1848b8605Smrg/*
2848b8605Smrg * Copyright (C) 2011 Francisco Jerez.
3848b8605Smrg * All Rights Reserved.
4848b8605Smrg *
5848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining
6848b8605Smrg * a copy of this software and associated documentation files (the
7848b8605Smrg * "Software"), to deal in the Software without restriction, including
8848b8605Smrg * without limitation the rights to use, copy, modify, merge, publish,
9848b8605Smrg * distribute, sublicense, and/or sell copies of the Software, and to
10848b8605Smrg * permit persons to whom the Software is furnished to do so, subject to
11848b8605Smrg * the following conditions:
12848b8605Smrg *
13848b8605Smrg * The above copyright notice and this permission notice (including the
14848b8605Smrg * next paragraph) shall be included in all copies or substantial
15848b8605Smrg * portions of the Software.
16848b8605Smrg *
17848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18848b8605Smrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19848b8605Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20848b8605Smrg * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21848b8605Smrg * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22848b8605Smrg * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23848b8605Smrg * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24848b8605Smrg *
25848b8605Smrg */
26848b8605Smrg
27848b8605Smrg#include <fcntl.h>
28848b8605Smrg#include <stdio.h>
29848b8605Smrg#include <sys/stat.h>
30848b8605Smrg#include <inttypes.h>
31848b8605Smrg#include "pipe/p_state.h"
32848b8605Smrg#include "pipe/p_context.h"
33848b8605Smrg#include "pipe/p_screen.h"
34848b8605Smrg#include "pipe/p_defines.h"
35848b8605Smrg#include "pipe/p_shader_tokens.h"
36848b8605Smrg#include "util/u_memory.h"
37848b8605Smrg#include "util/u_inlines.h"
38848b8605Smrg#include "util/u_sampler.h"
39848b8605Smrg#include "util/u_format.h"
40848b8605Smrg#include "tgsi/tgsi_text.h"
41848b8605Smrg#include "pipe-loader/pipe_loader.h"
42848b8605Smrg
43848b8605Smrg#define MAX_RESOURCES 4
44848b8605Smrg
45848b8605Smrgstruct context {
46848b8605Smrg        struct pipe_loader_device *dev;
47848b8605Smrg        struct pipe_screen *screen;
48848b8605Smrg        struct pipe_context *pipe;
49848b8605Smrg        void *hwcs;
50848b8605Smrg        void *hwsmp[MAX_RESOURCES];
51848b8605Smrg        struct pipe_resource *tex[MAX_RESOURCES];
52848b8605Smrg        bool tex_rw[MAX_RESOURCES];
53848b8605Smrg        struct pipe_sampler_view *view[MAX_RESOURCES];
54848b8605Smrg        struct pipe_surface *surf[MAX_RESOURCES];
55848b8605Smrg};
56848b8605Smrg
57848b8605Smrg#define DUMP_COMPUTE_PARAM(p, c) do {                                   \
58848b8605Smrg                uint64_t __v[4];                                        \
59848b8605Smrg                int __i, __n;                                           \
60848b8605Smrg                                                                        \
61b8e80941Smrg                __n = ctx->screen->get_compute_param(ctx->screen,       \
62b8e80941Smrg                                                     PIPE_SHADER_IR_TGSI, \
63b8e80941Smrg                                                     c, __v);           \
64848b8605Smrg                printf("%s: {", #c);                                    \
65848b8605Smrg                                                                        \
66848b8605Smrg                for (__i = 0; __i < __n / sizeof(*__v); ++__i)          \
67848b8605Smrg                        printf(" %"PRIu64, __v[__i]);                   \
68848b8605Smrg                                                                        \
69848b8605Smrg                printf(" }\n");                                         \
70848b8605Smrg        } while (0)
71848b8605Smrg
72848b8605Smrgstatic void init_ctx(struct context *ctx)
73848b8605Smrg{
74848b8605Smrg        int ret;
75848b8605Smrg
76848b8605Smrg        ret = pipe_loader_probe(&ctx->dev, 1);
77848b8605Smrg        assert(ret);
78848b8605Smrg
79b8e80941Smrg        ctx->screen = pipe_loader_create_screen(ctx->dev);
80848b8605Smrg        assert(ctx->screen);
81848b8605Smrg
82b8e80941Smrg        ctx->pipe = ctx->screen->context_create(ctx->screen, NULL, 0);
83848b8605Smrg        assert(ctx->pipe);
84848b8605Smrg
85848b8605Smrg        DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_GRID_DIMENSION);
86848b8605Smrg        DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_GRID_SIZE);
87848b8605Smrg        DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
88848b8605Smrg}
89848b8605Smrg
90848b8605Smrgstatic void destroy_ctx(struct context *ctx)
91848b8605Smrg{
92848b8605Smrg        ctx->pipe->destroy(ctx->pipe);
93848b8605Smrg        ctx->screen->destroy(ctx->screen);
94848b8605Smrg        pipe_loader_release(&ctx->dev, 1);
95848b8605Smrg        FREE(ctx);
96848b8605Smrg}
97848b8605Smrg
98848b8605Smrgstatic char *
99848b8605Smrgpreprocess_prog(struct context *ctx, const char *src, const char *defs)
100848b8605Smrg{
101848b8605Smrg        const char header[] =
102848b8605Smrg                "#define RGLOBAL        RES[32767]\n"
103848b8605Smrg                "#define RLOCAL         RES[32766]\n"
104848b8605Smrg                "#define RPRIVATE       RES[32765]\n"
105848b8605Smrg                "#define RINPUT         RES[32764]\n";
106848b8605Smrg        char cmd[512];
107848b8605Smrg        char tmp[] = "/tmp/test-compute.tgsi-XXXXXX";
108848b8605Smrg        char *buf;
109848b8605Smrg        int fd, ret;
110848b8605Smrg        struct stat st;
111848b8605Smrg        FILE *p;
112848b8605Smrg
113848b8605Smrg        /* Open a temporary file */
114848b8605Smrg        fd = mkstemp(tmp);
115848b8605Smrg        assert(fd >= 0);
116848b8605Smrg        snprintf(cmd, sizeof(cmd), "cpp -P -nostdinc -undef %s > %s",
117848b8605Smrg                 defs ? defs : "", tmp);
118848b8605Smrg
119848b8605Smrg        /* Preprocess */
120848b8605Smrg        p = popen(cmd, "w");
121848b8605Smrg        fwrite(header, strlen(header), 1, p);
122848b8605Smrg        fwrite(src, strlen(src), 1, p);
123848b8605Smrg        ret = pclose(p);
124848b8605Smrg        assert(!ret);
125848b8605Smrg
126848b8605Smrg        /* Read back */
127848b8605Smrg        ret = fstat(fd, &st);
128848b8605Smrg        assert(!ret);
129848b8605Smrg
130848b8605Smrg        buf = malloc(st.st_size + 1);
131848b8605Smrg        ret = read(fd, buf, st.st_size);
132848b8605Smrg        assert(ret == st.st_size);
133848b8605Smrg        buf[ret] = 0;
134848b8605Smrg
135848b8605Smrg        /* Clean up */
136848b8605Smrg        close(fd);
137848b8605Smrg        unlink(tmp);
138848b8605Smrg
139848b8605Smrg        return buf;
140848b8605Smrg}
141848b8605Smrg
142848b8605Smrgstatic void init_prog(struct context *ctx, unsigned local_sz,
143848b8605Smrg                      unsigned private_sz, unsigned input_sz,
144848b8605Smrg                      const char *src, const char *defs)
145848b8605Smrg{
146848b8605Smrg        struct pipe_context *pipe = ctx->pipe;
147848b8605Smrg        struct tgsi_token prog[1024];
148848b8605Smrg        struct pipe_compute_state cs = {
149b8e80941Smrg                .ir_type = PIPE_SHADER_IR_TGSI,
150848b8605Smrg                .prog = prog,
151848b8605Smrg                .req_local_mem = local_sz,
152848b8605Smrg                .req_private_mem = private_sz,
153848b8605Smrg                .req_input_mem = input_sz
154848b8605Smrg        };
155848b8605Smrg        char *psrc = preprocess_prog(ctx, src, defs);
156848b8605Smrg        int ret;
157848b8605Smrg
158b8e80941Smrg        ret = tgsi_text_translate(psrc, prog, ARRAY_SIZE(prog));
159848b8605Smrg        assert(ret);
160848b8605Smrg        free(psrc);
161848b8605Smrg
162848b8605Smrg        ctx->hwcs = pipe->create_compute_state(pipe, &cs);
163848b8605Smrg        assert(ctx->hwcs);
164848b8605Smrg
165848b8605Smrg        pipe->bind_compute_state(pipe, ctx->hwcs);
166848b8605Smrg}
167848b8605Smrg
168848b8605Smrgstatic void destroy_prog(struct context *ctx)
169848b8605Smrg{
170848b8605Smrg        struct pipe_context *pipe = ctx->pipe;
171848b8605Smrg
172848b8605Smrg        pipe->delete_compute_state(pipe, ctx->hwcs);
173848b8605Smrg        ctx->hwcs = NULL;
174848b8605Smrg}
175848b8605Smrg
176848b8605Smrgstatic void init_tex(struct context *ctx, int slot,
177848b8605Smrg                     enum pipe_texture_target target, bool rw,
178848b8605Smrg                     enum pipe_format format, int w, int h,
179848b8605Smrg                     void (*init)(void *, int, int, int))
180848b8605Smrg{
181848b8605Smrg        struct pipe_context *pipe = ctx->pipe;
182848b8605Smrg        struct pipe_resource **tex = &ctx->tex[slot];
183848b8605Smrg        struct pipe_resource ttex = {
184848b8605Smrg                .target = target,
185848b8605Smrg                .format = format,
186848b8605Smrg                .width0 = w,
187848b8605Smrg                .height0 = h,
188848b8605Smrg                .depth0 = 1,
189848b8605Smrg                .array_size = 1,
190848b8605Smrg                .bind = (PIPE_BIND_SAMPLER_VIEW |
191848b8605Smrg                         PIPE_BIND_COMPUTE_RESOURCE |
192848b8605Smrg                         PIPE_BIND_GLOBAL)
193848b8605Smrg        };
194848b8605Smrg        int dx = util_format_get_blocksize(format);
195848b8605Smrg        int dy = util_format_get_stride(format, w);
196848b8605Smrg        int nx = (target == PIPE_BUFFER ? (w / dx) :
197848b8605Smrg                  util_format_get_nblocksx(format, w));
198848b8605Smrg        int ny = (target == PIPE_BUFFER ? 1 :
199848b8605Smrg                  util_format_get_nblocksy(format, h));
200848b8605Smrg        struct pipe_transfer *xfer;
201848b8605Smrg        char *map;
202848b8605Smrg        int x, y;
203848b8605Smrg
204848b8605Smrg        *tex = ctx->screen->resource_create(ctx->screen, &ttex);
205848b8605Smrg        assert(*tex);
206848b8605Smrg
207848b8605Smrg        map = pipe->transfer_map(pipe, *tex, 0, PIPE_TRANSFER_WRITE,
208848b8605Smrg                                  &(struct pipe_box) { .width = w,
209848b8605Smrg                                                  .height = h,
210848b8605Smrg                                                  .depth = 1 }, &xfer);
211848b8605Smrg        assert(xfer);
212848b8605Smrg        assert(map);
213848b8605Smrg
214848b8605Smrg        for (y = 0; y < ny; ++y) {
215848b8605Smrg                for (x = 0; x < nx; ++x) {
216848b8605Smrg                        init(map + y * dy + x * dx, slot, x, y);
217848b8605Smrg                }
218848b8605Smrg        }
219848b8605Smrg
220848b8605Smrg        pipe->transfer_unmap(pipe, xfer);
221848b8605Smrg
222848b8605Smrg        ctx->tex_rw[slot] = rw;
223848b8605Smrg}
224848b8605Smrg
225848b8605Smrgstatic bool default_check(void *x, void *y, int sz) {
226848b8605Smrg        return !memcmp(x, y, sz);
227848b8605Smrg}
228848b8605Smrg
229848b8605Smrgstatic void check_tex(struct context *ctx, int slot,
230848b8605Smrg                      void (*expect)(void *, int, int, int),
231848b8605Smrg                      bool (*check)(void *, void *, int))
232848b8605Smrg{
233848b8605Smrg        struct pipe_context *pipe = ctx->pipe;
234848b8605Smrg        struct pipe_resource *tex = ctx->tex[slot];
235848b8605Smrg        int dx = util_format_get_blocksize(tex->format);
236848b8605Smrg        int dy = util_format_get_stride(tex->format, tex->width0);
237848b8605Smrg        int nx = (tex->target == PIPE_BUFFER ? (tex->width0 / dx) :
238848b8605Smrg                  util_format_get_nblocksx(tex->format, tex->width0));
239848b8605Smrg        int ny = (tex->target == PIPE_BUFFER ? 1 :
240848b8605Smrg                  util_format_get_nblocksy(tex->format, tex->height0));
241848b8605Smrg        struct pipe_transfer *xfer;
242848b8605Smrg        char *map;
243b8e80941Smrg        int x = 0, y, i;
244848b8605Smrg        int err = 0;
245848b8605Smrg
246848b8605Smrg        if (!check)
247848b8605Smrg                check = default_check;
248848b8605Smrg
249848b8605Smrg        map = pipe->transfer_map(pipe, tex, 0, PIPE_TRANSFER_READ,
250848b8605Smrg                                  &(struct pipe_box) { .width = tex->width0,
251848b8605Smrg                                        .height = tex->height0,
252848b8605Smrg                                        .depth = 1 }, &xfer);
253848b8605Smrg        assert(xfer);
254848b8605Smrg        assert(map);
255848b8605Smrg
256848b8605Smrg        for (y = 0; y < ny; ++y) {
257848b8605Smrg                for (x = 0; x < nx; ++x) {
258848b8605Smrg                        uint32_t exp[4];
259848b8605Smrg                        uint32_t *res = (uint32_t *)(map + y * dy + x * dx);
260848b8605Smrg
261848b8605Smrg                        expect(exp, slot, x, y);
262848b8605Smrg                        if (check(res, exp, dx) || (++err) > 20)
263848b8605Smrg                                continue;
264848b8605Smrg
265848b8605Smrg                        if (dx < 4) {
266848b8605Smrg                                uint32_t u = 0, v = 0;
267848b8605Smrg
268848b8605Smrg                                for (i = 0; i < dx; i++) {
269848b8605Smrg                                        u |= ((uint8_t *)exp)[i] << (8 * i);
270848b8605Smrg                                        v |= ((uint8_t *)res)[i] << (8 * i);
271848b8605Smrg                                }
272848b8605Smrg                                printf("(%d, %d): got 0x%x, expected 0x%x\n",
273848b8605Smrg                                       x, y, v, u);
274848b8605Smrg                        } else {
275848b8605Smrg                                for (i = 0; i < dx / 4; i++) {
276848b8605Smrg                                        printf("(%d, %d)[%d]: got 0x%x/%f,"
277848b8605Smrg                                               " expected 0x%x/%f\n", x, y, i,
278848b8605Smrg                                               res[i], ((float *)res)[i],
279848b8605Smrg                                               exp[i], ((float *)exp)[i]);
280848b8605Smrg                                }
281848b8605Smrg                        }
282848b8605Smrg                }
283848b8605Smrg        }
284848b8605Smrg
285848b8605Smrg        pipe->transfer_unmap(pipe, xfer);
286848b8605Smrg
287848b8605Smrg        if (err)
288848b8605Smrg                printf("(%d, %d): \x1b[31mFAIL\x1b[0m (%d)\n", x, y, err);
289848b8605Smrg        else
290848b8605Smrg                printf("(%d, %d): \x1b[32mOK\x1b[0m\n", x, y);
291848b8605Smrg}
292848b8605Smrg
293848b8605Smrgstatic void destroy_tex(struct context *ctx)
294848b8605Smrg{
295848b8605Smrg        int i;
296848b8605Smrg
297848b8605Smrg        for (i = 0; i < MAX_RESOURCES; ++i) {
298848b8605Smrg                if (ctx->tex[i])
299848b8605Smrg                        pipe_resource_reference(&ctx->tex[i], NULL);
300848b8605Smrg        }
301848b8605Smrg}
302848b8605Smrg
303848b8605Smrgstatic void init_sampler_views(struct context *ctx, const int *slots)
304848b8605Smrg{
305848b8605Smrg        struct pipe_context *pipe = ctx->pipe;
306848b8605Smrg        struct pipe_sampler_view tview;
307848b8605Smrg        int i;
308848b8605Smrg
309848b8605Smrg        for (i = 0; *slots >= 0; ++i, ++slots) {
310848b8605Smrg                u_sampler_view_default_template(&tview, ctx->tex[*slots],
311848b8605Smrg                                                ctx->tex[*slots]->format);
312848b8605Smrg
313848b8605Smrg                ctx->view[i] = pipe->create_sampler_view(pipe, ctx->tex[*slots],
314848b8605Smrg                                                         &tview);
315848b8605Smrg                assert(ctx->view[i]);
316848b8605Smrg        }
317848b8605Smrg
318848b8605Smrg        pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, i, ctx->view);
319848b8605Smrg}
320848b8605Smrg
321848b8605Smrgstatic void destroy_sampler_views(struct context *ctx)
322848b8605Smrg{
323848b8605Smrg        struct pipe_context *pipe = ctx->pipe;
324848b8605Smrg        int i;
325848b8605Smrg
326848b8605Smrg        pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, MAX_RESOURCES, NULL);
327848b8605Smrg
328848b8605Smrg        for (i = 0; i < MAX_RESOURCES; ++i) {
329848b8605Smrg                if (ctx->view[i]) {
330848b8605Smrg                        pipe->sampler_view_destroy(pipe, ctx->view[i]);
331848b8605Smrg                        ctx->view[i] = NULL;
332848b8605Smrg                }
333848b8605Smrg        }
334848b8605Smrg}
335848b8605Smrg
336848b8605Smrgstatic void init_compute_resources(struct context *ctx, const int *slots)
337848b8605Smrg{
338848b8605Smrg        struct pipe_context *pipe = ctx->pipe;
339848b8605Smrg        int i;
340848b8605Smrg
341848b8605Smrg        for (i = 0; *slots >= 0; ++i, ++slots) {
342848b8605Smrg                struct pipe_surface tsurf = {
343848b8605Smrg                        .format = ctx->tex[*slots]->format,
344848b8605Smrg                        .writable = ctx->tex_rw[*slots]
345848b8605Smrg                };
346848b8605Smrg
347848b8605Smrg                if (ctx->tex[*slots]->target == PIPE_BUFFER)
348848b8605Smrg                        tsurf.u.buf.last_element = ctx->tex[*slots]->width0 - 1;
349848b8605Smrg
350848b8605Smrg                ctx->surf[i] = pipe->create_surface(pipe, ctx->tex[*slots],
351848b8605Smrg                                                    &tsurf);
352848b8605Smrg                assert(ctx->surf[i]);
353848b8605Smrg        }
354848b8605Smrg
355848b8605Smrg        pipe->set_compute_resources(pipe, 0, i, ctx->surf);
356848b8605Smrg}
357848b8605Smrg
358848b8605Smrgstatic void destroy_compute_resources(struct context *ctx)
359848b8605Smrg{
360848b8605Smrg        struct pipe_context *pipe = ctx->pipe;
361848b8605Smrg        int i;
362848b8605Smrg
363848b8605Smrg        pipe->set_compute_resources(pipe, 0, MAX_RESOURCES, NULL);
364848b8605Smrg
365848b8605Smrg        for (i = 0; i < MAX_RESOURCES; ++i) {
366848b8605Smrg                if (ctx->surf[i]) {
367848b8605Smrg                        pipe->surface_destroy(pipe, ctx->surf[i]);
368848b8605Smrg                        ctx->surf[i] = NULL;
369848b8605Smrg                }
370848b8605Smrg        }
371848b8605Smrg}
372848b8605Smrg
373848b8605Smrgstatic void init_sampler_states(struct context *ctx, int n)
374848b8605Smrg{
375848b8605Smrg        struct pipe_context *pipe = ctx->pipe;
376848b8605Smrg        struct pipe_sampler_state smp = {
377848b8605Smrg                .normalized_coords = 1,
378848b8605Smrg        };
379848b8605Smrg        int i;
380848b8605Smrg
381848b8605Smrg        for (i = 0; i < n; ++i) {
382848b8605Smrg                ctx->hwsmp[i] = pipe->create_sampler_state(pipe, &smp);
383848b8605Smrg                assert(ctx->hwsmp[i]);
384848b8605Smrg        }
385848b8605Smrg
386848b8605Smrg        pipe->bind_sampler_states(pipe, PIPE_SHADER_COMPUTE, 0, i, ctx->hwsmp);
387848b8605Smrg}
388848b8605Smrg
389848b8605Smrgstatic void destroy_sampler_states(struct context *ctx)
390848b8605Smrg{
391848b8605Smrg        struct pipe_context *pipe = ctx->pipe;
392848b8605Smrg        int i;
393848b8605Smrg
394848b8605Smrg        pipe->bind_sampler_states(pipe, PIPE_SHADER_COMPUTE,
395848b8605Smrg				  0, MAX_RESOURCES, NULL);
396848b8605Smrg
397848b8605Smrg        for (i = 0; i < MAX_RESOURCES; ++i) {
398848b8605Smrg                if (ctx->hwsmp[i]) {
399848b8605Smrg                        pipe->delete_sampler_state(pipe, ctx->hwsmp[i]);
400848b8605Smrg                        ctx->hwsmp[i] = NULL;
401848b8605Smrg                }
402848b8605Smrg        }
403848b8605Smrg}
404848b8605Smrg
405848b8605Smrgstatic void init_globals(struct context *ctx, const int *slots,
406848b8605Smrg                         uint32_t **handles)
407848b8605Smrg{
408848b8605Smrg        struct pipe_context *pipe = ctx->pipe;
409848b8605Smrg        struct pipe_resource *res[MAX_RESOURCES];
410848b8605Smrg        int i;
411848b8605Smrg
412848b8605Smrg        for (i = 0; *slots >= 0; ++i, ++slots)
413848b8605Smrg                res[i] = ctx->tex[*slots];
414848b8605Smrg
415848b8605Smrg        pipe->set_global_binding(pipe, 0, i, res, handles);
416848b8605Smrg}
417848b8605Smrg
418848b8605Smrgstatic void destroy_globals(struct context *ctx)
419848b8605Smrg{
420848b8605Smrg        struct pipe_context *pipe = ctx->pipe;
421848b8605Smrg
422848b8605Smrg        pipe->set_global_binding(pipe, 0, MAX_RESOURCES, NULL, NULL);
423848b8605Smrg}
424848b8605Smrg
425848b8605Smrgstatic void launch_grid(struct context *ctx, const uint *block_layout,
426848b8605Smrg                        const uint *grid_layout, uint32_t pc,
427b8e80941Smrg                        void *input)
428848b8605Smrg{
429848b8605Smrg        struct pipe_context *pipe = ctx->pipe;
430b8e80941Smrg        struct pipe_grid_info info;
431b8e80941Smrg        int i;
432b8e80941Smrg
433b8e80941Smrg        for (i = 0; i < 3; i++) {
434b8e80941Smrg                info.block[i] = block_layout[i];
435b8e80941Smrg                info.grid[i] = grid_layout[i];
436b8e80941Smrg        }
437b8e80941Smrg        info.pc = pc;
438b8e80941Smrg        info.input = input;
439848b8605Smrg
440b8e80941Smrg        pipe->launch_grid(pipe, &info);
441b8e80941Smrg}
442b8e80941Smrg
443b8e80941Smrgstatic void test_default_init(void *p, int s, int x, int y)
444b8e80941Smrg{
445b8e80941Smrg        *(uint32_t *)p = 0xdeadbeef;
446b8e80941Smrg}
447b8e80941Smrg
448b8e80941Smrg/* test_system_values */
449b8e80941Smrgstatic void test_system_values_expect(void *p, int s, int x, int y)
450b8e80941Smrg{
451b8e80941Smrg        int id = x / 16, sv = (x % 16) / 4, c = x % 4;
452b8e80941Smrg        int tid[] = { id % 20, (id % 240) / 20, id / 240, 0 };
453b8e80941Smrg        int bsz[] = { 4, 3, 5, 1};
454b8e80941Smrg        int gsz[] = { 5, 4, 1, 1};
455b8e80941Smrg
456b8e80941Smrg        switch (sv) {
457b8e80941Smrg        case 0:
458b8e80941Smrg                *(uint32_t *)p = tid[c] / bsz[c];
459b8e80941Smrg                break;
460b8e80941Smrg        case 1:
461b8e80941Smrg                *(uint32_t *)p = bsz[c];
462b8e80941Smrg                break;
463b8e80941Smrg        case 2:
464b8e80941Smrg                *(uint32_t *)p = gsz[c];
465b8e80941Smrg                break;
466b8e80941Smrg        case 3:
467b8e80941Smrg                *(uint32_t *)p = tid[c] % bsz[c];
468b8e80941Smrg                break;
469b8e80941Smrg        }
470848b8605Smrg}
471848b8605Smrg
472848b8605Smrgstatic void test_system_values(struct context *ctx)
473848b8605Smrg{
474848b8605Smrg        const char *src = "COMP\n"
475848b8605Smrg                "DCL RES[0], BUFFER, RAW, WR\n"
476848b8605Smrg                "DCL SV[0], BLOCK_ID[0]\n"
477848b8605Smrg                "DCL SV[1], BLOCK_SIZE[0]\n"
478848b8605Smrg                "DCL SV[2], GRID_SIZE[0]\n"
479848b8605Smrg                "DCL SV[3], THREAD_ID[0]\n"
480848b8605Smrg                "DCL TEMP[0], LOCAL\n"
481848b8605Smrg                "DCL TEMP[1], LOCAL\n"
482848b8605Smrg                "IMM UINT32 { 64, 0, 0, 0 }\n"
483848b8605Smrg                "IMM UINT32 { 16, 0, 0, 0 }\n"
484848b8605Smrg                "IMM UINT32 { 0, 0, 0, 0 }\n"
485848b8605Smrg                "\n"
486848b8605Smrg                "BGNSUB"
487848b8605Smrg                "  UMUL TEMP[0], SV[0], SV[1]\n"
488848b8605Smrg                "  UADD TEMP[0], TEMP[0], SV[3]\n"
489848b8605Smrg                "  UMUL TEMP[1], SV[1], SV[2]\n"
490848b8605Smrg                "  UMUL TEMP[0].w, TEMP[0], TEMP[1].zzzz\n"
491848b8605Smrg                "  UMUL TEMP[0].zw, TEMP[0], TEMP[1].yyyy\n"
492848b8605Smrg                "  UMUL TEMP[0].yzw, TEMP[0], TEMP[1].xxxx\n"
493848b8605Smrg                "  UADD TEMP[0].xy, TEMP[0].xyxy, TEMP[0].zwzw\n"
494848b8605Smrg                "  UADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy\n"
495848b8605Smrg                "  UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
496848b8605Smrg                "  STORE RES[0].xyzw, TEMP[0], SV[0]\n"
497848b8605Smrg                "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
498848b8605Smrg                "  STORE RES[0].xyzw, TEMP[0], SV[1]\n"
499848b8605Smrg                "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
500848b8605Smrg                "  STORE RES[0].xyzw, TEMP[0], SV[2]\n"
501848b8605Smrg                "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
502848b8605Smrg                "  STORE RES[0].xyzw, TEMP[0], SV[3]\n"
503848b8605Smrg                "  RET\n"
504848b8605Smrg                "ENDSUB\n";
505848b8605Smrg
506848b8605Smrg        printf("- %s\n", __func__);
507848b8605Smrg
508848b8605Smrg        init_prog(ctx, 0, 0, 0, src, NULL);
509848b8605Smrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
510b8e80941Smrg                 76800, 0, test_default_init);
511848b8605Smrg        init_compute_resources(ctx, (int []) { 0, -1 });
512848b8605Smrg        launch_grid(ctx, (uint []){4, 3, 5}, (uint []){5, 4, 1}, 0, NULL);
513b8e80941Smrg        check_tex(ctx, 0, test_system_values_expect, NULL);
514848b8605Smrg        destroy_compute_resources(ctx);
515848b8605Smrg        destroy_tex(ctx);
516848b8605Smrg        destroy_prog(ctx);
517848b8605Smrg}
518848b8605Smrg
519b8e80941Smrg/* test_resource_access */
520b8e80941Smrgstatic void test_resource_access_init0(void *p, int s, int x, int y)
521b8e80941Smrg{
522b8e80941Smrg        *(float *)p = 8.0 - (float)x;
523b8e80941Smrg}
524b8e80941Smrg
525b8e80941Smrgstatic void test_resource_access_expect(void *p, int s, int x, int y)
526b8e80941Smrg{
527b8e80941Smrg        *(float *)p = 8.0 - (float)((x + 4 * y) & 0x3f);
528b8e80941Smrg}
529b8e80941Smrg
530848b8605Smrgstatic void test_resource_access(struct context *ctx)
531848b8605Smrg{
532848b8605Smrg        const char *src = "COMP\n"
533848b8605Smrg                "DCL RES[0], BUFFER, RAW, WR\n"
534848b8605Smrg                "DCL RES[1], 2D, RAW, WR\n"
535848b8605Smrg                "DCL SV[0], BLOCK_ID[0]\n"
536848b8605Smrg                "DCL TEMP[0], LOCAL\n"
537848b8605Smrg                "DCL TEMP[1], LOCAL\n"
538848b8605Smrg                "IMM UINT32 { 15, 0, 0, 0 }\n"
539848b8605Smrg                "IMM UINT32 { 16, 1, 0, 0 }\n"
540848b8605Smrg                "\n"
541848b8605Smrg                "    BGNSUB\n"
542848b8605Smrg                "       UADD TEMP[0].x, SV[0].xxxx, SV[0].yyyy\n"
543848b8605Smrg                "       AND TEMP[0].x, TEMP[0], IMM[0]\n"
544848b8605Smrg                "       UMUL TEMP[0].x, TEMP[0], IMM[1]\n"
545848b8605Smrg                "       LOAD TEMP[0].xyzw, RES[0], TEMP[0]\n"
546848b8605Smrg                "       UMUL TEMP[1], SV[0], IMM[1]\n"
547848b8605Smrg                "       STORE RES[1].xyzw, TEMP[1], TEMP[0]\n"
548848b8605Smrg                "       RET\n"
549848b8605Smrg                "    ENDSUB\n";
550848b8605Smrg
551848b8605Smrg        printf("- %s\n", __func__);
552848b8605Smrg
553848b8605Smrg        init_prog(ctx, 0, 0, 0, src, NULL);
554848b8605Smrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
555b8e80941Smrg                 256, 0, test_resource_access_init0);
556848b8605Smrg        init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
557b8e80941Smrg                 60, 12, test_default_init);
558848b8605Smrg        init_compute_resources(ctx, (int []) { 0, 1, -1 });
559848b8605Smrg        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){15, 12, 1}, 0, NULL);
560b8e80941Smrg        check_tex(ctx, 1, test_resource_access_expect, NULL);
561848b8605Smrg        destroy_compute_resources(ctx);
562848b8605Smrg        destroy_tex(ctx);
563848b8605Smrg        destroy_prog(ctx);
564848b8605Smrg}
565848b8605Smrg
566b8e80941Smrg/* test_function_calls */
567b8e80941Smrgstatic void test_function_calls_init(void *p, int s, int x, int y)
568b8e80941Smrg{
569b8e80941Smrg        *(uint32_t *)p = 15 * y + x;
570b8e80941Smrg}
571b8e80941Smrg
572b8e80941Smrgstatic void test_function_calls_expect(void *p, int s, int x, int y)
573b8e80941Smrg{
574b8e80941Smrg        *(uint32_t *)p = (15 * y + x) < 4 ? 2 : 1 ;
575b8e80941Smrg}
576b8e80941Smrg
577848b8605Smrgstatic void test_function_calls(struct context *ctx)
578848b8605Smrg{
579848b8605Smrg        const char *src = "COMP\n"
580848b8605Smrg                "DCL RES[0], 2D, RAW, WR\n"
581848b8605Smrg                "DCL SV[0], BLOCK_ID[0]\n"
582848b8605Smrg                "DCL SV[1], BLOCK_SIZE[0]\n"
583848b8605Smrg                "DCL SV[2], GRID_SIZE[0]\n"
584848b8605Smrg                "DCL SV[3], THREAD_ID[0]\n"
585848b8605Smrg                "DCL TEMP[0]\n"
586848b8605Smrg                "DCL TEMP[1]\n"
587848b8605Smrg                "DCL TEMP[2], LOCAL\n"
588848b8605Smrg                "IMM UINT32 { 0, 11, 22, 33 }\n"
589848b8605Smrg                "IMM FLT32 { 11, 33, 55, 99 }\n"
590848b8605Smrg                "IMM UINT32 { 4, 1, 0, 0 }\n"
591848b8605Smrg                "IMM UINT32 { 12, 0, 0, 0 }\n"
592848b8605Smrg                "\n"
593848b8605Smrg                "00: BGNSUB\n"
594848b8605Smrg                "01:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
595848b8605Smrg                "02:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
596848b8605Smrg                "03:  USLT TEMP[0].x, TEMP[0], IMM[0]\n"
597848b8605Smrg                "04:  RET\n"
598848b8605Smrg                "05: ENDSUB\n"
599848b8605Smrg                "06: BGNSUB\n"
600848b8605Smrg                "07:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
601848b8605Smrg                "08:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
602848b8605Smrg                "09:  USLT TEMP[0].x, TEMP[0], IMM[0].yyyy\n"
603848b8605Smrg                "10:  IF TEMP[0].xxxx\n"
604848b8605Smrg                "11:   CAL :0\n"
605848b8605Smrg                "12:  ENDIF\n"
606848b8605Smrg                "13:  RET\n"
607848b8605Smrg                "14: ENDSUB\n"
608848b8605Smrg                "15: BGNSUB\n"
609848b8605Smrg                "16:  UMUL TEMP[2], SV[0], SV[1]\n"
610848b8605Smrg                "17:  UADD TEMP[2], TEMP[2], SV[3]\n"
611848b8605Smrg                "18:  UMUL TEMP[2], TEMP[2], IMM[2]\n"
612848b8605Smrg                "00:  MOV TEMP[1].x, IMM[2].wwww\n"
613848b8605Smrg                "19:  LOAD TEMP[0].x, RES[0].xxxx, TEMP[2]\n"
614848b8605Smrg                "20:  CAL :6\n"
615848b8605Smrg                "21:  STORE RES[0].x, TEMP[2], TEMP[1].xxxx\n"
616848b8605Smrg                "22:  RET\n"
617848b8605Smrg                "23: ENDSUB\n";
618848b8605Smrg
619848b8605Smrg        printf("- %s\n", __func__);
620848b8605Smrg
621848b8605Smrg        init_prog(ctx, 0, 0, 0, src, NULL);
622848b8605Smrg        init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
623b8e80941Smrg                 15, 12, test_function_calls_init);
624848b8605Smrg        init_compute_resources(ctx, (int []) { 0, -1 });
625848b8605Smrg        launch_grid(ctx, (uint []){3, 3, 3}, (uint []){5, 4, 1}, 15, NULL);
626b8e80941Smrg        check_tex(ctx, 0, test_function_calls_expect, NULL);
627848b8605Smrg        destroy_compute_resources(ctx);
628848b8605Smrg        destroy_tex(ctx);
629848b8605Smrg        destroy_prog(ctx);
630848b8605Smrg}
631848b8605Smrg
632b8e80941Smrg/* test_input_global */
633b8e80941Smrgstatic void test_input_global_expect(void *p, int s, int x, int y)
634b8e80941Smrg{
635b8e80941Smrg        *(uint32_t *)p = 0xdeadbeef - (x == 0 ? 0x10001 + 2 * s : 0);
636b8e80941Smrg}
637b8e80941Smrg
638848b8605Smrgstatic void test_input_global(struct context *ctx)
639848b8605Smrg{
640848b8605Smrg        const char *src = "COMP\n"
641848b8605Smrg                "DCL SV[0], THREAD_ID[0]\n"
642848b8605Smrg                "DCL TEMP[0], LOCAL\n"
643848b8605Smrg                "DCL TEMP[1], LOCAL\n"
644848b8605Smrg                "IMM UINT32 { 8, 0, 0, 0 }\n"
645848b8605Smrg                "\n"
646848b8605Smrg                "    BGNSUB\n"
647848b8605Smrg                "       UMUL TEMP[0], SV[0], IMM[0]\n"
648848b8605Smrg                "       LOAD TEMP[1].xy, RINPUT, TEMP[0]\n"
649848b8605Smrg                "       LOAD TEMP[0].x, RGLOBAL, TEMP[1].yyyy\n"
650848b8605Smrg                "       UADD TEMP[1].x, TEMP[0], -TEMP[1]\n"
651848b8605Smrg                "       STORE RGLOBAL.x, TEMP[1].yyyy, TEMP[1]\n"
652848b8605Smrg                "       RET\n"
653848b8605Smrg                "    ENDSUB\n";
654848b8605Smrg        uint32_t input[8] = { 0x10001, 0x10002, 0x10003, 0x10004,
655848b8605Smrg                              0x10005, 0x10006, 0x10007, 0x10008 };
656848b8605Smrg
657848b8605Smrg        printf("- %s\n", __func__);
658848b8605Smrg
659848b8605Smrg        init_prog(ctx, 0, 0, 32, src, NULL);
660b8e80941Smrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,
661b8e80941Smrg                 test_default_init);
662b8e80941Smrg        init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,
663b8e80941Smrg                 test_default_init);
664b8e80941Smrg        init_tex(ctx, 2, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,
665b8e80941Smrg                 test_default_init);
666b8e80941Smrg        init_tex(ctx, 3, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,
667b8e80941Smrg                 test_default_init);
668848b8605Smrg        init_globals(ctx, (int []){ 0, 1, 2, 3, -1 },
669848b8605Smrg                     (uint32_t *[]){ &input[1], &input[3],
670848b8605Smrg                                     &input[5], &input[7] });
671848b8605Smrg        launch_grid(ctx, (uint []){4, 1, 1}, (uint []){1, 1, 1}, 0, input);
672b8e80941Smrg        check_tex(ctx, 0, test_input_global_expect, NULL);
673b8e80941Smrg        check_tex(ctx, 1, test_input_global_expect, NULL);
674b8e80941Smrg        check_tex(ctx, 2, test_input_global_expect, NULL);
675b8e80941Smrg        check_tex(ctx, 3, test_input_global_expect, NULL);
676848b8605Smrg        destroy_globals(ctx);
677848b8605Smrg        destroy_tex(ctx);
678848b8605Smrg        destroy_prog(ctx);
679848b8605Smrg}
680848b8605Smrg
681b8e80941Smrg/* test_private */
682b8e80941Smrgstatic void test_private_expect(void *p, int s, int x, int y)
683b8e80941Smrg{
684b8e80941Smrg        *(uint32_t *)p = (x / 32) + x % 32;
685b8e80941Smrg}
686b8e80941Smrg
687848b8605Smrgstatic void test_private(struct context *ctx)
688848b8605Smrg{
689848b8605Smrg        const char *src = "COMP\n"
690848b8605Smrg                "DCL RES[0], BUFFER, RAW, WR\n"
691848b8605Smrg                "DCL SV[0], BLOCK_ID[0]\n"
692848b8605Smrg                "DCL SV[1], BLOCK_SIZE[0]\n"
693848b8605Smrg                "DCL SV[2], THREAD_ID[0]\n"
694848b8605Smrg                "DCL TEMP[0], LOCAL\n"
695848b8605Smrg                "DCL TEMP[1], LOCAL\n"
696848b8605Smrg                "DCL TEMP[2], LOCAL\n"
697848b8605Smrg                "IMM UINT32 { 128, 0, 0, 0 }\n"
698848b8605Smrg                "IMM UINT32 { 4, 0, 0, 0 }\n"
699848b8605Smrg                "\n"
700848b8605Smrg                "    BGNSUB\n"
701848b8605Smrg                "       UMUL TEMP[0].x, SV[0], SV[1]\n"
702848b8605Smrg                "       UADD TEMP[0].x, TEMP[0], SV[2]\n"
703848b8605Smrg                "       MOV TEMP[1].x, IMM[0].wwww\n"
704848b8605Smrg                "       BGNLOOP\n"
705848b8605Smrg                "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
706848b8605Smrg                "               IF TEMP[2]\n"
707848b8605Smrg                "                       BRK\n"
708848b8605Smrg                "               ENDIF\n"
709848b8605Smrg                "               UDIV TEMP[2].x, TEMP[1], IMM[1]\n"
710848b8605Smrg                "               UADD TEMP[2].x, TEMP[2], TEMP[0]\n"
711848b8605Smrg                "               STORE RPRIVATE.x, TEMP[1], TEMP[2]\n"
712848b8605Smrg                "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
713848b8605Smrg                "       ENDLOOP\n"
714848b8605Smrg                "       MOV TEMP[1].x, IMM[0].wwww\n"
715848b8605Smrg                "       UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
716848b8605Smrg                "       BGNLOOP\n"
717848b8605Smrg                "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
718848b8605Smrg                "               IF TEMP[2]\n"
719848b8605Smrg                "                       BRK\n"
720848b8605Smrg                "               ENDIF\n"
721848b8605Smrg                "               LOAD TEMP[2].x, RPRIVATE, TEMP[1]\n"
722848b8605Smrg                "               STORE RES[0].x, TEMP[0], TEMP[2]\n"
723848b8605Smrg                "               UADD TEMP[0].x, TEMP[0], IMM[1]\n"
724848b8605Smrg                "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
725848b8605Smrg                "       ENDLOOP\n"
726848b8605Smrg                "       RET\n"
727848b8605Smrg                "    ENDSUB\n";
728848b8605Smrg
729848b8605Smrg        printf("- %s\n", __func__);
730848b8605Smrg
731848b8605Smrg        init_prog(ctx, 0, 128, 0, src, NULL);
732848b8605Smrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
733b8e80941Smrg                 32768, 0, test_default_init);
734848b8605Smrg        init_compute_resources(ctx, (int []) { 0, -1 });
735848b8605Smrg        launch_grid(ctx, (uint []){16, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
736b8e80941Smrg        check_tex(ctx, 0, test_private_expect, NULL);
737848b8605Smrg        destroy_compute_resources(ctx);
738848b8605Smrg        destroy_tex(ctx);
739848b8605Smrg        destroy_prog(ctx);
740848b8605Smrg}
741848b8605Smrg
742b8e80941Smrg/* test_local */
743b8e80941Smrgstatic void test_local_expect(void *p, int s, int x, int y)
744b8e80941Smrg{
745b8e80941Smrg        *(uint32_t *)p = x & 0x20 ? 2 : 1;
746b8e80941Smrg}
747b8e80941Smrg
748848b8605Smrgstatic void test_local(struct context *ctx)
749848b8605Smrg{
750848b8605Smrg        const char *src = "COMP\n"
751848b8605Smrg                "DCL RES[0], BUFFER, RAW, WR\n"
752848b8605Smrg                "DCL SV[0], BLOCK_ID[0]\n"
753848b8605Smrg                "DCL SV[1], BLOCK_SIZE[0]\n"
754848b8605Smrg                "DCL SV[2], THREAD_ID[0]\n"
755848b8605Smrg                "DCL TEMP[0], LOCAL\n"
756848b8605Smrg                "DCL TEMP[1], LOCAL\n"
757848b8605Smrg                "DCL TEMP[2], LOCAL\n"
758848b8605Smrg                "IMM UINT32 { 1, 0, 0, 0 }\n"
759848b8605Smrg                "IMM UINT32 { 2, 0, 0, 0 }\n"
760848b8605Smrg                "IMM UINT32 { 4, 0, 0, 0 }\n"
761848b8605Smrg                "IMM UINT32 { 32, 0, 0, 0 }\n"
762848b8605Smrg                "IMM UINT32 { 128, 0, 0, 0 }\n"
763848b8605Smrg                "\n"
764848b8605Smrg                "    BGNSUB\n"
765848b8605Smrg                "       UMUL TEMP[0].x, SV[2], IMM[2]\n"
766848b8605Smrg                "       STORE RLOCAL.x, TEMP[0], IMM[0].wwww\n"
767848b8605Smrg                "       MFENCE RLOCAL\n"
768848b8605Smrg                "       USLT TEMP[1].x, SV[2], IMM[3]\n"
769848b8605Smrg                "       IF TEMP[1]\n"
770848b8605Smrg                "               UADD TEMP[1].x, TEMP[0], IMM[4]\n"
771848b8605Smrg                "               BGNLOOP\n"
772848b8605Smrg                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
773848b8605Smrg                "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
774848b8605Smrg                "                       IF TEMP[2]\n"
775848b8605Smrg                "                               BRK\n"
776848b8605Smrg                "                       ENDIF\n"
777848b8605Smrg                "               ENDLOOP\n"
778848b8605Smrg                "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
779848b8605Smrg                "               MFENCE RLOCAL\n"
780848b8605Smrg                "               BGNLOOP\n"
781848b8605Smrg                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
782848b8605Smrg                "                       USEQ TEMP[2].x, TEMP[2], IMM[1]\n"
783848b8605Smrg                "                       IF TEMP[2]\n"
784848b8605Smrg                "                               BRK\n"
785848b8605Smrg                "                       ENDIF\n"
786848b8605Smrg                "               ENDLOOP\n"
787848b8605Smrg                "       ELSE\n"
788848b8605Smrg                "               UADD TEMP[1].x, TEMP[0], -IMM[4]\n"
789848b8605Smrg                "               BGNLOOP\n"
790848b8605Smrg                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
791848b8605Smrg                "                       USEQ TEMP[2].x, TEMP[2], IMM[0].wwww\n"
792848b8605Smrg                "                       IF TEMP[2]\n"
793848b8605Smrg                "                               BRK\n"
794848b8605Smrg                "                       ENDIF\n"
795848b8605Smrg                "               ENDLOOP\n"
796848b8605Smrg                "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
797848b8605Smrg                "               MFENCE RLOCAL\n"
798848b8605Smrg                "               BGNLOOP\n"
799848b8605Smrg                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
800848b8605Smrg                "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
801848b8605Smrg                "                       IF TEMP[2]\n"
802848b8605Smrg                "                               BRK\n"
803848b8605Smrg                "                       ENDIF\n"
804848b8605Smrg                "               ENDLOOP\n"
805848b8605Smrg                "               STORE RLOCAL.x, TEMP[0], IMM[1]\n"
806848b8605Smrg                "               MFENCE RLOCAL\n"
807848b8605Smrg                "       ENDIF\n"
808848b8605Smrg                "       UMUL TEMP[1].x, SV[0], SV[1]\n"
809848b8605Smrg                "       UMUL TEMP[1].x, TEMP[1], IMM[2]\n"
810848b8605Smrg                "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
811848b8605Smrg                "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
812848b8605Smrg                "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
813848b8605Smrg                "       RET\n"
814848b8605Smrg                "    ENDSUB\n";
815848b8605Smrg
816848b8605Smrg        printf("- %s\n", __func__);
817848b8605Smrg
818848b8605Smrg        init_prog(ctx, 256, 0, 0, src, NULL);
819848b8605Smrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
820b8e80941Smrg                 4096, 0, test_default_init);
821848b8605Smrg        init_compute_resources(ctx, (int []) { 0, -1 });
822848b8605Smrg        launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
823b8e80941Smrg        check_tex(ctx, 0, test_local_expect, NULL);
824848b8605Smrg        destroy_compute_resources(ctx);
825848b8605Smrg        destroy_tex(ctx);
826848b8605Smrg        destroy_prog(ctx);
827848b8605Smrg}
828848b8605Smrg
829b8e80941Smrg/* test_sample */
830b8e80941Smrgstatic void test_sample_init(void *p, int s, int x, int y)
831b8e80941Smrg{
832b8e80941Smrg        *(float *)p = s ? 1 : x * y;
833b8e80941Smrg}
834b8e80941Smrg
835b8e80941Smrgstatic void test_sample_expect(void *p, int s, int x, int y)
836b8e80941Smrg{
837b8e80941Smrg        switch (x % 4) {
838b8e80941Smrg        case 0:
839b8e80941Smrg                *(float *)p = x / 4 * y;
840b8e80941Smrg                break;
841b8e80941Smrg        case 1:
842b8e80941Smrg        case 2:
843b8e80941Smrg                *(float *)p = 0;
844b8e80941Smrg                break;
845b8e80941Smrg        case 3:
846b8e80941Smrg                *(float *)p = 1;
847b8e80941Smrg                break;
848b8e80941Smrg        }
849b8e80941Smrg}
850b8e80941Smrg
851848b8605Smrgstatic void test_sample(struct context *ctx)
852848b8605Smrg{
853848b8605Smrg        const char *src = "COMP\n"
854848b8605Smrg                "DCL SVIEW[0], 2D, FLOAT\n"
855848b8605Smrg                "DCL RES[0], 2D, RAW, WR\n"
856848b8605Smrg                "DCL SAMP[0]\n"
857848b8605Smrg                "DCL SV[0], BLOCK_ID[0]\n"
858848b8605Smrg                "DCL TEMP[0], LOCAL\n"
859848b8605Smrg                "DCL TEMP[1], LOCAL\n"
860848b8605Smrg                "IMM UINT32 { 16, 1, 0, 0 }\n"
861848b8605Smrg                "IMM FLT32 { 128, 32, 0, 0 }\n"
862848b8605Smrg                "\n"
863848b8605Smrg                "    BGNSUB\n"
864848b8605Smrg                "       I2F TEMP[1], SV[0]\n"
865848b8605Smrg                "       DIV TEMP[1], TEMP[1], IMM[1]\n"
866848b8605Smrg                "       SAMPLE TEMP[1], TEMP[1], SVIEW[0], SAMP[0]\n"
867848b8605Smrg                "       UMUL TEMP[0], SV[0], IMM[0]\n"
868848b8605Smrg                "       STORE RES[0].xyzw, TEMP[0], TEMP[1]\n"
869848b8605Smrg                "       RET\n"
870848b8605Smrg                "    ENDSUB\n";
871848b8605Smrg
872848b8605Smrg        printf("- %s\n", __func__);
873848b8605Smrg
874848b8605Smrg        init_prog(ctx, 0, 0, 0, src, NULL);
875848b8605Smrg        init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
876b8e80941Smrg                 128, 32, test_sample_init);
877848b8605Smrg        init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
878b8e80941Smrg                 512, 32, test_sample_init);
879848b8605Smrg        init_compute_resources(ctx, (int []) { 1, -1 });
880848b8605Smrg        init_sampler_views(ctx, (int []) { 0, -1 });
881848b8605Smrg        init_sampler_states(ctx, 2);
882848b8605Smrg        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, NULL);
883b8e80941Smrg        check_tex(ctx, 1, test_sample_expect, NULL);
884848b8605Smrg        destroy_sampler_states(ctx);
885848b8605Smrg        destroy_sampler_views(ctx);
886848b8605Smrg        destroy_compute_resources(ctx);
887848b8605Smrg        destroy_tex(ctx);
888848b8605Smrg        destroy_prog(ctx);
889848b8605Smrg}
890848b8605Smrg
891b8e80941Smrg/* test_many_kern */
892b8e80941Smrgstatic void test_many_kern_expect(void *p, int s, int x, int y)
893b8e80941Smrg{
894b8e80941Smrg        *(uint32_t *)p = x;
895b8e80941Smrg}
896b8e80941Smrg
897848b8605Smrgstatic void test_many_kern(struct context *ctx)
898848b8605Smrg{
899848b8605Smrg        const char *src = "COMP\n"
900848b8605Smrg                "DCL RES[0], BUFFER, RAW, WR\n"
901848b8605Smrg                "DCL TEMP[0], LOCAL\n"
902848b8605Smrg                "IMM UINT32 { 0, 1, 2, 3 }\n"
903848b8605Smrg                "IMM UINT32 { 4, 0, 0, 0 }\n"
904848b8605Smrg                "\n"
905848b8605Smrg                "    BGNSUB\n"
906848b8605Smrg                "       UMUL TEMP[0].x, IMM[0].xxxx, IMM[1].xxxx\n"
907848b8605Smrg                "       STORE RES[0].x, TEMP[0], IMM[0].xxxx\n"
908848b8605Smrg                "       RET\n"
909848b8605Smrg                "    ENDSUB\n"
910848b8605Smrg                "    BGNSUB\n"
911848b8605Smrg                "       UMUL TEMP[0].x, IMM[0].yyyy, IMM[1].xxxx\n"
912848b8605Smrg                "       STORE RES[0].x, TEMP[0], IMM[0].yyyy\n"
913848b8605Smrg                "       RET\n"
914848b8605Smrg                "    ENDSUB\n"
915848b8605Smrg                "    BGNSUB\n"
916848b8605Smrg                "       UMUL TEMP[0].x, IMM[0].zzzz, IMM[1].xxxx\n"
917848b8605Smrg                "       STORE RES[0].x, TEMP[0], IMM[0].zzzz\n"
918848b8605Smrg                "       RET\n"
919848b8605Smrg                "    ENDSUB\n"
920848b8605Smrg                "    BGNSUB\n"
921848b8605Smrg                "       UMUL TEMP[0].x, IMM[0].wwww, IMM[1].xxxx\n"
922848b8605Smrg                "       STORE RES[0].x, TEMP[0], IMM[0].wwww\n"
923848b8605Smrg                "       RET\n"
924848b8605Smrg                "    ENDSUB\n";
925848b8605Smrg
926848b8605Smrg        printf("- %s\n", __func__);
927848b8605Smrg
928848b8605Smrg        init_prog(ctx, 0, 0, 0, src, NULL);
929848b8605Smrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
930b8e80941Smrg                 16, 0, test_default_init);
931848b8605Smrg        init_compute_resources(ctx, (int []) { 0, -1 });
932848b8605Smrg        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
933848b8605Smrg        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 5, NULL);
934848b8605Smrg        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 10, NULL);
935848b8605Smrg        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 15, NULL);
936b8e80941Smrg        check_tex(ctx, 0, test_many_kern_expect, NULL);
937848b8605Smrg        destroy_compute_resources(ctx);
938848b8605Smrg        destroy_tex(ctx);
939848b8605Smrg        destroy_prog(ctx);
940848b8605Smrg}
941848b8605Smrg
942b8e80941Smrg/* test_constant */
943b8e80941Smrgstatic void test_constant_init(void *p, int s, int x, int y)
944b8e80941Smrg{
945b8e80941Smrg        *(float *)p = s ? 0xdeadbeef : 8.0 - (float)x;
946b8e80941Smrg}
947b8e80941Smrg
948b8e80941Smrgstatic void test_constant_expect(void *p, int s, int x, int y)
949b8e80941Smrg{
950b8e80941Smrg        *(float *)p = 8.0 - (float)x;
951b8e80941Smrg}
952b8e80941Smrg
953848b8605Smrgstatic void test_constant(struct context *ctx)
954848b8605Smrg{
955848b8605Smrg        const char *src = "COMP\n"
956848b8605Smrg                "DCL RES[0], BUFFER, RAW\n"
957848b8605Smrg                "DCL RES[1], BUFFER, RAW, WR\n"
958848b8605Smrg                "DCL SV[0], BLOCK_ID[0]\n"
959848b8605Smrg                "DCL TEMP[0], LOCAL\n"
960848b8605Smrg                "DCL TEMP[1], LOCAL\n"
961848b8605Smrg                "IMM UINT32 { 4, 0, 0, 0 }\n"
962848b8605Smrg                "\n"
963848b8605Smrg                "    BGNSUB\n"
964848b8605Smrg                "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
965848b8605Smrg                "       LOAD TEMP[1].x, RES[0], TEMP[0]\n"
966848b8605Smrg                "       STORE RES[1].x, TEMP[0], TEMP[1]\n"
967848b8605Smrg                "       RET\n"
968848b8605Smrg                "    ENDSUB\n";
969848b8605Smrg
970848b8605Smrg        printf("- %s\n", __func__);
971848b8605Smrg
972848b8605Smrg        init_prog(ctx, 0, 0, 0, src, NULL);
973848b8605Smrg        init_tex(ctx, 0, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
974b8e80941Smrg                 256, 0, test_constant_init);
975848b8605Smrg        init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
976b8e80941Smrg                 256, 0, test_constant_init);
977848b8605Smrg        init_compute_resources(ctx, (int []) { 0, 1, -1 });
978848b8605Smrg        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
979b8e80941Smrg        check_tex(ctx, 1, test_constant_expect, NULL);
980848b8605Smrg        destroy_compute_resources(ctx);
981848b8605Smrg        destroy_tex(ctx);
982848b8605Smrg        destroy_prog(ctx);
983848b8605Smrg}
984848b8605Smrg
985b8e80941Smrg/* test_resource_indirect */
986b8e80941Smrgstatic void test_resource_indirect_init(void *p, int s, int x, int y)
987b8e80941Smrg{
988b8e80941Smrg        *(uint32_t *)p = s == 0 ? 0xdeadbeef :
989b8e80941Smrg                s == 1 ? x % 2 :
990b8e80941Smrg                s == 2 ? 2 * x :
991b8e80941Smrg                2 * x + 1;
992b8e80941Smrg}
993b8e80941Smrg
994b8e80941Smrgstatic void test_resource_indirect_expect(void *p, int s, int x, int y)
995b8e80941Smrg{
996b8e80941Smrg        *(uint32_t *)p = 2 * x + (x % 2 ? 1 : 0);
997b8e80941Smrg}
998b8e80941Smrg
999848b8605Smrgstatic void test_resource_indirect(struct context *ctx)
1000848b8605Smrg{
1001848b8605Smrg        const char *src = "COMP\n"
1002848b8605Smrg                "DCL RES[0], BUFFER, RAW, WR\n"
1003848b8605Smrg                "DCL RES[1..3], BUFFER, RAW\n"
1004848b8605Smrg                "DCL SV[0], BLOCK_ID[0]\n"
1005848b8605Smrg                "DCL TEMP[0], LOCAL\n"
1006848b8605Smrg                "DCL TEMP[1], LOCAL\n"
1007848b8605Smrg                "IMM UINT32 { 4, 0, 0, 0 }\n"
1008848b8605Smrg                "\n"
1009848b8605Smrg                "    BGNSUB\n"
1010848b8605Smrg                "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
1011848b8605Smrg                "       LOAD TEMP[1].x, RES[1], TEMP[0]\n"
1012848b8605Smrg                "       LOAD TEMP[1].x, RES[TEMP[1].x+2], TEMP[0]\n"
1013848b8605Smrg                "       STORE RES[0].x, TEMP[0], TEMP[1]\n"
1014848b8605Smrg                "       RET\n"
1015848b8605Smrg                "    ENDSUB\n";
1016848b8605Smrg
1017848b8605Smrg        printf("- %s\n", __func__);
1018848b8605Smrg
1019848b8605Smrg        init_prog(ctx, 0, 0, 0, src, NULL);
1020848b8605Smrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1021b8e80941Smrg                 256, 0, test_resource_indirect_init);
1022848b8605Smrg        init_tex(ctx, 1, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
1023b8e80941Smrg                 256, 0, test_resource_indirect_init);
1024848b8605Smrg        init_tex(ctx, 2, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
1025b8e80941Smrg                 256, 0, test_resource_indirect_init);
1026848b8605Smrg        init_tex(ctx, 3, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
1027b8e80941Smrg                 256, 0, test_resource_indirect_init);
1028848b8605Smrg        init_compute_resources(ctx, (int []) { 0, 1, 2, 3, -1 });
1029848b8605Smrg        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
1030b8e80941Smrg        check_tex(ctx, 0, test_resource_indirect_expect, NULL);
1031848b8605Smrg        destroy_compute_resources(ctx);
1032848b8605Smrg        destroy_tex(ctx);
1033848b8605Smrg        destroy_prog(ctx);
1034848b8605Smrg}
1035848b8605Smrg
1036b8e80941Smrg/* test_surface_ld */
1037848b8605Smrgenum pipe_format surface_fmts[] = {
1038848b8605Smrg        PIPE_FORMAT_B8G8R8A8_UNORM,
1039848b8605Smrg        PIPE_FORMAT_B8G8R8X8_UNORM,
1040848b8605Smrg        PIPE_FORMAT_A8R8G8B8_UNORM,
1041848b8605Smrg        PIPE_FORMAT_X8R8G8B8_UNORM,
1042848b8605Smrg        PIPE_FORMAT_X8R8G8B8_UNORM,
1043848b8605Smrg        PIPE_FORMAT_L8_UNORM,
1044848b8605Smrg        PIPE_FORMAT_A8_UNORM,
1045848b8605Smrg        PIPE_FORMAT_I8_UNORM,
1046848b8605Smrg        PIPE_FORMAT_L8A8_UNORM,
1047848b8605Smrg        PIPE_FORMAT_R32_FLOAT,
1048848b8605Smrg        PIPE_FORMAT_R32G32_FLOAT,
1049848b8605Smrg        PIPE_FORMAT_R32G32B32A32_FLOAT,
1050848b8605Smrg        PIPE_FORMAT_R32_UNORM,
1051848b8605Smrg        PIPE_FORMAT_R32G32_UNORM,
1052848b8605Smrg        PIPE_FORMAT_R32G32B32A32_UNORM,
1053848b8605Smrg        PIPE_FORMAT_R32_SNORM,
1054848b8605Smrg        PIPE_FORMAT_R32G32_SNORM,
1055848b8605Smrg        PIPE_FORMAT_R32G32B32A32_SNORM,
1056848b8605Smrg        PIPE_FORMAT_R8_UINT,
1057848b8605Smrg        PIPE_FORMAT_R8G8_UINT,
1058848b8605Smrg        PIPE_FORMAT_R8G8B8A8_UINT,
1059848b8605Smrg        PIPE_FORMAT_R8_SINT,
1060848b8605Smrg        PIPE_FORMAT_R8G8_SINT,
1061848b8605Smrg        PIPE_FORMAT_R8G8B8A8_SINT,
1062848b8605Smrg        PIPE_FORMAT_R32_UINT,
1063848b8605Smrg        PIPE_FORMAT_R32G32_UINT,
1064848b8605Smrg        PIPE_FORMAT_R32G32B32A32_UINT,
1065848b8605Smrg        PIPE_FORMAT_R32_SINT,
1066848b8605Smrg        PIPE_FORMAT_R32G32_SINT,
1067848b8605Smrg        PIPE_FORMAT_R32G32B32A32_SINT
1068848b8605Smrg};
1069848b8605Smrg
1070b8e80941Smrgstatic void test_surface_ld_init0f(void *p, int s, int x, int y)
1071b8e80941Smrg{
1072b8e80941Smrg        float v[] = { 1.0, -.75, .50, -.25 };
1073b8e80941Smrg        int i = 0;
1074b8e80941Smrg
1075b8e80941Smrg        util_format_write_4f(surface_fmts[i], v, 0, p, 0, 0, 0, 1, 1);
1076b8e80941Smrg}
1077b8e80941Smrg
1078b8e80941Smrgstatic void test_surface_ld_init0i(void *p, int s, int x, int y)
1079b8e80941Smrg{
1080b8e80941Smrg        int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
1081b8e80941Smrg        int i = 0;
1082b8e80941Smrg
1083b8e80941Smrg        util_format_write_4i(surface_fmts[i], v, 0, p, 0, 0, 0, 1, 1);
1084b8e80941Smrg}
1085b8e80941Smrg
1086b8e80941Smrgstatic void test_surface_ld_expectf(void *p, int s, int x, int y)
1087b8e80941Smrg{
1088b8e80941Smrg        float v[4], w[4];
1089b8e80941Smrg        int i = 0;
1090b8e80941Smrg
1091b8e80941Smrg        test_surface_ld_init0f(v, s, x / 4, y);
1092b8e80941Smrg        util_format_read_4f(surface_fmts[i], w, 0, v, 0, 0, 0, 1, 1);
1093b8e80941Smrg        *(float *)p = w[x % 4];
1094b8e80941Smrg}
1095b8e80941Smrg
1096b8e80941Smrgstatic void test_surface_ld_expecti(void *p, int s, int x, int y)
1097b8e80941Smrg{
1098b8e80941Smrg        int32_t v[4], w[4];
1099b8e80941Smrg        int i = 0;
1100b8e80941Smrg
1101b8e80941Smrg        test_surface_ld_init0i(v, s, x / 4, y);
1102b8e80941Smrg        util_format_read_4i(surface_fmts[i], w, 0, v, 0, 0, 0, 1, 1);
1103b8e80941Smrg        *(uint32_t *)p = w[x % 4];
1104b8e80941Smrg}
1105b8e80941Smrg
1106848b8605Smrgstatic void test_surface_ld(struct context *ctx)
1107848b8605Smrg{
1108848b8605Smrg        const char *src = "COMP\n"
1109848b8605Smrg                "DCL RES[0], 2D\n"
1110848b8605Smrg                "DCL RES[1], 2D, RAW, WR\n"
1111848b8605Smrg                "DCL SV[0], BLOCK_ID[0]\n"
1112848b8605Smrg                "DCL TEMP[0], LOCAL\n"
1113848b8605Smrg                "DCL TEMP[1], LOCAL\n"
1114848b8605Smrg                "IMM UINT32 { 16, 1, 0, 0 }\n"
1115848b8605Smrg                "\n"
1116848b8605Smrg                "    BGNSUB\n"
1117848b8605Smrg                "       LOAD TEMP[1], RES[0], SV[0]\n"
1118848b8605Smrg                "       UMUL TEMP[0], SV[0], IMM[0]\n"
1119848b8605Smrg                "       STORE RES[1].xyzw, TEMP[0], TEMP[1]\n"
1120848b8605Smrg                "       RET\n"
1121848b8605Smrg                "    ENDSUB\n";
1122848b8605Smrg        int i = 0;
1123848b8605Smrg
1124848b8605Smrg        printf("- %s\n", __func__);
1125848b8605Smrg
1126848b8605Smrg        init_prog(ctx, 0, 0, 0, src, NULL);
1127848b8605Smrg
1128b8e80941Smrg        for (i = 0; i < ARRAY_SIZE(surface_fmts); i++) {
1129848b8605Smrg                bool is_int = util_format_is_pure_integer(surface_fmts[i]);
1130848b8605Smrg
1131848b8605Smrg                printf("   - %s\n", util_format_name(surface_fmts[i]));
1132848b8605Smrg
1133848b8605Smrg                if (!ctx->screen->is_format_supported(ctx->screen,
1134b8e80941Smrg                       surface_fmts[i], PIPE_TEXTURE_2D, 1, 1,
1135848b8605Smrg                       PIPE_BIND_COMPUTE_RESOURCE)) {
1136848b8605Smrg                   printf("(unsupported)\n");
1137848b8605Smrg                   continue;
1138848b8605Smrg                }
1139848b8605Smrg
1140848b8605Smrg                init_tex(ctx, 0, PIPE_TEXTURE_2D, true, surface_fmts[i],
1141b8e80941Smrg                         128, 32, (is_int ? test_surface_ld_init0i : test_surface_ld_init0f));
1142848b8605Smrg                init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
1143b8e80941Smrg                         512, 32, test_default_init);
1144848b8605Smrg                init_compute_resources(ctx, (int []) { 0, 1, -1 });
1145848b8605Smrg                init_sampler_states(ctx, 2);
1146848b8605Smrg                launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
1147848b8605Smrg                            NULL);
1148b8e80941Smrg                check_tex(ctx, 1, (is_int ? test_surface_ld_expecti : test_surface_ld_expectf), NULL);
1149848b8605Smrg                destroy_sampler_states(ctx);
1150848b8605Smrg                destroy_compute_resources(ctx);
1151848b8605Smrg                destroy_tex(ctx);
1152848b8605Smrg        }
1153848b8605Smrg
1154848b8605Smrg        destroy_prog(ctx);
1155848b8605Smrg}
1156848b8605Smrg
1157b8e80941Smrg/* test_surface_st */
1158b8e80941Smrgstatic void test_surface_st_init0f(void *p, int s, int x, int y)
1159b8e80941Smrg{
1160b8e80941Smrg        float v[] = { 1.0, -.75, 0.5, -.25 };
1161b8e80941Smrg        *(float *)p = v[x % 4];
1162b8e80941Smrg}
1163b8e80941Smrg
1164b8e80941Smrgstatic void test_surface_st_init0i(void *p, int s, int x, int y)
1165b8e80941Smrg{
1166b8e80941Smrg        int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
1167b8e80941Smrg        *(int32_t *)p = v[x % 4];
1168b8e80941Smrg}
1169b8e80941Smrg
1170b8e80941Smrgstatic void test_surface_st_init1(void *p, int s, int x, int y)
1171b8e80941Smrg{
1172b8e80941Smrg        int i = 0;
1173b8e80941Smrg        memset(p, 1, util_format_get_blocksize(surface_fmts[i]));
1174b8e80941Smrg}
1175b8e80941Smrg
1176b8e80941Smrgstatic void test_surface_st_expectf(void *p, int s, int x, int y)
1177b8e80941Smrg{
1178b8e80941Smrg        float vf[4];
1179b8e80941Smrg        int i = 0, j;
1180b8e80941Smrg
1181b8e80941Smrg        for (j = 0; j < 4; j++)
1182b8e80941Smrg                test_surface_st_init0f(&vf[j], s, 4 * x + j, y);
1183b8e80941Smrg        util_format_write_4f(surface_fmts[i], vf, 0, p, 0, 0, 0, 1, 1);
1184b8e80941Smrg}
1185b8e80941Smrg
1186b8e80941Smrgstatic void test_surface_st_expects(void *p, int s, int x, int y)
1187b8e80941Smrg{
1188b8e80941Smrg        int32_t v[4];
1189b8e80941Smrg        int i = 0, j;
1190b8e80941Smrg
1191b8e80941Smrg        for (j = 0; j < 4; j++)
1192b8e80941Smrg                test_surface_st_init0i(&v[j], s, 4 * x + j, y);
1193b8e80941Smrg        util_format_write_4i(surface_fmts[i], v, 0, p, 0, 0, 0, 1, 1);
1194b8e80941Smrg}
1195b8e80941Smrg
1196b8e80941Smrgstatic void test_surface_st_expectu(void *p, int s, int x, int y)
1197b8e80941Smrg{
1198b8e80941Smrg        uint32_t v[4];
1199b8e80941Smrg        int i = 0, j;
1200b8e80941Smrg
1201b8e80941Smrg        for (j = 0; j < 4; j++)
1202b8e80941Smrg                test_surface_st_init0i(&v[j], s, 4 * x + j, y);
1203b8e80941Smrg        util_format_write_4ui(surface_fmts[i], v, 0, p, 0, 0, 0, 1, 1);
1204b8e80941Smrg}
1205b8e80941Smrg
1206b8e80941Smrgstatic bool test_surface_st_check(void *x, void *y, int sz)
1207b8e80941Smrg{
1208b8e80941Smrg        int i = 0, j;
1209b8e80941Smrg
1210b8e80941Smrg        if (util_format_is_float(surface_fmts[i])) {
1211b8e80941Smrg                return fabs(*(float *)x - *(float *)y) < 3.92156863e-3;
1212b8e80941Smrg
1213b8e80941Smrg        } else if ((sz % 4) == 0) {
1214b8e80941Smrg                for (j = 0; j < sz / 4; j++)
1215b8e80941Smrg                        if (abs(((uint32_t *)x)[j] -
1216b8e80941Smrg                                ((uint32_t *)y)[j]) > 1)
1217b8e80941Smrg                                return false;
1218b8e80941Smrg                return true;
1219b8e80941Smrg        } else {
1220b8e80941Smrg                return !memcmp(x, y, sz);
1221b8e80941Smrg        }
1222b8e80941Smrg}
1223b8e80941Smrg
1224848b8605Smrgstatic void test_surface_st(struct context *ctx)
1225848b8605Smrg{
1226848b8605Smrg        const char *src = "COMP\n"
1227848b8605Smrg                "DCL RES[0], 2D, RAW\n"
1228848b8605Smrg                "DCL RES[1], 2D, WR\n"
1229848b8605Smrg                "DCL SV[0], BLOCK_ID[0]\n"
1230848b8605Smrg                "DCL TEMP[0], LOCAL\n"
1231848b8605Smrg                "DCL TEMP[1], LOCAL\n"
1232848b8605Smrg                "IMM UINT32 { 16, 1, 0, 0 }\n"
1233848b8605Smrg                "\n"
1234848b8605Smrg                "    BGNSUB\n"
1235848b8605Smrg                "       UMUL TEMP[0], SV[0], IMM[0]\n"
1236848b8605Smrg                "       LOAD TEMP[1], RES[0], TEMP[0]\n"
1237848b8605Smrg                "       STORE RES[1], SV[0], TEMP[1]\n"
1238848b8605Smrg                "       RET\n"
1239848b8605Smrg                "    ENDSUB\n";
1240848b8605Smrg        int i = 0;
1241848b8605Smrg
1242848b8605Smrg        printf("- %s\n", __func__);
1243848b8605Smrg
1244848b8605Smrg        init_prog(ctx, 0, 0, 0, src, NULL);
1245848b8605Smrg
1246b8e80941Smrg        for (i = 0; i < ARRAY_SIZE(surface_fmts); i++) {
1247848b8605Smrg                bool is_signed = (util_format_description(surface_fmts[i])
1248848b8605Smrg                                  ->channel[0].type == UTIL_FORMAT_TYPE_SIGNED);
1249848b8605Smrg                bool is_int = util_format_is_pure_integer(surface_fmts[i]);
1250848b8605Smrg
1251848b8605Smrg                printf("   - %s\n", util_format_name(surface_fmts[i]));
1252848b8605Smrg
1253848b8605Smrg                if (!ctx->screen->is_format_supported(ctx->screen,
1254b8e80941Smrg                       surface_fmts[i], PIPE_TEXTURE_2D, 1, 1,
1255848b8605Smrg                       PIPE_BIND_COMPUTE_RESOURCE)) {
1256848b8605Smrg                   printf("(unsupported)\n");
1257848b8605Smrg                   continue;
1258848b8605Smrg                }
1259848b8605Smrg
1260848b8605Smrg                init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
1261b8e80941Smrg                         512, 32, (is_int ? test_surface_st_init0i : test_surface_st_init0f));
1262848b8605Smrg                init_tex(ctx, 1, PIPE_TEXTURE_2D, true, surface_fmts[i],
1263b8e80941Smrg                         128, 32, test_surface_st_init1);
1264848b8605Smrg                init_compute_resources(ctx, (int []) { 0, 1, -1 });
1265848b8605Smrg                init_sampler_states(ctx, 2);
1266848b8605Smrg                launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
1267848b8605Smrg                            NULL);
1268b8e80941Smrg                check_tex(ctx, 1, (is_int && is_signed ? test_surface_st_expects :
1269b8e80941Smrg                                   is_int && !is_signed ? test_surface_st_expectu :
1270b8e80941Smrg                                   test_surface_st_expectf), test_surface_st_check);
1271848b8605Smrg                destroy_sampler_states(ctx);
1272848b8605Smrg                destroy_compute_resources(ctx);
1273848b8605Smrg                destroy_tex(ctx);
1274848b8605Smrg        }
1275848b8605Smrg
1276848b8605Smrg        destroy_prog(ctx);
1277848b8605Smrg}
1278848b8605Smrg
1279b8e80941Smrg/* test_barrier */
1280b8e80941Smrgstatic void test_barrier_expect(void *p, int s, int x, int y)
1281b8e80941Smrg{
1282b8e80941Smrg        *(uint32_t *)p = 31;
1283b8e80941Smrg}
1284b8e80941Smrg
1285848b8605Smrgstatic void test_barrier(struct context *ctx)
1286848b8605Smrg{
1287848b8605Smrg        const char *src = "COMP\n"
1288848b8605Smrg                "DCL RES[0], BUFFER, RAW, WR\n"
1289848b8605Smrg                "DCL SV[0], BLOCK_ID[0]\n"
1290848b8605Smrg                "DCL SV[1], BLOCK_SIZE[0]\n"
1291848b8605Smrg                "DCL SV[2], THREAD_ID[0]\n"
1292848b8605Smrg                "DCL TEMP[0], LOCAL\n"
1293848b8605Smrg                "DCL TEMP[1], LOCAL\n"
1294848b8605Smrg                "DCL TEMP[2], LOCAL\n"
1295848b8605Smrg                "DCL TEMP[3], LOCAL\n"
1296848b8605Smrg                "IMM UINT32 { 1, 0, 0, 0 }\n"
1297848b8605Smrg                "IMM UINT32 { 4, 0, 0, 0 }\n"
1298848b8605Smrg                "IMM UINT32 { 32, 0, 0, 0 }\n"
1299848b8605Smrg                "\n"
1300848b8605Smrg                "    BGNSUB\n"
1301848b8605Smrg                "       UMUL TEMP[0].x, SV[2], IMM[1]\n"
1302848b8605Smrg                "       MOV TEMP[1].x, IMM[0].wwww\n"
1303848b8605Smrg                "       BGNLOOP\n"
1304848b8605Smrg                "               BARRIER\n"
1305848b8605Smrg                "               STORE RLOCAL.x, TEMP[0], TEMP[1]\n"
1306848b8605Smrg                "               BARRIER\n"
1307848b8605Smrg                "               MOV TEMP[2].x, IMM[0].wwww\n"
1308848b8605Smrg                "               BGNLOOP\n"
1309848b8605Smrg                "                       UMUL TEMP[3].x, TEMP[2], IMM[1]\n"
1310848b8605Smrg                "                       LOAD TEMP[3].x, RLOCAL, TEMP[3]\n"
1311848b8605Smrg                "                       USNE TEMP[3].x, TEMP[3], TEMP[1]\n"
1312848b8605Smrg                "                       IF TEMP[3]\n"
1313848b8605Smrg                "                               END\n"
1314848b8605Smrg                "                       ENDIF\n"
1315848b8605Smrg                "                       UADD TEMP[2].x, TEMP[2], IMM[0]\n"
1316848b8605Smrg                "                       USEQ TEMP[3].x, TEMP[2], SV[1]\n"
1317848b8605Smrg                "                       IF TEMP[3]\n"
1318848b8605Smrg                "                               BRK\n"
1319848b8605Smrg                "                       ENDIF\n"
1320848b8605Smrg                "               ENDLOOP\n"
1321848b8605Smrg                "               UADD TEMP[1].x, TEMP[1], IMM[0]\n"
1322848b8605Smrg                "               USEQ TEMP[2].x, TEMP[1], IMM[2]\n"
1323848b8605Smrg                "               IF TEMP[2]\n"
1324848b8605Smrg                "                       BRK\n"
1325848b8605Smrg                "               ENDIF\n"
1326848b8605Smrg                "       ENDLOOP\n"
1327848b8605Smrg                "       UMUL TEMP[1].x, SV[0], SV[1]\n"
1328848b8605Smrg                "       UMUL TEMP[1].x, TEMP[1], IMM[1]\n"
1329848b8605Smrg                "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
1330848b8605Smrg                "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
1331848b8605Smrg                "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
1332848b8605Smrg                "       RET\n"
1333848b8605Smrg                "    ENDSUB\n";
1334848b8605Smrg
1335848b8605Smrg        printf("- %s\n", __func__);
1336848b8605Smrg
1337848b8605Smrg        init_prog(ctx, 256, 0, 0, src, NULL);
1338848b8605Smrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1339b8e80941Smrg                 4096, 0, test_default_init);
1340848b8605Smrg        init_compute_resources(ctx, (int []) { 0, -1 });
1341848b8605Smrg        launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
1342b8e80941Smrg        check_tex(ctx, 0, test_barrier_expect, NULL);
1343848b8605Smrg        destroy_compute_resources(ctx);
1344848b8605Smrg        destroy_tex(ctx);
1345848b8605Smrg        destroy_prog(ctx);
1346848b8605Smrg}
1347848b8605Smrg
1348b8e80941Smrg/* test_atom_ops */
1349b8e80941Smrgstatic void test_atom_ops_init(void *p, int s, int x, int y)
1350b8e80941Smrg{
1351b8e80941Smrg        *(uint32_t *)p = 0xbad;
1352b8e80941Smrg}
1353b8e80941Smrg
1354b8e80941Smrgstatic void test_atom_ops_expect(void *p, int s, int x, int y)
1355b8e80941Smrg{
1356b8e80941Smrg        switch (x) {
1357b8e80941Smrg        case 0:
1358b8e80941Smrg                *(uint32_t *)p = 0xce6c8eef;
1359b8e80941Smrg                break;
1360b8e80941Smrg        case 1:
1361b8e80941Smrg                *(uint32_t *)p = 0xdeadbeef;
1362b8e80941Smrg                break;
1363b8e80941Smrg        case 2:
1364b8e80941Smrg                *(uint32_t *)p = 0x11111111;
1365b8e80941Smrg                break;
1366b8e80941Smrg        case 3:
1367b8e80941Smrg                *(uint32_t *)p = 0x10011001;
1368b8e80941Smrg                break;
1369b8e80941Smrg        case 4:
1370b8e80941Smrg                *(uint32_t *)p = 0xdfbdbfff;
1371b8e80941Smrg                break;
1372b8e80941Smrg        case 5:
1373b8e80941Smrg                *(uint32_t *)p = 0x11111111;
1374b8e80941Smrg                break;
1375b8e80941Smrg        case 6:
1376b8e80941Smrg                *(uint32_t *)p = 0x11111111;
1377b8e80941Smrg                break;
1378b8e80941Smrg        case 7:
1379b8e80941Smrg                *(uint32_t *)p = 0xdeadbeef;
1380b8e80941Smrg                break;
1381b8e80941Smrg        case 8:
1382b8e80941Smrg                *(uint32_t *)p = 0xdeadbeef;
1383b8e80941Smrg                break;
1384b8e80941Smrg        case 9:
1385b8e80941Smrg                *(uint32_t *)p = 0x11111111;
1386b8e80941Smrg                break;
1387b8e80941Smrg        }
1388b8e80941Smrg}
1389b8e80941Smrg
1390848b8605Smrgstatic void test_atom_ops(struct context *ctx, bool global)
1391848b8605Smrg{
1392848b8605Smrg        const char *src = "COMP\n"
1393848b8605Smrg                "#ifdef TARGET_GLOBAL\n"
1394848b8605Smrg                "#define target RES[0]\n"
1395848b8605Smrg                "#else\n"
1396848b8605Smrg                "#define target RLOCAL\n"
1397848b8605Smrg                "#endif\n"
1398848b8605Smrg                ""
1399848b8605Smrg                "DCL RES[0], BUFFER, RAW, WR\n"
1400848b8605Smrg                "#define threadid SV[0]\n"
1401848b8605Smrg                "DCL threadid, THREAD_ID[0]\n"
1402848b8605Smrg                ""
1403848b8605Smrg                "#define offset TEMP[0]\n"
1404848b8605Smrg                "DCL offset, LOCAL\n"
1405848b8605Smrg                "#define tmp TEMP[1]\n"
1406848b8605Smrg                "DCL tmp, LOCAL\n"
1407848b8605Smrg                ""
1408848b8605Smrg                "#define k0 IMM[0]\n"
1409848b8605Smrg                "IMM UINT32 { 0, 0, 0, 0 }\n"
1410848b8605Smrg                "#define k1 IMM[1]\n"
1411848b8605Smrg                "IMM UINT32 { 1, 0, 0, 0 }\n"
1412848b8605Smrg                "#define k2 IMM[2]\n"
1413848b8605Smrg                "IMM UINT32 { 2, 0, 0, 0 }\n"
1414848b8605Smrg                "#define k3 IMM[3]\n"
1415848b8605Smrg                "IMM UINT32 { 3, 0, 0, 0 }\n"
1416848b8605Smrg                "#define k4 IMM[4]\n"
1417848b8605Smrg                "IMM UINT32 { 4, 0, 0, 0 }\n"
1418848b8605Smrg                "#define k5 IMM[5]\n"
1419848b8605Smrg                "IMM UINT32 { 5, 0, 0, 0 }\n"
1420848b8605Smrg                "#define k6 IMM[6]\n"
1421848b8605Smrg                "IMM UINT32 { 6, 0, 0, 0 }\n"
1422848b8605Smrg                "#define k7 IMM[7]\n"
1423848b8605Smrg                "IMM UINT32 { 7, 0, 0, 0 }\n"
1424848b8605Smrg                "#define k8 IMM[8]\n"
1425848b8605Smrg                "IMM UINT32 { 8, 0, 0, 0 }\n"
1426848b8605Smrg                "#define k9 IMM[9]\n"
1427848b8605Smrg                "IMM UINT32 { 9, 0, 0, 0 }\n"
1428848b8605Smrg                "#define korig IMM[10].xxxx\n"
1429848b8605Smrg                "#define karg IMM[10].yyyy\n"
1430848b8605Smrg                "IMM UINT32 { 3735928559, 286331153, 0, 0 }\n"
1431848b8605Smrg                "\n"
1432848b8605Smrg                "    BGNSUB\n"
1433848b8605Smrg                "       UMUL offset.x, threadid, k4\n"
1434848b8605Smrg                "       STORE target.x, offset, korig\n"
1435848b8605Smrg                "       USEQ tmp.x, threadid, k0\n"
1436848b8605Smrg                "       IF tmp\n"
1437848b8605Smrg                "               ATOMUADD tmp.x, target, offset, karg\n"
1438848b8605Smrg                "               ATOMUADD tmp.x, target, offset, tmp\n"
1439848b8605Smrg                "       ENDIF\n"
1440848b8605Smrg                "       USEQ tmp.x, threadid, k1\n"
1441848b8605Smrg                "       IF tmp\n"
1442848b8605Smrg                "               ATOMXCHG tmp.x, target, offset, karg\n"
1443848b8605Smrg                "               ATOMXCHG tmp.x, target, offset, tmp\n"
1444848b8605Smrg                "       ENDIF\n"
1445848b8605Smrg                "       USEQ tmp.x, threadid, k2\n"
1446848b8605Smrg                "       IF tmp\n"
1447848b8605Smrg                "               ATOMCAS tmp.x, target, offset, korig, karg\n"
1448848b8605Smrg                "               ATOMCAS tmp.x, target, offset, tmp, k0\n"
1449848b8605Smrg                "       ENDIF\n"
1450848b8605Smrg                "       USEQ tmp.x, threadid, k3\n"
1451848b8605Smrg                "       IF tmp\n"
1452848b8605Smrg                "               ATOMAND tmp.x, target, offset, karg\n"
1453848b8605Smrg                "               ATOMAND tmp.x, target, offset, tmp\n"
1454848b8605Smrg                "       ENDIF\n"
1455848b8605Smrg                "       USEQ tmp.x, threadid, k4\n"
1456848b8605Smrg                "       IF tmp\n"
1457848b8605Smrg                "               ATOMOR tmp.x, target, offset, karg\n"
1458848b8605Smrg                "               ATOMOR tmp.x, target, offset, tmp\n"
1459848b8605Smrg                "       ENDIF\n"
1460848b8605Smrg                "       USEQ tmp.x, threadid, k5\n"
1461848b8605Smrg                "       IF tmp\n"
1462848b8605Smrg                "               ATOMXOR tmp.x, target, offset, karg\n"
1463848b8605Smrg                "               ATOMXOR tmp.x, target, offset, tmp\n"
1464848b8605Smrg                "       ENDIF\n"
1465848b8605Smrg                "       USEQ tmp.x, threadid, k6\n"
1466848b8605Smrg                "       IF tmp\n"
1467848b8605Smrg                "               ATOMUMIN tmp.x, target, offset, karg\n"
1468848b8605Smrg                "               ATOMUMIN tmp.x, target, offset, tmp\n"
1469848b8605Smrg                "       ENDIF\n"
1470848b8605Smrg                "       USEQ tmp.x, threadid, k7\n"
1471848b8605Smrg                "       IF tmp\n"
1472848b8605Smrg                "               ATOMUMAX tmp.x, target, offset, karg\n"
1473848b8605Smrg                "               ATOMUMAX tmp.x, target, offset, tmp\n"
1474848b8605Smrg                "       ENDIF\n"
1475848b8605Smrg                "       USEQ tmp.x, threadid, k8\n"
1476848b8605Smrg                "       IF tmp\n"
1477848b8605Smrg                "               ATOMIMIN tmp.x, target, offset, karg\n"
1478848b8605Smrg                "               ATOMIMIN tmp.x, target, offset, tmp\n"
1479848b8605Smrg                "       ENDIF\n"
1480848b8605Smrg                "       USEQ tmp.x, threadid, k9\n"
1481848b8605Smrg                "       IF tmp\n"
1482848b8605Smrg                "               ATOMIMAX tmp.x, target, offset, karg\n"
1483848b8605Smrg                "               ATOMIMAX tmp.x, target, offset, tmp\n"
1484848b8605Smrg                "       ENDIF\n"
1485848b8605Smrg                "#ifdef TARGET_LOCAL\n"
1486848b8605Smrg                "       LOAD tmp.x, RLOCAL, offset\n"
1487848b8605Smrg                "       STORE RES[0].x, offset, tmp\n"
1488848b8605Smrg                "#endif\n"
1489848b8605Smrg                "       RET\n"
1490848b8605Smrg                "    ENDSUB\n";
1491848b8605Smrg
1492848b8605Smrg        printf("- %s (%s)\n", __func__, global ? "global" : "local");
1493848b8605Smrg
1494848b8605Smrg        init_prog(ctx, 40, 0, 0, src,
1495848b8605Smrg                  (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
1496848b8605Smrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1497b8e80941Smrg                 40, 0, test_atom_ops_init);
1498848b8605Smrg        init_compute_resources(ctx, (int []) { 0, -1 });
1499848b8605Smrg        launch_grid(ctx, (uint []){10, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
1500b8e80941Smrg        check_tex(ctx, 0, test_atom_ops_expect, NULL);
1501848b8605Smrg        destroy_compute_resources(ctx);
1502848b8605Smrg        destroy_tex(ctx);
1503848b8605Smrg        destroy_prog(ctx);
1504848b8605Smrg}
1505848b8605Smrg
1506b8e80941Smrg/* test_atom_race */
1507b8e80941Smrgstatic void test_atom_race_expect(void *p, int s, int x, int y)
1508b8e80941Smrg{
1509b8e80941Smrg        *(uint32_t *)p = x & 0x20 ? 0x11111111 : 0xffffffff;
1510b8e80941Smrg}
1511b8e80941Smrg
1512848b8605Smrgstatic void test_atom_race(struct context *ctx, bool global)
1513848b8605Smrg{
1514848b8605Smrg        const char *src = "COMP\n"
1515848b8605Smrg                "#ifdef TARGET_GLOBAL\n"
1516848b8605Smrg                "#define target RES[0]\n"
1517848b8605Smrg                "#else\n"
1518848b8605Smrg                "#define target RLOCAL\n"
1519848b8605Smrg                "#endif\n"
1520848b8605Smrg                ""
1521848b8605Smrg                "DCL RES[0], BUFFER, RAW, WR\n"
1522848b8605Smrg                ""
1523848b8605Smrg                "#define blockid SV[0]\n"
1524848b8605Smrg                "DCL blockid, BLOCK_ID[0]\n"
1525848b8605Smrg                "#define blocksz SV[1]\n"
1526848b8605Smrg                "DCL blocksz, BLOCK_SIZE[0]\n"
1527848b8605Smrg                "#define threadid SV[2]\n"
1528848b8605Smrg                "DCL threadid, THREAD_ID[0]\n"
1529848b8605Smrg                ""
1530848b8605Smrg                "#define offset TEMP[0]\n"
1531848b8605Smrg                "DCL offset, LOCAL\n"
1532848b8605Smrg                "#define arg TEMP[1]\n"
1533848b8605Smrg                "DCL arg, LOCAL\n"
1534848b8605Smrg                "#define count TEMP[2]\n"
1535848b8605Smrg                "DCL count, LOCAL\n"
1536848b8605Smrg                "#define vlocal TEMP[3]\n"
1537848b8605Smrg                "DCL vlocal, LOCAL\n"
1538848b8605Smrg                "#define vshared TEMP[4]\n"
1539848b8605Smrg                "DCL vshared, LOCAL\n"
1540848b8605Smrg                "#define last TEMP[5]\n"
1541848b8605Smrg                "DCL last, LOCAL\n"
1542848b8605Smrg                "#define tmp0 TEMP[6]\n"
1543848b8605Smrg                "DCL tmp0, LOCAL\n"
1544848b8605Smrg                "#define tmp1 TEMP[7]\n"
1545848b8605Smrg                "DCL tmp1, LOCAL\n"
1546848b8605Smrg                ""
1547848b8605Smrg                "#define k0 IMM[0]\n"
1548848b8605Smrg                "IMM UINT32 { 0, 0, 0, 0 }\n"
1549848b8605Smrg                "#define k1 IMM[1]\n"
1550848b8605Smrg                "IMM UINT32 { 1, 0, 0, 0 }\n"
1551848b8605Smrg                "#define k4 IMM[2]\n"
1552848b8605Smrg                "IMM UINT32 { 4, 0, 0, 0 }\n"
1553848b8605Smrg                "#define k32 IMM[3]\n"
1554848b8605Smrg                "IMM UINT32 { 32, 0, 0, 0 }\n"
1555848b8605Smrg                "#define k128 IMM[4]\n"
1556848b8605Smrg                "IMM UINT32 { 128, 0, 0, 0 }\n"
1557848b8605Smrg                "#define kdeadcafe IMM[5]\n"
1558848b8605Smrg                "IMM UINT32 { 3735931646, 0, 0, 0 }\n"
1559848b8605Smrg                "#define kallowed_set IMM[6]\n"
1560848b8605Smrg                "IMM UINT32 { 559035650, 0, 0, 0 }\n"
1561848b8605Smrg                "#define k11111111 IMM[7]\n"
1562848b8605Smrg                "IMM UINT32 { 286331153, 0, 0, 0 }\n"
1563848b8605Smrg                "\n"
1564848b8605Smrg                "    BGNSUB\n"
1565848b8605Smrg                "       MOV offset.x, threadid\n"
1566848b8605Smrg                "#ifdef TARGET_GLOBAL\n"
1567848b8605Smrg                "       UMUL tmp0.x, blockid, blocksz\n"
1568848b8605Smrg                "       UADD offset.x, offset, tmp0\n"
1569848b8605Smrg                "#endif\n"
1570848b8605Smrg                "       UMUL offset.x, offset, k4\n"
1571848b8605Smrg                "       USLT tmp0.x, threadid, k32\n"
1572848b8605Smrg                "       STORE target.x, offset, k0\n"
1573848b8605Smrg                "       BARRIER\n"
1574848b8605Smrg                "       IF tmp0\n"
1575848b8605Smrg                "               MOV vlocal.x, k0\n"
1576848b8605Smrg                "               MOV arg.x, kdeadcafe\n"
1577848b8605Smrg                "               BGNLOOP\n"
1578848b8605Smrg                "                       INEG arg.x, arg\n"
1579848b8605Smrg                "                       ATOMUADD vshared.x, target, offset, arg\n"
1580848b8605Smrg                "                       SFENCE target\n"
1581848b8605Smrg                "                       USNE tmp0.x, vshared, vlocal\n"
1582848b8605Smrg                "                       IF tmp0\n"
1583848b8605Smrg                "                               BRK\n"
1584848b8605Smrg                "                       ENDIF\n"
1585848b8605Smrg                "                       UADD vlocal.x, vlocal, arg\n"
1586848b8605Smrg                "               ENDLOOP\n"
1587848b8605Smrg                "               UADD vlocal.x, vshared, arg\n"
1588848b8605Smrg                "               LOAD vshared.x, target, offset\n"
1589848b8605Smrg                "               USEQ tmp0.x, vshared, vlocal\n"
1590848b8605Smrg                "               STORE target.x, offset, tmp0\n"
1591848b8605Smrg                "       ELSE\n"
1592848b8605Smrg                "               UADD offset.x, offset, -k128\n"
1593848b8605Smrg                "               MOV count.x, k0\n"
1594848b8605Smrg                "               MOV last.x, k0\n"
1595848b8605Smrg                "               BGNLOOP\n"
1596848b8605Smrg                "                       LOAD vshared.x, target, offset\n"
1597848b8605Smrg                "                       USEQ tmp0.x, vshared, kallowed_set.xxxx\n"
1598848b8605Smrg                "                       USEQ tmp1.x, vshared, kallowed_set.yyyy\n"
1599848b8605Smrg                "                       OR tmp0.x, tmp0, tmp1\n"
1600848b8605Smrg                "                       IF tmp0\n"
1601848b8605Smrg                "                               USEQ tmp0.x, vshared, last\n"
1602848b8605Smrg                "                               IF tmp0\n"
1603848b8605Smrg                "                                       CONT\n"
1604848b8605Smrg                "                               ENDIF\n"
1605848b8605Smrg                "                               MOV last.x, vshared\n"
1606848b8605Smrg                "                       ELSE\n"
1607848b8605Smrg                "                               END\n"
1608848b8605Smrg                "                       ENDIF\n"
1609848b8605Smrg                "                       UADD count.x, count, k1\n"
1610848b8605Smrg                "                       USEQ tmp0.x, count, k128\n"
1611848b8605Smrg                "                       IF tmp0\n"
1612848b8605Smrg                "                               BRK\n"
1613848b8605Smrg                "                       ENDIF\n"
1614848b8605Smrg                "               ENDLOOP\n"
1615848b8605Smrg                "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
1616848b8605Smrg                "               UADD offset.x, offset, k128\n"
1617848b8605Smrg                "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
1618848b8605Smrg                "               SFENCE target\n"
1619848b8605Smrg                "       ENDIF\n"
1620848b8605Smrg                "#ifdef TARGET_LOCAL\n"
1621848b8605Smrg                "       LOAD tmp0.x, RLOCAL, offset\n"
1622848b8605Smrg                "       UMUL tmp1.x, blockid, blocksz\n"
1623848b8605Smrg                "       UMUL tmp1.x, tmp1, k4\n"
1624848b8605Smrg                "       UADD offset.x, offset, tmp1\n"
1625848b8605Smrg                "       STORE RES[0].x, offset, tmp0\n"
1626848b8605Smrg                "#endif\n"
1627848b8605Smrg                "       RET\n"
1628848b8605Smrg                "    ENDSUB\n";
1629848b8605Smrg
1630848b8605Smrg        printf("- %s (%s)\n", __func__, global ? "global" : "local");
1631848b8605Smrg
1632848b8605Smrg        init_prog(ctx, 256, 0, 0, src,
1633848b8605Smrg                  (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
1634848b8605Smrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1635b8e80941Smrg                 4096, 0, test_default_init);
1636848b8605Smrg        init_compute_resources(ctx, (int []) { 0, -1 });
1637848b8605Smrg        launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
1638b8e80941Smrg        check_tex(ctx, 0, test_atom_race_expect, NULL);
1639848b8605Smrg        destroy_compute_resources(ctx);
1640848b8605Smrg        destroy_tex(ctx);
1641848b8605Smrg        destroy_prog(ctx);
1642848b8605Smrg}
1643848b8605Smrg
1644848b8605Smrgint main(int argc, char *argv[])
1645848b8605Smrg{
1646848b8605Smrg        struct context *ctx = CALLOC_STRUCT(context);
1647848b8605Smrg
1648848b8605Smrg        unsigned tests = (argc > 1) ? strtoul(argv[1], NULL, 0) : ~0;
1649848b8605Smrg
1650848b8605Smrg        init_ctx(ctx);
1651848b8605Smrg
1652848b8605Smrg        if (tests & (1 << 0))
1653848b8605Smrg           test_system_values(ctx);
1654848b8605Smrg        if (tests & (1 << 1))
1655848b8605Smrg           test_resource_access(ctx);
1656848b8605Smrg        if (tests & (1 << 2))
1657848b8605Smrg           test_function_calls(ctx);
1658848b8605Smrg        if (tests & (1 << 3))
1659848b8605Smrg           test_input_global(ctx);
1660848b8605Smrg        if (tests & (1 << 4))
1661848b8605Smrg           test_private(ctx);
1662848b8605Smrg        if (tests & (1 << 5))
1663848b8605Smrg           test_local(ctx);
1664848b8605Smrg        if (tests & (1 << 6))
1665848b8605Smrg           test_sample(ctx);
1666848b8605Smrg        if (tests & (1 << 7))
1667848b8605Smrg           test_many_kern(ctx);
1668848b8605Smrg        if (tests & (1 << 8))
1669848b8605Smrg           test_constant(ctx);
1670848b8605Smrg        if (tests & (1 << 9))
1671848b8605Smrg           test_resource_indirect(ctx);
1672848b8605Smrg        if (tests & (1 << 10))
1673848b8605Smrg           test_surface_ld(ctx);
1674848b8605Smrg        if (tests & (1 << 11))
1675848b8605Smrg           test_surface_st(ctx);
1676848b8605Smrg        if (tests & (1 << 12))
1677848b8605Smrg           test_barrier(ctx);
1678848b8605Smrg        if (tests & (1 << 13))
1679848b8605Smrg           test_atom_ops(ctx, true);
1680848b8605Smrg        if (tests & (1 << 14))
1681848b8605Smrg           test_atom_race(ctx, true);
1682848b8605Smrg        if (tests & (1 << 15))
1683848b8605Smrg           test_atom_ops(ctx, false);
1684848b8605Smrg        if (tests & (1 << 16))
1685848b8605Smrg           test_atom_race(ctx, false);
1686848b8605Smrg
1687848b8605Smrg        destroy_ctx(ctx);
1688848b8605Smrg
1689848b8605Smrg        return 0;
1690848b8605Smrg}
1691