compute.c revision 848b8605
1/*
2 * Copyright (C) 2011 Francisco Jerez.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 */
26
27#include <fcntl.h>
28#include <stdio.h>
29#include <sys/stat.h>
30#include <inttypes.h>
31#include "pipe/p_state.h"
32#include "pipe/p_context.h"
33#include "pipe/p_screen.h"
34#include "pipe/p_defines.h"
35#include "pipe/p_shader_tokens.h"
36#include "util/u_memory.h"
37#include "util/u_inlines.h"
38#include "util/u_sampler.h"
39#include "util/u_format.h"
40#include "tgsi/tgsi_text.h"
41#include "pipe-loader/pipe_loader.h"
42
43#define MAX_RESOURCES 4
44
45struct context {
46        struct pipe_loader_device *dev;
47        struct pipe_screen *screen;
48        struct pipe_context *pipe;
49        void *hwcs;
50        void *hwsmp[MAX_RESOURCES];
51        struct pipe_resource *tex[MAX_RESOURCES];
52        bool tex_rw[MAX_RESOURCES];
53        struct pipe_sampler_view *view[MAX_RESOURCES];
54        struct pipe_surface *surf[MAX_RESOURCES];
55};
56
57#define DUMP_COMPUTE_PARAM(p, c) do {                                   \
58                uint64_t __v[4];                                        \
59                int __i, __n;                                           \
60                                                                        \
61                __n = ctx->screen->get_compute_param(ctx->screen, c, __v); \
62                printf("%s: {", #c);                                    \
63                                                                        \
64                for (__i = 0; __i < __n / sizeof(*__v); ++__i)          \
65                        printf(" %"PRIu64, __v[__i]);                   \
66                                                                        \
67                printf(" }\n");                                         \
68        } while (0)
69
70static void init_ctx(struct context *ctx)
71{
72        int ret;
73
74        ret = pipe_loader_probe(&ctx->dev, 1);
75        assert(ret);
76
77        ctx->screen = pipe_loader_create_screen(ctx->dev, PIPE_SEARCH_DIR);
78        assert(ctx->screen);
79
80        ctx->pipe = ctx->screen->context_create(ctx->screen, NULL);
81        assert(ctx->pipe);
82
83        DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_GRID_DIMENSION);
84        DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_GRID_SIZE);
85        DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
86}
87
88static void destroy_ctx(struct context *ctx)
89{
90        ctx->pipe->destroy(ctx->pipe);
91        ctx->screen->destroy(ctx->screen);
92        pipe_loader_release(&ctx->dev, 1);
93        FREE(ctx);
94}
95
96static char *
97preprocess_prog(struct context *ctx, const char *src, const char *defs)
98{
99        const char header[] =
100                "#define RGLOBAL        RES[32767]\n"
101                "#define RLOCAL         RES[32766]\n"
102                "#define RPRIVATE       RES[32765]\n"
103                "#define RINPUT         RES[32764]\n";
104        char cmd[512];
105        char tmp[] = "/tmp/test-compute.tgsi-XXXXXX";
106        char *buf;
107        int fd, ret;
108        struct stat st;
109        FILE *p;
110
111        /* Open a temporary file */
112        fd = mkstemp(tmp);
113        assert(fd >= 0);
114        snprintf(cmd, sizeof(cmd), "cpp -P -nostdinc -undef %s > %s",
115                 defs ? defs : "", tmp);
116
117        /* Preprocess */
118        p = popen(cmd, "w");
119        fwrite(header, strlen(header), 1, p);
120        fwrite(src, strlen(src), 1, p);
121        ret = pclose(p);
122        assert(!ret);
123
124        /* Read back */
125        ret = fstat(fd, &st);
126        assert(!ret);
127
128        buf = malloc(st.st_size + 1);
129        ret = read(fd, buf, st.st_size);
130        assert(ret == st.st_size);
131        buf[ret] = 0;
132
133        /* Clean up */
134        close(fd);
135        unlink(tmp);
136
137        return buf;
138}
139
140static void init_prog(struct context *ctx, unsigned local_sz,
141                      unsigned private_sz, unsigned input_sz,
142                      const char *src, const char *defs)
143{
144        struct pipe_context *pipe = ctx->pipe;
145        struct tgsi_token prog[1024];
146        struct pipe_compute_state cs = {
147                .prog = prog,
148                .req_local_mem = local_sz,
149                .req_private_mem = private_sz,
150                .req_input_mem = input_sz
151        };
152        char *psrc = preprocess_prog(ctx, src, defs);
153        int ret;
154
155        ret = tgsi_text_translate(psrc, prog, Elements(prog));
156        assert(ret);
157        free(psrc);
158
159        ctx->hwcs = pipe->create_compute_state(pipe, &cs);
160        assert(ctx->hwcs);
161
162        pipe->bind_compute_state(pipe, ctx->hwcs);
163}
164
165static void destroy_prog(struct context *ctx)
166{
167        struct pipe_context *pipe = ctx->pipe;
168
169        pipe->delete_compute_state(pipe, ctx->hwcs);
170        ctx->hwcs = NULL;
171}
172
173static void init_tex(struct context *ctx, int slot,
174                     enum pipe_texture_target target, bool rw,
175                     enum pipe_format format, int w, int h,
176                     void (*init)(void *, int, int, int))
177{
178        struct pipe_context *pipe = ctx->pipe;
179        struct pipe_resource **tex = &ctx->tex[slot];
180        struct pipe_resource ttex = {
181                .target = target,
182                .format = format,
183                .width0 = w,
184                .height0 = h,
185                .depth0 = 1,
186                .array_size = 1,
187                .bind = (PIPE_BIND_SAMPLER_VIEW |
188                         PIPE_BIND_COMPUTE_RESOURCE |
189                         PIPE_BIND_GLOBAL)
190        };
191        int dx = util_format_get_blocksize(format);
192        int dy = util_format_get_stride(format, w);
193        int nx = (target == PIPE_BUFFER ? (w / dx) :
194                  util_format_get_nblocksx(format, w));
195        int ny = (target == PIPE_BUFFER ? 1 :
196                  util_format_get_nblocksy(format, h));
197        struct pipe_transfer *xfer;
198        char *map;
199        int x, y;
200
201        *tex = ctx->screen->resource_create(ctx->screen, &ttex);
202        assert(*tex);
203
204        map = pipe->transfer_map(pipe, *tex, 0, PIPE_TRANSFER_WRITE,
205                                  &(struct pipe_box) { .width = w,
206                                                  .height = h,
207                                                  .depth = 1 }, &xfer);
208        assert(xfer);
209        assert(map);
210
211        for (y = 0; y < ny; ++y) {
212                for (x = 0; x < nx; ++x) {
213                        init(map + y * dy + x * dx, slot, x, y);
214                }
215        }
216
217        pipe->transfer_unmap(pipe, xfer);
218
219        ctx->tex_rw[slot] = rw;
220}
221
222static bool default_check(void *x, void *y, int sz) {
223        return !memcmp(x, y, sz);
224}
225
226static void check_tex(struct context *ctx, int slot,
227                      void (*expect)(void *, int, int, int),
228                      bool (*check)(void *, void *, int))
229{
230        struct pipe_context *pipe = ctx->pipe;
231        struct pipe_resource *tex = ctx->tex[slot];
232        int dx = util_format_get_blocksize(tex->format);
233        int dy = util_format_get_stride(tex->format, tex->width0);
234        int nx = (tex->target == PIPE_BUFFER ? (tex->width0 / dx) :
235                  util_format_get_nblocksx(tex->format, tex->width0));
236        int ny = (tex->target == PIPE_BUFFER ? 1 :
237                  util_format_get_nblocksy(tex->format, tex->height0));
238        struct pipe_transfer *xfer;
239        char *map;
240        int x, y, i;
241        int err = 0;
242
243        if (!check)
244                check = default_check;
245
246        map = pipe->transfer_map(pipe, tex, 0, PIPE_TRANSFER_READ,
247                                  &(struct pipe_box) { .width = tex->width0,
248                                        .height = tex->height0,
249                                        .depth = 1 }, &xfer);
250        assert(xfer);
251        assert(map);
252
253        for (y = 0; y < ny; ++y) {
254                for (x = 0; x < nx; ++x) {
255                        uint32_t exp[4];
256                        uint32_t *res = (uint32_t *)(map + y * dy + x * dx);
257
258                        expect(exp, slot, x, y);
259                        if (check(res, exp, dx) || (++err) > 20)
260                                continue;
261
262                        if (dx < 4) {
263                                uint32_t u = 0, v = 0;
264
265                                for (i = 0; i < dx; i++) {
266                                        u |= ((uint8_t *)exp)[i] << (8 * i);
267                                        v |= ((uint8_t *)res)[i] << (8 * i);
268                                }
269                                printf("(%d, %d): got 0x%x, expected 0x%x\n",
270                                       x, y, v, u);
271                        } else {
272                                for (i = 0; i < dx / 4; i++) {
273                                        printf("(%d, %d)[%d]: got 0x%x/%f,"
274                                               " expected 0x%x/%f\n", x, y, i,
275                                               res[i], ((float *)res)[i],
276                                               exp[i], ((float *)exp)[i]);
277                                }
278                        }
279                }
280        }
281
282        pipe->transfer_unmap(pipe, xfer);
283
284        if (err)
285                printf("(%d, %d): \x1b[31mFAIL\x1b[0m (%d)\n", x, y, err);
286        else
287                printf("(%d, %d): \x1b[32mOK\x1b[0m\n", x, y);
288}
289
290static void destroy_tex(struct context *ctx)
291{
292        int i;
293
294        for (i = 0; i < MAX_RESOURCES; ++i) {
295                if (ctx->tex[i])
296                        pipe_resource_reference(&ctx->tex[i], NULL);
297        }
298}
299
300static void init_sampler_views(struct context *ctx, const int *slots)
301{
302        struct pipe_context *pipe = ctx->pipe;
303        struct pipe_sampler_view tview;
304        int i;
305
306        for (i = 0; *slots >= 0; ++i, ++slots) {
307                u_sampler_view_default_template(&tview, ctx->tex[*slots],
308                                                ctx->tex[*slots]->format);
309
310                ctx->view[i] = pipe->create_sampler_view(pipe, ctx->tex[*slots],
311                                                         &tview);
312                assert(ctx->view[i]);
313        }
314
315        pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, i, ctx->view);
316}
317
318static void destroy_sampler_views(struct context *ctx)
319{
320        struct pipe_context *pipe = ctx->pipe;
321        int i;
322
323        pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, MAX_RESOURCES, NULL);
324
325        for (i = 0; i < MAX_RESOURCES; ++i) {
326                if (ctx->view[i]) {
327                        pipe->sampler_view_destroy(pipe, ctx->view[i]);
328                        ctx->view[i] = NULL;
329                }
330        }
331}
332
333static void init_compute_resources(struct context *ctx, const int *slots)
334{
335        struct pipe_context *pipe = ctx->pipe;
336        int i;
337
338        for (i = 0; *slots >= 0; ++i, ++slots) {
339                struct pipe_surface tsurf = {
340                        .format = ctx->tex[*slots]->format,
341                        .writable = ctx->tex_rw[*slots]
342                };
343
344                if (ctx->tex[*slots]->target == PIPE_BUFFER)
345                        tsurf.u.buf.last_element = ctx->tex[*slots]->width0 - 1;
346
347                ctx->surf[i] = pipe->create_surface(pipe, ctx->tex[*slots],
348                                                    &tsurf);
349                assert(ctx->surf[i]);
350        }
351
352        pipe->set_compute_resources(pipe, 0, i, ctx->surf);
353}
354
355static void destroy_compute_resources(struct context *ctx)
356{
357        struct pipe_context *pipe = ctx->pipe;
358        int i;
359
360        pipe->set_compute_resources(pipe, 0, MAX_RESOURCES, NULL);
361
362        for (i = 0; i < MAX_RESOURCES; ++i) {
363                if (ctx->surf[i]) {
364                        pipe->surface_destroy(pipe, ctx->surf[i]);
365                        ctx->surf[i] = NULL;
366                }
367        }
368}
369
370static void init_sampler_states(struct context *ctx, int n)
371{
372        struct pipe_context *pipe = ctx->pipe;
373        struct pipe_sampler_state smp = {
374                .normalized_coords = 1,
375        };
376        int i;
377
378        for (i = 0; i < n; ++i) {
379                ctx->hwsmp[i] = pipe->create_sampler_state(pipe, &smp);
380                assert(ctx->hwsmp[i]);
381        }
382
383        pipe->bind_sampler_states(pipe, PIPE_SHADER_COMPUTE, 0, i, ctx->hwsmp);
384}
385
386static void destroy_sampler_states(struct context *ctx)
387{
388        struct pipe_context *pipe = ctx->pipe;
389        int i;
390
391        pipe->bind_sampler_states(pipe, PIPE_SHADER_COMPUTE,
392				  0, MAX_RESOURCES, NULL);
393
394        for (i = 0; i < MAX_RESOURCES; ++i) {
395                if (ctx->hwsmp[i]) {
396                        pipe->delete_sampler_state(pipe, ctx->hwsmp[i]);
397                        ctx->hwsmp[i] = NULL;
398                }
399        }
400}
401
402static void init_globals(struct context *ctx, const int *slots,
403                         uint32_t **handles)
404{
405        struct pipe_context *pipe = ctx->pipe;
406        struct pipe_resource *res[MAX_RESOURCES];
407        int i;
408
409        for (i = 0; *slots >= 0; ++i, ++slots)
410                res[i] = ctx->tex[*slots];
411
412        pipe->set_global_binding(pipe, 0, i, res, handles);
413}
414
415static void destroy_globals(struct context *ctx)
416{
417        struct pipe_context *pipe = ctx->pipe;
418
419        pipe->set_global_binding(pipe, 0, MAX_RESOURCES, NULL, NULL);
420}
421
422static void launch_grid(struct context *ctx, const uint *block_layout,
423                        const uint *grid_layout, uint32_t pc,
424                        const void *input)
425{
426        struct pipe_context *pipe = ctx->pipe;
427
428        pipe->launch_grid(pipe, block_layout, grid_layout, pc, input);
429}
430
431static void test_system_values(struct context *ctx)
432{
433        const char *src = "COMP\n"
434                "DCL RES[0], BUFFER, RAW, WR\n"
435                "DCL SV[0], BLOCK_ID[0]\n"
436                "DCL SV[1], BLOCK_SIZE[0]\n"
437                "DCL SV[2], GRID_SIZE[0]\n"
438                "DCL SV[3], THREAD_ID[0]\n"
439                "DCL TEMP[0], LOCAL\n"
440                "DCL TEMP[1], LOCAL\n"
441                "IMM UINT32 { 64, 0, 0, 0 }\n"
442                "IMM UINT32 { 16, 0, 0, 0 }\n"
443                "IMM UINT32 { 0, 0, 0, 0 }\n"
444                "\n"
445                "BGNSUB"
446                "  UMUL TEMP[0], SV[0], SV[1]\n"
447                "  UADD TEMP[0], TEMP[0], SV[3]\n"
448                "  UMUL TEMP[1], SV[1], SV[2]\n"
449                "  UMUL TEMP[0].w, TEMP[0], TEMP[1].zzzz\n"
450                "  UMUL TEMP[0].zw, TEMP[0], TEMP[1].yyyy\n"
451                "  UMUL TEMP[0].yzw, TEMP[0], TEMP[1].xxxx\n"
452                "  UADD TEMP[0].xy, TEMP[0].xyxy, TEMP[0].zwzw\n"
453                "  UADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy\n"
454                "  UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
455                "  STORE RES[0].xyzw, TEMP[0], SV[0]\n"
456                "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
457                "  STORE RES[0].xyzw, TEMP[0], SV[1]\n"
458                "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
459                "  STORE RES[0].xyzw, TEMP[0], SV[2]\n"
460                "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
461                "  STORE RES[0].xyzw, TEMP[0], SV[3]\n"
462                "  RET\n"
463                "ENDSUB\n";
464        void init(void *p, int s, int x, int y) {
465                *(uint32_t *)p = 0xdeadbeef;
466        }
467        void expect(void *p, int s, int x, int y) {
468                int id = x / 16, sv = (x % 16) / 4, c = x % 4;
469                int tid[] = { id % 20, (id % 240) / 20, id / 240, 0 };
470                int bsz[] = { 4, 3, 5, 1};
471                int gsz[] = { 5, 4, 1, 1};
472
473                switch (sv) {
474                case 0:
475                        *(uint32_t *)p = tid[c] / bsz[c];
476                        break;
477                case 1:
478                        *(uint32_t *)p = bsz[c];
479                        break;
480                case 2:
481                        *(uint32_t *)p = gsz[c];
482                        break;
483                case 3:
484                        *(uint32_t *)p = tid[c] % bsz[c];
485                        break;
486                }
487        }
488
489        printf("- %s\n", __func__);
490
491        init_prog(ctx, 0, 0, 0, src, NULL);
492        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
493                 76800, 0, init);
494        init_compute_resources(ctx, (int []) { 0, -1 });
495        launch_grid(ctx, (uint []){4, 3, 5}, (uint []){5, 4, 1}, 0, NULL);
496        check_tex(ctx, 0, expect, NULL);
497        destroy_compute_resources(ctx);
498        destroy_tex(ctx);
499        destroy_prog(ctx);
500}
501
502static void test_resource_access(struct context *ctx)
503{
504        const char *src = "COMP\n"
505                "DCL RES[0], BUFFER, RAW, WR\n"
506                "DCL RES[1], 2D, RAW, WR\n"
507                "DCL SV[0], BLOCK_ID[0]\n"
508                "DCL TEMP[0], LOCAL\n"
509                "DCL TEMP[1], LOCAL\n"
510                "IMM UINT32 { 15, 0, 0, 0 }\n"
511                "IMM UINT32 { 16, 1, 0, 0 }\n"
512                "\n"
513                "    BGNSUB\n"
514                "       UADD TEMP[0].x, SV[0].xxxx, SV[0].yyyy\n"
515                "       AND TEMP[0].x, TEMP[0], IMM[0]\n"
516                "       UMUL TEMP[0].x, TEMP[0], IMM[1]\n"
517                "       LOAD TEMP[0].xyzw, RES[0], TEMP[0]\n"
518                "       UMUL TEMP[1], SV[0], IMM[1]\n"
519                "       STORE RES[1].xyzw, TEMP[1], TEMP[0]\n"
520                "       RET\n"
521                "    ENDSUB\n";
522        void init0(void *p, int s, int x, int y) {
523                *(float *)p = 8.0 - (float)x;
524        }
525        void init1(void *p, int s, int x, int y) {
526                *(uint32_t *)p = 0xdeadbeef;
527        }
528        void expect(void *p, int s, int x, int y) {
529                *(float *)p = 8.0 - (float)((x + 4*y) & 0x3f);
530        }
531
532        printf("- %s\n", __func__);
533
534        init_prog(ctx, 0, 0, 0, src, NULL);
535        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
536                 256, 0, init0);
537        init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
538                 60, 12, init1);
539        init_compute_resources(ctx, (int []) { 0, 1, -1 });
540        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){15, 12, 1}, 0, NULL);
541        check_tex(ctx, 1, expect, NULL);
542        destroy_compute_resources(ctx);
543        destroy_tex(ctx);
544        destroy_prog(ctx);
545}
546
547static void test_function_calls(struct context *ctx)
548{
549        const char *src = "COMP\n"
550                "DCL RES[0], 2D, RAW, WR\n"
551                "DCL SV[0], BLOCK_ID[0]\n"
552                "DCL SV[1], BLOCK_SIZE[0]\n"
553                "DCL SV[2], GRID_SIZE[0]\n"
554                "DCL SV[3], THREAD_ID[0]\n"
555                "DCL TEMP[0]\n"
556                "DCL TEMP[1]\n"
557                "DCL TEMP[2], LOCAL\n"
558                "IMM UINT32 { 0, 11, 22, 33 }\n"
559                "IMM FLT32 { 11, 33, 55, 99 }\n"
560                "IMM UINT32 { 4, 1, 0, 0 }\n"
561                "IMM UINT32 { 12, 0, 0, 0 }\n"
562                "\n"
563                "00: BGNSUB\n"
564                "01:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
565                "02:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
566                "03:  USLT TEMP[0].x, TEMP[0], IMM[0]\n"
567                "04:  RET\n"
568                "05: ENDSUB\n"
569                "06: BGNSUB\n"
570                "07:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
571                "08:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
572                "09:  USLT TEMP[0].x, TEMP[0], IMM[0].yyyy\n"
573                "10:  IF TEMP[0].xxxx\n"
574                "11:   CAL :0\n"
575                "12:  ENDIF\n"
576                "13:  RET\n"
577                "14: ENDSUB\n"
578                "15: BGNSUB\n"
579                "16:  UMUL TEMP[2], SV[0], SV[1]\n"
580                "17:  UADD TEMP[2], TEMP[2], SV[3]\n"
581                "18:  UMUL TEMP[2], TEMP[2], IMM[2]\n"
582                "00:  MOV TEMP[1].x, IMM[2].wwww\n"
583                "19:  LOAD TEMP[0].x, RES[0].xxxx, TEMP[2]\n"
584                "20:  CAL :6\n"
585                "21:  STORE RES[0].x, TEMP[2], TEMP[1].xxxx\n"
586                "22:  RET\n"
587                "23: ENDSUB\n";
588        void init(void *p, int s, int x, int y) {
589                *(uint32_t *)p = 15 * y + x;
590        }
591        void expect(void *p, int s, int x, int y) {
592                *(uint32_t *)p = (15 * y + x) < 4 ? 2 : 1 ;
593        }
594
595        printf("- %s\n", __func__);
596
597        init_prog(ctx, 0, 0, 0, src, NULL);
598        init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
599                 15, 12, init);
600        init_compute_resources(ctx, (int []) { 0, -1 });
601        launch_grid(ctx, (uint []){3, 3, 3}, (uint []){5, 4, 1}, 15, NULL);
602        check_tex(ctx, 0, expect, NULL);
603        destroy_compute_resources(ctx);
604        destroy_tex(ctx);
605        destroy_prog(ctx);
606}
607
608static void test_input_global(struct context *ctx)
609{
610        const char *src = "COMP\n"
611                "DCL SV[0], THREAD_ID[0]\n"
612                "DCL TEMP[0], LOCAL\n"
613                "DCL TEMP[1], LOCAL\n"
614                "IMM UINT32 { 8, 0, 0, 0 }\n"
615                "\n"
616                "    BGNSUB\n"
617                "       UMUL TEMP[0], SV[0], IMM[0]\n"
618                "       LOAD TEMP[1].xy, RINPUT, TEMP[0]\n"
619                "       LOAD TEMP[0].x, RGLOBAL, TEMP[1].yyyy\n"
620                "       UADD TEMP[1].x, TEMP[0], -TEMP[1]\n"
621                "       STORE RGLOBAL.x, TEMP[1].yyyy, TEMP[1]\n"
622                "       RET\n"
623                "    ENDSUB\n";
624        void init(void *p, int s, int x, int y) {
625                *(uint32_t *)p = 0xdeadbeef;
626        }
627        void expect(void *p, int s, int x, int y) {
628                *(uint32_t *)p = 0xdeadbeef - (x == 0 ? 0x10001 + 2 * s : 0);
629        }
630        uint32_t input[8] = { 0x10001, 0x10002, 0x10003, 0x10004,
631                              0x10005, 0x10006, 0x10007, 0x10008 };
632
633        printf("- %s\n", __func__);
634
635        init_prog(ctx, 0, 0, 32, src, NULL);
636        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
637        init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
638        init_tex(ctx, 2, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
639        init_tex(ctx, 3, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
640        init_globals(ctx, (int []){ 0, 1, 2, 3, -1 },
641                     (uint32_t *[]){ &input[1], &input[3],
642                                     &input[5], &input[7] });
643        launch_grid(ctx, (uint []){4, 1, 1}, (uint []){1, 1, 1}, 0, input);
644        check_tex(ctx, 0, expect, NULL);
645        check_tex(ctx, 1, expect, NULL);
646        check_tex(ctx, 2, expect, NULL);
647        check_tex(ctx, 3, expect, NULL);
648        destroy_globals(ctx);
649        destroy_tex(ctx);
650        destroy_prog(ctx);
651}
652
653static void test_private(struct context *ctx)
654{
655        const char *src = "COMP\n"
656                "DCL RES[0], BUFFER, RAW, WR\n"
657                "DCL SV[0], BLOCK_ID[0]\n"
658                "DCL SV[1], BLOCK_SIZE[0]\n"
659                "DCL SV[2], THREAD_ID[0]\n"
660                "DCL TEMP[0], LOCAL\n"
661                "DCL TEMP[1], LOCAL\n"
662                "DCL TEMP[2], LOCAL\n"
663                "IMM UINT32 { 128, 0, 0, 0 }\n"
664                "IMM UINT32 { 4, 0, 0, 0 }\n"
665                "\n"
666                "    BGNSUB\n"
667                "       UMUL TEMP[0].x, SV[0], SV[1]\n"
668                "       UADD TEMP[0].x, TEMP[0], SV[2]\n"
669                "       MOV TEMP[1].x, IMM[0].wwww\n"
670                "       BGNLOOP\n"
671                "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
672                "               IF TEMP[2]\n"
673                "                       BRK\n"
674                "               ENDIF\n"
675                "               UDIV TEMP[2].x, TEMP[1], IMM[1]\n"
676                "               UADD TEMP[2].x, TEMP[2], TEMP[0]\n"
677                "               STORE RPRIVATE.x, TEMP[1], TEMP[2]\n"
678                "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
679                "       ENDLOOP\n"
680                "       MOV TEMP[1].x, IMM[0].wwww\n"
681                "       UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
682                "       BGNLOOP\n"
683                "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
684                "               IF TEMP[2]\n"
685                "                       BRK\n"
686                "               ENDIF\n"
687                "               LOAD TEMP[2].x, RPRIVATE, TEMP[1]\n"
688                "               STORE RES[0].x, TEMP[0], TEMP[2]\n"
689                "               UADD TEMP[0].x, TEMP[0], IMM[1]\n"
690                "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
691                "       ENDLOOP\n"
692                "       RET\n"
693                "    ENDSUB\n";
694        void init(void *p, int s, int x, int y) {
695                *(uint32_t *)p = 0xdeadbeef;
696        }
697        void expect(void *p, int s, int x, int y) {
698                *(uint32_t *)p = (x / 32) + x % 32;
699        }
700
701        printf("- %s\n", __func__);
702
703        init_prog(ctx, 0, 128, 0, src, NULL);
704        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
705                 32768, 0, init);
706        init_compute_resources(ctx, (int []) { 0, -1 });
707        launch_grid(ctx, (uint []){16, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
708        check_tex(ctx, 0, expect, NULL);
709        destroy_compute_resources(ctx);
710        destroy_tex(ctx);
711        destroy_prog(ctx);
712}
713
714static void test_local(struct context *ctx)
715{
716        const char *src = "COMP\n"
717                "DCL RES[0], BUFFER, RAW, WR\n"
718                "DCL SV[0], BLOCK_ID[0]\n"
719                "DCL SV[1], BLOCK_SIZE[0]\n"
720                "DCL SV[2], THREAD_ID[0]\n"
721                "DCL TEMP[0], LOCAL\n"
722                "DCL TEMP[1], LOCAL\n"
723                "DCL TEMP[2], LOCAL\n"
724                "IMM UINT32 { 1, 0, 0, 0 }\n"
725                "IMM UINT32 { 2, 0, 0, 0 }\n"
726                "IMM UINT32 { 4, 0, 0, 0 }\n"
727                "IMM UINT32 { 32, 0, 0, 0 }\n"
728                "IMM UINT32 { 128, 0, 0, 0 }\n"
729                "\n"
730                "    BGNSUB\n"
731                "       UMUL TEMP[0].x, SV[2], IMM[2]\n"
732                "       STORE RLOCAL.x, TEMP[0], IMM[0].wwww\n"
733                "       MFENCE RLOCAL\n"
734                "       USLT TEMP[1].x, SV[2], IMM[3]\n"
735                "       IF TEMP[1]\n"
736                "               UADD TEMP[1].x, TEMP[0], IMM[4]\n"
737                "               BGNLOOP\n"
738                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
739                "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
740                "                       IF TEMP[2]\n"
741                "                               BRK\n"
742                "                       ENDIF\n"
743                "               ENDLOOP\n"
744                "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
745                "               MFENCE RLOCAL\n"
746                "               BGNLOOP\n"
747                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
748                "                       USEQ TEMP[2].x, TEMP[2], IMM[1]\n"
749                "                       IF TEMP[2]\n"
750                "                               BRK\n"
751                "                       ENDIF\n"
752                "               ENDLOOP\n"
753                "       ELSE\n"
754                "               UADD TEMP[1].x, TEMP[0], -IMM[4]\n"
755                "               BGNLOOP\n"
756                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
757                "                       USEQ TEMP[2].x, TEMP[2], IMM[0].wwww\n"
758                "                       IF TEMP[2]\n"
759                "                               BRK\n"
760                "                       ENDIF\n"
761                "               ENDLOOP\n"
762                "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
763                "               MFENCE RLOCAL\n"
764                "               BGNLOOP\n"
765                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
766                "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
767                "                       IF TEMP[2]\n"
768                "                               BRK\n"
769                "                       ENDIF\n"
770                "               ENDLOOP\n"
771                "               STORE RLOCAL.x, TEMP[0], IMM[1]\n"
772                "               MFENCE RLOCAL\n"
773                "       ENDIF\n"
774                "       UMUL TEMP[1].x, SV[0], SV[1]\n"
775                "       UMUL TEMP[1].x, TEMP[1], IMM[2]\n"
776                "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
777                "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
778                "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
779                "       RET\n"
780                "    ENDSUB\n";
781        void init(void *p, int s, int x, int y) {
782                *(uint32_t *)p = 0xdeadbeef;
783        }
784        void expect(void *p, int s, int x, int y) {
785                *(uint32_t *)p = x & 0x20 ? 2 : 1;
786        }
787
788        printf("- %s\n", __func__);
789
790        init_prog(ctx, 256, 0, 0, src, NULL);
791        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
792                 4096, 0, init);
793        init_compute_resources(ctx, (int []) { 0, -1 });
794        launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
795        check_tex(ctx, 0, expect, NULL);
796        destroy_compute_resources(ctx);
797        destroy_tex(ctx);
798        destroy_prog(ctx);
799}
800
801static void test_sample(struct context *ctx)
802{
803        const char *src = "COMP\n"
804                "DCL SVIEW[0], 2D, FLOAT\n"
805                "DCL RES[0], 2D, RAW, WR\n"
806                "DCL SAMP[0]\n"
807                "DCL SV[0], BLOCK_ID[0]\n"
808                "DCL TEMP[0], LOCAL\n"
809                "DCL TEMP[1], LOCAL\n"
810                "IMM UINT32 { 16, 1, 0, 0 }\n"
811                "IMM FLT32 { 128, 32, 0, 0 }\n"
812                "\n"
813                "    BGNSUB\n"
814                "       I2F TEMP[1], SV[0]\n"
815                "       DIV TEMP[1], TEMP[1], IMM[1]\n"
816                "       SAMPLE TEMP[1], TEMP[1], SVIEW[0], SAMP[0]\n"
817                "       UMUL TEMP[0], SV[0], IMM[0]\n"
818                "       STORE RES[0].xyzw, TEMP[0], TEMP[1]\n"
819                "       RET\n"
820                "    ENDSUB\n";
821        void init(void *p, int s, int x, int y) {
822                *(float *)p = s ? 1 : x * y;
823        }
824        void expect(void *p, int s, int x, int y) {
825                switch (x % 4) {
826                case 0:
827                        *(float *)p = x / 4 * y;
828                        break;
829                case 1:
830                case 2:
831                        *(float *)p = 0;
832                        break;
833                case 3:
834                        *(float *)p = 1;
835                        break;
836                }
837        }
838
839        printf("- %s\n", __func__);
840
841        init_prog(ctx, 0, 0, 0, src, NULL);
842        init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
843                 128, 32, init);
844        init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
845                 512, 32, init);
846        init_compute_resources(ctx, (int []) { 1, -1 });
847        init_sampler_views(ctx, (int []) { 0, -1 });
848        init_sampler_states(ctx, 2);
849        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, NULL);
850        check_tex(ctx, 1, expect, NULL);
851        destroy_sampler_states(ctx);
852        destroy_sampler_views(ctx);
853        destroy_compute_resources(ctx);
854        destroy_tex(ctx);
855        destroy_prog(ctx);
856}
857
858static void test_many_kern(struct context *ctx)
859{
860        const char *src = "COMP\n"
861                "DCL RES[0], BUFFER, RAW, WR\n"
862                "DCL TEMP[0], LOCAL\n"
863                "IMM UINT32 { 0, 1, 2, 3 }\n"
864                "IMM UINT32 { 4, 0, 0, 0 }\n"
865                "\n"
866                "    BGNSUB\n"
867                "       UMUL TEMP[0].x, IMM[0].xxxx, IMM[1].xxxx\n"
868                "       STORE RES[0].x, TEMP[0], IMM[0].xxxx\n"
869                "       RET\n"
870                "    ENDSUB\n"
871                "    BGNSUB\n"
872                "       UMUL TEMP[0].x, IMM[0].yyyy, IMM[1].xxxx\n"
873                "       STORE RES[0].x, TEMP[0], IMM[0].yyyy\n"
874                "       RET\n"
875                "    ENDSUB\n"
876                "    BGNSUB\n"
877                "       UMUL TEMP[0].x, IMM[0].zzzz, IMM[1].xxxx\n"
878                "       STORE RES[0].x, TEMP[0], IMM[0].zzzz\n"
879                "       RET\n"
880                "    ENDSUB\n"
881                "    BGNSUB\n"
882                "       UMUL TEMP[0].x, IMM[0].wwww, IMM[1].xxxx\n"
883                "       STORE RES[0].x, TEMP[0], IMM[0].wwww\n"
884                "       RET\n"
885                "    ENDSUB\n";
886        void init(void *p, int s, int x, int y) {
887                *(uint32_t *)p = 0xdeadbeef;
888        }
889        void expect(void *p, int s, int x, int y) {
890                *(uint32_t *)p = x;
891        }
892
893        printf("- %s\n", __func__);
894
895        init_prog(ctx, 0, 0, 0, src, NULL);
896        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
897                 16, 0, init);
898        init_compute_resources(ctx, (int []) { 0, -1 });
899        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
900        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 5, NULL);
901        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 10, NULL);
902        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 15, NULL);
903        check_tex(ctx, 0, expect, NULL);
904        destroy_compute_resources(ctx);
905        destroy_tex(ctx);
906        destroy_prog(ctx);
907}
908
909static void test_constant(struct context *ctx)
910{
911        const char *src = "COMP\n"
912                "DCL RES[0], BUFFER, RAW\n"
913                "DCL RES[1], BUFFER, RAW, WR\n"
914                "DCL SV[0], BLOCK_ID[0]\n"
915                "DCL TEMP[0], LOCAL\n"
916                "DCL TEMP[1], LOCAL\n"
917                "IMM UINT32 { 4, 0, 0, 0 }\n"
918                "\n"
919                "    BGNSUB\n"
920                "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
921                "       LOAD TEMP[1].x, RES[0], TEMP[0]\n"
922                "       STORE RES[1].x, TEMP[0], TEMP[1]\n"
923                "       RET\n"
924                "    ENDSUB\n";
925        void init(void *p, int s, int x, int y) {
926                *(float *)p = s ? 0xdeadbeef : 8.0 - (float)x;
927        }
928        void expect(void *p, int s, int x, int y) {
929                *(float *)p = 8.0 - (float)x;
930        }
931
932        printf("- %s\n", __func__);
933
934        init_prog(ctx, 0, 0, 0, src, NULL);
935        init_tex(ctx, 0, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
936                 256, 0, init);
937        init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
938                 256, 0, init);
939        init_compute_resources(ctx, (int []) { 0, 1, -1 });
940        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
941        check_tex(ctx, 1, expect, NULL);
942        destroy_compute_resources(ctx);
943        destroy_tex(ctx);
944        destroy_prog(ctx);
945}
946
947static void test_resource_indirect(struct context *ctx)
948{
949        const char *src = "COMP\n"
950                "DCL RES[0], BUFFER, RAW, WR\n"
951                "DCL RES[1..3], BUFFER, RAW\n"
952                "DCL SV[0], BLOCK_ID[0]\n"
953                "DCL TEMP[0], LOCAL\n"
954                "DCL TEMP[1], LOCAL\n"
955                "IMM UINT32 { 4, 0, 0, 0 }\n"
956                "\n"
957                "    BGNSUB\n"
958                "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
959                "       LOAD TEMP[1].x, RES[1], TEMP[0]\n"
960                "       LOAD TEMP[1].x, RES[TEMP[1].x+2], TEMP[0]\n"
961                "       STORE RES[0].x, TEMP[0], TEMP[1]\n"
962                "       RET\n"
963                "    ENDSUB\n";
964        void init(void *p, int s, int x, int y) {
965                *(uint32_t *)p = s == 0 ? 0xdeadbeef :
966                   s == 1 ? x % 2 :
967                   s == 2 ? 2 * x :
968                   2 * x + 1;
969        }
970        void expect(void *p, int s, int x, int y) {
971           *(uint32_t *)p = 2 * x + (x % 2 ? 1 : 0);
972        }
973
974        printf("- %s\n", __func__);
975
976        init_prog(ctx, 0, 0, 0, src, NULL);
977        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
978                 256, 0, init);
979        init_tex(ctx, 1, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
980                 256, 0, init);
981        init_tex(ctx, 2, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
982                 256, 0, init);
983        init_tex(ctx, 3, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
984                 256, 0, init);
985        init_compute_resources(ctx, (int []) { 0, 1, 2, 3, -1 });
986        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
987        check_tex(ctx, 0, expect, NULL);
988        destroy_compute_resources(ctx);
989        destroy_tex(ctx);
990        destroy_prog(ctx);
991}
992
993enum pipe_format surface_fmts[] = {
994        PIPE_FORMAT_B8G8R8A8_UNORM,
995        PIPE_FORMAT_B8G8R8X8_UNORM,
996        PIPE_FORMAT_A8R8G8B8_UNORM,
997        PIPE_FORMAT_X8R8G8B8_UNORM,
998        PIPE_FORMAT_X8R8G8B8_UNORM,
999        PIPE_FORMAT_L8_UNORM,
1000        PIPE_FORMAT_A8_UNORM,
1001        PIPE_FORMAT_I8_UNORM,
1002        PIPE_FORMAT_L8A8_UNORM,
1003        PIPE_FORMAT_R32_FLOAT,
1004        PIPE_FORMAT_R32G32_FLOAT,
1005        PIPE_FORMAT_R32G32B32A32_FLOAT,
1006        PIPE_FORMAT_R32_UNORM,
1007        PIPE_FORMAT_R32G32_UNORM,
1008        PIPE_FORMAT_R32G32B32A32_UNORM,
1009        PIPE_FORMAT_R32_SNORM,
1010        PIPE_FORMAT_R32G32_SNORM,
1011        PIPE_FORMAT_R32G32B32A32_SNORM,
1012        PIPE_FORMAT_R8_UINT,
1013        PIPE_FORMAT_R8G8_UINT,
1014        PIPE_FORMAT_R8G8B8A8_UINT,
1015        PIPE_FORMAT_R8_SINT,
1016        PIPE_FORMAT_R8G8_SINT,
1017        PIPE_FORMAT_R8G8B8A8_SINT,
1018        PIPE_FORMAT_R32_UINT,
1019        PIPE_FORMAT_R32G32_UINT,
1020        PIPE_FORMAT_R32G32B32A32_UINT,
1021        PIPE_FORMAT_R32_SINT,
1022        PIPE_FORMAT_R32G32_SINT,
1023        PIPE_FORMAT_R32G32B32A32_SINT
1024};
1025
1026static void test_surface_ld(struct context *ctx)
1027{
1028        const char *src = "COMP\n"
1029                "DCL RES[0], 2D\n"
1030                "DCL RES[1], 2D, RAW, WR\n"
1031                "DCL SV[0], BLOCK_ID[0]\n"
1032                "DCL TEMP[0], LOCAL\n"
1033                "DCL TEMP[1], LOCAL\n"
1034                "IMM UINT32 { 16, 1, 0, 0 }\n"
1035                "\n"
1036                "    BGNSUB\n"
1037                "       LOAD TEMP[1], RES[0], SV[0]\n"
1038                "       UMUL TEMP[0], SV[0], IMM[0]\n"
1039                "       STORE RES[1].xyzw, TEMP[0], TEMP[1]\n"
1040                "       RET\n"
1041                "    ENDSUB\n";
1042        int i = 0;
1043        void init0f(void *p, int s, int x, int y) {
1044                float v[] = { 1.0, -.75, .50, -.25 };
1045                util_format_write_4f(surface_fmts[i], v, 0,
1046                                     p, 0, 0, 0, 1, 1);
1047        }
1048        void init0i(void *p, int s, int x, int y) {
1049                int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
1050                util_format_write_4i(surface_fmts[i], v, 0,
1051                                     p, 0, 0, 0, 1, 1);
1052        }
1053        void init1(void *p, int s, int x, int y) {
1054                *(uint32_t *)p = 0xdeadbeef;
1055        }
1056        void expectf(void *p, int s, int x, int y) {
1057                float v[4], w[4];
1058                init0f(v, s, x / 4, y);
1059                util_format_read_4f(surface_fmts[i], w, 0,
1060                                    v, 0, 0, 0, 1, 1);
1061                *(float *)p = w[x % 4];
1062        }
1063        void expecti(void *p, int s, int x, int y) {
1064                int32_t v[4], w[4];
1065                init0i(v, s, x / 4, y);
1066                util_format_read_4i(surface_fmts[i], w, 0,
1067                                    v, 0, 0, 0, 1, 1);
1068                *(uint32_t *)p = w[x % 4];
1069        }
1070
1071        printf("- %s\n", __func__);
1072
1073        init_prog(ctx, 0, 0, 0, src, NULL);
1074
1075        for (i = 0; i < Elements(surface_fmts); i++) {
1076                bool is_int = util_format_is_pure_integer(surface_fmts[i]);
1077
1078                printf("   - %s\n", util_format_name(surface_fmts[i]));
1079
1080                if (!ctx->screen->is_format_supported(ctx->screen,
1081                       surface_fmts[i], PIPE_TEXTURE_2D, 1,
1082                       PIPE_BIND_COMPUTE_RESOURCE)) {
1083                   printf("(unsupported)\n");
1084                   continue;
1085                }
1086
1087                init_tex(ctx, 0, PIPE_TEXTURE_2D, true, surface_fmts[i],
1088                         128, 32, (is_int ? init0i : init0f));
1089                init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
1090                         512, 32, init1);
1091                init_compute_resources(ctx, (int []) { 0, 1, -1 });
1092                init_sampler_states(ctx, 2);
1093                launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
1094                            NULL);
1095                check_tex(ctx, 1, (is_int ? expecti : expectf), NULL);
1096                destroy_sampler_states(ctx);
1097                destroy_compute_resources(ctx);
1098                destroy_tex(ctx);
1099        }
1100
1101        destroy_prog(ctx);
1102}
1103
1104static void test_surface_st(struct context *ctx)
1105{
1106        const char *src = "COMP\n"
1107                "DCL RES[0], 2D, RAW\n"
1108                "DCL RES[1], 2D, WR\n"
1109                "DCL SV[0], BLOCK_ID[0]\n"
1110                "DCL TEMP[0], LOCAL\n"
1111                "DCL TEMP[1], LOCAL\n"
1112                "IMM UINT32 { 16, 1, 0, 0 }\n"
1113                "\n"
1114                "    BGNSUB\n"
1115                "       UMUL TEMP[0], SV[0], IMM[0]\n"
1116                "       LOAD TEMP[1], RES[0], TEMP[0]\n"
1117                "       STORE RES[1], SV[0], TEMP[1]\n"
1118                "       RET\n"
1119                "    ENDSUB\n";
1120        int i = 0;
1121        void init0f(void *p, int s, int x, int y) {
1122                float v[] = { 1.0, -.75, 0.5, -.25 };
1123                *(float *)p = v[x % 4];
1124        }
1125        void init0i(void *p, int s, int x, int y) {
1126                int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
1127                *(int32_t *)p = v[x % 4];
1128        }
1129        void init1(void *p, int s, int x, int y) {
1130                memset(p, 1, util_format_get_blocksize(surface_fmts[i]));
1131        }
1132        void expectf(void *p, int s, int x, int y) {
1133                float vf[4];
1134                int j;
1135
1136                for (j = 0; j < 4; j++)
1137                        init0f(&vf[j], s, 4 * x + j, y);
1138                util_format_write_4f(surface_fmts[i], vf, 0,
1139                                     p, 0, 0, 0, 1, 1);
1140        }
1141        void expects(void *p, int s, int x, int y) {
1142                int32_t v[4];
1143                int j;
1144
1145                for (j = 0; j < 4; j++)
1146                        init0i(&v[j], s, 4 * x + j, y);
1147                util_format_write_4i(surface_fmts[i], v, 0,
1148                                     p, 0, 0, 0, 1, 1);
1149        }
1150        void expectu(void *p, int s, int x, int y) {
1151                uint32_t v[4];
1152                int j;
1153
1154                for (j = 0; j < 4; j++)
1155                        init0i(&v[j], s, 4 * x + j, y);
1156                util_format_write_4ui(surface_fmts[i], v, 0,
1157                                      p, 0, 0, 0, 1, 1);
1158        }
1159        bool check(void *x, void *y, int sz) {
1160                int j;
1161
1162                if (util_format_is_float(surface_fmts[i])) {
1163                        return fabs(*(float *)x - *(float *)y) < 3.92156863e-3;
1164
1165                } else if ((sz % 4) == 0) {
1166                        for (j = 0; j < sz / 4; j++)
1167                                if (abs(((uint32_t *)x)[j] -
1168                                        ((uint32_t *)y)[j]) > 1)
1169                                        return false;
1170                        return true;
1171                } else {
1172                        return !memcmp(x, y, sz);
1173                }
1174        }
1175
1176        printf("- %s\n", __func__);
1177
1178        init_prog(ctx, 0, 0, 0, src, NULL);
1179
1180        for (i = 0; i < Elements(surface_fmts); i++) {
1181                bool is_signed = (util_format_description(surface_fmts[i])
1182                                  ->channel[0].type == UTIL_FORMAT_TYPE_SIGNED);
1183                bool is_int = util_format_is_pure_integer(surface_fmts[i]);
1184
1185                printf("   - %s\n", util_format_name(surface_fmts[i]));
1186
1187                if (!ctx->screen->is_format_supported(ctx->screen,
1188                       surface_fmts[i], PIPE_TEXTURE_2D, 1,
1189                       PIPE_BIND_COMPUTE_RESOURCE)) {
1190                   printf("(unsupported)\n");
1191                   continue;
1192                }
1193
1194                init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
1195                         512, 32, (is_int ? init0i : init0f));
1196                init_tex(ctx, 1, PIPE_TEXTURE_2D, true, surface_fmts[i],
1197                         128, 32, init1);
1198                init_compute_resources(ctx, (int []) { 0, 1, -1 });
1199                init_sampler_states(ctx, 2);
1200                launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
1201                            NULL);
1202                check_tex(ctx, 1, (is_int && is_signed ? expects :
1203                                   is_int && !is_signed ? expectu :
1204                                   expectf), check);
1205                destroy_sampler_states(ctx);
1206                destroy_compute_resources(ctx);
1207                destroy_tex(ctx);
1208        }
1209
1210        destroy_prog(ctx);
1211}
1212
1213static void test_barrier(struct context *ctx)
1214{
1215        const char *src = "COMP\n"
1216                "DCL RES[0], BUFFER, RAW, WR\n"
1217                "DCL SV[0], BLOCK_ID[0]\n"
1218                "DCL SV[1], BLOCK_SIZE[0]\n"
1219                "DCL SV[2], THREAD_ID[0]\n"
1220                "DCL TEMP[0], LOCAL\n"
1221                "DCL TEMP[1], LOCAL\n"
1222                "DCL TEMP[2], LOCAL\n"
1223                "DCL TEMP[3], LOCAL\n"
1224                "IMM UINT32 { 1, 0, 0, 0 }\n"
1225                "IMM UINT32 { 4, 0, 0, 0 }\n"
1226                "IMM UINT32 { 32, 0, 0, 0 }\n"
1227                "\n"
1228                "    BGNSUB\n"
1229                "       UMUL TEMP[0].x, SV[2], IMM[1]\n"
1230                "       MOV TEMP[1].x, IMM[0].wwww\n"
1231                "       BGNLOOP\n"
1232                "               BARRIER\n"
1233                "               STORE RLOCAL.x, TEMP[0], TEMP[1]\n"
1234                "               BARRIER\n"
1235                "               MOV TEMP[2].x, IMM[0].wwww\n"
1236                "               BGNLOOP\n"
1237                "                       UMUL TEMP[3].x, TEMP[2], IMM[1]\n"
1238                "                       LOAD TEMP[3].x, RLOCAL, TEMP[3]\n"
1239                "                       USNE TEMP[3].x, TEMP[3], TEMP[1]\n"
1240                "                       IF TEMP[3]\n"
1241                "                               END\n"
1242                "                       ENDIF\n"
1243                "                       UADD TEMP[2].x, TEMP[2], IMM[0]\n"
1244                "                       USEQ TEMP[3].x, TEMP[2], SV[1]\n"
1245                "                       IF TEMP[3]\n"
1246                "                               BRK\n"
1247                "                       ENDIF\n"
1248                "               ENDLOOP\n"
1249                "               UADD TEMP[1].x, TEMP[1], IMM[0]\n"
1250                "               USEQ TEMP[2].x, TEMP[1], IMM[2]\n"
1251                "               IF TEMP[2]\n"
1252                "                       BRK\n"
1253                "               ENDIF\n"
1254                "       ENDLOOP\n"
1255                "       UMUL TEMP[1].x, SV[0], SV[1]\n"
1256                "       UMUL TEMP[1].x, TEMP[1], IMM[1]\n"
1257                "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
1258                "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
1259                "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
1260                "       RET\n"
1261                "    ENDSUB\n";
1262        void init(void *p, int s, int x, int y) {
1263                *(uint32_t *)p = 0xdeadbeef;
1264        }
1265        void expect(void *p, int s, int x, int y) {
1266                *(uint32_t *)p = 31;
1267        }
1268
1269        printf("- %s\n", __func__);
1270
1271        init_prog(ctx, 256, 0, 0, src, NULL);
1272        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1273                 4096, 0, init);
1274        init_compute_resources(ctx, (int []) { 0, -1 });
1275        launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
1276        check_tex(ctx, 0, expect, NULL);
1277        destroy_compute_resources(ctx);
1278        destroy_tex(ctx);
1279        destroy_prog(ctx);
1280}
1281
1282static void test_atom_ops(struct context *ctx, bool global)
1283{
1284        const char *src = "COMP\n"
1285                "#ifdef TARGET_GLOBAL\n"
1286                "#define target RES[0]\n"
1287                "#else\n"
1288                "#define target RLOCAL\n"
1289                "#endif\n"
1290                ""
1291                "DCL RES[0], BUFFER, RAW, WR\n"
1292                "#define threadid SV[0]\n"
1293                "DCL threadid, THREAD_ID[0]\n"
1294                ""
1295                "#define offset TEMP[0]\n"
1296                "DCL offset, LOCAL\n"
1297                "#define tmp TEMP[1]\n"
1298                "DCL tmp, LOCAL\n"
1299                ""
1300                "#define k0 IMM[0]\n"
1301                "IMM UINT32 { 0, 0, 0, 0 }\n"
1302                "#define k1 IMM[1]\n"
1303                "IMM UINT32 { 1, 0, 0, 0 }\n"
1304                "#define k2 IMM[2]\n"
1305                "IMM UINT32 { 2, 0, 0, 0 }\n"
1306                "#define k3 IMM[3]\n"
1307                "IMM UINT32 { 3, 0, 0, 0 }\n"
1308                "#define k4 IMM[4]\n"
1309                "IMM UINT32 { 4, 0, 0, 0 }\n"
1310                "#define k5 IMM[5]\n"
1311                "IMM UINT32 { 5, 0, 0, 0 }\n"
1312                "#define k6 IMM[6]\n"
1313                "IMM UINT32 { 6, 0, 0, 0 }\n"
1314                "#define k7 IMM[7]\n"
1315                "IMM UINT32 { 7, 0, 0, 0 }\n"
1316                "#define k8 IMM[8]\n"
1317                "IMM UINT32 { 8, 0, 0, 0 }\n"
1318                "#define k9 IMM[9]\n"
1319                "IMM UINT32 { 9, 0, 0, 0 }\n"
1320                "#define korig IMM[10].xxxx\n"
1321                "#define karg IMM[10].yyyy\n"
1322                "IMM UINT32 { 3735928559, 286331153, 0, 0 }\n"
1323                "\n"
1324                "    BGNSUB\n"
1325                "       UMUL offset.x, threadid, k4\n"
1326                "       STORE target.x, offset, korig\n"
1327                "       USEQ tmp.x, threadid, k0\n"
1328                "       IF tmp\n"
1329                "               ATOMUADD tmp.x, target, offset, karg\n"
1330                "               ATOMUADD tmp.x, target, offset, tmp\n"
1331                "       ENDIF\n"
1332                "       USEQ tmp.x, threadid, k1\n"
1333                "       IF tmp\n"
1334                "               ATOMXCHG tmp.x, target, offset, karg\n"
1335                "               ATOMXCHG tmp.x, target, offset, tmp\n"
1336                "       ENDIF\n"
1337                "       USEQ tmp.x, threadid, k2\n"
1338                "       IF tmp\n"
1339                "               ATOMCAS tmp.x, target, offset, korig, karg\n"
1340                "               ATOMCAS tmp.x, target, offset, tmp, k0\n"
1341                "       ENDIF\n"
1342                "       USEQ tmp.x, threadid, k3\n"
1343                "       IF tmp\n"
1344                "               ATOMAND tmp.x, target, offset, karg\n"
1345                "               ATOMAND tmp.x, target, offset, tmp\n"
1346                "       ENDIF\n"
1347                "       USEQ tmp.x, threadid, k4\n"
1348                "       IF tmp\n"
1349                "               ATOMOR tmp.x, target, offset, karg\n"
1350                "               ATOMOR tmp.x, target, offset, tmp\n"
1351                "       ENDIF\n"
1352                "       USEQ tmp.x, threadid, k5\n"
1353                "       IF tmp\n"
1354                "               ATOMXOR tmp.x, target, offset, karg\n"
1355                "               ATOMXOR tmp.x, target, offset, tmp\n"
1356                "       ENDIF\n"
1357                "       USEQ tmp.x, threadid, k6\n"
1358                "       IF tmp\n"
1359                "               ATOMUMIN tmp.x, target, offset, karg\n"
1360                "               ATOMUMIN tmp.x, target, offset, tmp\n"
1361                "       ENDIF\n"
1362                "       USEQ tmp.x, threadid, k7\n"
1363                "       IF tmp\n"
1364                "               ATOMUMAX tmp.x, target, offset, karg\n"
1365                "               ATOMUMAX tmp.x, target, offset, tmp\n"
1366                "       ENDIF\n"
1367                "       USEQ tmp.x, threadid, k8\n"
1368                "       IF tmp\n"
1369                "               ATOMIMIN tmp.x, target, offset, karg\n"
1370                "               ATOMIMIN tmp.x, target, offset, tmp\n"
1371                "       ENDIF\n"
1372                "       USEQ tmp.x, threadid, k9\n"
1373                "       IF tmp\n"
1374                "               ATOMIMAX tmp.x, target, offset, karg\n"
1375                "               ATOMIMAX tmp.x, target, offset, tmp\n"
1376                "       ENDIF\n"
1377                "#ifdef TARGET_LOCAL\n"
1378                "       LOAD tmp.x, RLOCAL, offset\n"
1379                "       STORE RES[0].x, offset, tmp\n"
1380                "#endif\n"
1381                "       RET\n"
1382                "    ENDSUB\n";
1383
1384        void init(void *p, int s, int x, int y) {
1385                *(uint32_t *)p = 0xbad;
1386        }
1387        void expect(void *p, int s, int x, int y) {
1388                switch (x) {
1389                case 0:
1390                        *(uint32_t *)p = 0xce6c8eef;
1391                        break;
1392                case 1:
1393                        *(uint32_t *)p = 0xdeadbeef;
1394                        break;
1395                case 2:
1396                        *(uint32_t *)p = 0x11111111;
1397                        break;
1398                case 3:
1399                        *(uint32_t *)p = 0x10011001;
1400                        break;
1401                case 4:
1402                        *(uint32_t *)p = 0xdfbdbfff;
1403                        break;
1404                case 5:
1405                        *(uint32_t *)p = 0x11111111;
1406                        break;
1407                case 6:
1408                        *(uint32_t *)p = 0x11111111;
1409                        break;
1410                case 7:
1411                        *(uint32_t *)p = 0xdeadbeef;
1412                        break;
1413                case 8:
1414                        *(uint32_t *)p = 0xdeadbeef;
1415                        break;
1416                case 9:
1417                        *(uint32_t *)p = 0x11111111;
1418                        break;
1419                }
1420        }
1421
1422        printf("- %s (%s)\n", __func__, global ? "global" : "local");
1423
1424        init_prog(ctx, 40, 0, 0, src,
1425                  (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
1426        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1427                 40, 0, init);
1428        init_compute_resources(ctx, (int []) { 0, -1 });
1429        launch_grid(ctx, (uint []){10, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
1430        check_tex(ctx, 0, expect, NULL);
1431        destroy_compute_resources(ctx);
1432        destroy_tex(ctx);
1433        destroy_prog(ctx);
1434}
1435
1436static void test_atom_race(struct context *ctx, bool global)
1437{
1438        const char *src = "COMP\n"
1439                "#ifdef TARGET_GLOBAL\n"
1440                "#define target RES[0]\n"
1441                "#else\n"
1442                "#define target RLOCAL\n"
1443                "#endif\n"
1444                ""
1445                "DCL RES[0], BUFFER, RAW, WR\n"
1446                ""
1447                "#define blockid SV[0]\n"
1448                "DCL blockid, BLOCK_ID[0]\n"
1449                "#define blocksz SV[1]\n"
1450                "DCL blocksz, BLOCK_SIZE[0]\n"
1451                "#define threadid SV[2]\n"
1452                "DCL threadid, THREAD_ID[0]\n"
1453                ""
1454                "#define offset TEMP[0]\n"
1455                "DCL offset, LOCAL\n"
1456                "#define arg TEMP[1]\n"
1457                "DCL arg, LOCAL\n"
1458                "#define count TEMP[2]\n"
1459                "DCL count, LOCAL\n"
1460                "#define vlocal TEMP[3]\n"
1461                "DCL vlocal, LOCAL\n"
1462                "#define vshared TEMP[4]\n"
1463                "DCL vshared, LOCAL\n"
1464                "#define last TEMP[5]\n"
1465                "DCL last, LOCAL\n"
1466                "#define tmp0 TEMP[6]\n"
1467                "DCL tmp0, LOCAL\n"
1468                "#define tmp1 TEMP[7]\n"
1469                "DCL tmp1, LOCAL\n"
1470                ""
1471                "#define k0 IMM[0]\n"
1472                "IMM UINT32 { 0, 0, 0, 0 }\n"
1473                "#define k1 IMM[1]\n"
1474                "IMM UINT32 { 1, 0, 0, 0 }\n"
1475                "#define k4 IMM[2]\n"
1476                "IMM UINT32 { 4, 0, 0, 0 }\n"
1477                "#define k32 IMM[3]\n"
1478                "IMM UINT32 { 32, 0, 0, 0 }\n"
1479                "#define k128 IMM[4]\n"
1480                "IMM UINT32 { 128, 0, 0, 0 }\n"
1481                "#define kdeadcafe IMM[5]\n"
1482                "IMM UINT32 { 3735931646, 0, 0, 0 }\n"
1483                "#define kallowed_set IMM[6]\n"
1484                "IMM UINT32 { 559035650, 0, 0, 0 }\n"
1485                "#define k11111111 IMM[7]\n"
1486                "IMM UINT32 { 286331153, 0, 0, 0 }\n"
1487                "\n"
1488                "    BGNSUB\n"
1489                "       MOV offset.x, threadid\n"
1490                "#ifdef TARGET_GLOBAL\n"
1491                "       UMUL tmp0.x, blockid, blocksz\n"
1492                "       UADD offset.x, offset, tmp0\n"
1493                "#endif\n"
1494                "       UMUL offset.x, offset, k4\n"
1495                "       USLT tmp0.x, threadid, k32\n"
1496                "       STORE target.x, offset, k0\n"
1497                "       BARRIER\n"
1498                "       IF tmp0\n"
1499                "               MOV vlocal.x, k0\n"
1500                "               MOV arg.x, kdeadcafe\n"
1501                "               BGNLOOP\n"
1502                "                       INEG arg.x, arg\n"
1503                "                       ATOMUADD vshared.x, target, offset, arg\n"
1504                "                       SFENCE target\n"
1505                "                       USNE tmp0.x, vshared, vlocal\n"
1506                "                       IF tmp0\n"
1507                "                               BRK\n"
1508                "                       ENDIF\n"
1509                "                       UADD vlocal.x, vlocal, arg\n"
1510                "               ENDLOOP\n"
1511                "               UADD vlocal.x, vshared, arg\n"
1512                "               LOAD vshared.x, target, offset\n"
1513                "               USEQ tmp0.x, vshared, vlocal\n"
1514                "               STORE target.x, offset, tmp0\n"
1515                "       ELSE\n"
1516                "               UADD offset.x, offset, -k128\n"
1517                "               MOV count.x, k0\n"
1518                "               MOV last.x, k0\n"
1519                "               BGNLOOP\n"
1520                "                       LOAD vshared.x, target, offset\n"
1521                "                       USEQ tmp0.x, vshared, kallowed_set.xxxx\n"
1522                "                       USEQ tmp1.x, vshared, kallowed_set.yyyy\n"
1523                "                       OR tmp0.x, tmp0, tmp1\n"
1524                "                       IF tmp0\n"
1525                "                               USEQ tmp0.x, vshared, last\n"
1526                "                               IF tmp0\n"
1527                "                                       CONT\n"
1528                "                               ENDIF\n"
1529                "                               MOV last.x, vshared\n"
1530                "                       ELSE\n"
1531                "                               END\n"
1532                "                       ENDIF\n"
1533                "                       UADD count.x, count, k1\n"
1534                "                       USEQ tmp0.x, count, k128\n"
1535                "                       IF tmp0\n"
1536                "                               BRK\n"
1537                "                       ENDIF\n"
1538                "               ENDLOOP\n"
1539                "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
1540                "               UADD offset.x, offset, k128\n"
1541                "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
1542                "               SFENCE target\n"
1543                "       ENDIF\n"
1544                "#ifdef TARGET_LOCAL\n"
1545                "       LOAD tmp0.x, RLOCAL, offset\n"
1546                "       UMUL tmp1.x, blockid, blocksz\n"
1547                "       UMUL tmp1.x, tmp1, k4\n"
1548                "       UADD offset.x, offset, tmp1\n"
1549                "       STORE RES[0].x, offset, tmp0\n"
1550                "#endif\n"
1551                "       RET\n"
1552                "    ENDSUB\n";
1553
1554        void init(void *p, int s, int x, int y) {
1555                *(uint32_t *)p = 0xdeadbeef;
1556        }
1557        void expect(void *p, int s, int x, int y) {
1558                *(uint32_t *)p = x & 0x20 ? 0x11111111 : 0xffffffff;
1559        }
1560
1561        printf("- %s (%s)\n", __func__, global ? "global" : "local");
1562
1563        init_prog(ctx, 256, 0, 0, src,
1564                  (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
1565        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1566                 4096, 0, init);
1567        init_compute_resources(ctx, (int []) { 0, -1 });
1568        launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
1569        check_tex(ctx, 0, expect, NULL);
1570        destroy_compute_resources(ctx);
1571        destroy_tex(ctx);
1572        destroy_prog(ctx);
1573}
1574
1575int main(int argc, char *argv[])
1576{
1577        struct context *ctx = CALLOC_STRUCT(context);
1578
1579        unsigned tests = (argc > 1) ? strtoul(argv[1], NULL, 0) : ~0;
1580
1581        init_ctx(ctx);
1582
1583        if (tests & (1 << 0))
1584           test_system_values(ctx);
1585        if (tests & (1 << 1))
1586           test_resource_access(ctx);
1587        if (tests & (1 << 2))
1588           test_function_calls(ctx);
1589        if (tests & (1 << 3))
1590           test_input_global(ctx);
1591        if (tests & (1 << 4))
1592           test_private(ctx);
1593        if (tests & (1 << 5))
1594           test_local(ctx);
1595        if (tests & (1 << 6))
1596           test_sample(ctx);
1597        if (tests & (1 << 7))
1598           test_many_kern(ctx);
1599        if (tests & (1 << 8))
1600           test_constant(ctx);
1601        if (tests & (1 << 9))
1602           test_resource_indirect(ctx);
1603        if (tests & (1 << 10))
1604           test_surface_ld(ctx);
1605        if (tests & (1 << 11))
1606           test_surface_st(ctx);
1607        if (tests & (1 << 12))
1608           test_barrier(ctx);
1609        if (tests & (1 << 13))
1610           test_atom_ops(ctx, true);
1611        if (tests & (1 << 14))
1612           test_atom_race(ctx, true);
1613        if (tests & (1 << 15))
1614           test_atom_ops(ctx, false);
1615        if (tests & (1 << 16))
1616           test_atom_race(ctx, false);
1617
1618        destroy_ctx(ctx);
1619
1620        return 0;
1621}
1622