1/**************************************************************************
2 *
3 * Copyright 2019 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 * Authors: James Zhu <james.zhu<@amd.com>
27 *
28 **************************************************************************/
29
30#include <assert.h>
31
32#include "tgsi/tgsi_text.h"
33#include "vl_compositor_cs.h"
34
35struct cs_viewport {
36   float scale_x;
37   float scale_y;
38   struct u_rect area;
39   int translate_x;
40   int translate_y;
41   float sampler0_w;
42   float sampler0_h;
43};
44
45const char *compute_shader_video_buffer =
46      "COMP\n"
47      "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
48      "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
49      "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
50
51      "DCL SV[0], THREAD_ID\n"
52      "DCL SV[1], BLOCK_ID\n"
53
54      "DCL CONST[0..5]\n"
55      "DCL SVIEW[0..2], RECT, FLOAT\n"
56      "DCL SAMP[0..2]\n"
57
58      "DCL IMAGE[0], 2D, WR\n"
59      "DCL TEMP[0..7]\n"
60
61      "IMM[0] UINT32 { 8, 8, 1, 0}\n"
62      "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
63
64      "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
65
66      /* Drawn area check */
67      "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
68      "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
69      "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
70      "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
71      "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
72
73      "UIF TEMP[1].xxxx\n"
74         /* Translate */
75         "UADD TEMP[2].xy, TEMP[0].xyyy, -CONST[5].xyxy\n"
76         "U2F TEMP[2].xy, TEMP[2].xyyy\n"
77         "DIV TEMP[3].xy, TEMP[2].xyyy, IMM[1].yyyy\n"
78
79         /* Scale */
80         "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwww\n"
81         "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwww\n"
82
83         /* Fetch texels */
84         "TEX_LZ TEMP[4].x, TEMP[2].xyyy, SAMP[0], RECT\n"
85         "TEX_LZ TEMP[4].y, TEMP[3].xyyy, SAMP[1], RECT\n"
86         "TEX_LZ TEMP[4].z, TEMP[3].xyyy, SAMP[2], RECT\n"
87
88         "MOV TEMP[4].w, IMM[1].xxxx\n"
89
90         /* Color Space Conversion */
91         "DP4 TEMP[7].x, CONST[0], TEMP[4]\n"
92         "DP4 TEMP[7].y, CONST[1], TEMP[4]\n"
93         "DP4 TEMP[7].z, CONST[2], TEMP[4]\n"
94
95         "MOV TEMP[5].w, TEMP[4].zzzz\n"
96         "SLE TEMP[6].w, TEMP[5].wwww, CONST[3].xxxx\n"
97         "SGT TEMP[5].w, TEMP[5].wwww, CONST[3].yyyy\n"
98
99         "MAX TEMP[7].w, TEMP[5].wwww, TEMP[6].wwww\n"
100
101         "STORE IMAGE[0], TEMP[0].xyyy, TEMP[7], 2D\n"
102      "ENDIF\n"
103
104      "END\n";
105
106const char *compute_shader_weave =
107      "COMP\n"
108      "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
109      "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
110      "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
111
112      "DCL SV[0], THREAD_ID\n"
113      "DCL SV[1], BLOCK_ID\n"
114
115      "DCL CONST[0..5]\n"
116      "DCL SVIEW[0..2], 2D_ARRAY, FLOAT\n"
117      "DCL SAMP[0..2]\n"
118
119      "DCL IMAGE[0], 2D, WR\n"
120      "DCL TEMP[0..15]\n"
121
122      "IMM[0] UINT32 { 8, 8, 1, 0}\n"
123      "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
124      "IMM[2] UINT32 { 1, 2, 4, 0}\n"
125      "IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"
126
127      "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
128
129      /* Drawn area check */
130      "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
131      "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
132      "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
133      "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
134      "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
135
136      "UIF TEMP[1].xxxx\n"
137         "MOV TEMP[2].xy, TEMP[0].xyyy\n"
138         /* Translate */
139         "UADD TEMP[2].xy, TEMP[2].xyyy, -CONST[5].xyxy\n"
140
141         /* Top Y */
142         "U2F TEMP[2].xy, TEMP[2].xyyy\n"
143         "DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n"
144         /* Down Y */
145         "MOV TEMP[12].xy, TEMP[2].xyyy\n"
146
147         /* Top UV */
148         "MOV TEMP[3].xy, TEMP[2].xyyy\n"
149         "DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n"
150         /* Down UV */
151         "MOV TEMP[13].xy, TEMP[3].xyyy\n"
152
153         /* Texture offset */
154         "ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n"
155         "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
156         "ADD TEMP[12].x, TEMP[12].xxxx, IMM[3].yyyy\n"
157         "ADD TEMP[12].y, TEMP[12].yyyy, IMM[3].xxxx\n"
158
159         "ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].xxxx\n"
160         "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].wwww\n"
161         "ADD TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx\n"
162         "ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n"
163
164         /* Scale */
165         "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n"
166         "DIV TEMP[12].xy, TEMP[12].xyyy, CONST[3].zwzw\n"
167         "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwzw\n"
168         "DIV TEMP[13].xy, TEMP[13].xyyy, CONST[3].zwzw\n"
169
170         /* Weave offset */
171         "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
172         "ADD TEMP[12].y, TEMP[12].yyyy, -IMM[3].xxxx\n"
173         "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].xxxx\n"
174         "ADD TEMP[13].y, TEMP[13].yyyy, -IMM[3].xxxx\n"
175
176         /* Texture layer */
177         "MOV TEMP[14].x, TEMP[2].yyyy\n"
178         "MOV TEMP[14].yz, TEMP[3].yyyy\n"
179         "ROUND TEMP[15].xyz, TEMP[14].xyzz\n"
180         "ADD TEMP[14].xyz, TEMP[14].xyzz, -TEMP[15].xyzz\n"
181         "MOV TEMP[14].xyz, |TEMP[14].xyzz|\n"
182         "MUL TEMP[14].xyz, TEMP[14].xyzz, IMM[1].yyyy\n"
183
184         /* Normalize */
185         "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[5].zwzw\n"
186         "DIV TEMP[12].xy, TEMP[12].xyyy, CONST[5].zwzw\n"
187         "DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n"
188         "DIV TEMP[3].xy, TEMP[3].xyyy, TEMP[15].xyxy\n"
189         "DIV TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xyxy\n"
190
191         /* Fetch texels */
192         "MOV TEMP[2].z, IMM[1].wwww\n"
193         "MOV TEMP[3].z, IMM[1].wwww\n"
194         "TEX_LZ TEMP[10].x, TEMP[2].xyzz, SAMP[0], 2D_ARRAY\n"
195         "TEX_LZ TEMP[10].y, TEMP[3].xyzz, SAMP[1], 2D_ARRAY\n"
196         "TEX_LZ TEMP[10].z, TEMP[3].xyzz, SAMP[2], 2D_ARRAY\n"
197
198         "MOV TEMP[12].z, IMM[1].xxxx\n"
199         "MOV TEMP[13].z, IMM[1].xxxx\n"
200         "TEX_LZ TEMP[11].x, TEMP[12].xyzz, SAMP[0], 2D_ARRAY\n"
201         "TEX_LZ TEMP[11].y, TEMP[13].xyzz, SAMP[1], 2D_ARRAY\n"
202         "TEX_LZ TEMP[11].z, TEMP[13].xyzz, SAMP[2], 2D_ARRAY\n"
203
204         "LRP TEMP[6].xyz, TEMP[14].xyzz, TEMP[10].xyzz, TEMP[11].xyzz\n"
205         "MOV TEMP[6].w, IMM[1].xxxx\n"
206
207         /* Color Space Conversion */
208         "DP4 TEMP[9].x, CONST[0], TEMP[6]\n"
209         "DP4 TEMP[9].y, CONST[1], TEMP[6]\n"
210         "DP4 TEMP[9].z, CONST[2], TEMP[6]\n"
211
212         "MOV TEMP[7].w, TEMP[6].zzzz\n"
213         "SLE TEMP[8].w, TEMP[7].wwww, CONST[3].xxxx\n"
214         "SGT TEMP[7].w, TEMP[7].wwww, CONST[3].yyyy\n"
215
216         "MAX TEMP[9].w, TEMP[7].wwww, TEMP[8].wwww\n"
217
218         "STORE IMAGE[0], TEMP[0].xyyy, TEMP[9], 2D\n"
219      "ENDIF\n"
220
221      "END\n";
222
223const char *compute_shader_rgba =
224      "COMP\n"
225      "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
226      "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
227      "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
228
229      "DCL SV[0], THREAD_ID\n"
230      "DCL SV[1], BLOCK_ID\n"
231
232      "DCL CONST[0..5]\n"
233      "DCL SVIEW[0], RECT, FLOAT\n"
234      "DCL SAMP[0]\n"
235
236      "DCL IMAGE[0], 2D, WR\n"
237      "DCL TEMP[0..3]\n"
238
239      "IMM[0] UINT32 { 8, 8, 1, 0}\n"
240      "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
241
242      "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
243
244      /* Drawn area check */
245      "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
246      "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n"
247      "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n"
248      "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
249      "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
250
251      "UIF TEMP[1].xxxx\n"
252         /* Translate */
253         "UADD TEMP[2].xy, TEMP[0].xyyy, -CONST[5].xyxy\n"
254         "U2F TEMP[2].xy, TEMP[2].xyyy\n"
255
256         /* Scale */
257         "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n"
258
259         /* Fetch texels */
260         "TEX_LZ TEMP[3], TEMP[2].xyyy, SAMP[0], RECT\n"
261
262         "STORE IMAGE[0], TEMP[0].xyyy, TEMP[3], 2D\n"
263      "ENDIF\n"
264
265      "END\n";
266
267static void
268cs_launch(struct vl_compositor *c,
269          void                 *cs,
270          const struct u_rect  *draw_area)
271{
272   struct pipe_context *ctx = c->pipe;
273
274   /* Bind the image */
275   struct pipe_image_view image = {};
276   image.resource = c->fb_state.cbufs[0]->texture;
277   image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE;
278   image.format = c->fb_state.cbufs[0]->texture->format;
279
280   ctx->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, &image);
281
282   /* Bind compute shader */
283   ctx->bind_compute_state(ctx, cs);
284
285   /* Dispatch compute */
286   struct pipe_grid_info info = {};
287   info.block[0] = 8;
288   info.block[1] = 8;
289   info.block[2] = 1;
290   info.grid[0] = DIV_ROUND_UP(draw_area->x1, info.block[0]);
291   info.grid[1] = DIV_ROUND_UP(draw_area->y1, info.block[1]);
292   info.grid[2] = 1;
293
294   ctx->launch_grid(ctx, &info);
295
296   /* Make the result visible to all clients. */
297   ctx->memory_barrier(ctx, PIPE_BARRIER_ALL);
298
299}
300
301static inline struct u_rect
302calc_drawn_area(struct vl_compositor_state *s,
303                struct vl_compositor_layer *layer)
304{
305   struct vertex2f tl, br;
306   struct u_rect result;
307
308   assert(s && layer);
309
310   tl = layer->dst.tl;
311   br = layer->dst.br;
312
313   /* Scale */
314   result.x0 = tl.x * layer->viewport.scale[0] + layer->viewport.translate[0];
315   result.y0 = tl.y * layer->viewport.scale[1] + layer->viewport.translate[1];
316   result.x1 = br.x * layer->viewport.scale[0] + layer->viewport.translate[0];
317   result.y1 = br.y * layer->viewport.scale[1] + layer->viewport.translate[1];
318
319   /* Clip */
320   result.x0 = MAX2(result.x0, s->scissor.minx);
321   result.y0 = MAX2(result.y0, s->scissor.miny);
322   result.x1 = MIN2(result.x1, s->scissor.maxx);
323   result.y1 = MIN2(result.y1, s->scissor.maxy);
324   return result;
325}
326
327static bool
328set_viewport(struct vl_compositor_state *s,
329             struct cs_viewport         *drawn)
330{
331   struct pipe_transfer *buf_transfer;
332
333   assert(s && drawn);
334
335   void *ptr = pipe_buffer_map(s->pipe, s->shader_params,
336                               PIPE_TRANSFER_READ | PIPE_TRANSFER_WRITE,
337                               &buf_transfer);
338
339   if (!ptr)
340     return false;
341
342   float *ptr_float = (float *)ptr;
343   ptr_float += sizeof(vl_csc_matrix)/sizeof(float) + 2;
344   *ptr_float++ = drawn->scale_x;
345   *ptr_float++ = drawn->scale_y;
346
347   int *ptr_int = (int *)ptr_float;
348   *ptr_int++ = drawn->area.x0;
349   *ptr_int++ = drawn->area.y0;
350   *ptr_int++ = drawn->area.x1;
351   *ptr_int++ = drawn->area.y1;
352   *ptr_int++ = drawn->translate_x;
353   *ptr_int++ = drawn->translate_y;
354
355   ptr_float = (float *)ptr_int;
356   *ptr_float++ = drawn->sampler0_w;
357   *ptr_float = drawn->sampler0_h;
358   pipe_buffer_unmap(s->pipe, buf_transfer);
359
360   return true;
361}
362
363static void
364draw_layers(struct vl_compositor       *c,
365            struct vl_compositor_state *s,
366            struct u_rect              *dirty)
367{
368   unsigned i;
369
370   assert(c);
371
372   for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
373      if (s->used_layers & (1 << i)) {
374         struct vl_compositor_layer *layer = &s->layers[i];
375         struct pipe_sampler_view **samplers = &layer->sampler_views[0];
376         unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
377         struct cs_viewport drawn;
378
379         drawn.area = calc_drawn_area(s, layer);
380         drawn.scale_x = layer->viewport.scale[0] /
381                  (float)layer->sampler_views[0]->texture->width0;
382         drawn.scale_y = drawn.scale_x;
383         drawn.translate_x = (int)layer->viewport.translate[0];
384         drawn.translate_y = (int)layer->viewport.translate[1];
385         drawn.sampler0_w = (float)layer->sampler_views[0]->texture->width0;
386         drawn.sampler0_h = (float)layer->sampler_views[0]->texture->height0;
387         set_viewport(s, &drawn);
388
389         c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
390                        num_sampler_views, layer->samplers);
391         c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0,
392                        num_sampler_views, samplers);
393
394         cs_launch(c, layer->cs, &(drawn.area));
395
396         /* Unbind. */
397         c->pipe->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, NULL);
398         c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, NULL);
399         c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_FRAGMENT, 0,
400                        num_sampler_views, NULL);
401         c->pipe->bind_compute_state(c->pipe, NULL);
402         c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0,
403                        num_sampler_views, NULL);
404
405         if (dirty) {
406            struct u_rect drawn = calc_drawn_area(s, layer);
407            dirty->x0 = MIN2(drawn.x0, dirty->x0);
408            dirty->y0 = MIN2(drawn.y0, dirty->y0);
409            dirty->x1 = MAX2(drawn.x1, dirty->x1);
410            dirty->y1 = MAX2(drawn.y1, dirty->y1);
411         }
412      }
413   }
414}
415
416void *
417vl_compositor_cs_create_shader(struct vl_compositor *c,
418                               const char           *compute_shader_text)
419{
420   assert(c && compute_shader_text);
421
422   struct tgsi_token tokens[1024];
423   if (!tgsi_text_translate(compute_shader_text, tokens, ARRAY_SIZE(tokens))) {
424      assert(0);
425      return NULL;
426   }
427
428   struct pipe_compute_state state = {};
429   state.ir_type = PIPE_SHADER_IR_TGSI;
430   state.prog = tokens;
431
432   /* create compute shader */
433   return c->pipe->create_compute_state(c->pipe, &state);
434}
435
436void
437vl_compositor_cs_render(struct vl_compositor_state *s,
438                        struct vl_compositor       *c,
439                        struct pipe_surface        *dst_surface,
440                        struct u_rect              *dirty_area,
441                        bool                        clear_dirty)
442{
443   assert(c && s);
444   assert(dst_surface);
445
446   c->fb_state.width = dst_surface->width;
447   c->fb_state.height = dst_surface->height;
448   c->fb_state.cbufs[0] = dst_surface;
449
450   if (!s->scissor_valid) {
451      s->scissor.minx = 0;
452      s->scissor.miny = 0;
453      s->scissor.maxx = dst_surface->width;
454      s->scissor.maxy = dst_surface->height;
455   }
456
457   if (clear_dirty && dirty_area &&
458       (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) {
459
460      c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color,
461                       0, 0, dst_surface->width, dst_surface->height, false);
462      dirty_area->x0 = dirty_area->y0 = VL_COMPOSITOR_MAX_DIRTY;
463      dirty_area->x1 = dirty_area->y1 = VL_COMPOSITOR_MIN_DIRTY;
464   }
465
466   pipe_set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, s->shader_params);
467
468   draw_layers(c, s, dirty_area);
469}
470