1/************************************************************************** 2 * 3 * Copyright 2019 Advanced Micro Devices, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 * Authors: James Zhu <james.zhu<@amd.com> 27 * 28 **************************************************************************/ 29 30#include <assert.h> 31 32#include "tgsi/tgsi_text.h" 33#include "vl_compositor_cs.h" 34 35struct cs_viewport { 36 float scale_x; 37 float scale_y; 38 struct u_rect area; 39 int translate_x; 40 int translate_y; 41 float sampler0_w; 42 float sampler0_h; 43}; 44 45const char *compute_shader_video_buffer = 46 "COMP\n" 47 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n" 48 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n" 49 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" 50 51 "DCL SV[0], THREAD_ID\n" 52 "DCL SV[1], BLOCK_ID\n" 53 54 "DCL CONST[0..5]\n" 55 "DCL SVIEW[0..2], RECT, FLOAT\n" 56 "DCL SAMP[0..2]\n" 57 58 "DCL IMAGE[0], 2D, WR\n" 59 "DCL TEMP[0..7]\n" 60 61 "IMM[0] UINT32 { 8, 8, 1, 0}\n" 62 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n" 63 64 "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n" 65 66 /* Drawn area check */ 67 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n" 68 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n" 69 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n" 70 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n" 71 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n" 72 73 "UIF TEMP[1].xxxx\n" 74 /* Translate */ 75 "UADD TEMP[2].xy, TEMP[0].xyyy, -CONST[5].xyxy\n" 76 "U2F TEMP[2].xy, TEMP[2].xyyy\n" 77 "DIV TEMP[3].xy, TEMP[2].xyyy, IMM[1].yyyy\n" 78 79 /* Scale */ 80 "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwww\n" 81 "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwww\n" 82 83 /* Fetch texels */ 84 "TEX_LZ TEMP[4].x, TEMP[2].xyyy, SAMP[0], RECT\n" 85 "TEX_LZ TEMP[4].y, TEMP[3].xyyy, SAMP[1], RECT\n" 86 "TEX_LZ TEMP[4].z, TEMP[3].xyyy, SAMP[2], RECT\n" 87 88 "MOV TEMP[4].w, IMM[1].xxxx\n" 89 90 /* Color Space Conversion */ 91 "DP4 TEMP[7].x, CONST[0], TEMP[4]\n" 92 "DP4 TEMP[7].y, CONST[1], TEMP[4]\n" 93 "DP4 TEMP[7].z, CONST[2], TEMP[4]\n" 94 95 "MOV TEMP[5].w, TEMP[4].zzzz\n" 96 "SLE TEMP[6].w, TEMP[5].wwww, CONST[3].xxxx\n" 97 "SGT TEMP[5].w, TEMP[5].wwww, CONST[3].yyyy\n" 98 99 "MAX TEMP[7].w, TEMP[5].wwww, TEMP[6].wwww\n" 100 101 "STORE IMAGE[0], TEMP[0].xyyy, TEMP[7], 2D\n" 102 "ENDIF\n" 103 104 "END\n"; 105 106const char *compute_shader_weave = 107 "COMP\n" 108 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n" 109 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n" 110 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" 111 112 "DCL SV[0], THREAD_ID\n" 113 "DCL SV[1], BLOCK_ID\n" 114 115 "DCL CONST[0..5]\n" 116 "DCL SVIEW[0..2], 2D_ARRAY, FLOAT\n" 117 "DCL SAMP[0..2]\n" 118 119 "DCL IMAGE[0], 2D, WR\n" 120 "DCL TEMP[0..15]\n" 121 122 "IMM[0] UINT32 { 8, 8, 1, 0}\n" 123 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n" 124 "IMM[2] UINT32 { 1, 2, 4, 0}\n" 125 "IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n" 126 127 "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n" 128 129 /* Drawn area check */ 130 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n" 131 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n" 132 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n" 133 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n" 134 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n" 135 136 "UIF TEMP[1].xxxx\n" 137 "MOV TEMP[2].xy, TEMP[0].xyyy\n" 138 /* Translate */ 139 "UADD TEMP[2].xy, TEMP[2].xyyy, -CONST[5].xyxy\n" 140 141 /* Top Y */ 142 "U2F TEMP[2].xy, TEMP[2].xyyy\n" 143 "DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n" 144 /* Down Y */ 145 "MOV TEMP[12].xy, TEMP[2].xyyy\n" 146 147 /* Top UV */ 148 "MOV TEMP[3].xy, TEMP[2].xyyy\n" 149 "DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n" 150 /* Down UV */ 151 "MOV TEMP[13].xy, TEMP[3].xyyy\n" 152 153 /* Texture offset */ 154 "ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n" 155 "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n" 156 "ADD TEMP[12].x, TEMP[12].xxxx, IMM[3].yyyy\n" 157 "ADD TEMP[12].y, TEMP[12].yyyy, IMM[3].xxxx\n" 158 159 "ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].xxxx\n" 160 "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].wwww\n" 161 "ADD TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx\n" 162 "ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n" 163 164 /* Scale */ 165 "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n" 166 "DIV TEMP[12].xy, TEMP[12].xyyy, CONST[3].zwzw\n" 167 "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwzw\n" 168 "DIV TEMP[13].xy, TEMP[13].xyyy, CONST[3].zwzw\n" 169 170 /* Weave offset */ 171 "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n" 172 "ADD TEMP[12].y, TEMP[12].yyyy, -IMM[3].xxxx\n" 173 "ADD TEMP[3].y, TEMP[3].yyyy, IMM[3].xxxx\n" 174 "ADD TEMP[13].y, TEMP[13].yyyy, -IMM[3].xxxx\n" 175 176 /* Texture layer */ 177 "MOV TEMP[14].x, TEMP[2].yyyy\n" 178 "MOV TEMP[14].yz, TEMP[3].yyyy\n" 179 "ROUND TEMP[15].xyz, TEMP[14].xyzz\n" 180 "ADD TEMP[14].xyz, TEMP[14].xyzz, -TEMP[15].xyzz\n" 181 "MOV TEMP[14].xyz, |TEMP[14].xyzz|\n" 182 "MUL TEMP[14].xyz, TEMP[14].xyzz, IMM[1].yyyy\n" 183 184 /* Normalize */ 185 "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[5].zwzw\n" 186 "DIV TEMP[12].xy, TEMP[12].xyyy, CONST[5].zwzw\n" 187 "DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n" 188 "DIV TEMP[3].xy, TEMP[3].xyyy, TEMP[15].xyxy\n" 189 "DIV TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xyxy\n" 190 191 /* Fetch texels */ 192 "MOV TEMP[2].z, IMM[1].wwww\n" 193 "MOV TEMP[3].z, IMM[1].wwww\n" 194 "TEX_LZ TEMP[10].x, TEMP[2].xyzz, SAMP[0], 2D_ARRAY\n" 195 "TEX_LZ TEMP[10].y, TEMP[3].xyzz, SAMP[1], 2D_ARRAY\n" 196 "TEX_LZ TEMP[10].z, TEMP[3].xyzz, SAMP[2], 2D_ARRAY\n" 197 198 "MOV TEMP[12].z, IMM[1].xxxx\n" 199 "MOV TEMP[13].z, IMM[1].xxxx\n" 200 "TEX_LZ TEMP[11].x, TEMP[12].xyzz, SAMP[0], 2D_ARRAY\n" 201 "TEX_LZ TEMP[11].y, TEMP[13].xyzz, SAMP[1], 2D_ARRAY\n" 202 "TEX_LZ TEMP[11].z, TEMP[13].xyzz, SAMP[2], 2D_ARRAY\n" 203 204 "LRP TEMP[6].xyz, TEMP[14].xyzz, TEMP[10].xyzz, TEMP[11].xyzz\n" 205 "MOV TEMP[6].w, IMM[1].xxxx\n" 206 207 /* Color Space Conversion */ 208 "DP4 TEMP[9].x, CONST[0], TEMP[6]\n" 209 "DP4 TEMP[9].y, CONST[1], TEMP[6]\n" 210 "DP4 TEMP[9].z, CONST[2], TEMP[6]\n" 211 212 "MOV TEMP[7].w, TEMP[6].zzzz\n" 213 "SLE TEMP[8].w, TEMP[7].wwww, CONST[3].xxxx\n" 214 "SGT TEMP[7].w, TEMP[7].wwww, CONST[3].yyyy\n" 215 216 "MAX TEMP[9].w, TEMP[7].wwww, TEMP[8].wwww\n" 217 218 "STORE IMAGE[0], TEMP[0].xyyy, TEMP[9], 2D\n" 219 "ENDIF\n" 220 221 "END\n"; 222 223const char *compute_shader_rgba = 224 "COMP\n" 225 "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n" 226 "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n" 227 "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" 228 229 "DCL SV[0], THREAD_ID\n" 230 "DCL SV[1], BLOCK_ID\n" 231 232 "DCL CONST[0..5]\n" 233 "DCL SVIEW[0], RECT, FLOAT\n" 234 "DCL SAMP[0]\n" 235 236 "DCL IMAGE[0], 2D, WR\n" 237 "DCL TEMP[0..3]\n" 238 239 "IMM[0] UINT32 { 8, 8, 1, 0}\n" 240 "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n" 241 242 "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n" 243 244 /* Drawn area check */ 245 "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n" 246 "USLT TEMP[1].zw, TEMP[0].xyxy, CONST[4].zwzw\n" 247 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy\n" 248 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n" 249 "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n" 250 251 "UIF TEMP[1].xxxx\n" 252 /* Translate */ 253 "UADD TEMP[2].xy, TEMP[0].xyyy, -CONST[5].xyxy\n" 254 "U2F TEMP[2].xy, TEMP[2].xyyy\n" 255 256 /* Scale */ 257 "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n" 258 259 /* Fetch texels */ 260 "TEX_LZ TEMP[3], TEMP[2].xyyy, SAMP[0], RECT\n" 261 262 "STORE IMAGE[0], TEMP[0].xyyy, TEMP[3], 2D\n" 263 "ENDIF\n" 264 265 "END\n"; 266 267static void 268cs_launch(struct vl_compositor *c, 269 void *cs, 270 const struct u_rect *draw_area) 271{ 272 struct pipe_context *ctx = c->pipe; 273 274 /* Bind the image */ 275 struct pipe_image_view image = {}; 276 image.resource = c->fb_state.cbufs[0]->texture; 277 image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE; 278 image.format = c->fb_state.cbufs[0]->texture->format; 279 280 ctx->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, &image); 281 282 /* Bind compute shader */ 283 ctx->bind_compute_state(ctx, cs); 284 285 /* Dispatch compute */ 286 struct pipe_grid_info info = {}; 287 info.block[0] = 8; 288 info.block[1] = 8; 289 info.block[2] = 1; 290 info.grid[0] = DIV_ROUND_UP(draw_area->x1, info.block[0]); 291 info.grid[1] = DIV_ROUND_UP(draw_area->y1, info.block[1]); 292 info.grid[2] = 1; 293 294 ctx->launch_grid(ctx, &info); 295 296 /* Make the result visible to all clients. */ 297 ctx->memory_barrier(ctx, PIPE_BARRIER_ALL); 298 299} 300 301static inline struct u_rect 302calc_drawn_area(struct vl_compositor_state *s, 303 struct vl_compositor_layer *layer) 304{ 305 struct vertex2f tl, br; 306 struct u_rect result; 307 308 assert(s && layer); 309 310 tl = layer->dst.tl; 311 br = layer->dst.br; 312 313 /* Scale */ 314 result.x0 = tl.x * layer->viewport.scale[0] + layer->viewport.translate[0]; 315 result.y0 = tl.y * layer->viewport.scale[1] + layer->viewport.translate[1]; 316 result.x1 = br.x * layer->viewport.scale[0] + layer->viewport.translate[0]; 317 result.y1 = br.y * layer->viewport.scale[1] + layer->viewport.translate[1]; 318 319 /* Clip */ 320 result.x0 = MAX2(result.x0, s->scissor.minx); 321 result.y0 = MAX2(result.y0, s->scissor.miny); 322 result.x1 = MIN2(result.x1, s->scissor.maxx); 323 result.y1 = MIN2(result.y1, s->scissor.maxy); 324 return result; 325} 326 327static bool 328set_viewport(struct vl_compositor_state *s, 329 struct cs_viewport *drawn) 330{ 331 struct pipe_transfer *buf_transfer; 332 333 assert(s && drawn); 334 335 void *ptr = pipe_buffer_map(s->pipe, s->shader_params, 336 PIPE_TRANSFER_READ | PIPE_TRANSFER_WRITE, 337 &buf_transfer); 338 339 if (!ptr) 340 return false; 341 342 float *ptr_float = (float *)ptr; 343 ptr_float += sizeof(vl_csc_matrix)/sizeof(float) + 2; 344 *ptr_float++ = drawn->scale_x; 345 *ptr_float++ = drawn->scale_y; 346 347 int *ptr_int = (int *)ptr_float; 348 *ptr_int++ = drawn->area.x0; 349 *ptr_int++ = drawn->area.y0; 350 *ptr_int++ = drawn->area.x1; 351 *ptr_int++ = drawn->area.y1; 352 *ptr_int++ = drawn->translate_x; 353 *ptr_int++ = drawn->translate_y; 354 355 ptr_float = (float *)ptr_int; 356 *ptr_float++ = drawn->sampler0_w; 357 *ptr_float = drawn->sampler0_h; 358 pipe_buffer_unmap(s->pipe, buf_transfer); 359 360 return true; 361} 362 363static void 364draw_layers(struct vl_compositor *c, 365 struct vl_compositor_state *s, 366 struct u_rect *dirty) 367{ 368 unsigned i; 369 370 assert(c); 371 372 for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) { 373 if (s->used_layers & (1 << i)) { 374 struct vl_compositor_layer *layer = &s->layers[i]; 375 struct pipe_sampler_view **samplers = &layer->sampler_views[0]; 376 unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3; 377 struct cs_viewport drawn; 378 379 drawn.area = calc_drawn_area(s, layer); 380 drawn.scale_x = layer->viewport.scale[0] / 381 (float)layer->sampler_views[0]->texture->width0; 382 drawn.scale_y = drawn.scale_x; 383 drawn.translate_x = (int)layer->viewport.translate[0]; 384 drawn.translate_y = (int)layer->viewport.translate[1]; 385 drawn.sampler0_w = (float)layer->sampler_views[0]->texture->width0; 386 drawn.sampler0_h = (float)layer->sampler_views[0]->texture->height0; 387 set_viewport(s, &drawn); 388 389 c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0, 390 num_sampler_views, layer->samplers); 391 c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_COMPUTE, 0, 392 num_sampler_views, samplers); 393 394 cs_launch(c, layer->cs, &(drawn.area)); 395 396 /* Unbind. */ 397 c->pipe->set_shader_images(c->pipe, PIPE_SHADER_COMPUTE, 0, 1, NULL); 398 c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, NULL); 399 c->pipe->set_sampler_views(c->pipe, PIPE_SHADER_FRAGMENT, 0, 400 num_sampler_views, NULL); 401 c->pipe->bind_compute_state(c->pipe, NULL); 402 c->pipe->bind_sampler_states(c->pipe, PIPE_SHADER_COMPUTE, 0, 403 num_sampler_views, NULL); 404 405 if (dirty) { 406 struct u_rect drawn = calc_drawn_area(s, layer); 407 dirty->x0 = MIN2(drawn.x0, dirty->x0); 408 dirty->y0 = MIN2(drawn.y0, dirty->y0); 409 dirty->x1 = MAX2(drawn.x1, dirty->x1); 410 dirty->y1 = MAX2(drawn.y1, dirty->y1); 411 } 412 } 413 } 414} 415 416void * 417vl_compositor_cs_create_shader(struct vl_compositor *c, 418 const char *compute_shader_text) 419{ 420 assert(c && compute_shader_text); 421 422 struct tgsi_token tokens[1024]; 423 if (!tgsi_text_translate(compute_shader_text, tokens, ARRAY_SIZE(tokens))) { 424 assert(0); 425 return NULL; 426 } 427 428 struct pipe_compute_state state = {}; 429 state.ir_type = PIPE_SHADER_IR_TGSI; 430 state.prog = tokens; 431 432 /* create compute shader */ 433 return c->pipe->create_compute_state(c->pipe, &state); 434} 435 436void 437vl_compositor_cs_render(struct vl_compositor_state *s, 438 struct vl_compositor *c, 439 struct pipe_surface *dst_surface, 440 struct u_rect *dirty_area, 441 bool clear_dirty) 442{ 443 assert(c && s); 444 assert(dst_surface); 445 446 c->fb_state.width = dst_surface->width; 447 c->fb_state.height = dst_surface->height; 448 c->fb_state.cbufs[0] = dst_surface; 449 450 if (!s->scissor_valid) { 451 s->scissor.minx = 0; 452 s->scissor.miny = 0; 453 s->scissor.maxx = dst_surface->width; 454 s->scissor.maxy = dst_surface->height; 455 } 456 457 if (clear_dirty && dirty_area && 458 (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) { 459 460 c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color, 461 0, 0, dst_surface->width, dst_surface->height, false); 462 dirty_area->x0 = dirty_area->y0 = VL_COMPOSITOR_MAX_DIRTY; 463 dirty_area->x1 = dirty_area->y1 = VL_COMPOSITOR_MIN_DIRTY; 464 } 465 466 pipe_set_constant_buffer(c->pipe, PIPE_SHADER_COMPUTE, 0, s->shader_params); 467 468 draw_layers(c, s, dirty_area); 469} 470