1/* 2 * Copyright © 2016 Red Hat 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include <stdbool.h> 25 26#include "st_tgsi_lower_yuv.h" 27#include "tgsi/tgsi_transform.h" 28#include "tgsi/tgsi_scan.h" 29#include "util/u_debug.h" 30 31#include "util/bitscan.h" 32 33struct tgsi_yuv_transform { 34 struct tgsi_transform_context base; 35 struct tgsi_shader_info info; 36 struct tgsi_full_src_register imm[4]; 37 struct { 38 struct tgsi_full_src_register src; 39 struct tgsi_full_dst_register dst; 40 } tmp[2]; 41#define A 0 42#define B 1 43 44 /* Maps a primary sampler (used for Y) to the U or UV sampler. In 45 * case of 3-plane YUV format, the V plane is next sampler after U. 46 */ 47 unsigned char sampler_map[PIPE_MAX_SAMPLERS][2]; 48 49 bool first_instruction_emitted; 50 unsigned free_slots; 51 unsigned lower_nv12; 52 unsigned lower_iyuv; 53}; 54 55static inline struct tgsi_yuv_transform * 56tgsi_yuv_transform(struct tgsi_transform_context *tctx) 57{ 58 return (struct tgsi_yuv_transform *)tctx; 59} 60 61static void 62reg_dst(struct tgsi_full_dst_register *dst, 63 const struct tgsi_full_dst_register *orig_dst, unsigned wrmask) 64{ 65 *dst = *orig_dst; 66 dst->Register.WriteMask &= wrmask; 67 assert(dst->Register.WriteMask); 68} 69 70static inline void 71get_swiz(unsigned *swiz, const struct tgsi_src_register *src) 72{ 73 swiz[0] = src->SwizzleX; 74 swiz[1] = src->SwizzleY; 75 swiz[2] = src->SwizzleZ; 76 swiz[3] = src->SwizzleW; 77} 78 79static void 80reg_src(struct tgsi_full_src_register *src, 81 const struct tgsi_full_src_register *orig_src, 82 unsigned sx, unsigned sy, unsigned sz, unsigned sw) 83{ 84 unsigned swiz[4]; 85 get_swiz(swiz, &orig_src->Register); 86 *src = *orig_src; 87 src->Register.SwizzleX = swiz[sx]; 88 src->Register.SwizzleY = swiz[sy]; 89 src->Register.SwizzleZ = swiz[sz]; 90 src->Register.SwizzleW = swiz[sw]; 91} 92 93#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */ 94#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \ 95 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w 96 97static inline struct tgsi_full_instruction 98tex_instruction(unsigned samp) 99{ 100 struct tgsi_full_instruction inst; 101 102 inst = tgsi_default_full_instruction(); 103 inst.Instruction.Opcode = TGSI_OPCODE_TEX; 104 inst.Instruction.Texture = 1; 105 inst.Texture.Texture = TGSI_TEXTURE_2D; 106 inst.Instruction.NumDstRegs = 1; 107 inst.Instruction.NumSrcRegs = 2; 108 inst.Src[1].Register.File = TGSI_FILE_SAMPLER; 109 inst.Src[1].Register.Index = samp; 110 111 return inst; 112} 113 114static inline struct tgsi_full_instruction 115mov_instruction(void) 116{ 117 struct tgsi_full_instruction inst; 118 119 inst = tgsi_default_full_instruction(); 120 inst.Instruction.Opcode = TGSI_OPCODE_MOV; 121 inst.Instruction.Saturate = 0; 122 inst.Instruction.NumDstRegs = 1; 123 inst.Instruction.NumSrcRegs = 1; 124 125 return inst; 126} 127 128static inline struct tgsi_full_instruction 129dp3_instruction(void) 130{ 131 struct tgsi_full_instruction inst; 132 133 inst = tgsi_default_full_instruction(); 134 inst.Instruction.Opcode = TGSI_OPCODE_DP3; 135 inst.Instruction.NumDstRegs = 1; 136 inst.Instruction.NumSrcRegs = 2; 137 138 return inst; 139} 140 141 142 143static void 144emit_immed(struct tgsi_transform_context *tctx, int idx, 145 float x, float y, float z, float w) 146{ 147 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); 148 struct tgsi_shader_info *info = &ctx->info; 149 struct tgsi_full_immediate immed; 150 151 immed = tgsi_default_full_immediate(); 152 immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */ 153 immed.u[0].Float = x; 154 immed.u[1].Float = y; 155 immed.u[2].Float = z; 156 immed.u[3].Float = w; 157 tctx->emit_immediate(tctx, &immed); 158 159 ctx->imm[idx].Register.File = TGSI_FILE_IMMEDIATE; 160 ctx->imm[idx].Register.Index = info->immediate_count + idx; 161 ctx->imm[idx].Register.SwizzleX = TGSI_SWIZZLE_X; 162 ctx->imm[idx].Register.SwizzleY = TGSI_SWIZZLE_Y; 163 ctx->imm[idx].Register.SwizzleZ = TGSI_SWIZZLE_Z; 164 ctx->imm[idx].Register.SwizzleW = TGSI_SWIZZLE_W; 165} 166 167static void 168emit_samp(struct tgsi_transform_context *tctx, unsigned samp) 169{ 170 tgsi_transform_sampler_decl(tctx, samp); 171 tgsi_transform_sampler_view_decl(tctx, samp, PIPE_TEXTURE_2D, 172 TGSI_RETURN_TYPE_FLOAT); 173} 174 175/* Emit extra declarations we need: 176 * + 2 TEMP to hold intermediate results 177 * + 1 (for 2-plane YUV) or 2 (for 3-plane YUV) extra samplers per 178 * lowered YUV sampler 179 * + extra immediates for doing CSC 180 */ 181static void 182emit_decls(struct tgsi_transform_context *tctx) 183{ 184 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); 185 struct tgsi_shader_info *info = &ctx->info; 186 unsigned mask, tempbase, i; 187 struct tgsi_full_declaration decl; 188 189 /* 190 * Declare immediates for CSC conversion: 191 */ 192 193 /* ITU-R BT.601 conversion */ 194 emit_immed(tctx, 0, 1.164f, 0.000f, 1.596f, 0.0f); 195 emit_immed(tctx, 1, 1.164f, -0.392f, -0.813f, 0.0f); 196 emit_immed(tctx, 2, 1.164f, 2.017f, 0.000f, 0.0f); 197 emit_immed(tctx, 3, 0.0625f, 0.500f, 0.500f, 1.0f); 198 199 /* 200 * Declare extra samplers / sampler-views: 201 */ 202 203 mask = ctx->lower_nv12 | ctx->lower_iyuv; 204 while (mask) { 205 unsigned extra, y_samp = u_bit_scan(&mask); 206 207 extra = u_bit_scan(&ctx->free_slots); 208 ctx->sampler_map[y_samp][0] = extra; 209 emit_samp(tctx, extra); 210 211 if (ctx->lower_iyuv & (1 << y_samp)) { 212 extra = u_bit_scan(&ctx->free_slots); 213 ctx->sampler_map[y_samp][1] = extra; 214 emit_samp(tctx, extra); 215 } 216 } 217 218 /* 219 * Declare extra temp: 220 */ 221 222 tempbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; 223 224 for (i = 0; i < 2; i++) { 225 decl = tgsi_default_full_declaration(); 226 decl.Declaration.File = TGSI_FILE_TEMPORARY; 227 decl.Range.First = decl.Range.Last = tempbase + i; 228 tctx->emit_declaration(tctx, &decl); 229 230 ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY; 231 ctx->tmp[i].src.Register.Index = tempbase + i; 232 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X; 233 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y; 234 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z; 235 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W; 236 237 ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY; 238 ctx->tmp[i].dst.Register.Index = tempbase + i; 239 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW; 240 } 241} 242 243/* call with YUV in tmpA.xyz */ 244static void 245yuv_to_rgb(struct tgsi_transform_context *tctx, 246 struct tgsi_full_dst_register *dst) 247{ 248 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); 249 struct tgsi_full_instruction inst; 250 251 /* 252 * IMM[0] FLT32 { 1.164, 0.000, 1.596, 0.0 } 253 * IMM[1] FLT32 { 1.164, -0.392, -0.813, 0.0 } 254 * IMM[2] FLT32 { 1.164, 2.017, 0.000, 0.0 } 255 * IMM[3] FLT32 { 0.0625, 0.500, 0.500, 1.0 } 256 */ 257 258 /* SUB tmpA.xyz, tmpA, imm[3] */ 259 inst = tgsi_default_full_instruction(); 260 inst.Instruction.Opcode = TGSI_OPCODE_ADD; 261 inst.Instruction.Saturate = 0; 262 inst.Instruction.NumDstRegs = 1; 263 inst.Instruction.NumSrcRegs = 2; 264 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ); 265 reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _)); 266 reg_src(&inst.Src[1], &ctx->imm[3], SWIZ(X, Y, Z, _)); 267 inst.Src[1].Register.Negate = 1; 268 tctx->emit_instruction(tctx, &inst); 269 270 /* DP3 dst.x, tmpA, imm[0] */ 271 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { 272 inst = dp3_instruction(); 273 reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X); 274 reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 275 reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W)); 276 tctx->emit_instruction(tctx, &inst); 277 } 278 279 /* DP3 dst.y, tmpA, imm[1] */ 280 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 281 inst = dp3_instruction(); 282 reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y); 283 reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 284 reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W)); 285 tctx->emit_instruction(tctx, &inst); 286 } 287 288 /* DP3 dst.z, tmpA, imm[2] */ 289 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 290 inst = dp3_instruction(); 291 reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z); 292 reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 293 reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W)); 294 tctx->emit_instruction(tctx, &inst); 295 } 296 297 /* MOV dst.w, imm[0].x */ 298 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 299 inst = mov_instruction(); 300 reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W); 301 reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W)); 302 tctx->emit_instruction(tctx, &inst); 303 } 304} 305 306static void 307lower_nv12(struct tgsi_transform_context *tctx, 308 struct tgsi_full_instruction *originst) 309{ 310 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); 311 struct tgsi_full_instruction inst; 312 struct tgsi_full_src_register *coord = &originst->Src[0]; 313 unsigned samp = originst->Src[1].Register.Index; 314 315 /* sample Y: 316 * TEX tempA.x, coord, texture[samp], 2D; 317 */ 318 inst = tex_instruction(samp); 319 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 320 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); 321 tctx->emit_instruction(tctx, &inst); 322 323 /* sample UV: 324 * TEX tempB.xy, coord, texture[sampler_map[samp][0]], 2D; 325 * MOV tempA.yz, tempB._xy_ 326 */ 327 inst = tex_instruction(ctx->sampler_map[samp][0]); 328 reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XY); 329 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); 330 tctx->emit_instruction(tctx, &inst); 331 332 inst = mov_instruction(); 333 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_YZ); 334 reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, Y, _)); 335 tctx->emit_instruction(tctx, &inst); 336 337 /* At this point, we have YUV in tempA.xyz, rest is common: */ 338 yuv_to_rgb(tctx, &originst->Dst[0]); 339} 340 341static void 342lower_iyuv(struct tgsi_transform_context *tctx, 343 struct tgsi_full_instruction *originst) 344{ 345 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); 346 struct tgsi_full_instruction inst; 347 struct tgsi_full_src_register *coord = &originst->Src[0]; 348 unsigned samp = originst->Src[1].Register.Index; 349 350 /* sample Y: 351 * TEX tempA.x, coord, texture[samp], 2D; 352 */ 353 inst = tex_instruction(samp); 354 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 355 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); 356 tctx->emit_instruction(tctx, &inst); 357 358 /* sample U: 359 * TEX tempB.x, coord, texture[sampler_map[samp][0]], 2D; 360 * MOV tempA.y, tempB._x__ 361 */ 362 inst = tex_instruction(ctx->sampler_map[samp][0]); 363 reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X); 364 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); 365 tctx->emit_instruction(tctx, &inst); 366 367 inst = mov_instruction(); 368 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 369 reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, _, _)); 370 tctx->emit_instruction(tctx, &inst); 371 372 /* sample V: 373 * TEX tempB.x, coord, texture[sampler_map[samp][1]], 2D; 374 * MOV tempA.z, tempB.__x_ 375 */ 376 inst = tex_instruction(ctx->sampler_map[samp][1]); 377 reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X); 378 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); 379 tctx->emit_instruction(tctx, &inst); 380 381 inst = mov_instruction(); 382 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 383 reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, _, X, _)); 384 tctx->emit_instruction(tctx, &inst); 385 386 /* At this point, we have YUV in tempA.xyz, rest is common: */ 387 yuv_to_rgb(tctx, &originst->Dst[0]); 388} 389 390static void 391transform_instr(struct tgsi_transform_context *tctx, 392 struct tgsi_full_instruction *inst) 393{ 394 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); 395 396 if (!ctx->first_instruction_emitted) { 397 emit_decls(tctx); 398 ctx->first_instruction_emitted = true; 399 } 400 401 switch (inst->Instruction.Opcode) { 402 /* TODO what other tex opcode's can be used w/ external eglimgs? */ 403 case TGSI_OPCODE_TEX: { 404 unsigned samp = inst->Src[1].Register.Index; 405 if (ctx->lower_nv12 & (1 << samp)) { 406 lower_nv12(tctx, inst); 407 } else if (ctx->lower_iyuv & (1 << samp)) { 408 lower_iyuv(tctx, inst); 409 } else { 410 goto skip; 411 } 412 break; 413 } 414 default: 415 skip: 416 tctx->emit_instruction(tctx, inst); 417 return; 418 } 419} 420 421extern const struct tgsi_token * 422st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots, 423 unsigned lower_nv12, unsigned lower_iyuv) 424{ 425 struct tgsi_yuv_transform ctx; 426 struct tgsi_token *newtoks; 427 int newlen; 428 429 assert(!(lower_nv12 & lower_iyuv)); /* bitmasks should be mutually exclusive */ 430 431// tgsi_dump(tokens, 0); 432// debug_printf("\n"); 433 434 memset(&ctx, 0, sizeof(ctx)); 435 ctx.base.transform_instruction = transform_instr; 436 ctx.free_slots = free_slots; 437 ctx.lower_nv12 = lower_nv12; 438 ctx.lower_iyuv = lower_iyuv; 439 tgsi_scan_shader(tokens, &ctx.info); 440 441 /* TODO better job of figuring out how many extra tokens we need.. 442 * this is a pain about tgsi_transform :-/ 443 */ 444 newlen = tgsi_num_tokens(tokens) + 300; 445 newtoks = tgsi_alloc_tokens(newlen); 446 if (!newtoks) 447 return NULL; 448 449 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); 450 451// tgsi_dump(newtoks, 0); 452// debug_printf("\n"); 453 454 return newtoks; 455} 456