1/* 2 * Copyright © 2016 Red Hat 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include <stdbool.h> 25 26#include "st_tgsi_lower_yuv.h" 27#include "tgsi/tgsi_transform.h" 28#include "tgsi/tgsi_scan.h" 29#include "tgsi/tgsi_dump.h" 30#include "util/u_debug.h" 31 32#include "util/bitscan.h" 33 34struct tgsi_yuv_transform { 35 struct tgsi_transform_context base; 36 struct tgsi_shader_info info; 37 struct tgsi_full_src_register imm[4]; 38 struct { 39 struct tgsi_full_src_register src; 40 struct tgsi_full_dst_register dst; 41 } tmp[2]; 42#define A 0 43#define B 1 44 45 /* Maps a primary sampler (used for Y) to the U or UV sampler. In 46 * case of 3-plane YUV format, the V plane is next sampler after U. 47 */ 48 unsigned char sampler_map[PIPE_MAX_SAMPLERS][2]; 49 50 bool first_instruction_emitted; 51 unsigned free_slots; 52 unsigned lower_nv12; 53 unsigned lower_iyuv; 54}; 55 56static inline struct tgsi_yuv_transform * 57tgsi_yuv_transform(struct tgsi_transform_context *tctx) 58{ 59 return (struct tgsi_yuv_transform *)tctx; 60} 61 62static void 63reg_dst(struct tgsi_full_dst_register *dst, 64 const struct tgsi_full_dst_register *orig_dst, unsigned wrmask) 65{ 66 *dst = *orig_dst; 67 dst->Register.WriteMask &= wrmask; 68 assert(dst->Register.WriteMask); 69} 70 71static inline void 72get_swiz(unsigned *swiz, const struct tgsi_src_register *src) 73{ 74 swiz[0] = src->SwizzleX; 75 swiz[1] = src->SwizzleY; 76 swiz[2] = src->SwizzleZ; 77 swiz[3] = src->SwizzleW; 78} 79 80static void 81reg_src(struct tgsi_full_src_register *src, 82 const struct tgsi_full_src_register *orig_src, 83 unsigned sx, unsigned sy, unsigned sz, unsigned sw) 84{ 85 unsigned swiz[4]; 86 get_swiz(swiz, &orig_src->Register); 87 *src = *orig_src; 88 src->Register.SwizzleX = swiz[sx]; 89 src->Register.SwizzleY = swiz[sy]; 90 src->Register.SwizzleZ = swiz[sz]; 91 src->Register.SwizzleW = swiz[sw]; 92} 93 94#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */ 95#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \ 96 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w 97 98static inline struct tgsi_full_instruction 99tex_instruction(unsigned samp) 100{ 101 struct tgsi_full_instruction inst; 102 103 inst = tgsi_default_full_instruction(); 104 inst.Instruction.Opcode = TGSI_OPCODE_TEX; 105 inst.Instruction.Texture = 1; 106 inst.Texture.Texture = TGSI_TEXTURE_2D; 107 inst.Instruction.NumDstRegs = 1; 108 inst.Instruction.NumSrcRegs = 2; 109 inst.Src[1].Register.File = TGSI_FILE_SAMPLER; 110 inst.Src[1].Register.Index = samp; 111 112 return inst; 113} 114 115static inline struct tgsi_full_instruction 116mov_instruction(void) 117{ 118 struct tgsi_full_instruction inst; 119 120 inst = tgsi_default_full_instruction(); 121 inst.Instruction.Opcode = TGSI_OPCODE_MOV; 122 inst.Instruction.Saturate = 0; 123 inst.Instruction.NumDstRegs = 1; 124 inst.Instruction.NumSrcRegs = 1; 125 126 return inst; 127} 128 129static inline struct tgsi_full_instruction 130dp3_instruction(void) 131{ 132 struct tgsi_full_instruction inst; 133 134 inst = tgsi_default_full_instruction(); 135 inst.Instruction.Opcode = TGSI_OPCODE_DP3; 136 inst.Instruction.NumDstRegs = 1; 137 inst.Instruction.NumSrcRegs = 2; 138 139 return inst; 140} 141 142 143 144static void 145emit_immed(struct tgsi_transform_context *tctx, int idx, 146 float x, float y, float z, float w) 147{ 148 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); 149 struct tgsi_shader_info *info = &ctx->info; 150 struct tgsi_full_immediate immed; 151 152 immed = tgsi_default_full_immediate(); 153 immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */ 154 immed.u[0].Float = x; 155 immed.u[1].Float = y; 156 immed.u[2].Float = z; 157 immed.u[3].Float = w; 158 tctx->emit_immediate(tctx, &immed); 159 160 ctx->imm[idx].Register.File = TGSI_FILE_IMMEDIATE; 161 ctx->imm[idx].Register.Index = info->immediate_count + idx; 162 ctx->imm[idx].Register.SwizzleX = TGSI_SWIZZLE_X; 163 ctx->imm[idx].Register.SwizzleY = TGSI_SWIZZLE_Y; 164 ctx->imm[idx].Register.SwizzleZ = TGSI_SWIZZLE_Z; 165 ctx->imm[idx].Register.SwizzleW = TGSI_SWIZZLE_W; 166} 167 168static void 169emit_samp(struct tgsi_transform_context *tctx, unsigned samp) 170{ 171 tgsi_transform_sampler_decl(tctx, samp); 172 tgsi_transform_sampler_view_decl(tctx, samp, PIPE_TEXTURE_2D, 173 TGSI_RETURN_TYPE_FLOAT); 174} 175 176/* Emit extra declarations we need: 177 * + 2 TEMP to hold intermediate results 178 * + 1 (for 2-plane YUV) or 2 (for 3-plane YUV) extra samplers per 179 * lowered YUV sampler 180 * + extra immediates for doing CSC 181 */ 182static void 183emit_decls(struct tgsi_transform_context *tctx) 184{ 185 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); 186 struct tgsi_shader_info *info = &ctx->info; 187 unsigned mask, tempbase, i; 188 struct tgsi_full_declaration decl; 189 190 /* 191 * Declare immediates for CSC conversion: 192 */ 193 194 /* ITU-R BT.601 conversion */ 195 emit_immed(tctx, 0, 1.164, 0.000, 1.596, 0.0); 196 emit_immed(tctx, 1, 1.164, -0.392, -0.813, 0.0); 197 emit_immed(tctx, 2, 1.164, 2.017, 0.000, 0.0); 198 emit_immed(tctx, 3, 0.0625, 0.500, 0.500, 1.0); 199 200 /* 201 * Declare extra samplers / sampler-views: 202 */ 203 204 mask = ctx->lower_nv12 | ctx->lower_iyuv; 205 while (mask) { 206 unsigned extra, y_samp = u_bit_scan(&mask); 207 208 extra = u_bit_scan(&ctx->free_slots); 209 ctx->sampler_map[y_samp][0] = extra; 210 emit_samp(tctx, extra); 211 212 if (ctx->lower_iyuv & (1 << y_samp)) { 213 extra = u_bit_scan(&ctx->free_slots); 214 ctx->sampler_map[y_samp][1] = extra; 215 emit_samp(tctx, extra); 216 } 217 } 218 219 /* 220 * Declare extra temp: 221 */ 222 223 tempbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; 224 225 for (i = 0; i < 2; i++) { 226 decl = tgsi_default_full_declaration(); 227 decl.Declaration.File = TGSI_FILE_TEMPORARY; 228 decl.Range.First = decl.Range.Last = tempbase + i; 229 tctx->emit_declaration(tctx, &decl); 230 231 ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY; 232 ctx->tmp[i].src.Register.Index = tempbase + i; 233 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X; 234 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y; 235 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z; 236 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W; 237 238 ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY; 239 ctx->tmp[i].dst.Register.Index = tempbase + i; 240 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW; 241 } 242} 243 244/* call with YUV in tmpA.xyz */ 245static void 246yuv_to_rgb(struct tgsi_transform_context *tctx, 247 struct tgsi_full_dst_register *dst) 248{ 249 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); 250 struct tgsi_full_instruction inst; 251 252 /* 253 * IMM[0] FLT32 { 1.164, 0.000, 1.596, 0.0 } 254 * IMM[1] FLT32 { 1.164, -0.392, -0.813, 0.0 } 255 * IMM[2] FLT32 { 1.164, 2.017, 0.000, 0.0 } 256 * IMM[3] FLT32 { 0.0625, 0.500, 0.500, 1.0 } 257 */ 258 259 /* SUB tmpA.xyz, tmpA, imm[3] */ 260 inst = tgsi_default_full_instruction(); 261 inst.Instruction.Opcode = TGSI_OPCODE_ADD; 262 inst.Instruction.Saturate = 0; 263 inst.Instruction.NumDstRegs = 1; 264 inst.Instruction.NumSrcRegs = 2; 265 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ); 266 reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _)); 267 reg_src(&inst.Src[1], &ctx->imm[3], SWIZ(X, Y, Z, _)); 268 inst.Src[1].Register.Negate = 1; 269 tctx->emit_instruction(tctx, &inst); 270 271 /* DP3 dst.x, tmpA, imm[0] */ 272 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { 273 inst = dp3_instruction(); 274 reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X); 275 reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 276 reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W)); 277 tctx->emit_instruction(tctx, &inst); 278 } 279 280 /* DP3 dst.y, tmpA, imm[1] */ 281 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 282 inst = dp3_instruction(); 283 reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y); 284 reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 285 reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W)); 286 tctx->emit_instruction(tctx, &inst); 287 } 288 289 /* DP3 dst.z, tmpA, imm[2] */ 290 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 291 inst = dp3_instruction(); 292 reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z); 293 reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 294 reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W)); 295 tctx->emit_instruction(tctx, &inst); 296 } 297 298 /* MOV dst.w, imm[0].x */ 299 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 300 inst = mov_instruction(); 301 reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W); 302 reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W)); 303 tctx->emit_instruction(tctx, &inst); 304 } 305} 306 307static void 308lower_nv12(struct tgsi_transform_context *tctx, 309 struct tgsi_full_instruction *originst) 310{ 311 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); 312 struct tgsi_full_instruction inst; 313 struct tgsi_full_src_register *coord = &originst->Src[0]; 314 unsigned samp = originst->Src[1].Register.Index; 315 316 /* sample Y: 317 * TEX tempA.x, coord, texture[samp], 2D; 318 */ 319 inst = tex_instruction(samp); 320 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 321 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); 322 tctx->emit_instruction(tctx, &inst); 323 324 /* sample UV: 325 * TEX tempB.xy, coord, texture[sampler_map[samp][0]], 2D; 326 * MOV tempA.yz, tempB._xy_ 327 */ 328 inst = tex_instruction(ctx->sampler_map[samp][0]); 329 reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XY); 330 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); 331 tctx->emit_instruction(tctx, &inst); 332 333 inst = mov_instruction(); 334 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_YZ); 335 reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, Y, _)); 336 tctx->emit_instruction(tctx, &inst); 337 338 /* At this point, we have YUV in tempA.xyz, rest is common: */ 339 yuv_to_rgb(tctx, &originst->Dst[0]); 340} 341 342static void 343lower_iyuv(struct tgsi_transform_context *tctx, 344 struct tgsi_full_instruction *originst) 345{ 346 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); 347 struct tgsi_full_instruction inst; 348 struct tgsi_full_src_register *coord = &originst->Src[0]; 349 unsigned samp = originst->Src[1].Register.Index; 350 351 /* sample Y: 352 * TEX tempA.x, coord, texture[samp], 2D; 353 */ 354 inst = tex_instruction(samp); 355 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 356 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); 357 tctx->emit_instruction(tctx, &inst); 358 359 /* sample U: 360 * TEX tempB.x, coord, texture[sampler_map[samp][0]], 2D; 361 * MOV tempA.y, tempB._x__ 362 */ 363 inst = tex_instruction(ctx->sampler_map[samp][0]); 364 reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X); 365 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); 366 tctx->emit_instruction(tctx, &inst); 367 368 inst = mov_instruction(); 369 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 370 reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, _, _)); 371 tctx->emit_instruction(tctx, &inst); 372 373 /* sample V: 374 * TEX tempB.x, coord, texture[sampler_map[samp][1]], 2D; 375 * MOV tempA.z, tempB.__x_ 376 */ 377 inst = tex_instruction(ctx->sampler_map[samp][1]); 378 reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X); 379 reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W)); 380 tctx->emit_instruction(tctx, &inst); 381 382 inst = mov_instruction(); 383 reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 384 reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, _, X, _)); 385 tctx->emit_instruction(tctx, &inst); 386 387 /* At this point, we have YUV in tempA.xyz, rest is common: */ 388 yuv_to_rgb(tctx, &originst->Dst[0]); 389} 390 391static void 392transform_instr(struct tgsi_transform_context *tctx, 393 struct tgsi_full_instruction *inst) 394{ 395 struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx); 396 397 if (!ctx->first_instruction_emitted) { 398 emit_decls(tctx); 399 ctx->first_instruction_emitted = true; 400 } 401 402 switch (inst->Instruction.Opcode) { 403 /* TODO what other tex opcode's can be used w/ external eglimgs? */ 404 case TGSI_OPCODE_TEX: { 405 unsigned samp = inst->Src[1].Register.Index; 406 if (ctx->lower_nv12 & (1 << samp)) { 407 lower_nv12(tctx, inst); 408 } else if (ctx->lower_iyuv & (1 << samp)) { 409 lower_iyuv(tctx, inst); 410 } else { 411 goto skip; 412 } 413 break; 414 } 415 default: 416 skip: 417 tctx->emit_instruction(tctx, inst); 418 return; 419 } 420} 421 422extern const struct tgsi_token * 423st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots, 424 unsigned lower_nv12, unsigned lower_iyuv) 425{ 426 struct tgsi_yuv_transform ctx; 427 struct tgsi_token *newtoks; 428 int newlen; 429 430 assert(!(lower_nv12 & lower_iyuv)); /* bitmasks should be mutually exclusive */ 431 432// tgsi_dump(tokens, 0); 433// debug_printf("\n"); 434 435 memset(&ctx, 0, sizeof(ctx)); 436 ctx.base.transform_instruction = transform_instr; 437 ctx.free_slots = free_slots; 438 ctx.lower_nv12 = lower_nv12; 439 ctx.lower_iyuv = lower_iyuv; 440 tgsi_scan_shader(tokens, &ctx.info); 441 442 /* TODO better job of figuring out how many extra tokens we need.. 443 * this is a pain about tgsi_transform :-/ 444 */ 445 newlen = tgsi_num_tokens(tokens) + 300; 446 newtoks = tgsi_alloc_tokens(newlen); 447 if (!newtoks) 448 return NULL; 449 450 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); 451 452// tgsi_dump(newtoks, 0); 453// debug_printf("\n"); 454 455 return newtoks; 456} 457