1/*
2 * Copyright © 2016 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include <stdbool.h>
25
26#include "st_tgsi_lower_yuv.h"
27#include "tgsi/tgsi_transform.h"
28#include "tgsi/tgsi_scan.h"
29#include "util/u_debug.h"
30
31#include "util/bitscan.h"
32
33struct tgsi_yuv_transform {
34   struct tgsi_transform_context base;
35   struct tgsi_shader_info info;
36   struct tgsi_full_src_register imm[4];
37   struct {
38      struct tgsi_full_src_register src;
39      struct tgsi_full_dst_register dst;
40   } tmp[2];
41#define A 0
42#define B 1
43
44   /* Maps a primary sampler (used for Y) to the U or UV sampler.  In
45    * case of 3-plane YUV format, the V plane is next sampler after U.
46    */
47   unsigned char sampler_map[PIPE_MAX_SAMPLERS][2];
48
49   bool first_instruction_emitted;
50   unsigned free_slots;
51   unsigned lower_nv12;
52   unsigned lower_iyuv;
53};
54
55static inline struct tgsi_yuv_transform *
56tgsi_yuv_transform(struct tgsi_transform_context *tctx)
57{
58   return (struct tgsi_yuv_transform *)tctx;
59}
60
61static void
62reg_dst(struct tgsi_full_dst_register *dst,
63        const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
64{
65   *dst = *orig_dst;
66   dst->Register.WriteMask &= wrmask;
67   assert(dst->Register.WriteMask);
68}
69
70static inline void
71get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
72{
73   swiz[0] = src->SwizzleX;
74   swiz[1] = src->SwizzleY;
75   swiz[2] = src->SwizzleZ;
76   swiz[3] = src->SwizzleW;
77}
78
79static void
80reg_src(struct tgsi_full_src_register *src,
81        const struct tgsi_full_src_register *orig_src,
82        unsigned sx, unsigned sy, unsigned sz, unsigned sw)
83{
84   unsigned swiz[4];
85   get_swiz(swiz, &orig_src->Register);
86   *src = *orig_src;
87   src->Register.SwizzleX = swiz[sx];
88   src->Register.SwizzleY = swiz[sy];
89   src->Register.SwizzleZ = swiz[sz];
90   src->Register.SwizzleW = swiz[sw];
91}
92
93#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
94#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
95      TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
96
97static inline struct tgsi_full_instruction
98tex_instruction(unsigned samp)
99{
100   struct tgsi_full_instruction inst;
101
102   inst = tgsi_default_full_instruction();
103   inst.Instruction.Opcode = TGSI_OPCODE_TEX;
104   inst.Instruction.Texture = 1;
105   inst.Texture.Texture = TGSI_TEXTURE_2D;
106   inst.Instruction.NumDstRegs = 1;
107   inst.Instruction.NumSrcRegs = 2;
108   inst.Src[1].Register.File  = TGSI_FILE_SAMPLER;
109   inst.Src[1].Register.Index = samp;
110
111   return inst;
112}
113
114static inline struct tgsi_full_instruction
115mov_instruction(void)
116{
117   struct tgsi_full_instruction inst;
118
119   inst = tgsi_default_full_instruction();
120   inst.Instruction.Opcode = TGSI_OPCODE_MOV;
121   inst.Instruction.Saturate = 0;
122   inst.Instruction.NumDstRegs = 1;
123   inst.Instruction.NumSrcRegs = 1;
124
125   return inst;
126}
127
128static inline struct tgsi_full_instruction
129dp3_instruction(void)
130{
131   struct tgsi_full_instruction inst;
132
133   inst = tgsi_default_full_instruction();
134   inst.Instruction.Opcode = TGSI_OPCODE_DP3;
135   inst.Instruction.NumDstRegs = 1;
136   inst.Instruction.NumSrcRegs = 2;
137
138   return inst;
139}
140
141
142
143static void
144emit_immed(struct tgsi_transform_context *tctx, int idx,
145           float x, float y, float z, float w)
146{
147   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
148   struct tgsi_shader_info *info = &ctx->info;
149   struct tgsi_full_immediate immed;
150
151   immed = tgsi_default_full_immediate();
152   immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
153   immed.u[0].Float = x;
154   immed.u[1].Float = y;
155   immed.u[2].Float = z;
156   immed.u[3].Float = w;
157   tctx->emit_immediate(tctx, &immed);
158
159   ctx->imm[idx].Register.File = TGSI_FILE_IMMEDIATE;
160   ctx->imm[idx].Register.Index = info->immediate_count + idx;
161   ctx->imm[idx].Register.SwizzleX = TGSI_SWIZZLE_X;
162   ctx->imm[idx].Register.SwizzleY = TGSI_SWIZZLE_Y;
163   ctx->imm[idx].Register.SwizzleZ = TGSI_SWIZZLE_Z;
164   ctx->imm[idx].Register.SwizzleW = TGSI_SWIZZLE_W;
165}
166
167static void
168emit_samp(struct tgsi_transform_context *tctx, unsigned samp)
169{
170   tgsi_transform_sampler_decl(tctx, samp);
171   tgsi_transform_sampler_view_decl(tctx, samp, PIPE_TEXTURE_2D,
172                                    TGSI_RETURN_TYPE_FLOAT);
173}
174
175/* Emit extra declarations we need:
176 *  + 2 TEMP to hold intermediate results
177 *  + 1 (for 2-plane YUV) or 2 (for 3-plane YUV) extra samplers per
178 *    lowered YUV sampler
179 *  + extra immediates for doing CSC
180 */
181static void
182emit_decls(struct tgsi_transform_context *tctx)
183{
184   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
185   struct tgsi_shader_info *info = &ctx->info;
186   unsigned mask, tempbase, i;
187   struct tgsi_full_declaration decl;
188
189   /*
190    * Declare immediates for CSC conversion:
191    */
192
193   /* ITU-R BT.601 conversion */
194   emit_immed(tctx, 0, 1.164f,  0.000f,  1.596f,  0.0f);
195   emit_immed(tctx, 1, 1.164f, -0.392f, -0.813f,  0.0f);
196   emit_immed(tctx, 2, 1.164f,  2.017f,  0.000f,  0.0f);
197   emit_immed(tctx, 3, 0.0625f, 0.500f,  0.500f,  1.0f);
198
199   /*
200    * Declare extra samplers / sampler-views:
201    */
202
203   mask = ctx->lower_nv12 | ctx->lower_iyuv;
204   while (mask) {
205      unsigned extra, y_samp = u_bit_scan(&mask);
206
207      extra = u_bit_scan(&ctx->free_slots);
208      ctx->sampler_map[y_samp][0] = extra;
209      emit_samp(tctx, extra);
210
211      if (ctx->lower_iyuv & (1 << y_samp)) {
212         extra = u_bit_scan(&ctx->free_slots);
213         ctx->sampler_map[y_samp][1] = extra;
214         emit_samp(tctx, extra);
215      }
216   }
217
218   /*
219    * Declare extra temp:
220    */
221
222   tempbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
223
224   for (i = 0; i < 2; i++) {
225      decl = tgsi_default_full_declaration();
226      decl.Declaration.File = TGSI_FILE_TEMPORARY;
227      decl.Range.First = decl.Range.Last = tempbase + i;
228      tctx->emit_declaration(tctx, &decl);
229
230      ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
231      ctx->tmp[i].src.Register.Index = tempbase + i;
232      ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
233      ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
234      ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
235      ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
236
237      ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
238      ctx->tmp[i].dst.Register.Index = tempbase + i;
239      ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
240   }
241}
242
243/* call with YUV in tmpA.xyz */
244static void
245yuv_to_rgb(struct tgsi_transform_context *tctx,
246           struct tgsi_full_dst_register *dst)
247{
248   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
249   struct tgsi_full_instruction inst;
250
251   /*
252    * IMM[0] FLT32 { 1.164,  0.000,  1.596,  0.0 }
253    * IMM[1] FLT32 { 1.164, -0.392, -0.813,  0.0 }
254    * IMM[2] FLT32 { 1.164,  2.017,  0.000,  0.0 }
255    * IMM[3] FLT32 { 0.0625, 0.500,  0.500,  1.0 }
256    */
257
258   /* SUB tmpA.xyz, tmpA, imm[3] */
259   inst = tgsi_default_full_instruction();
260   inst.Instruction.Opcode = TGSI_OPCODE_ADD;
261   inst.Instruction.Saturate = 0;
262   inst.Instruction.NumDstRegs = 1;
263   inst.Instruction.NumSrcRegs = 2;
264   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
265   reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
266   reg_src(&inst.Src[1], &ctx->imm[3], SWIZ(X, Y, Z, _));
267   inst.Src[1].Register.Negate = 1;
268   tctx->emit_instruction(tctx, &inst);
269
270   /* DP3 dst.x, tmpA, imm[0] */
271   if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
272      inst = dp3_instruction();
273      reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X);
274      reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
275      reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W));
276      tctx->emit_instruction(tctx, &inst);
277   }
278
279   /* DP3 dst.y, tmpA, imm[1] */
280   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
281      inst = dp3_instruction();
282      reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y);
283      reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
284      reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W));
285      tctx->emit_instruction(tctx, &inst);
286   }
287
288   /* DP3 dst.z, tmpA, imm[2] */
289   if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
290      inst = dp3_instruction();
291      reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z);
292      reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
293      reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W));
294      tctx->emit_instruction(tctx, &inst);
295   }
296
297   /* MOV dst.w, imm[0].x */
298   if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
299      inst = mov_instruction();
300      reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W);
301      reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W));
302      tctx->emit_instruction(tctx, &inst);
303   }
304}
305
306static void
307lower_nv12(struct tgsi_transform_context *tctx,
308           struct tgsi_full_instruction *originst)
309{
310   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
311   struct tgsi_full_instruction inst;
312   struct tgsi_full_src_register *coord = &originst->Src[0];
313   unsigned samp = originst->Src[1].Register.Index;
314
315   /* sample Y:
316    *    TEX tempA.x, coord, texture[samp], 2D;
317    */
318   inst = tex_instruction(samp);
319   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
320   reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
321   tctx->emit_instruction(tctx, &inst);
322
323   /* sample UV:
324    *    TEX tempB.xy, coord, texture[sampler_map[samp][0]], 2D;
325    *    MOV tempA.yz, tempB._xy_
326    */
327   inst = tex_instruction(ctx->sampler_map[samp][0]);
328   reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XY);
329   reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
330   tctx->emit_instruction(tctx, &inst);
331
332   inst = mov_instruction();
333   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_YZ);
334   reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, Y, _));
335   tctx->emit_instruction(tctx, &inst);
336
337   /* At this point, we have YUV in tempA.xyz, rest is common: */
338   yuv_to_rgb(tctx, &originst->Dst[0]);
339}
340
341static void
342lower_iyuv(struct tgsi_transform_context *tctx,
343           struct tgsi_full_instruction *originst)
344{
345   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
346   struct tgsi_full_instruction inst;
347   struct tgsi_full_src_register *coord = &originst->Src[0];
348   unsigned samp = originst->Src[1].Register.Index;
349
350   /* sample Y:
351    *    TEX tempA.x, coord, texture[samp], 2D;
352    */
353   inst = tex_instruction(samp);
354   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
355   reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
356   tctx->emit_instruction(tctx, &inst);
357
358   /* sample U:
359    *    TEX tempB.x, coord, texture[sampler_map[samp][0]], 2D;
360    *    MOV tempA.y, tempB._x__
361    */
362   inst = tex_instruction(ctx->sampler_map[samp][0]);
363   reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
364   reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
365   tctx->emit_instruction(tctx, &inst);
366
367   inst = mov_instruction();
368   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
369   reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, X, _, _));
370   tctx->emit_instruction(tctx, &inst);
371
372   /* sample V:
373    *    TEX tempB.x, coord, texture[sampler_map[samp][1]], 2D;
374    *    MOV tempA.z, tempB.__x_
375    */
376   inst = tex_instruction(ctx->sampler_map[samp][1]);
377   reg_dst(&inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
378   reg_src(&inst.Src[0], coord, SWIZ(X, Y, Z, W));
379   tctx->emit_instruction(tctx, &inst);
380
381   inst = mov_instruction();
382   reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
383   reg_src(&inst.Src[0], &ctx->tmp[B].src, SWIZ(_, _, X, _));
384   tctx->emit_instruction(tctx, &inst);
385
386   /* At this point, we have YUV in tempA.xyz, rest is common: */
387   yuv_to_rgb(tctx, &originst->Dst[0]);
388}
389
390static void
391transform_instr(struct tgsi_transform_context *tctx,
392                struct tgsi_full_instruction *inst)
393{
394   struct tgsi_yuv_transform *ctx = tgsi_yuv_transform(tctx);
395
396   if (!ctx->first_instruction_emitted) {
397      emit_decls(tctx);
398      ctx->first_instruction_emitted = true;
399   }
400
401   switch (inst->Instruction.Opcode) {
402   /* TODO what other tex opcode's can be used w/ external eglimgs? */
403   case TGSI_OPCODE_TEX: {
404      unsigned samp = inst->Src[1].Register.Index;
405      if (ctx->lower_nv12 & (1 << samp)) {
406         lower_nv12(tctx, inst);
407      } else if (ctx->lower_iyuv & (1 << samp)) {
408         lower_iyuv(tctx, inst);
409      } else {
410         goto skip;
411      }
412      break;
413   }
414   default:
415   skip:
416      tctx->emit_instruction(tctx, inst);
417      return;
418   }
419}
420
421extern const struct tgsi_token *
422st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots,
423                  unsigned lower_nv12, unsigned lower_iyuv)
424{
425   struct tgsi_yuv_transform ctx;
426   struct tgsi_token *newtoks;
427   int newlen;
428
429   assert(!(lower_nv12 & lower_iyuv)); /* bitmasks should be mutually exclusive */
430
431//   tgsi_dump(tokens, 0);
432//   debug_printf("\n");
433
434   memset(&ctx, 0, sizeof(ctx));
435   ctx.base.transform_instruction = transform_instr;
436   ctx.free_slots = free_slots;
437   ctx.lower_nv12 = lower_nv12;
438   ctx.lower_iyuv = lower_iyuv;
439   tgsi_scan_shader(tokens, &ctx.info);
440
441   /* TODO better job of figuring out how many extra tokens we need..
442    * this is a pain about tgsi_transform :-/
443    */
444   newlen = tgsi_num_tokens(tokens) + 300;
445   newtoks = tgsi_alloc_tokens(newlen);
446   if (!newtoks)
447      return NULL;
448
449   tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
450
451//   tgsi_dump(newtoks, 0);
452//   debug_printf("\n");
453
454   return newtoks;
455}
456