1af69d88dSmrg/**************************************************************************
2af69d88dSmrg *
3af69d88dSmrg * Copyright 2009 Younes Manton.
4af69d88dSmrg * All Rights Reserved.
5af69d88dSmrg *
6af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining a
7af69d88dSmrg * copy of this software and associated documentation files (the
8af69d88dSmrg * "Software"), to deal in the Software without restriction, including
9af69d88dSmrg * without limitation the rights to use, copy, modify, merge, publish,
10af69d88dSmrg * distribute, sub license, and/or sell copies of the Software, and to
11af69d88dSmrg * permit persons to whom the Software is furnished to do so, subject to
12af69d88dSmrg * the following conditions:
13af69d88dSmrg *
14af69d88dSmrg * The above copyright notice and this permission notice (including the
15af69d88dSmrg * next paragraph) shall be included in all copies or substantial portions
16af69d88dSmrg * of the Software.
17af69d88dSmrg *
18af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19af69d88dSmrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20af69d88dSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21af69d88dSmrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22af69d88dSmrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23af69d88dSmrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24af69d88dSmrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25af69d88dSmrg *
26af69d88dSmrg **************************************************************************/
27af69d88dSmrg
28af69d88dSmrg#include <assert.h>
29af69d88dSmrg
30af69d88dSmrg#include "pipe/p_context.h"
31af69d88dSmrg
32af69d88dSmrg#include "util/u_sampler.h"
33af69d88dSmrg#include "util/u_draw.h"
34af69d88dSmrg
35af69d88dSmrg#include "tgsi/tgsi_ureg.h"
36af69d88dSmrg
37af69d88dSmrg#include "vl_defines.h"
38af69d88dSmrg#include "vl_vertex_buffers.h"
39af69d88dSmrg#include "vl_mc.h"
40af69d88dSmrg#include "vl_idct.h"
41af69d88dSmrg
42af69d88dSmrgenum VS_OUTPUT
43af69d88dSmrg{
44af69d88dSmrg   VS_O_VPOS = 0,
45af69d88dSmrg   VS_O_VTOP = 0,
46af69d88dSmrg   VS_O_VBOTTOM,
47af69d88dSmrg
48af69d88dSmrg   VS_O_FLAGS = VS_O_VTOP,
49af69d88dSmrg   VS_O_VTEX = VS_O_VBOTTOM
50af69d88dSmrg};
51af69d88dSmrg
52af69d88dSmrgstatic struct ureg_dst
53af69d88dSmrgcalc_position(struct vl_mc *r, struct ureg_program *shader, struct ureg_src block_scale)
54af69d88dSmrg{
55af69d88dSmrg   struct ureg_src vrect, vpos;
56af69d88dSmrg   struct ureg_dst t_vpos;
57af69d88dSmrg   struct ureg_dst o_vpos;
58af69d88dSmrg
59af69d88dSmrg   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
60af69d88dSmrg   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
61af69d88dSmrg
62af69d88dSmrg   t_vpos = ureg_DECL_temporary(shader);
63af69d88dSmrg
64af69d88dSmrg   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
65af69d88dSmrg
66af69d88dSmrg   /*
67af69d88dSmrg    * block_scale = (VL_MACROBLOCK_WIDTH, VL_MACROBLOCK_HEIGHT) / (dst.width, dst.height)
68af69d88dSmrg    *
69af69d88dSmrg    * t_vpos = (vpos + vrect) * block_scale
70af69d88dSmrg    * o_vpos.xy = t_vpos
71af69d88dSmrg    * o_vpos.zw = vpos
72af69d88dSmrg    */
73af69d88dSmrg   ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
74af69d88dSmrg   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale);
75af69d88dSmrg   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
76af69d88dSmrg   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
77af69d88dSmrg
78af69d88dSmrg   return t_vpos;
79af69d88dSmrg}
80af69d88dSmrg
81af69d88dSmrgstatic struct ureg_dst
8201e04c3fSmrgcalc_line(struct pipe_screen *screen, struct ureg_program *shader)
83af69d88dSmrg{
84af69d88dSmrg   struct ureg_dst tmp;
85af69d88dSmrg   struct ureg_src pos;
86af69d88dSmrg
87af69d88dSmrg   tmp = ureg_DECL_temporary(shader);
88af69d88dSmrg
8901e04c3fSmrg   if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL))
9001e04c3fSmrg      pos = ureg_DECL_system_value(shader, TGSI_SEMANTIC_POSITION, 0);
9101e04c3fSmrg   else
9201e04c3fSmrg      pos = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS,
9301e04c3fSmrg                               TGSI_INTERPOLATE_LINEAR);
94af69d88dSmrg
95af69d88dSmrg   /*
96af69d88dSmrg    * tmp.y = fraction(pos.y / 2) >= 0.5 ? 1 : 0
97af69d88dSmrg    */
98af69d88dSmrg   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), pos, ureg_imm1f(shader, 0.5f));
99af69d88dSmrg   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp));
100af69d88dSmrg   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
101af69d88dSmrg
102af69d88dSmrg   return tmp;
103af69d88dSmrg}
104af69d88dSmrg
105af69d88dSmrgstatic void *
106af69d88dSmrgcreate_ref_vert_shader(struct vl_mc *r)
107af69d88dSmrg{
108af69d88dSmrg   struct ureg_program *shader;
109af69d88dSmrg   struct ureg_src mv_scale;
110af69d88dSmrg   struct ureg_src vmv[2];
111af69d88dSmrg   struct ureg_dst t_vpos;
112af69d88dSmrg   struct ureg_dst o_vmv[2];
113af69d88dSmrg   unsigned i;
114af69d88dSmrg
11501e04c3fSmrg   shader = ureg_create(PIPE_SHADER_VERTEX);
116af69d88dSmrg   if (!shader)
117af69d88dSmrg      return NULL;
118af69d88dSmrg
119af69d88dSmrg   vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP);
120af69d88dSmrg   vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM);
121af69d88dSmrg
122af69d88dSmrg   t_vpos = calc_position(r, shader, ureg_imm2f(shader,
123af69d88dSmrg      (float)VL_MACROBLOCK_WIDTH / r->buffer_width,
124af69d88dSmrg      (float)VL_MACROBLOCK_HEIGHT / r->buffer_height)
125af69d88dSmrg   );
126af69d88dSmrg
127af69d88dSmrg   o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
128af69d88dSmrg   o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);
129af69d88dSmrg
130af69d88dSmrg   /*
131af69d88dSmrg    * mv_scale.xy = 0.5 / (dst.width, dst.height);
132af69d88dSmrg    * mv_scale.z = 1.0f / 4.0f
133af69d88dSmrg    * mv_scale.w = 1.0f / 255.0f
134af69d88dSmrg    *
135af69d88dSmrg    * // Apply motion vectors
136af69d88dSmrg    * o_vmv[0..1].xy = vmv[0..1] * mv_scale + t_vpos
137af69d88dSmrg    * o_vmv[0..1].zw = vmv[0..1] * mv_scale
138af69d88dSmrg    *
139af69d88dSmrg    */
140af69d88dSmrg
141af69d88dSmrg   mv_scale = ureg_imm4f(shader,
142af69d88dSmrg      0.5f / r->buffer_width,
143af69d88dSmrg      0.5f / r->buffer_height,
144af69d88dSmrg      1.0f / 4.0f,
145af69d88dSmrg      1.0f / PIPE_VIDEO_MV_WEIGHT_MAX);
146af69d88dSmrg
147af69d88dSmrg   for (i = 0; i < 2; ++i) {
148af69d88dSmrg      ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos));
149af69d88dSmrg      ureg_MUL(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_ZW), mv_scale, vmv[i]);
150af69d88dSmrg   }
151af69d88dSmrg
152af69d88dSmrg   ureg_release_temporary(shader, t_vpos);
153af69d88dSmrg
154af69d88dSmrg   ureg_END(shader);
155af69d88dSmrg
156af69d88dSmrg   return ureg_create_shader_and_destroy(shader, r->pipe);
157af69d88dSmrg}
158af69d88dSmrg
159af69d88dSmrgstatic void *
160af69d88dSmrgcreate_ref_frag_shader(struct vl_mc *r)
161af69d88dSmrg{
162af69d88dSmrg   const float y_scale =
163af69d88dSmrg      r->buffer_height / 2 *
164af69d88dSmrg      r->macroblock_size / VL_MACROBLOCK_HEIGHT;
165af69d88dSmrg
166af69d88dSmrg   struct ureg_program *shader;
167af69d88dSmrg   struct ureg_src tc[2], sampler;
168af69d88dSmrg   struct ureg_dst ref, field;
169af69d88dSmrg   struct ureg_dst fragment;
170af69d88dSmrg   unsigned label;
171af69d88dSmrg
17201e04c3fSmrg   shader = ureg_create(PIPE_SHADER_FRAGMENT);
173af69d88dSmrg   if (!shader)
174af69d88dSmrg      return NULL;
175af69d88dSmrg
176af69d88dSmrg   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
177af69d88dSmrg   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR);
178af69d88dSmrg
179af69d88dSmrg   sampler = ureg_DECL_sampler(shader, 0);
180af69d88dSmrg   ref = ureg_DECL_temporary(shader);
181af69d88dSmrg
182af69d88dSmrg   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
183af69d88dSmrg
18401e04c3fSmrg   field = calc_line(r->pipe->screen, shader);
185af69d88dSmrg
186af69d88dSmrg   /*
187af69d88dSmrg    * ref = field.z ? tc[1] : tc[0]
188af69d88dSmrg    *
189af69d88dSmrg    * // Adjust tc acording to top/bottom field selection
190af69d88dSmrg    * if (|ref.z|) {
191af69d88dSmrg    *    ref.y *= y_scale
192af69d88dSmrg    *    ref.y = floor(ref.y)
193af69d88dSmrg    *    ref.y += ref.z
194af69d88dSmrg    *    ref.y /= y_scale
195af69d88dSmrg    * }
196af69d88dSmrg    * fragment.xyz = tex(ref, sampler[0])
197af69d88dSmrg    */
198af69d88dSmrg   ureg_CMP(shader, ureg_writemask(ref, TGSI_WRITEMASK_XYZ),
199af69d88dSmrg            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
200af69d88dSmrg            tc[1], tc[0]);
201af69d88dSmrg   ureg_CMP(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W),
202af69d88dSmrg            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
203af69d88dSmrg            tc[1], tc[0]);
204af69d88dSmrg
205af69d88dSmrg   ureg_IF(shader, ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z), &label);
206af69d88dSmrg
207af69d88dSmrg      ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
208af69d88dSmrg               ureg_src(ref), ureg_imm1f(shader, y_scale));
209af69d88dSmrg      ureg_FLR(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), ureg_src(ref));
210af69d88dSmrg      ureg_ADD(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
211af69d88dSmrg               ureg_src(ref), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z));
212af69d88dSmrg      ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
213af69d88dSmrg               ureg_src(ref), ureg_imm1f(shader, 1.0f / y_scale));
214af69d88dSmrg
215af69d88dSmrg   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
216af69d88dSmrg   ureg_ENDIF(shader);
217af69d88dSmrg
218af69d88dSmrg   ureg_TEX(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), TGSI_TEXTURE_2D, ureg_src(ref), sampler);
219af69d88dSmrg
220af69d88dSmrg   ureg_release_temporary(shader, ref);
221af69d88dSmrg
222af69d88dSmrg   ureg_release_temporary(shader, field);
223af69d88dSmrg   ureg_END(shader);
224af69d88dSmrg
225af69d88dSmrg   return ureg_create_shader_and_destroy(shader, r->pipe);
226af69d88dSmrg}
227af69d88dSmrg
228af69d88dSmrgstatic void *
229af69d88dSmrgcreate_ycbcr_vert_shader(struct vl_mc *r, vl_mc_ycbcr_vert_shader vs_callback, void *callback_priv)
230af69d88dSmrg{
231af69d88dSmrg   struct ureg_program *shader;
232af69d88dSmrg
233af69d88dSmrg   struct ureg_src vrect, vpos;
234af69d88dSmrg   struct ureg_dst t_vpos, t_vtex;
235af69d88dSmrg   struct ureg_dst o_vpos, o_flags;
236af69d88dSmrg
237af69d88dSmrg   struct vertex2f scale = {
238af69d88dSmrg      (float)VL_BLOCK_WIDTH / r->buffer_width * VL_MACROBLOCK_WIDTH / r->macroblock_size,
239af69d88dSmrg      (float)VL_BLOCK_HEIGHT / r->buffer_height * VL_MACROBLOCK_HEIGHT / r->macroblock_size
240af69d88dSmrg   };
241af69d88dSmrg
242af69d88dSmrg   unsigned label;
243af69d88dSmrg
24401e04c3fSmrg   shader = ureg_create(PIPE_SHADER_VERTEX);
245af69d88dSmrg   if (!shader)
246af69d88dSmrg      return NULL;
247af69d88dSmrg
248af69d88dSmrg   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
249af69d88dSmrg   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
250af69d88dSmrg
251af69d88dSmrg   t_vpos = calc_position(r, shader, ureg_imm2f(shader, scale.x, scale.y));
252af69d88dSmrg   t_vtex = ureg_DECL_temporary(shader);
253af69d88dSmrg
254af69d88dSmrg   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
255af69d88dSmrg   o_flags = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS);
256af69d88dSmrg
257af69d88dSmrg   /*
258af69d88dSmrg    * o_vtex.xy = t_vpos
259af69d88dSmrg    * o_flags.z = intra * 0.5
260af69d88dSmrg    *
261af69d88dSmrg    * if(interlaced) {
262af69d88dSmrg    *    t_vtex.xy = vrect.y ? { 0, scale.y } : { -scale.y : 0 }
263af69d88dSmrg    *    t_vtex.z = vpos.y % 2
264af69d88dSmrg    *    t_vtex.y = t_vtex.z ? t_vtex.x : t_vtex.y
265af69d88dSmrg    *    o_vpos.y = t_vtex.y + t_vpos.y
266af69d88dSmrg    *
267af69d88dSmrg    *    o_flags.w = t_vtex.z ? 0 : 1
268af69d88dSmrg    * }
269af69d88dSmrg    *
270af69d88dSmrg    */
271af69d88dSmrg
272af69d88dSmrg   vs_callback(callback_priv, r, shader, VS_O_VTEX, t_vpos);
273af69d88dSmrg
274af69d88dSmrg   ureg_MUL(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_Z),
275af69d88dSmrg            ureg_scalar(vpos, TGSI_SWIZZLE_Z), ureg_imm1f(shader, 0.5f));
276af69d88dSmrg   ureg_MOV(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f));
277af69d88dSmrg
278af69d88dSmrg   if (r->macroblock_size == VL_MACROBLOCK_HEIGHT) { //TODO
279af69d88dSmrg      ureg_IF(shader, ureg_scalar(vpos, TGSI_SWIZZLE_W), &label);
280af69d88dSmrg
281af69d88dSmrg         ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY),
282af69d88dSmrg                  ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_Y)),
283af69d88dSmrg                  ureg_imm2f(shader, 0.0f, scale.y),
284af69d88dSmrg                  ureg_imm2f(shader, -scale.y, 0.0f));
285af69d88dSmrg         ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z),
286af69d88dSmrg                  ureg_scalar(vpos, TGSI_SWIZZLE_Y), ureg_imm1f(shader, 0.5f));
287af69d88dSmrg
288af69d88dSmrg         ureg_FRC(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z), ureg_src(t_vtex));
289af69d88dSmrg
290af69d88dSmrg         ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y),
291af69d88dSmrg                  ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)),
292af69d88dSmrg                  ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_X),
293af69d88dSmrg                  ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Y));
294af69d88dSmrg         ureg_ADD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_Y),
295af69d88dSmrg                  ureg_src(t_vpos), ureg_src(t_vtex));
296af69d88dSmrg
297af69d88dSmrg         ureg_CMP(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W),
298af69d88dSmrg                  ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)),
299af69d88dSmrg                  ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 1.0f));
300af69d88dSmrg
301af69d88dSmrg      ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
302af69d88dSmrg      ureg_ENDIF(shader);
303af69d88dSmrg   }
304af69d88dSmrg
305af69d88dSmrg   ureg_release_temporary(shader, t_vtex);
306af69d88dSmrg   ureg_release_temporary(shader, t_vpos);
307af69d88dSmrg
308af69d88dSmrg   ureg_END(shader);
309af69d88dSmrg
310af69d88dSmrg   return ureg_create_shader_and_destroy(shader, r->pipe);
311af69d88dSmrg}
312af69d88dSmrg
313af69d88dSmrgstatic void *
314af69d88dSmrgcreate_ycbcr_frag_shader(struct vl_mc *r, float scale, bool invert,
315af69d88dSmrg                         vl_mc_ycbcr_frag_shader fs_callback, void *callback_priv)
316af69d88dSmrg{
317af69d88dSmrg   struct ureg_program *shader;
318af69d88dSmrg   struct ureg_src flags;
319af69d88dSmrg   struct ureg_dst tmp;
320af69d88dSmrg   struct ureg_dst fragment;
321af69d88dSmrg   unsigned label;
322af69d88dSmrg
32301e04c3fSmrg   shader = ureg_create(PIPE_SHADER_FRAGMENT);
324af69d88dSmrg   if (!shader)
325af69d88dSmrg      return NULL;
326af69d88dSmrg
327af69d88dSmrg   flags = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS, TGSI_INTERPOLATE_LINEAR);
328af69d88dSmrg
329af69d88dSmrg   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
330af69d88dSmrg
33101e04c3fSmrg   tmp = calc_line(r->pipe->screen, shader);
332af69d88dSmrg
333af69d88dSmrg   /*
334af69d88dSmrg    * if (field == tc.w)
335af69d88dSmrg    *    kill();
336af69d88dSmrg    * else {
337af69d88dSmrg    *    fragment.xyz  = tex(tc, sampler) * scale + tc.z
338af69d88dSmrg    *    fragment.w = 1.0f
339af69d88dSmrg    * }
340af69d88dSmrg    */
341af69d88dSmrg
342af69d88dSmrg   ureg_SEQ(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y),
343af69d88dSmrg            ureg_scalar(flags, TGSI_SWIZZLE_W), ureg_src(tmp));
344af69d88dSmrg
345af69d88dSmrg   ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
346af69d88dSmrg
347af69d88dSmrg      ureg_KILL(shader);
348af69d88dSmrg
349af69d88dSmrg   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
350af69d88dSmrg   ureg_ELSE(shader, &label);
351af69d88dSmrg
352af69d88dSmrg      fs_callback(callback_priv, r, shader, VS_O_VTEX, tmp);
353af69d88dSmrg
354af69d88dSmrg      if (scale != 1.0f)
355af69d88dSmrg         ureg_MAD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ),
356af69d88dSmrg                  ureg_src(tmp), ureg_imm1f(shader, scale),
357af69d88dSmrg                  ureg_scalar(flags, TGSI_SWIZZLE_Z));
358af69d88dSmrg      else
359af69d88dSmrg         ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ),
360af69d88dSmrg                  ureg_src(tmp), ureg_scalar(flags, TGSI_SWIZZLE_Z));
361af69d88dSmrg
362af69d88dSmrg      ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_imm1f(shader, invert ? -1.0f : 1.0f));
363af69d88dSmrg      ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
364af69d88dSmrg
365af69d88dSmrg   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
366af69d88dSmrg   ureg_ENDIF(shader);
367af69d88dSmrg
368af69d88dSmrg   ureg_release_temporary(shader, tmp);
369af69d88dSmrg
370af69d88dSmrg   ureg_END(shader);
371af69d88dSmrg
372af69d88dSmrg   return ureg_create_shader_and_destroy(shader, r->pipe);
373af69d88dSmrg}
374af69d88dSmrg
375af69d88dSmrgstatic bool
376af69d88dSmrginit_pipe_state(struct vl_mc *r)
377af69d88dSmrg{
378af69d88dSmrg   struct pipe_sampler_state sampler;
379af69d88dSmrg   struct pipe_blend_state blend;
380af69d88dSmrg   struct pipe_rasterizer_state rs_state;
381af69d88dSmrg   unsigned i;
382af69d88dSmrg
383af69d88dSmrg   assert(r);
384af69d88dSmrg
385af69d88dSmrg   memset(&sampler, 0, sizeof(sampler));
386af69d88dSmrg   sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
387af69d88dSmrg   sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
388af69d88dSmrg   sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
389af69d88dSmrg   sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
390af69d88dSmrg   sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
391af69d88dSmrg   sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
392af69d88dSmrg   sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
393af69d88dSmrg   sampler.compare_func = PIPE_FUNC_ALWAYS;
394af69d88dSmrg   sampler.normalized_coords = 1;
395af69d88dSmrg   r->sampler_ref = r->pipe->create_sampler_state(r->pipe, &sampler);
396af69d88dSmrg   if (!r->sampler_ref)
397af69d88dSmrg      goto error_sampler_ref;
398af69d88dSmrg
399af69d88dSmrg   for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) {
400af69d88dSmrg      memset(&blend, 0, sizeof blend);
401af69d88dSmrg      blend.independent_blend_enable = 0;
402af69d88dSmrg      blend.rt[0].blend_enable = 1;
403af69d88dSmrg      blend.rt[0].rgb_func = PIPE_BLEND_ADD;
404af69d88dSmrg      blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
405af69d88dSmrg      blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
406af69d88dSmrg      blend.rt[0].alpha_func = PIPE_BLEND_ADD;
407af69d88dSmrg      blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
408af69d88dSmrg      blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
409af69d88dSmrg      blend.logicop_enable = 0;
410af69d88dSmrg      blend.logicop_func = PIPE_LOGICOP_CLEAR;
411af69d88dSmrg      blend.rt[0].colormask = i;
412af69d88dSmrg      blend.dither = 0;
413af69d88dSmrg      r->blend_clear[i] = r->pipe->create_blend_state(r->pipe, &blend);
414af69d88dSmrg      if (!r->blend_clear[i])
415af69d88dSmrg         goto error_blend;
416af69d88dSmrg
417af69d88dSmrg      blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
418af69d88dSmrg      blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
419af69d88dSmrg      r->blend_add[i] = r->pipe->create_blend_state(r->pipe, &blend);
420af69d88dSmrg      if (!r->blend_add[i])
421af69d88dSmrg         goto error_blend;
422af69d88dSmrg
423af69d88dSmrg      blend.rt[0].rgb_func = PIPE_BLEND_REVERSE_SUBTRACT;
424af69d88dSmrg      blend.rt[0].alpha_dst_factor = PIPE_BLEND_REVERSE_SUBTRACT;
425af69d88dSmrg      r->blend_sub[i] = r->pipe->create_blend_state(r->pipe, &blend);
426af69d88dSmrg      if (!r->blend_sub[i])
427af69d88dSmrg         goto error_blend;
428af69d88dSmrg   }
429af69d88dSmrg
430af69d88dSmrg   memset(&rs_state, 0, sizeof(rs_state));
431af69d88dSmrg   /*rs_state.sprite_coord_enable */
432af69d88dSmrg   rs_state.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT;
433af69d88dSmrg   rs_state.point_quad_rasterization = true;
434af69d88dSmrg   rs_state.point_size = VL_BLOCK_WIDTH;
435af69d88dSmrg   rs_state.half_pixel_center = true;
436af69d88dSmrg   rs_state.bottom_edge_rule = true;
43701e04c3fSmrg   rs_state.depth_clip_near = 1;
43801e04c3fSmrg   rs_state.depth_clip_far = 1;
43901e04c3fSmrg
440af69d88dSmrg   r->rs_state = r->pipe->create_rasterizer_state(r->pipe, &rs_state);
441af69d88dSmrg   if (!r->rs_state)
442af69d88dSmrg      goto error_rs_state;
443af69d88dSmrg
444af69d88dSmrg   return true;
445af69d88dSmrg
446af69d88dSmrgerror_rs_state:
447af69d88dSmrgerror_blend:
448af69d88dSmrg   for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) {
449af69d88dSmrg      if (r->blend_sub[i])
450af69d88dSmrg         r->pipe->delete_blend_state(r->pipe, r->blend_sub[i]);
451af69d88dSmrg
452af69d88dSmrg      if (r->blend_add[i])
453af69d88dSmrg         r->pipe->delete_blend_state(r->pipe, r->blend_add[i]);
454af69d88dSmrg
455af69d88dSmrg      if (r->blend_clear[i])
456af69d88dSmrg         r->pipe->delete_blend_state(r->pipe, r->blend_clear[i]);
457af69d88dSmrg   }
458af69d88dSmrg
459af69d88dSmrg   r->pipe->delete_sampler_state(r->pipe, r->sampler_ref);
460af69d88dSmrg
461af69d88dSmrgerror_sampler_ref:
462af69d88dSmrg   return false;
463af69d88dSmrg}
464af69d88dSmrg
465af69d88dSmrgstatic void
466af69d88dSmrgcleanup_pipe_state(struct vl_mc *r)
467af69d88dSmrg{
468af69d88dSmrg   unsigned i;
469af69d88dSmrg
470af69d88dSmrg   assert(r);
471af69d88dSmrg
472af69d88dSmrg   r->pipe->delete_sampler_state(r->pipe, r->sampler_ref);
473af69d88dSmrg   for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) {
474af69d88dSmrg      r->pipe->delete_blend_state(r->pipe, r->blend_clear[i]);
475af69d88dSmrg      r->pipe->delete_blend_state(r->pipe, r->blend_add[i]);
476af69d88dSmrg      r->pipe->delete_blend_state(r->pipe, r->blend_sub[i]);
477af69d88dSmrg   }
478af69d88dSmrg   r->pipe->delete_rasterizer_state(r->pipe, r->rs_state);
479af69d88dSmrg}
480af69d88dSmrg
481af69d88dSmrgbool
482af69d88dSmrgvl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe,
483af69d88dSmrg           unsigned buffer_width, unsigned buffer_height,
484af69d88dSmrg           unsigned macroblock_size, float scale,
485af69d88dSmrg           vl_mc_ycbcr_vert_shader vs_callback,
486af69d88dSmrg           vl_mc_ycbcr_frag_shader fs_callback,
487af69d88dSmrg           void *callback_priv)
488af69d88dSmrg{
489af69d88dSmrg   assert(renderer);
490af69d88dSmrg   assert(pipe);
491af69d88dSmrg
492af69d88dSmrg   memset(renderer, 0, sizeof(struct vl_mc));
493af69d88dSmrg
494af69d88dSmrg   renderer->pipe = pipe;
495af69d88dSmrg   renderer->buffer_width = buffer_width;
496af69d88dSmrg   renderer->buffer_height = buffer_height;
497af69d88dSmrg   renderer->macroblock_size = macroblock_size;
498af69d88dSmrg
499af69d88dSmrg   if (!init_pipe_state(renderer))
500af69d88dSmrg      goto error_pipe_state;
501af69d88dSmrg
502af69d88dSmrg   renderer->vs_ref = create_ref_vert_shader(renderer);
503af69d88dSmrg   if (!renderer->vs_ref)
504af69d88dSmrg      goto error_vs_ref;
505af69d88dSmrg
506af69d88dSmrg   renderer->vs_ycbcr = create_ycbcr_vert_shader(renderer, vs_callback, callback_priv);
507af69d88dSmrg   if (!renderer->vs_ycbcr)
508af69d88dSmrg      goto error_vs_ycbcr;
509af69d88dSmrg
510af69d88dSmrg   renderer->fs_ref = create_ref_frag_shader(renderer);
511af69d88dSmrg   if (!renderer->fs_ref)
512af69d88dSmrg      goto error_fs_ref;
513af69d88dSmrg
514af69d88dSmrg   renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale, false, fs_callback, callback_priv);
515af69d88dSmrg   if (!renderer->fs_ycbcr)
516af69d88dSmrg      goto error_fs_ycbcr;
517af69d88dSmrg
518af69d88dSmrg   renderer->fs_ycbcr_sub = create_ycbcr_frag_shader(renderer, scale, true, fs_callback, callback_priv);
519af69d88dSmrg   if (!renderer->fs_ycbcr_sub)
520af69d88dSmrg      goto error_fs_ycbcr_sub;
521af69d88dSmrg
522af69d88dSmrg   return true;
523af69d88dSmrg
524af69d88dSmrgerror_fs_ycbcr_sub:
525af69d88dSmrg   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr);
526af69d88dSmrg
527af69d88dSmrgerror_fs_ycbcr:
528af69d88dSmrg   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref);
529af69d88dSmrg
530af69d88dSmrgerror_fs_ref:
531af69d88dSmrg   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ycbcr);
532af69d88dSmrg
533af69d88dSmrgerror_vs_ycbcr:
534af69d88dSmrg   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ref);
535af69d88dSmrg
536af69d88dSmrgerror_vs_ref:
537af69d88dSmrg   cleanup_pipe_state(renderer);
538af69d88dSmrg
539af69d88dSmrgerror_pipe_state:
540af69d88dSmrg   return false;
541af69d88dSmrg}
542af69d88dSmrg
543af69d88dSmrgvoid
544af69d88dSmrgvl_mc_cleanup(struct vl_mc *renderer)
545af69d88dSmrg{
546af69d88dSmrg   assert(renderer);
547af69d88dSmrg
548af69d88dSmrg   cleanup_pipe_state(renderer);
549af69d88dSmrg
550af69d88dSmrg   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ref);
551af69d88dSmrg   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ycbcr);
552af69d88dSmrg   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref);
553af69d88dSmrg   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr);
554af69d88dSmrg   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr_sub);
555af69d88dSmrg}
556af69d88dSmrg
557af69d88dSmrgbool
558af69d88dSmrgvl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer)
559af69d88dSmrg{
560af69d88dSmrg   assert(renderer && buffer);
561af69d88dSmrg
562af69d88dSmrg   buffer->viewport.scale[2] = 1;
563af69d88dSmrg   buffer->viewport.translate[0] = 0;
564af69d88dSmrg   buffer->viewport.translate[1] = 0;
565af69d88dSmrg   buffer->viewport.translate[2] = 0;
5667ec681f3Smrg   buffer->viewport.swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X;
5677ec681f3Smrg   buffer->viewport.swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y;
5687ec681f3Smrg   buffer->viewport.swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z;
5697ec681f3Smrg   buffer->viewport.swizzle_w = PIPE_VIEWPORT_SWIZZLE_POSITIVE_W;
570af69d88dSmrg
571af69d88dSmrg   buffer->fb_state.nr_cbufs = 1;
572af69d88dSmrg   buffer->fb_state.zsbuf = NULL;
573af69d88dSmrg
574af69d88dSmrg   return true;
575af69d88dSmrg}
576af69d88dSmrg
577af69d88dSmrgvoid
578af69d88dSmrgvl_mc_cleanup_buffer(struct vl_mc_buffer *buffer)
579af69d88dSmrg{
580af69d88dSmrg   assert(buffer);
581af69d88dSmrg}
582af69d88dSmrg
583af69d88dSmrgvoid
584af69d88dSmrgvl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface)
585af69d88dSmrg{
586af69d88dSmrg   assert(buffer && surface);
587af69d88dSmrg
588af69d88dSmrg   buffer->surface_cleared = false;
589af69d88dSmrg
590af69d88dSmrg   buffer->viewport.scale[0] = surface->width;
591af69d88dSmrg   buffer->viewport.scale[1] = surface->height;
592af69d88dSmrg
593af69d88dSmrg   buffer->fb_state.width = surface->width;
594af69d88dSmrg   buffer->fb_state.height = surface->height;
595af69d88dSmrg   buffer->fb_state.cbufs[0] = surface;
596af69d88dSmrg}
597af69d88dSmrg
598af69d88dSmrgstatic void
599af69d88dSmrgprepare_pipe_4_rendering(struct vl_mc *renderer, struct vl_mc_buffer *buffer, unsigned mask)
600af69d88dSmrg{
601af69d88dSmrg   assert(buffer);
602af69d88dSmrg
603af69d88dSmrg   renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
604af69d88dSmrg
605af69d88dSmrg   if (buffer->surface_cleared)
606af69d88dSmrg      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add[mask]);
607af69d88dSmrg   else
608af69d88dSmrg      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear[mask]);
609af69d88dSmrg
610af69d88dSmrg   renderer->pipe->set_framebuffer_state(renderer->pipe, &buffer->fb_state);
611af69d88dSmrg   renderer->pipe->set_viewport_states(renderer->pipe, 0, 1, &buffer->viewport);
612af69d88dSmrg}
613af69d88dSmrg
614af69d88dSmrgvoid
615af69d88dSmrgvl_mc_render_ref(struct vl_mc *renderer, struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref)
616af69d88dSmrg{
617af69d88dSmrg   assert(buffer && ref);
618af69d88dSmrg
619af69d88dSmrg   prepare_pipe_4_rendering(renderer, buffer, PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B);
620af69d88dSmrg
621af69d88dSmrg   renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ref);
622af69d88dSmrg   renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ref);
623af69d88dSmrg
624af69d88dSmrg   renderer->pipe->set_sampler_views(renderer->pipe, PIPE_SHADER_FRAGMENT,
6257ec681f3Smrg                                     0, 1, 0, false, &ref);
626af69d88dSmrg   renderer->pipe->bind_sampler_states(renderer->pipe, PIPE_SHADER_FRAGMENT,
627af69d88dSmrg                                       0, 1, &renderer->sampler_ref);
628af69d88dSmrg
629af69d88dSmrg   util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0,
630af69d88dSmrg                              renderer->buffer_width / VL_MACROBLOCK_WIDTH *
631af69d88dSmrg                              renderer->buffer_height / VL_MACROBLOCK_HEIGHT);
632af69d88dSmrg
633af69d88dSmrg   buffer->surface_cleared = true;
634af69d88dSmrg}
635af69d88dSmrg
636af69d88dSmrgvoid
637af69d88dSmrgvl_mc_render_ycbcr(struct vl_mc *renderer, struct vl_mc_buffer *buffer, unsigned component, unsigned num_instances)
638af69d88dSmrg{
639af69d88dSmrg   unsigned mask = 1 << component;
640af69d88dSmrg
641af69d88dSmrg   assert(buffer);
642af69d88dSmrg
643af69d88dSmrg   if (num_instances == 0)
644af69d88dSmrg      return;
645af69d88dSmrg
646af69d88dSmrg   prepare_pipe_4_rendering(renderer, buffer, mask);
647af69d88dSmrg
648af69d88dSmrg   renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ycbcr);
649af69d88dSmrg   renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr);
650af69d88dSmrg
651af69d88dSmrg   util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
652af69d88dSmrg
653af69d88dSmrg   if (buffer->surface_cleared) {
654af69d88dSmrg      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_sub[mask]);
655af69d88dSmrg      renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr_sub);
656af69d88dSmrg      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
657af69d88dSmrg   }
658af69d88dSmrg}
659