1848b8605Smrg/**************************************************************************
2848b8605Smrg *
3848b8605Smrg * Copyright 2010 Christian König
4848b8605Smrg * All Rights Reserved.
5848b8605Smrg *
6848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
7848b8605Smrg * copy of this software and associated documentation files (the
8848b8605Smrg * "Software"), to deal in the Software without restriction, including
9848b8605Smrg * without limitation the rights to use, copy, modify, merge, publish,
10848b8605Smrg * distribute, sub license, and/or sell copies of the Software, and to
11848b8605Smrg * permit persons to whom the Software is furnished to do so, subject to
12848b8605Smrg * the following conditions:
13848b8605Smrg *
14848b8605Smrg * The above copyright notice and this permission notice (including the
15848b8605Smrg * next paragraph) shall be included in all copies or substantial portions
16848b8605Smrg * of the Software.
17848b8605Smrg *
18848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20848b8605Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21848b8605Smrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22848b8605Smrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23848b8605Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24848b8605Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25848b8605Smrg *
26848b8605Smrg **************************************************************************/
27848b8605Smrg
28848b8605Smrg#include <assert.h>
29848b8605Smrg
30848b8605Smrg#include "pipe/p_context.h"
31848b8605Smrg#include "pipe/p_screen.h"
32848b8605Smrg
33848b8605Smrg#include "util/u_draw.h"
34848b8605Smrg#include "util/u_sampler.h"
35848b8605Smrg#include "util/u_memory.h"
36848b8605Smrg
37848b8605Smrg#include "tgsi/tgsi_ureg.h"
38848b8605Smrg
39848b8605Smrg#include "vl_defines.h"
40848b8605Smrg#include "vl_types.h"
41848b8605Smrg#include "vl_vertex_buffers.h"
42848b8605Smrg#include "vl_idct.h"
43848b8605Smrg
44848b8605Smrgenum VS_OUTPUT
45848b8605Smrg{
46848b8605Smrg   VS_O_VPOS = 0,
47848b8605Smrg   VS_O_L_ADDR0 = 0,
48848b8605Smrg   VS_O_L_ADDR1,
49848b8605Smrg   VS_O_R_ADDR0,
50848b8605Smrg   VS_O_R_ADDR1
51848b8605Smrg};
52848b8605Smrg
53848b8605Smrg/**
54848b8605Smrg * The DCT matrix stored as hex representation of floats. Equal to the following equation:
55848b8605Smrg * for (i = 0; i < 8; ++i)
56848b8605Smrg *    for (j = 0; j < 8; ++j)
57848b8605Smrg *       if (i == 0) const_matrix[i][j] = 1.0f / sqrtf(8.0f);
58848b8605Smrg *       else const_matrix[i][j] = sqrtf(2.0f / 8.0f) * cosf((2 * j + 1) * i * M_PI / (2.0f * 8.0f));
59848b8605Smrg */
60848b8605Smrgstatic const uint32_t const_matrix[8][8] = {
61848b8605Smrg   { 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3 },
62848b8605Smrg   { 0x3efb14be, 0x3ed4db31, 0x3e8e39da, 0x3dc7c5c4, 0xbdc7c5c2, 0xbe8e39d9, 0xbed4db32, 0xbefb14bf },
63848b8605Smrg   { 0x3eec835f, 0x3e43ef15, 0xbe43ef14, 0xbeec835e, 0xbeec835f, 0xbe43ef1a, 0x3e43ef1b, 0x3eec835f },
64848b8605Smrg   { 0x3ed4db31, 0xbdc7c5c2, 0xbefb14bf, 0xbe8e39dd, 0x3e8e39d7, 0x3efb14bf, 0x3dc7c5d0, 0xbed4db34 },
65848b8605Smrg   { 0x3eb504f3, 0xbeb504f3, 0xbeb504f4, 0x3eb504f1, 0x3eb504f3, 0xbeb504f0, 0xbeb504ef, 0x3eb504f4 },
66848b8605Smrg   { 0x3e8e39da, 0xbefb14bf, 0x3dc7c5c8, 0x3ed4db32, 0xbed4db34, 0xbdc7c5bb, 0x3efb14bf, 0xbe8e39d7 },
67848b8605Smrg   { 0x3e43ef15, 0xbeec835f, 0x3eec835f, 0xbe43ef07, 0xbe43ef23, 0x3eec8361, 0xbeec835c, 0x3e43ef25 },
68848b8605Smrg   { 0x3dc7c5c4, 0xbe8e39dd, 0x3ed4db32, 0xbefb14c0, 0x3efb14be, 0xbed4db31, 0x3e8e39ce, 0xbdc7c596 },
69848b8605Smrg};
70848b8605Smrg
71848b8605Smrgstatic void
72848b8605Smrgcalc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
73848b8605Smrg          struct ureg_src tc, struct ureg_src start, bool right_side,
74848b8605Smrg          bool transposed, float size)
75848b8605Smrg{
76848b8605Smrg   unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
77848b8605Smrg   unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X;
78848b8605Smrg
79848b8605Smrg   unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
80848b8605Smrg   unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y;
81848b8605Smrg
82848b8605Smrg   /*
83848b8605Smrg    * addr[0..1].(start) = right_side ? start.x : tc.x
84848b8605Smrg    * addr[0..1].(tc) = right_side ? tc.y : start.y
85848b8605Smrg    * addr[0..1].z = tc.z
86848b8605Smrg    * addr[1].(start) += 1.0f / scale
87848b8605Smrg    */
88848b8605Smrg   ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start));
89848b8605Smrg   ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc));
90848b8605Smrg
91848b8605Smrg   ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size));
92848b8605Smrg   ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc));
93848b8605Smrg}
94848b8605Smrg
95848b8605Smrgstatic void
96848b8605Smrgincrement_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
97848b8605Smrg               struct ureg_src saddr[2], bool right_side, bool transposed,
98848b8605Smrg               int pos, float size)
99848b8605Smrg{
100848b8605Smrg   unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
101848b8605Smrg   unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
102848b8605Smrg
103848b8605Smrg   /*
104848b8605Smrg    * daddr[0..1].(start) = saddr[0..1].(start)
105848b8605Smrg    * daddr[0..1].(tc) = saddr[0..1].(tc)
106848b8605Smrg    */
107848b8605Smrg
108848b8605Smrg   ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);
109848b8605Smrg   ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));
110848b8605Smrg   ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);
111848b8605Smrg   ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size));
112848b8605Smrg}
113848b8605Smrg
114848b8605Smrgstatic void
115848b8605Smrgfetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2],
116848b8605Smrg           struct ureg_src sampler, bool resource3d)
117848b8605Smrg{
118848b8605Smrg   ureg_TEX(shader, m[0], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[0], sampler);
119848b8605Smrg   ureg_TEX(shader, m[1], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[1], sampler);
120848b8605Smrg}
121848b8605Smrg
122848b8605Smrgstatic void
123848b8605Smrgmatrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
124848b8605Smrg{
125848b8605Smrg   struct ureg_dst tmp;
126848b8605Smrg
127848b8605Smrg   tmp = ureg_DECL_temporary(shader);
128848b8605Smrg
129848b8605Smrg   /*
130848b8605Smrg    * tmp.xy = dot4(m[0][0..1], m[1][0..1])
131848b8605Smrg    * dst = tmp.x + tmp.y
132848b8605Smrg    */
133848b8605Smrg   ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
134848b8605Smrg   ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1]));
135848b8605Smrg   ureg_ADD(shader, dst,
136848b8605Smrg      ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X),
137848b8605Smrg      ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
138848b8605Smrg
139848b8605Smrg   ureg_release_temporary(shader, tmp);
140848b8605Smrg}
141848b8605Smrg
142848b8605Smrgstatic void *
143848b8605Smrgcreate_mismatch_vert_shader(struct vl_idct *idct)
144848b8605Smrg{
145848b8605Smrg   struct ureg_program *shader;
146848b8605Smrg   struct ureg_src vpos;
147848b8605Smrg   struct ureg_src scale;
148848b8605Smrg   struct ureg_dst t_tex;
149848b8605Smrg   struct ureg_dst o_vpos, o_addr[2];
150848b8605Smrg
151b8e80941Smrg   shader = ureg_create(PIPE_SHADER_VERTEX);
152848b8605Smrg   if (!shader)
153848b8605Smrg      return NULL;
154848b8605Smrg
155848b8605Smrg   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
156848b8605Smrg
157848b8605Smrg   t_tex = ureg_DECL_temporary(shader);
158848b8605Smrg
159848b8605Smrg   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
160848b8605Smrg
161848b8605Smrg   o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
162848b8605Smrg   o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
163848b8605Smrg
164848b8605Smrg   /*
165848b8605Smrg    * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)
166848b8605Smrg    *
167848b8605Smrg    * t_vpos = vpos + 7 / VL_BLOCK_WIDTH
168848b8605Smrg    * o_vpos.xy = t_vpos * scale
169848b8605Smrg    *
170848b8605Smrg    * o_addr = calc_addr(...)
171848b8605Smrg    *
172848b8605Smrg    */
173848b8605Smrg
174848b8605Smrg   scale = ureg_imm2f(shader,
175848b8605Smrg      (float)VL_BLOCK_WIDTH / idct->buffer_width,
176848b8605Smrg      (float)VL_BLOCK_HEIGHT / idct->buffer_height);
177848b8605Smrg
178848b8605Smrg   ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale);
179848b8605Smrg   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
180848b8605Smrg
181848b8605Smrg   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale);
182848b8605Smrg   calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4);
183848b8605Smrg
184848b8605Smrg   ureg_release_temporary(shader, t_tex);
185848b8605Smrg
186848b8605Smrg   ureg_END(shader);
187848b8605Smrg
188848b8605Smrg   return ureg_create_shader_and_destroy(shader, idct->pipe);
189848b8605Smrg}
190848b8605Smrg
191848b8605Smrgstatic void *
192848b8605Smrgcreate_mismatch_frag_shader(struct vl_idct *idct)
193848b8605Smrg{
194848b8605Smrg   struct ureg_program *shader;
195848b8605Smrg
196848b8605Smrg   struct ureg_src addr[2];
197848b8605Smrg
198848b8605Smrg   struct ureg_dst m[8][2];
199848b8605Smrg   struct ureg_dst fragment;
200848b8605Smrg
201848b8605Smrg   unsigned i;
202848b8605Smrg
203b8e80941Smrg   shader = ureg_create(PIPE_SHADER_FRAGMENT);
204848b8605Smrg   if (!shader)
205848b8605Smrg      return NULL;
206848b8605Smrg
207848b8605Smrg   addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
208848b8605Smrg   addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
209848b8605Smrg
210848b8605Smrg   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
211848b8605Smrg
212848b8605Smrg   for (i = 0; i < 8; ++i) {
213848b8605Smrg      m[i][0] = ureg_DECL_temporary(shader);
214848b8605Smrg      m[i][1] = ureg_DECL_temporary(shader);
215848b8605Smrg   }
216848b8605Smrg
217848b8605Smrg   for (i = 0; i < 8; ++i) {
218848b8605Smrg      increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height);
219848b8605Smrg   }
220848b8605Smrg
221848b8605Smrg   for (i = 0; i < 8; ++i) {
222848b8605Smrg      struct ureg_src s_addr[2];
223848b8605Smrg      s_addr[0] = ureg_src(m[i][0]);
224848b8605Smrg      s_addr[1] = ureg_src(m[i][1]);
225848b8605Smrg      fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false);
226848b8605Smrg   }
227848b8605Smrg
228848b8605Smrg   for (i = 1; i < 8; ++i) {
229848b8605Smrg      ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0]));
230848b8605Smrg      ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1]));
231848b8605Smrg   }
232848b8605Smrg
233848b8605Smrg   ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1]));
234848b8605Smrg   ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14));
235848b8605Smrg
236848b8605Smrg   ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14));
237848b8605Smrg   ureg_FRC(shader, m[0][0], ureg_src(m[0][0]));
238848b8605Smrg   ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0])));
239848b8605Smrg
240848b8605Smrg   ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])),
241848b8605Smrg            ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15)));
242848b8605Smrg   ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]),
243848b8605Smrg            ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X));
244848b8605Smrg
245848b8605Smrg   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1]));
246848b8605Smrg   ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1]));
247848b8605Smrg
248848b8605Smrg   for (i = 0; i < 8; ++i) {
249848b8605Smrg      ureg_release_temporary(shader, m[i][0]);
250848b8605Smrg      ureg_release_temporary(shader, m[i][1]);
251848b8605Smrg   }
252848b8605Smrg
253848b8605Smrg   ureg_END(shader);
254848b8605Smrg
255848b8605Smrg   return ureg_create_shader_and_destroy(shader, idct->pipe);
256848b8605Smrg}
257848b8605Smrg
258848b8605Smrgstatic void *
259848b8605Smrgcreate_stage1_vert_shader(struct vl_idct *idct)
260848b8605Smrg{
261848b8605Smrg   struct ureg_program *shader;
262848b8605Smrg   struct ureg_src vrect, vpos;
263848b8605Smrg   struct ureg_src scale;
264848b8605Smrg   struct ureg_dst t_tex, t_start;
265848b8605Smrg   struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];
266848b8605Smrg
267b8e80941Smrg   shader = ureg_create(PIPE_SHADER_VERTEX);
268848b8605Smrg   if (!shader)
269848b8605Smrg      return NULL;
270848b8605Smrg
271848b8605Smrg   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
272848b8605Smrg   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
273848b8605Smrg
274848b8605Smrg   t_tex = ureg_DECL_temporary(shader);
275848b8605Smrg   t_start = ureg_DECL_temporary(shader);
276848b8605Smrg
277848b8605Smrg   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
278848b8605Smrg
279848b8605Smrg   o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
280848b8605Smrg   o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
281848b8605Smrg
282848b8605Smrg   o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0);
283848b8605Smrg   o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1);
284848b8605Smrg
285848b8605Smrg   /*
286848b8605Smrg    * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height)
287848b8605Smrg    *
288848b8605Smrg    * t_vpos = vpos + vrect
289848b8605Smrg    * o_vpos.xy = t_vpos * scale
290848b8605Smrg    * o_vpos.zw = vpos
291848b8605Smrg    *
292848b8605Smrg    * o_l_addr = calc_addr(...)
293848b8605Smrg    * o_r_addr = calc_addr(...)
294848b8605Smrg    *
295848b8605Smrg    */
296848b8605Smrg
297848b8605Smrg   scale = ureg_imm2f(shader,
298848b8605Smrg      (float)VL_BLOCK_WIDTH / idct->buffer_width,
299848b8605Smrg      (float)VL_BLOCK_HEIGHT / idct->buffer_height);
300848b8605Smrg
301848b8605Smrg   ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect);
302848b8605Smrg   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
303848b8605Smrg
304848b8605Smrg   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
305848b8605Smrg   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
306848b8605Smrg
307848b8605Smrg   ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
308848b8605Smrg
309848b8605Smrg   calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
310848b8605Smrg   calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, VL_BLOCK_WIDTH / 4);
311848b8605Smrg
312848b8605Smrg   ureg_release_temporary(shader, t_tex);
313848b8605Smrg   ureg_release_temporary(shader, t_start);
314848b8605Smrg
315848b8605Smrg   ureg_END(shader);
316848b8605Smrg
317848b8605Smrg   return ureg_create_shader_and_destroy(shader, idct->pipe);
318848b8605Smrg}
319848b8605Smrg
320848b8605Smrgstatic void *
321848b8605Smrgcreate_stage1_frag_shader(struct vl_idct *idct)
322848b8605Smrg{
323848b8605Smrg   struct ureg_program *shader;
324848b8605Smrg   struct ureg_src l_addr[2], r_addr[2];
325848b8605Smrg   struct ureg_dst l[4][2], r[2];
326848b8605Smrg   struct ureg_dst *fragment;
327b8e80941Smrg   unsigned i;
328b8e80941Smrg   int j;
329848b8605Smrg
330b8e80941Smrg   shader = ureg_create(PIPE_SHADER_FRAGMENT);
331848b8605Smrg   if (!shader)
332848b8605Smrg      return NULL;
333848b8605Smrg
334848b8605Smrg   fragment = MALLOC(idct->nr_of_render_targets * sizeof(struct ureg_dst));
335848b8605Smrg
336848b8605Smrg   l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
337848b8605Smrg   l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
338848b8605Smrg
339848b8605Smrg   r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
340848b8605Smrg   r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
341848b8605Smrg
342848b8605Smrg   for (i = 0; i < idct->nr_of_render_targets; ++i)
343848b8605Smrg       fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
344848b8605Smrg
345848b8605Smrg   for (i = 0; i < 4; ++i) {
346848b8605Smrg      l[i][0] = ureg_DECL_temporary(shader);
347848b8605Smrg      l[i][1] = ureg_DECL_temporary(shader);
348848b8605Smrg   }
349848b8605Smrg
350848b8605Smrg   r[0] = ureg_DECL_temporary(shader);
351848b8605Smrg   r[1] = ureg_DECL_temporary(shader);
352848b8605Smrg
353848b8605Smrg   for (i = 0; i < 4; ++i) {
354848b8605Smrg      increment_addr(shader, l[i], l_addr, false, false, i - 2, idct->buffer_height);
355848b8605Smrg   }
356848b8605Smrg
357848b8605Smrg   for (i = 0; i < 4; ++i) {
358848b8605Smrg      struct ureg_src s_addr[2];
359848b8605Smrg      s_addr[0] = ureg_src(l[i][0]);
360848b8605Smrg      s_addr[1] = ureg_src(l[i][1]);
361848b8605Smrg      fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false);
362848b8605Smrg   }
363848b8605Smrg
364848b8605Smrg   for (i = 0; i < idct->nr_of_render_targets; ++i) {
365848b8605Smrg      struct ureg_src s_addr[2];
366848b8605Smrg
367848b8605Smrg      increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, VL_BLOCK_HEIGHT);
368848b8605Smrg
369848b8605Smrg      s_addr[0] = ureg_src(r[0]);
370848b8605Smrg      s_addr[1] = ureg_src(r[1]);
371848b8605Smrg      fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false);
372848b8605Smrg
373848b8605Smrg      for (j = 0; j < 4; ++j) {
374848b8605Smrg         matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
375848b8605Smrg      }
376848b8605Smrg   }
377848b8605Smrg
378848b8605Smrg   for (i = 0; i < 4; ++i) {
379848b8605Smrg      ureg_release_temporary(shader, l[i][0]);
380848b8605Smrg      ureg_release_temporary(shader, l[i][1]);
381848b8605Smrg   }
382848b8605Smrg   ureg_release_temporary(shader, r[0]);
383848b8605Smrg   ureg_release_temporary(shader, r[1]);
384848b8605Smrg
385848b8605Smrg   ureg_END(shader);
386848b8605Smrg
387848b8605Smrg   FREE(fragment);
388848b8605Smrg
389848b8605Smrg   return ureg_create_shader_and_destroy(shader, idct->pipe);
390848b8605Smrg}
391848b8605Smrg
392848b8605Smrgvoid
393848b8605Smrgvl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader,
394848b8605Smrg                           unsigned first_output, struct ureg_dst tex)
395848b8605Smrg{
396848b8605Smrg   struct ureg_src vrect, vpos;
397848b8605Smrg   struct ureg_src scale;
398848b8605Smrg   struct ureg_dst t_start;
399848b8605Smrg   struct ureg_dst o_l_addr[2], o_r_addr[2];
400848b8605Smrg
401848b8605Smrg   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
402848b8605Smrg   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
403848b8605Smrg
404848b8605Smrg   t_start = ureg_DECL_temporary(shader);
405848b8605Smrg
406848b8605Smrg   --first_output;
407848b8605Smrg
408848b8605Smrg   o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0);
409848b8605Smrg   o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1);
410848b8605Smrg
411848b8605Smrg   o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0);
412848b8605Smrg   o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1);
413848b8605Smrg
414848b8605Smrg   scale = ureg_imm2f(shader,
415848b8605Smrg      (float)VL_BLOCK_WIDTH / idct->buffer_width,
416848b8605Smrg      (float)VL_BLOCK_HEIGHT / idct->buffer_height);
417848b8605Smrg
418848b8605Smrg   ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z),
419848b8605Smrg      ureg_scalar(vrect, TGSI_SWIZZLE_X),
420848b8605Smrg      ureg_imm1f(shader, VL_BLOCK_WIDTH / idct->nr_of_render_targets));
421848b8605Smrg   ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
422848b8605Smrg
423848b8605Smrg   calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, VL_BLOCK_WIDTH / 4);
424848b8605Smrg   calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
425848b8605Smrg
426848b8605Smrg   ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex));
427848b8605Smrg   ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex));
428848b8605Smrg}
429848b8605Smrg
430848b8605Smrgvoid
431848b8605Smrgvl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
432848b8605Smrg                           unsigned first_input, struct ureg_dst fragment)
433848b8605Smrg{
434848b8605Smrg   struct ureg_src l_addr[2], r_addr[2];
435848b8605Smrg
436848b8605Smrg   struct ureg_dst l[2], r[2];
437848b8605Smrg
438848b8605Smrg   --first_input;
439848b8605Smrg
440848b8605Smrg   l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
441848b8605Smrg   l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
442848b8605Smrg
443848b8605Smrg   r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
444848b8605Smrg   r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
445848b8605Smrg
446848b8605Smrg   l[0] = ureg_DECL_temporary(shader);
447848b8605Smrg   l[1] = ureg_DECL_temporary(shader);
448848b8605Smrg   r[0] = ureg_DECL_temporary(shader);
449848b8605Smrg   r[1] = ureg_DECL_temporary(shader);
450848b8605Smrg
451848b8605Smrg   fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false);
452848b8605Smrg   fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true);
453848b8605Smrg
454848b8605Smrg   matrix_mul(shader, fragment, l, r);
455848b8605Smrg
456848b8605Smrg   ureg_release_temporary(shader, l[0]);
457848b8605Smrg   ureg_release_temporary(shader, l[1]);
458848b8605Smrg   ureg_release_temporary(shader, r[0]);
459848b8605Smrg   ureg_release_temporary(shader, r[1]);
460848b8605Smrg}
461848b8605Smrg
462848b8605Smrgstatic bool
463848b8605Smrginit_shaders(struct vl_idct *idct)
464848b8605Smrg{
465848b8605Smrg   idct->vs_mismatch = create_mismatch_vert_shader(idct);
466848b8605Smrg   if (!idct->vs_mismatch)
467848b8605Smrg      goto error_vs_mismatch;
468848b8605Smrg
469848b8605Smrg   idct->fs_mismatch = create_mismatch_frag_shader(idct);
470848b8605Smrg   if (!idct->fs_mismatch)
471848b8605Smrg      goto error_fs_mismatch;
472848b8605Smrg
473848b8605Smrg   idct->vs = create_stage1_vert_shader(idct);
474848b8605Smrg   if (!idct->vs)
475848b8605Smrg      goto error_vs;
476848b8605Smrg
477848b8605Smrg   idct->fs = create_stage1_frag_shader(idct);
478848b8605Smrg   if (!idct->fs)
479848b8605Smrg      goto error_fs;
480848b8605Smrg
481848b8605Smrg   return true;
482848b8605Smrg
483848b8605Smrgerror_fs:
484848b8605Smrg   idct->pipe->delete_vs_state(idct->pipe, idct->vs);
485848b8605Smrg
486848b8605Smrgerror_vs:
487848b8605Smrg   idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
488848b8605Smrg
489848b8605Smrgerror_fs_mismatch:
490848b8605Smrg   idct->pipe->delete_vs_state(idct->pipe, idct->fs);
491848b8605Smrg
492848b8605Smrgerror_vs_mismatch:
493848b8605Smrg   return false;
494848b8605Smrg}
495848b8605Smrg
496848b8605Smrgstatic void
497848b8605Smrgcleanup_shaders(struct vl_idct *idct)
498848b8605Smrg{
499848b8605Smrg   idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
500848b8605Smrg   idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch);
501848b8605Smrg   idct->pipe->delete_vs_state(idct->pipe, idct->vs);
502848b8605Smrg   idct->pipe->delete_fs_state(idct->pipe, idct->fs);
503848b8605Smrg}
504848b8605Smrg
505848b8605Smrgstatic bool
506848b8605Smrginit_state(struct vl_idct *idct)
507848b8605Smrg{
508848b8605Smrg   struct pipe_blend_state blend;
509848b8605Smrg   struct pipe_rasterizer_state rs_state;
510848b8605Smrg   struct pipe_sampler_state sampler;
511848b8605Smrg   unsigned i;
512848b8605Smrg
513848b8605Smrg   assert(idct);
514848b8605Smrg
515848b8605Smrg   memset(&rs_state, 0, sizeof(rs_state));
516848b8605Smrg   rs_state.point_size = 1;
517848b8605Smrg   rs_state.half_pixel_center = true;
518848b8605Smrg   rs_state.bottom_edge_rule = true;
519b8e80941Smrg   rs_state.depth_clip_near = 1;
520b8e80941Smrg   rs_state.depth_clip_far = 1;
521b8e80941Smrg
522848b8605Smrg   idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
523848b8605Smrg   if (!idct->rs_state)
524848b8605Smrg      goto error_rs_state;
525848b8605Smrg
526848b8605Smrg   memset(&blend, 0, sizeof blend);
527848b8605Smrg
528848b8605Smrg   blend.independent_blend_enable = 0;
529848b8605Smrg   blend.rt[0].blend_enable = 0;
530848b8605Smrg   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
531848b8605Smrg   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
532848b8605Smrg   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
533848b8605Smrg   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
534848b8605Smrg   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
535848b8605Smrg   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
536848b8605Smrg   blend.logicop_enable = 0;
537848b8605Smrg   blend.logicop_func = PIPE_LOGICOP_CLEAR;
538848b8605Smrg   /* Needed to allow color writes to FB, even if blending disabled */
539848b8605Smrg   blend.rt[0].colormask = PIPE_MASK_RGBA;
540848b8605Smrg   blend.dither = 0;
541848b8605Smrg   idct->blend = idct->pipe->create_blend_state(idct->pipe, &blend);
542848b8605Smrg   if (!idct->blend)
543848b8605Smrg      goto error_blend;
544848b8605Smrg
545848b8605Smrg   for (i = 0; i < 2; ++i) {
546848b8605Smrg      memset(&sampler, 0, sizeof(sampler));
547848b8605Smrg      sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
548848b8605Smrg      sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
549848b8605Smrg      sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
550848b8605Smrg      sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
551848b8605Smrg      sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
552848b8605Smrg      sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
553848b8605Smrg      sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
554848b8605Smrg      sampler.compare_func = PIPE_FUNC_ALWAYS;
555848b8605Smrg      sampler.normalized_coords = 1;
556848b8605Smrg      idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
557848b8605Smrg      if (!idct->samplers[i])
558848b8605Smrg         goto error_samplers;
559848b8605Smrg   }
560848b8605Smrg
561848b8605Smrg   return true;
562848b8605Smrg
563848b8605Smrgerror_samplers:
564848b8605Smrg   for (i = 0; i < 2; ++i)
565848b8605Smrg      if (idct->samplers[i])
566848b8605Smrg         idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
567848b8605Smrg
568848b8605Smrg   idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
569848b8605Smrg
570848b8605Smrgerror_blend:
571848b8605Smrg   idct->pipe->delete_blend_state(idct->pipe, idct->blend);
572848b8605Smrg
573848b8605Smrgerror_rs_state:
574848b8605Smrg   return false;
575848b8605Smrg}
576848b8605Smrg
577848b8605Smrgstatic void
578848b8605Smrgcleanup_state(struct vl_idct *idct)
579848b8605Smrg{
580848b8605Smrg   unsigned i;
581848b8605Smrg
582848b8605Smrg   for (i = 0; i < 2; ++i)
583848b8605Smrg      idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
584848b8605Smrg
585848b8605Smrg   idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
586848b8605Smrg   idct->pipe->delete_blend_state(idct->pipe, idct->blend);
587848b8605Smrg}
588848b8605Smrg
589848b8605Smrgstatic bool
590848b8605Smrginit_source(struct vl_idct *idct, struct vl_idct_buffer *buffer)
591848b8605Smrg{
592848b8605Smrg   struct pipe_resource *tex;
593848b8605Smrg   struct pipe_surface surf_templ;
594848b8605Smrg
595848b8605Smrg   assert(idct && buffer);
596848b8605Smrg
597848b8605Smrg   tex = buffer->sampler_views.individual.source->texture;
598848b8605Smrg
599848b8605Smrg   buffer->fb_state_mismatch.width = tex->width0;
600848b8605Smrg   buffer->fb_state_mismatch.height = tex->height0;
601848b8605Smrg   buffer->fb_state_mismatch.nr_cbufs = 1;
602848b8605Smrg
603848b8605Smrg   memset(&surf_templ, 0, sizeof(surf_templ));
604848b8605Smrg   surf_templ.format = tex->format;
605848b8605Smrg   surf_templ.u.tex.first_layer = 0;
606848b8605Smrg   surf_templ.u.tex.last_layer = 0;
607848b8605Smrg   buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ);
608848b8605Smrg
609848b8605Smrg   buffer->viewport_mismatch.scale[0] = tex->width0;
610848b8605Smrg   buffer->viewport_mismatch.scale[1] = tex->height0;
611848b8605Smrg   buffer->viewport_mismatch.scale[2] = 1;
612848b8605Smrg
613848b8605Smrg   return true;
614848b8605Smrg}
615848b8605Smrg
616848b8605Smrgstatic void
617848b8605Smrgcleanup_source(struct vl_idct_buffer *buffer)
618848b8605Smrg{
619848b8605Smrg   assert(buffer);
620848b8605Smrg
621848b8605Smrg   pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL);
622848b8605Smrg
623848b8605Smrg   pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL);
624848b8605Smrg}
625848b8605Smrg
626848b8605Smrgstatic bool
627848b8605Smrginit_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
628848b8605Smrg{
629848b8605Smrg   struct pipe_resource *tex;
630848b8605Smrg   struct pipe_surface surf_templ;
631848b8605Smrg   unsigned i;
632848b8605Smrg
633848b8605Smrg   assert(idct && buffer);
634848b8605Smrg
635848b8605Smrg   tex = buffer->sampler_views.individual.intermediate->texture;
636848b8605Smrg
637848b8605Smrg   buffer->fb_state.width = tex->width0;
638848b8605Smrg   buffer->fb_state.height = tex->height0;
639848b8605Smrg   buffer->fb_state.nr_cbufs = idct->nr_of_render_targets;
640848b8605Smrg   for(i = 0; i < idct->nr_of_render_targets; ++i) {
641848b8605Smrg      memset(&surf_templ, 0, sizeof(surf_templ));
642848b8605Smrg      surf_templ.format = tex->format;
643848b8605Smrg      surf_templ.u.tex.first_layer = i;
644848b8605Smrg      surf_templ.u.tex.last_layer = i;
645848b8605Smrg      buffer->fb_state.cbufs[i] = idct->pipe->create_surface(
646848b8605Smrg         idct->pipe, tex, &surf_templ);
647848b8605Smrg
648848b8605Smrg      if (!buffer->fb_state.cbufs[i])
649848b8605Smrg         goto error_surfaces;
650848b8605Smrg   }
651848b8605Smrg
652848b8605Smrg   buffer->viewport.scale[0] = tex->width0;
653848b8605Smrg   buffer->viewport.scale[1] = tex->height0;
654848b8605Smrg   buffer->viewport.scale[2] = 1;
655848b8605Smrg
656848b8605Smrg   return true;
657848b8605Smrg
658848b8605Smrgerror_surfaces:
659848b8605Smrg   for(i = 0; i < idct->nr_of_render_targets; ++i)
660848b8605Smrg      pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
661848b8605Smrg
662848b8605Smrg   return false;
663848b8605Smrg}
664848b8605Smrg
665848b8605Smrgstatic void
666848b8605Smrgcleanup_intermediate(struct vl_idct_buffer *buffer)
667848b8605Smrg{
668848b8605Smrg   unsigned i;
669848b8605Smrg
670848b8605Smrg   assert(buffer);
671848b8605Smrg
672848b8605Smrg   for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i)
673848b8605Smrg      pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
674848b8605Smrg
675848b8605Smrg   pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);
676848b8605Smrg}
677848b8605Smrg
678848b8605Smrgstruct pipe_sampler_view *
679848b8605Smrgvl_idct_upload_matrix(struct pipe_context *pipe, float scale)
680848b8605Smrg{
681848b8605Smrg   struct pipe_resource tex_templ, *matrix;
682848b8605Smrg   struct pipe_sampler_view sv_templ, *sv;
683848b8605Smrg   struct pipe_transfer *buf_transfer;
684848b8605Smrg   unsigned i, j, pitch;
685848b8605Smrg   float *f;
686848b8605Smrg
687848b8605Smrg   struct pipe_box rect =
688848b8605Smrg   {
689848b8605Smrg      0, 0, 0,
690848b8605Smrg      VL_BLOCK_WIDTH / 4,
691848b8605Smrg      VL_BLOCK_HEIGHT,
692848b8605Smrg      1
693848b8605Smrg   };
694848b8605Smrg
695848b8605Smrg   assert(pipe);
696848b8605Smrg
697848b8605Smrg   memset(&tex_templ, 0, sizeof(tex_templ));
698848b8605Smrg   tex_templ.target = PIPE_TEXTURE_2D;
699848b8605Smrg   tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
700848b8605Smrg   tex_templ.last_level = 0;
701848b8605Smrg   tex_templ.width0 = 2;
702848b8605Smrg   tex_templ.height0 = 8;
703848b8605Smrg   tex_templ.depth0 = 1;
704848b8605Smrg   tex_templ.array_size = 1;
705848b8605Smrg   tex_templ.usage = PIPE_USAGE_IMMUTABLE;
706848b8605Smrg   tex_templ.bind = PIPE_BIND_SAMPLER_VIEW;
707848b8605Smrg   tex_templ.flags = 0;
708848b8605Smrg
709848b8605Smrg   matrix = pipe->screen->resource_create(pipe->screen, &tex_templ);
710848b8605Smrg   if (!matrix)
711848b8605Smrg      goto error_matrix;
712848b8605Smrg
713848b8605Smrg   f = pipe->transfer_map(pipe, matrix, 0,
714848b8605Smrg                                     PIPE_TRANSFER_WRITE |
715848b8605Smrg                                     PIPE_TRANSFER_DISCARD_RANGE,
716848b8605Smrg                                     &rect, &buf_transfer);
717848b8605Smrg   if (!f)
718848b8605Smrg      goto error_map;
719848b8605Smrg
720848b8605Smrg   pitch = buf_transfer->stride / sizeof(float);
721848b8605Smrg
722848b8605Smrg   for(i = 0; i < VL_BLOCK_HEIGHT; ++i)
723848b8605Smrg      for(j = 0; j < VL_BLOCK_WIDTH; ++j)
724848b8605Smrg         // transpose and scale
725848b8605Smrg         f[i * pitch + j] = ((const float (*)[8])const_matrix)[j][i] * scale;
726848b8605Smrg
727848b8605Smrg   pipe->transfer_unmap(pipe, buf_transfer);
728848b8605Smrg
729848b8605Smrg   memset(&sv_templ, 0, sizeof(sv_templ));
730848b8605Smrg   u_sampler_view_default_template(&sv_templ, matrix, matrix->format);
731848b8605Smrg   sv = pipe->create_sampler_view(pipe, matrix, &sv_templ);
732848b8605Smrg   pipe_resource_reference(&matrix, NULL);
733848b8605Smrg   if (!sv)
734848b8605Smrg      goto error_map;
735848b8605Smrg
736848b8605Smrg   return sv;
737848b8605Smrg
738848b8605Smrgerror_map:
739848b8605Smrg   pipe_resource_reference(&matrix, NULL);
740848b8605Smrg
741848b8605Smrgerror_matrix:
742848b8605Smrg   return NULL;
743848b8605Smrg}
744848b8605Smrg
745848b8605Smrgbool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
746848b8605Smrg                  unsigned buffer_width, unsigned buffer_height,
747848b8605Smrg                  unsigned nr_of_render_targets,
748848b8605Smrg                  struct pipe_sampler_view *matrix,
749848b8605Smrg                  struct pipe_sampler_view *transpose)
750848b8605Smrg{
751848b8605Smrg   assert(idct && pipe);
752848b8605Smrg   assert(matrix && transpose);
753848b8605Smrg
754848b8605Smrg   idct->pipe = pipe;
755848b8605Smrg   idct->buffer_width = buffer_width;
756848b8605Smrg   idct->buffer_height = buffer_height;
757848b8605Smrg   idct->nr_of_render_targets = nr_of_render_targets;
758848b8605Smrg
759848b8605Smrg   pipe_sampler_view_reference(&idct->matrix, matrix);
760848b8605Smrg   pipe_sampler_view_reference(&idct->transpose, transpose);
761848b8605Smrg
762848b8605Smrg   if(!init_shaders(idct))
763848b8605Smrg      return false;
764848b8605Smrg
765848b8605Smrg   if(!init_state(idct)) {
766848b8605Smrg      cleanup_shaders(idct);
767848b8605Smrg      return false;
768848b8605Smrg   }
769848b8605Smrg
770848b8605Smrg   return true;
771848b8605Smrg}
772848b8605Smrg
773848b8605Smrgvoid
774848b8605Smrgvl_idct_cleanup(struct vl_idct *idct)
775848b8605Smrg{
776848b8605Smrg   cleanup_shaders(idct);
777848b8605Smrg   cleanup_state(idct);
778848b8605Smrg
779848b8605Smrg   pipe_sampler_view_reference(&idct->matrix, NULL);
780848b8605Smrg   pipe_sampler_view_reference(&idct->transpose, NULL);
781848b8605Smrg}
782848b8605Smrg
783848b8605Smrgbool
784848b8605Smrgvl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
785848b8605Smrg                    struct pipe_sampler_view *source,
786848b8605Smrg                    struct pipe_sampler_view *intermediate)
787848b8605Smrg{
788848b8605Smrg   assert(buffer && idct);
789848b8605Smrg   assert(source && intermediate);
790848b8605Smrg
791848b8605Smrg   memset(buffer, 0, sizeof(struct vl_idct_buffer));
792848b8605Smrg
793848b8605Smrg   pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix);
794848b8605Smrg   pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);
795848b8605Smrg   pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose);
796848b8605Smrg   pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate);
797848b8605Smrg
798848b8605Smrg   if (!init_source(idct, buffer))
799848b8605Smrg      return false;
800848b8605Smrg
801848b8605Smrg   if (!init_intermediate(idct, buffer))
802848b8605Smrg      return false;
803848b8605Smrg
804848b8605Smrg   return true;
805848b8605Smrg}
806848b8605Smrg
807848b8605Smrgvoid
808848b8605Smrgvl_idct_cleanup_buffer(struct vl_idct_buffer *buffer)
809848b8605Smrg{
810848b8605Smrg   assert(buffer);
811848b8605Smrg
812848b8605Smrg   cleanup_source(buffer);
813848b8605Smrg   cleanup_intermediate(buffer);
814848b8605Smrg
815848b8605Smrg   pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL);
816848b8605Smrg   pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, NULL);
817848b8605Smrg}
818848b8605Smrg
819848b8605Smrgvoid
820848b8605Smrgvl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances)
821848b8605Smrg{
822848b8605Smrg   assert(buffer);
823848b8605Smrg
824848b8605Smrg   idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
825848b8605Smrg   idct->pipe->bind_blend_state(idct->pipe, idct->blend);
826848b8605Smrg
827848b8605Smrg   idct->pipe->bind_sampler_states(idct->pipe, PIPE_SHADER_FRAGMENT,
828848b8605Smrg                                   0, 2, idct->samplers);
829848b8605Smrg
830848b8605Smrg   idct->pipe->set_sampler_views(idct->pipe, PIPE_SHADER_FRAGMENT, 0, 2,
831848b8605Smrg                                 buffer->sampler_views.stage[0]);
832848b8605Smrg
833848b8605Smrg   /* mismatch control */
834848b8605Smrg   idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch);
835848b8605Smrg   idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport_mismatch);
836848b8605Smrg   idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch);
837848b8605Smrg   idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch);
838848b8605Smrg   util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances);
839848b8605Smrg
840848b8605Smrg   /* first stage */
841848b8605Smrg   idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state);
842848b8605Smrg   idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport);
843848b8605Smrg   idct->pipe->bind_vs_state(idct->pipe, idct->vs);
844848b8605Smrg   idct->pipe->bind_fs_state(idct->pipe, idct->fs);
845848b8605Smrg   util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
846848b8605Smrg}
847848b8605Smrg
848848b8605Smrgvoid
849848b8605Smrgvl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer)
850848b8605Smrg{
851848b8605Smrg   assert(buffer);
852848b8605Smrg
853848b8605Smrg   /* second stage */
854848b8605Smrg   idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
855848b8605Smrg   idct->pipe->bind_sampler_states(idct->pipe, PIPE_SHADER_FRAGMENT,
856848b8605Smrg                                   0, 2, idct->samplers);
857848b8605Smrg   idct->pipe->set_sampler_views(idct->pipe, PIPE_SHADER_FRAGMENT,
858848b8605Smrg                                 0, 2, buffer->sampler_views.stage[1]);
859848b8605Smrg}
860848b8605Smrg
861