1af69d88dSmrg/**************************************************************************
2af69d88dSmrg *
3af69d88dSmrg * Copyright 2011 Christian König
4af69d88dSmrg * All Rights Reserved.
5af69d88dSmrg *
6af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining a
7af69d88dSmrg * copy of this software and associated documentation files (the
8af69d88dSmrg * "Software"), to deal in the Software without restriction, including
9af69d88dSmrg * without limitation the rights to use, copy, modify, merge, publish,
10af69d88dSmrg * distribute, sub license, and/or sell copies of the Software, and to
11af69d88dSmrg * permit persons to whom the Software is furnished to do so, subject to
12af69d88dSmrg * the following conditions:
13af69d88dSmrg *
14af69d88dSmrg * The above copyright notice and this permission notice (including the
15af69d88dSmrg * next paragraph) shall be included in all copies or substantial portions
16af69d88dSmrg * of the Software.
17af69d88dSmrg *
18af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19af69d88dSmrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20af69d88dSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21af69d88dSmrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22af69d88dSmrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23af69d88dSmrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24af69d88dSmrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25af69d88dSmrg *
26af69d88dSmrg **************************************************************************/
27af69d88dSmrg
28af69d88dSmrg#include <assert.h>
29af69d88dSmrg
30af69d88dSmrg#include "pipe/p_screen.h"
31af69d88dSmrg#include "pipe/p_context.h"
32af69d88dSmrg
33af69d88dSmrg#include "util/u_draw.h"
34af69d88dSmrg#include "util/u_sampler.h"
35af69d88dSmrg#include "util/u_inlines.h"
36af69d88dSmrg#include "util/u_memory.h"
37af69d88dSmrg
38af69d88dSmrg#include "tgsi/tgsi_ureg.h"
39af69d88dSmrg
40af69d88dSmrg#include "vl_defines.h"
41af69d88dSmrg#include "vl_types.h"
42af69d88dSmrg
43af69d88dSmrg#include "vl_zscan.h"
44af69d88dSmrg#include "vl_vertex_buffers.h"
45af69d88dSmrg
46af69d88dSmrgenum VS_OUTPUT
47af69d88dSmrg{
48af69d88dSmrg   VS_O_VPOS = 0,
49af69d88dSmrg   VS_O_VTEX = 0
50af69d88dSmrg};
51af69d88dSmrg
5201e04c3fSmrgconst int vl_zscan_normal_16[] =
5301e04c3fSmrg{
5401e04c3fSmrg   /* Zig-Zag scan pattern */
5501e04c3fSmrg    0, 1, 4, 8, 5, 2, 3, 6,
5601e04c3fSmrg    9,12,13,10, 7,11,14,15
5701e04c3fSmrg};
5801e04c3fSmrg
59af69d88dSmrgconst int vl_zscan_linear[] =
60af69d88dSmrg{
61af69d88dSmrg   /* Linear scan pattern */
62af69d88dSmrg    0, 1, 2, 3, 4, 5, 6, 7,
63af69d88dSmrg    8, 9,10,11,12,13,14,15,
64af69d88dSmrg   16,17,18,19,20,21,22,23,
65af69d88dSmrg   24,25,26,27,28,29,30,31,
66af69d88dSmrg   32,33,34,35,36,37,38,39,
67af69d88dSmrg   40,41,42,43,44,45,46,47,
68af69d88dSmrg   48,49,50,51,52,53,54,55,
69af69d88dSmrg   56,57,58,59,60,61,62,63
70af69d88dSmrg};
71af69d88dSmrg
72af69d88dSmrgconst int vl_zscan_normal[] =
73af69d88dSmrg{
74af69d88dSmrg   /* Zig-Zag scan pattern */
75af69d88dSmrg    0, 1, 8,16, 9, 2, 3,10,
76af69d88dSmrg   17,24,32,25,18,11, 4, 5,
77af69d88dSmrg   12,19,26,33,40,48,41,34,
78af69d88dSmrg   27,20,13, 6, 7,14,21,28,
79af69d88dSmrg   35,42,49,56,57,50,43,36,
80af69d88dSmrg   29,22,15,23,30,37,44,51,
81af69d88dSmrg   58,59,52,45,38,31,39,46,
82af69d88dSmrg   53,60,61,54,47,55,62,63
83af69d88dSmrg};
84af69d88dSmrg
85af69d88dSmrgconst int vl_zscan_alternate[] =
86af69d88dSmrg{
87af69d88dSmrg   /* Alternate scan pattern */
88af69d88dSmrg    0, 8,16,24, 1, 9, 2,10,
89af69d88dSmrg   17,25,32,40,48,56,57,49,
90af69d88dSmrg   41,33,26,18, 3,11, 4,12,
91af69d88dSmrg   19,27,34,42,50,58,35,43,
92af69d88dSmrg   51,59,20,28, 5,13, 6,14,
93af69d88dSmrg   21,29,36,44,52,60,37,45,
94af69d88dSmrg   53,61,22,30, 7,15,23,31,
95af69d88dSmrg   38,46,54,62,39,47,55,63
96af69d88dSmrg};
97af69d88dSmrg
9801e04c3fSmrgconst int vl_zscan_h265_up_right_diagonal_16[] =
9901e04c3fSmrg{
10001e04c3fSmrg   /* Up-right diagonal scan order for 4x4 blocks - see H.265 section 6.5.3. */
10101e04c3fSmrg    0,  4,  1,  8,  5,  2, 12,  9,
10201e04c3fSmrg    6,  3, 13, 10,  7, 14, 11, 15,
10301e04c3fSmrg};
10401e04c3fSmrg
10501e04c3fSmrgconst int vl_zscan_h265_up_right_diagonal[] =
10601e04c3fSmrg{
10701e04c3fSmrg   /* Up-right diagonal scan order for 8x8 blocks - see H.265 section 6.5.3. */
10801e04c3fSmrg    0,  8,  1, 16,  9,  2, 24, 17,
10901e04c3fSmrg   10,  3, 32, 25, 18, 11,  4, 40,
11001e04c3fSmrg   33, 26, 19, 12,  5, 48, 41, 34,
11101e04c3fSmrg   27, 20, 13,  6, 56, 49, 42, 35,
11201e04c3fSmrg   28, 21, 14,  7, 57, 50, 43, 36,
11301e04c3fSmrg   29, 22, 15, 58, 51, 44, 37, 30,
11401e04c3fSmrg   23, 59, 52, 45, 38, 31, 60, 53,
11501e04c3fSmrg   46, 39, 61, 54, 47, 62, 55, 63,
11601e04c3fSmrg};
11701e04c3fSmrg
11801e04c3fSmrg
119af69d88dSmrgstatic void *
120af69d88dSmrgcreate_vert_shader(struct vl_zscan *zscan)
121af69d88dSmrg{
122af69d88dSmrg   struct ureg_program *shader;
123af69d88dSmrg   struct ureg_src scale;
124af69d88dSmrg   struct ureg_src vrect, vpos, block_num;
125af69d88dSmrg   struct ureg_dst tmp;
126af69d88dSmrg   struct ureg_dst o_vpos;
127af69d88dSmrg   struct ureg_dst *o_vtex;
12801e04c3fSmrg   unsigned i;
129af69d88dSmrg
13001e04c3fSmrg   shader = ureg_create(PIPE_SHADER_VERTEX);
131af69d88dSmrg   if (!shader)
132af69d88dSmrg      return NULL;
133af69d88dSmrg
134af69d88dSmrg   o_vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_dst));
135af69d88dSmrg
136af69d88dSmrg   scale = ureg_imm2f(shader,
137af69d88dSmrg      (float)VL_BLOCK_WIDTH / zscan->buffer_width,
138af69d88dSmrg      (float)VL_BLOCK_HEIGHT / zscan->buffer_height);
139af69d88dSmrg
140af69d88dSmrg   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
141af69d88dSmrg   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
142af69d88dSmrg   block_num = ureg_DECL_vs_input(shader, VS_I_BLOCK_NUM);
143af69d88dSmrg
144af69d88dSmrg   tmp = ureg_DECL_temporary(shader);
145af69d88dSmrg
146af69d88dSmrg   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
147af69d88dSmrg
148af69d88dSmrg   for (i = 0; i < zscan->num_channels; ++i)
149af69d88dSmrg      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i);
150af69d88dSmrg
151af69d88dSmrg   /*
152af69d88dSmrg    * o_vpos.xy = (vpos + vrect) * scale
153af69d88dSmrg    * o_vpos.zw = 1.0f
154af69d88dSmrg    *
155af69d88dSmrg    * tmp.xy = InstanceID / blocks_per_line
156af69d88dSmrg    * tmp.x = frac(tmp.x)
157af69d88dSmrg    * tmp.y = floor(tmp.y)
158af69d88dSmrg    *
159af69d88dSmrg    * o_vtex.x = vrect.x / blocks_per_line + tmp.x
160af69d88dSmrg    * o_vtex.y = vrect.y
161af69d88dSmrg    * o_vtex.z = tmp.z * blocks_per_line / blocks_total
162af69d88dSmrg    */
163af69d88dSmrg   ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect);
164af69d88dSmrg   ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale);
165af69d88dSmrg   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
166af69d88dSmrg
167af69d88dSmrg   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(block_num, TGSI_SWIZZLE_X),
168af69d88dSmrg            ureg_imm1f(shader, 1.0f / zscan->blocks_per_line));
169af69d88dSmrg
170af69d88dSmrg   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
171af69d88dSmrg   ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_src(tmp));
172af69d88dSmrg
173af69d88dSmrg   for (i = 0; i < zscan->num_channels; ++i) {
174af69d88dSmrg      ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y),
175af69d88dSmrg               ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * VL_BLOCK_WIDTH)
17601e04c3fSmrg                * ((signed)i - (signed)zscan->num_channels / 2)));
177af69d88dSmrg
178af69d88dSmrg      ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
179af69d88dSmrg               ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
180af69d88dSmrg      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect);
181af69d88dSmrg      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), vpos);
182af69d88dSmrg      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_W), ureg_src(tmp),
183af69d88dSmrg               ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total));
184af69d88dSmrg   }
185af69d88dSmrg
186af69d88dSmrg   ureg_release_temporary(shader, tmp);
187af69d88dSmrg   ureg_END(shader);
188af69d88dSmrg
189af69d88dSmrg   FREE(o_vtex);
190af69d88dSmrg
191af69d88dSmrg   return ureg_create_shader_and_destroy(shader, zscan->pipe);
192af69d88dSmrg}
193af69d88dSmrg
194af69d88dSmrgstatic void *
195af69d88dSmrgcreate_frag_shader(struct vl_zscan *zscan)
196af69d88dSmrg{
197af69d88dSmrg   struct ureg_program *shader;
198af69d88dSmrg   struct ureg_src *vtex;
199af69d88dSmrg
200af69d88dSmrg   struct ureg_src samp_src, samp_scan, samp_quant;
201af69d88dSmrg
202af69d88dSmrg   struct ureg_dst *tmp;
203af69d88dSmrg   struct ureg_dst quant, fragment;
204af69d88dSmrg
205af69d88dSmrg   unsigned i;
206af69d88dSmrg
20701e04c3fSmrg   shader = ureg_create(PIPE_SHADER_FRAGMENT);
208af69d88dSmrg   if (!shader)
209af69d88dSmrg      return NULL;
210af69d88dSmrg
211af69d88dSmrg   vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_src));
212af69d88dSmrg   tmp = MALLOC(zscan->num_channels * sizeof(struct ureg_dst));
213af69d88dSmrg
214af69d88dSmrg   for (i = 0; i < zscan->num_channels; ++i)
215af69d88dSmrg      vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR);
216af69d88dSmrg
217af69d88dSmrg   samp_src = ureg_DECL_sampler(shader, 0);
218af69d88dSmrg   samp_scan = ureg_DECL_sampler(shader, 1);
219af69d88dSmrg   samp_quant = ureg_DECL_sampler(shader, 2);
220af69d88dSmrg
221af69d88dSmrg   for (i = 0; i < zscan->num_channels; ++i)
222af69d88dSmrg      tmp[i] = ureg_DECL_temporary(shader);
223af69d88dSmrg   quant = ureg_DECL_temporary(shader);
224af69d88dSmrg
225af69d88dSmrg   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
226af69d88dSmrg
227af69d88dSmrg   /*
228af69d88dSmrg    * tmp.x = tex(vtex, 1)
229af69d88dSmrg    * tmp.y = vtex.z
230af69d88dSmrg    * fragment = tex(tmp, 0) * quant
231af69d88dSmrg    */
232af69d88dSmrg   for (i = 0; i < zscan->num_channels; ++i)
233af69d88dSmrg      ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], samp_scan);
234af69d88dSmrg
235af69d88dSmrg   for (i = 0; i < zscan->num_channels; ++i)
236af69d88dSmrg      ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_W));
237af69d88dSmrg
238af69d88dSmrg   for (i = 0; i < zscan->num_channels; ++i) {
239af69d88dSmrg      ureg_TEX(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, ureg_src(tmp[i]), samp_src);
240af69d88dSmrg      ureg_TEX(shader, ureg_writemask(quant, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, vtex[i], samp_quant);
241af69d88dSmrg   }
242af69d88dSmrg
243af69d88dSmrg   ureg_MUL(shader, quant, ureg_src(quant), ureg_imm1f(shader, 16.0f));
244af69d88dSmrg   ureg_MUL(shader, fragment, ureg_src(tmp[0]), ureg_src(quant));
245af69d88dSmrg
246af69d88dSmrg   for (i = 0; i < zscan->num_channels; ++i)
247af69d88dSmrg      ureg_release_temporary(shader, tmp[i]);
248af69d88dSmrg   ureg_END(shader);
249af69d88dSmrg
250af69d88dSmrg   FREE(vtex);
251af69d88dSmrg   FREE(tmp);
252af69d88dSmrg
253af69d88dSmrg   return ureg_create_shader_and_destroy(shader, zscan->pipe);
254af69d88dSmrg}
255af69d88dSmrg
256af69d88dSmrgstatic bool
257af69d88dSmrginit_shaders(struct vl_zscan *zscan)
258af69d88dSmrg{
259af69d88dSmrg   assert(zscan);
260af69d88dSmrg
261af69d88dSmrg   zscan->vs = create_vert_shader(zscan);
262af69d88dSmrg   if (!zscan->vs)
263af69d88dSmrg      goto error_vs;
264af69d88dSmrg
265af69d88dSmrg   zscan->fs = create_frag_shader(zscan);
266af69d88dSmrg   if (!zscan->fs)
267af69d88dSmrg      goto error_fs;
268af69d88dSmrg
269af69d88dSmrg   return true;
270af69d88dSmrg
271af69d88dSmrgerror_fs:
272af69d88dSmrg   zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs);
273af69d88dSmrg
274af69d88dSmrgerror_vs:
275af69d88dSmrg   return false;
276af69d88dSmrg}
277af69d88dSmrg
278af69d88dSmrgstatic void
279af69d88dSmrgcleanup_shaders(struct vl_zscan *zscan)
280af69d88dSmrg{
281af69d88dSmrg   assert(zscan);
282af69d88dSmrg
283af69d88dSmrg   zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs);
284af69d88dSmrg   zscan->pipe->delete_fs_state(zscan->pipe, zscan->fs);
285af69d88dSmrg}
286af69d88dSmrg
287af69d88dSmrgstatic bool
288af69d88dSmrginit_state(struct vl_zscan *zscan)
289af69d88dSmrg{
290af69d88dSmrg   struct pipe_blend_state blend;
291af69d88dSmrg   struct pipe_rasterizer_state rs_state;
292af69d88dSmrg   struct pipe_sampler_state sampler;
293af69d88dSmrg   unsigned i;
294af69d88dSmrg
295af69d88dSmrg   assert(zscan);
296af69d88dSmrg
297af69d88dSmrg   memset(&rs_state, 0, sizeof(rs_state));
298af69d88dSmrg   rs_state.half_pixel_center = true;
299af69d88dSmrg   rs_state.bottom_edge_rule = true;
30001e04c3fSmrg   rs_state.depth_clip_near = 1;
30101e04c3fSmrg   rs_state.depth_clip_far = 1;
30201e04c3fSmrg
303af69d88dSmrg   zscan->rs_state = zscan->pipe->create_rasterizer_state(zscan->pipe, &rs_state);
304af69d88dSmrg   if (!zscan->rs_state)
305af69d88dSmrg      goto error_rs_state;
306af69d88dSmrg
307af69d88dSmrg   memset(&blend, 0, sizeof blend);
308af69d88dSmrg
309af69d88dSmrg   blend.independent_blend_enable = 0;
310af69d88dSmrg   blend.rt[0].blend_enable = 0;
311af69d88dSmrg   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
312af69d88dSmrg   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
313af69d88dSmrg   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
314af69d88dSmrg   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
315af69d88dSmrg   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
316af69d88dSmrg   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
317af69d88dSmrg   blend.logicop_enable = 0;
318af69d88dSmrg   blend.logicop_func = PIPE_LOGICOP_CLEAR;
319af69d88dSmrg   /* Needed to allow color writes to FB, even if blending disabled */
320af69d88dSmrg   blend.rt[0].colormask = PIPE_MASK_RGBA;
321af69d88dSmrg   blend.dither = 0;
322af69d88dSmrg   zscan->blend = zscan->pipe->create_blend_state(zscan->pipe, &blend);
323af69d88dSmrg   if (!zscan->blend)
324af69d88dSmrg      goto error_blend;
325af69d88dSmrg
326af69d88dSmrg   for (i = 0; i < 3; ++i) {
327af69d88dSmrg      memset(&sampler, 0, sizeof(sampler));
328af69d88dSmrg      sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
329af69d88dSmrg      sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
330af69d88dSmrg      sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
331af69d88dSmrg      sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
332af69d88dSmrg      sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
333af69d88dSmrg      sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
334af69d88dSmrg      sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
335af69d88dSmrg      sampler.compare_func = PIPE_FUNC_ALWAYS;
336af69d88dSmrg      sampler.normalized_coords = 1;
337af69d88dSmrg      zscan->samplers[i] = zscan->pipe->create_sampler_state(zscan->pipe, &sampler);
338af69d88dSmrg      if (!zscan->samplers[i])
339af69d88dSmrg         goto error_samplers;
340af69d88dSmrg   }
341af69d88dSmrg
342af69d88dSmrg   return true;
343af69d88dSmrg
344af69d88dSmrgerror_samplers:
345af69d88dSmrg   for (i = 0; i < 2; ++i)
346af69d88dSmrg      if (zscan->samplers[i])
347af69d88dSmrg         zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]);
348af69d88dSmrg
349af69d88dSmrg   zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state);
350af69d88dSmrg
351af69d88dSmrgerror_blend:
352af69d88dSmrg   zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend);
353af69d88dSmrg
354af69d88dSmrgerror_rs_state:
355af69d88dSmrg   return false;
356af69d88dSmrg}
357af69d88dSmrg
358af69d88dSmrgstatic void
359af69d88dSmrgcleanup_state(struct vl_zscan *zscan)
360af69d88dSmrg{
361af69d88dSmrg   unsigned i;
362af69d88dSmrg
363af69d88dSmrg   assert(zscan);
364af69d88dSmrg
365af69d88dSmrg   for (i = 0; i < 3; ++i)
366af69d88dSmrg      zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]);
367af69d88dSmrg
368af69d88dSmrg   zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state);
369af69d88dSmrg   zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend);
370af69d88dSmrg}
371af69d88dSmrg
372af69d88dSmrgstruct pipe_sampler_view *
373af69d88dSmrgvl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks_per_line)
374af69d88dSmrg{
375af69d88dSmrg   const unsigned total_size = blocks_per_line * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
376af69d88dSmrg
377af69d88dSmrg   int patched_layout[64];
378af69d88dSmrg
379af69d88dSmrg   struct pipe_resource res_tmpl, *res;
380af69d88dSmrg   struct pipe_sampler_view sv_tmpl, *sv;
381af69d88dSmrg   struct pipe_transfer *buf_transfer;
382af69d88dSmrg   unsigned x, y, i, pitch;
383af69d88dSmrg   float *f;
384af69d88dSmrg
385af69d88dSmrg   struct pipe_box rect =
386af69d88dSmrg   {
387af69d88dSmrg      0, 0, 0,
388af69d88dSmrg      VL_BLOCK_WIDTH * blocks_per_line,
389af69d88dSmrg      VL_BLOCK_HEIGHT,
390af69d88dSmrg      1
391af69d88dSmrg   };
392af69d88dSmrg
393af69d88dSmrg   assert(pipe && layout && blocks_per_line);
394af69d88dSmrg
395af69d88dSmrg   for (i = 0; i < 64; ++i)
396af69d88dSmrg      patched_layout[layout[i]] = i;
397af69d88dSmrg
398af69d88dSmrg   memset(&res_tmpl, 0, sizeof(res_tmpl));
399af69d88dSmrg   res_tmpl.target = PIPE_TEXTURE_2D;
400af69d88dSmrg   res_tmpl.format = PIPE_FORMAT_R32_FLOAT;
401af69d88dSmrg   res_tmpl.width0 = VL_BLOCK_WIDTH * blocks_per_line;
402af69d88dSmrg   res_tmpl.height0 = VL_BLOCK_HEIGHT;
403af69d88dSmrg   res_tmpl.depth0 = 1;
404af69d88dSmrg   res_tmpl.array_size = 1;
405af69d88dSmrg   res_tmpl.usage = PIPE_USAGE_IMMUTABLE;
406af69d88dSmrg   res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;
407af69d88dSmrg
408af69d88dSmrg   res = pipe->screen->resource_create(pipe->screen, &res_tmpl);
409af69d88dSmrg   if (!res)
410af69d88dSmrg      goto error_resource;
411af69d88dSmrg
4127ec681f3Smrg   f = pipe->texture_map(pipe, res,
4137ec681f3Smrg                          0, PIPE_MAP_WRITE | PIPE_MAP_DISCARD_RANGE,
414af69d88dSmrg                          &rect, &buf_transfer);
415af69d88dSmrg   if (!f)
416af69d88dSmrg      goto error_map;
417af69d88dSmrg
418af69d88dSmrg   pitch = buf_transfer->stride / sizeof(float);
419af69d88dSmrg
420af69d88dSmrg   for (i = 0; i < blocks_per_line; ++i)
421af69d88dSmrg      for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
422af69d88dSmrg         for (x = 0; x < VL_BLOCK_WIDTH; ++x) {
423af69d88dSmrg            float addr = patched_layout[x + y * VL_BLOCK_WIDTH] +
424af69d88dSmrg               i * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
425af69d88dSmrg
426af69d88dSmrg            addr /= total_size;
427af69d88dSmrg
428af69d88dSmrg            f[i * VL_BLOCK_WIDTH + y * pitch + x] = addr;
429af69d88dSmrg         }
430af69d88dSmrg
4317ec681f3Smrg   pipe->texture_unmap(pipe, buf_transfer);
432af69d88dSmrg
433af69d88dSmrg   memset(&sv_tmpl, 0, sizeof(sv_tmpl));
434af69d88dSmrg   u_sampler_view_default_template(&sv_tmpl, res, res->format);
435af69d88dSmrg   sv = pipe->create_sampler_view(pipe, res, &sv_tmpl);
436af69d88dSmrg   pipe_resource_reference(&res, NULL);
437af69d88dSmrg   if (!sv)
438af69d88dSmrg      goto error_map;
439af69d88dSmrg
440af69d88dSmrg   return sv;
441af69d88dSmrg
442af69d88dSmrgerror_map:
443af69d88dSmrg   pipe_resource_reference(&res, NULL);
444af69d88dSmrg
445af69d88dSmrgerror_resource:
446af69d88dSmrg   return NULL;
447af69d88dSmrg}
448af69d88dSmrg
449af69d88dSmrgbool
450af69d88dSmrgvl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
451af69d88dSmrg              unsigned buffer_width, unsigned buffer_height,
452af69d88dSmrg              unsigned blocks_per_line, unsigned blocks_total,
453af69d88dSmrg              unsigned num_channels)
454af69d88dSmrg{
455af69d88dSmrg   assert(zscan && pipe);
456af69d88dSmrg
457af69d88dSmrg   zscan->pipe = pipe;
458af69d88dSmrg   zscan->buffer_width = buffer_width;
459af69d88dSmrg   zscan->buffer_height = buffer_height;
460af69d88dSmrg   zscan->num_channels = num_channels;
461af69d88dSmrg   zscan->blocks_per_line = blocks_per_line;
462af69d88dSmrg   zscan->blocks_total = blocks_total;
463af69d88dSmrg
464af69d88dSmrg   if(!init_shaders(zscan))
465af69d88dSmrg      return false;
466af69d88dSmrg
467af69d88dSmrg   if(!init_state(zscan)) {
468af69d88dSmrg      cleanup_shaders(zscan);
469af69d88dSmrg      return false;
470af69d88dSmrg   }
471af69d88dSmrg
472af69d88dSmrg   return true;
473af69d88dSmrg}
474af69d88dSmrg
475af69d88dSmrgvoid
476af69d88dSmrgvl_zscan_cleanup(struct vl_zscan *zscan)
477af69d88dSmrg{
478af69d88dSmrg   assert(zscan);
479af69d88dSmrg
480af69d88dSmrg   cleanup_shaders(zscan);
481af69d88dSmrg   cleanup_state(zscan);
482af69d88dSmrg}
483af69d88dSmrg
484af69d88dSmrgbool
485af69d88dSmrgvl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
486af69d88dSmrg                     struct pipe_sampler_view *src, struct pipe_surface *dst)
487af69d88dSmrg{
488af69d88dSmrg   struct pipe_resource res_tmpl, *res;
489af69d88dSmrg   struct pipe_sampler_view sv_tmpl;
490af69d88dSmrg
491af69d88dSmrg   assert(zscan && buffer);
492af69d88dSmrg
493af69d88dSmrg   memset(buffer, 0, sizeof(struct vl_zscan_buffer));
494af69d88dSmrg
495af69d88dSmrg   pipe_sampler_view_reference(&buffer->src, src);
496af69d88dSmrg
497af69d88dSmrg   buffer->viewport.scale[0] = dst->width;
498af69d88dSmrg   buffer->viewport.scale[1] = dst->height;
499af69d88dSmrg   buffer->viewport.scale[2] = 1;
500af69d88dSmrg   buffer->viewport.translate[0] = 0;
501af69d88dSmrg   buffer->viewport.translate[1] = 0;
502af69d88dSmrg   buffer->viewport.translate[2] = 0;
5037ec681f3Smrg   buffer->viewport.swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X;
5047ec681f3Smrg   buffer->viewport.swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y;
5057ec681f3Smrg   buffer->viewport.swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z;
5067ec681f3Smrg   buffer->viewport.swizzle_w = PIPE_VIEWPORT_SWIZZLE_POSITIVE_W;
507af69d88dSmrg
508af69d88dSmrg   buffer->fb_state.width = dst->width;
509af69d88dSmrg   buffer->fb_state.height = dst->height;
510af69d88dSmrg   buffer->fb_state.nr_cbufs = 1;
511af69d88dSmrg   pipe_surface_reference(&buffer->fb_state.cbufs[0], dst);
512af69d88dSmrg
513af69d88dSmrg   memset(&res_tmpl, 0, sizeof(res_tmpl));
514af69d88dSmrg   res_tmpl.target = PIPE_TEXTURE_3D;
515af69d88dSmrg   res_tmpl.format = PIPE_FORMAT_R8_UNORM;
516af69d88dSmrg   res_tmpl.width0 = VL_BLOCK_WIDTH * zscan->blocks_per_line;
517af69d88dSmrg   res_tmpl.height0 = VL_BLOCK_HEIGHT;
518af69d88dSmrg   res_tmpl.depth0 = 2;
519af69d88dSmrg   res_tmpl.array_size = 1;
520af69d88dSmrg   res_tmpl.usage = PIPE_USAGE_IMMUTABLE;
521af69d88dSmrg   res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;
522af69d88dSmrg
523af69d88dSmrg   res = zscan->pipe->screen->resource_create(zscan->pipe->screen, &res_tmpl);
524af69d88dSmrg   if (!res)
525af69d88dSmrg      return false;
526af69d88dSmrg
527af69d88dSmrg   memset(&sv_tmpl, 0, sizeof(sv_tmpl));
528af69d88dSmrg   u_sampler_view_default_template(&sv_tmpl, res, res->format);
529af69d88dSmrg   sv_tmpl.swizzle_r = sv_tmpl.swizzle_g = sv_tmpl.swizzle_b = sv_tmpl.swizzle_a = TGSI_SWIZZLE_X;
530af69d88dSmrg   buffer->quant = zscan->pipe->create_sampler_view(zscan->pipe, res, &sv_tmpl);
531af69d88dSmrg   pipe_resource_reference(&res, NULL);
532af69d88dSmrg   if (!buffer->quant)
533af69d88dSmrg      return false;
534af69d88dSmrg
535af69d88dSmrg   return true;
536af69d88dSmrg}
537af69d88dSmrg
538af69d88dSmrgvoid
539af69d88dSmrgvl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer)
540af69d88dSmrg{
541af69d88dSmrg   assert(buffer);
542af69d88dSmrg
543af69d88dSmrg   pipe_sampler_view_reference(&buffer->src, NULL);
544af69d88dSmrg   pipe_sampler_view_reference(&buffer->layout, NULL);
545af69d88dSmrg   pipe_sampler_view_reference(&buffer->quant, NULL);
546af69d88dSmrg   pipe_surface_reference(&buffer->fb_state.cbufs[0], NULL);
547af69d88dSmrg}
548af69d88dSmrg
549af69d88dSmrgvoid
550af69d88dSmrgvl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout)
551af69d88dSmrg{
552af69d88dSmrg   assert(buffer);
553af69d88dSmrg   assert(layout);
554af69d88dSmrg
555af69d88dSmrg   pipe_sampler_view_reference(&buffer->layout, layout);
556af69d88dSmrg}
557af69d88dSmrg
558af69d88dSmrgvoid
559af69d88dSmrgvl_zscan_upload_quant(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
560af69d88dSmrg                      const uint8_t matrix[64], bool intra)
561af69d88dSmrg{
562af69d88dSmrg   struct pipe_context *pipe;
563af69d88dSmrg   struct pipe_transfer *buf_transfer;
564af69d88dSmrg   unsigned x, y, i, pitch;
565af69d88dSmrg   uint8_t *data;
566af69d88dSmrg
567af69d88dSmrg   struct pipe_box rect =
568af69d88dSmrg   {
569af69d88dSmrg      0, 0, intra ? 1 : 0,
570af69d88dSmrg      VL_BLOCK_WIDTH,
571af69d88dSmrg      VL_BLOCK_HEIGHT,
572af69d88dSmrg      1
573af69d88dSmrg   };
574af69d88dSmrg
575af69d88dSmrg   assert(buffer);
576af69d88dSmrg   assert(matrix);
577af69d88dSmrg
578af69d88dSmrg   pipe = zscan->pipe;
579af69d88dSmrg
580af69d88dSmrg   rect.width *= zscan->blocks_per_line;
581af69d88dSmrg
5827ec681f3Smrg   data = pipe->texture_map(pipe, buffer->quant->texture,
5837ec681f3Smrg                             0, PIPE_MAP_WRITE |
5847ec681f3Smrg                             PIPE_MAP_DISCARD_RANGE,
585af69d88dSmrg                             &rect, &buf_transfer);
586af69d88dSmrg   if (!data)
587af69d88dSmrg      return;
588af69d88dSmrg
589af69d88dSmrg   pitch = buf_transfer->stride;
590af69d88dSmrg
591af69d88dSmrg   for (i = 0; i < zscan->blocks_per_line; ++i)
592af69d88dSmrg      for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
593af69d88dSmrg         for (x = 0; x < VL_BLOCK_WIDTH; ++x)
594af69d88dSmrg            data[i * VL_BLOCK_WIDTH + y * pitch + x] = matrix[x + y * VL_BLOCK_WIDTH];
595af69d88dSmrg
5967ec681f3Smrg   pipe->texture_unmap(pipe, buf_transfer);
597af69d88dSmrg}
598af69d88dSmrg
599af69d88dSmrgvoid
600af69d88dSmrgvl_zscan_render(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, unsigned num_instances)
601af69d88dSmrg{
602af69d88dSmrg   assert(buffer);
603af69d88dSmrg
604af69d88dSmrg   zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state);
605af69d88dSmrg   zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend);
606af69d88dSmrg   zscan->pipe->bind_sampler_states(zscan->pipe, PIPE_SHADER_FRAGMENT,
607af69d88dSmrg                                    0, 3, zscan->samplers);
608af69d88dSmrg   zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state);
609af69d88dSmrg   zscan->pipe->set_viewport_states(zscan->pipe, 0, 1, &buffer->viewport);
610af69d88dSmrg   zscan->pipe->set_sampler_views(zscan->pipe, PIPE_SHADER_FRAGMENT,
6117ec681f3Smrg                                  0, 3, 0, false, &buffer->src);
612af69d88dSmrg   zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs);
613af69d88dSmrg   zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs);
614af69d88dSmrg   util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
615af69d88dSmrg}
616