1af69d88dSmrg/************************************************************************** 2af69d88dSmrg * 3af69d88dSmrg * Copyright 2011 Christian König 4af69d88dSmrg * All Rights Reserved. 5af69d88dSmrg * 6af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 7af69d88dSmrg * copy of this software and associated documentation files (the 8af69d88dSmrg * "Software"), to deal in the Software without restriction, including 9af69d88dSmrg * without limitation the rights to use, copy, modify, merge, publish, 10af69d88dSmrg * distribute, sub license, and/or sell copies of the Software, and to 11af69d88dSmrg * permit persons to whom the Software is furnished to do so, subject to 12af69d88dSmrg * the following conditions: 13af69d88dSmrg * 14af69d88dSmrg * The above copyright notice and this permission notice (including the 15af69d88dSmrg * next paragraph) shall be included in all copies or substantial portions 16af69d88dSmrg * of the Software. 17af69d88dSmrg * 18af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19af69d88dSmrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20af69d88dSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21af69d88dSmrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22af69d88dSmrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23af69d88dSmrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24af69d88dSmrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25af69d88dSmrg * 26af69d88dSmrg **************************************************************************/ 27af69d88dSmrg 28af69d88dSmrg#include <assert.h> 29af69d88dSmrg 30af69d88dSmrg#include "pipe/p_screen.h" 31af69d88dSmrg#include "pipe/p_context.h" 32af69d88dSmrg 33af69d88dSmrg#include "util/u_draw.h" 34af69d88dSmrg#include "util/u_sampler.h" 35af69d88dSmrg#include "util/u_inlines.h" 36af69d88dSmrg#include "util/u_memory.h" 37af69d88dSmrg 38af69d88dSmrg#include "tgsi/tgsi_ureg.h" 39af69d88dSmrg 40af69d88dSmrg#include "vl_defines.h" 41af69d88dSmrg#include "vl_types.h" 42af69d88dSmrg 43af69d88dSmrg#include "vl_zscan.h" 44af69d88dSmrg#include "vl_vertex_buffers.h" 45af69d88dSmrg 46af69d88dSmrgenum VS_OUTPUT 47af69d88dSmrg{ 48af69d88dSmrg VS_O_VPOS = 0, 49af69d88dSmrg VS_O_VTEX = 0 50af69d88dSmrg}; 51af69d88dSmrg 5201e04c3fSmrgconst int vl_zscan_normal_16[] = 5301e04c3fSmrg{ 5401e04c3fSmrg /* Zig-Zag scan pattern */ 5501e04c3fSmrg 0, 1, 4, 8, 5, 2, 3, 6, 5601e04c3fSmrg 9,12,13,10, 7,11,14,15 5701e04c3fSmrg}; 5801e04c3fSmrg 59af69d88dSmrgconst int vl_zscan_linear[] = 60af69d88dSmrg{ 61af69d88dSmrg /* Linear scan pattern */ 62af69d88dSmrg 0, 1, 2, 3, 4, 5, 6, 7, 63af69d88dSmrg 8, 9,10,11,12,13,14,15, 64af69d88dSmrg 16,17,18,19,20,21,22,23, 65af69d88dSmrg 24,25,26,27,28,29,30,31, 66af69d88dSmrg 32,33,34,35,36,37,38,39, 67af69d88dSmrg 40,41,42,43,44,45,46,47, 68af69d88dSmrg 48,49,50,51,52,53,54,55, 69af69d88dSmrg 56,57,58,59,60,61,62,63 70af69d88dSmrg}; 71af69d88dSmrg 72af69d88dSmrgconst int vl_zscan_normal[] = 73af69d88dSmrg{ 74af69d88dSmrg /* Zig-Zag scan pattern */ 75af69d88dSmrg 0, 1, 8,16, 9, 2, 3,10, 76af69d88dSmrg 17,24,32,25,18,11, 4, 5, 77af69d88dSmrg 12,19,26,33,40,48,41,34, 78af69d88dSmrg 27,20,13, 6, 7,14,21,28, 79af69d88dSmrg 35,42,49,56,57,50,43,36, 80af69d88dSmrg 29,22,15,23,30,37,44,51, 81af69d88dSmrg 58,59,52,45,38,31,39,46, 82af69d88dSmrg 53,60,61,54,47,55,62,63 83af69d88dSmrg}; 84af69d88dSmrg 85af69d88dSmrgconst int vl_zscan_alternate[] = 86af69d88dSmrg{ 87af69d88dSmrg /* Alternate scan pattern */ 88af69d88dSmrg 0, 8,16,24, 1, 9, 2,10, 89af69d88dSmrg 17,25,32,40,48,56,57,49, 90af69d88dSmrg 41,33,26,18, 3,11, 4,12, 91af69d88dSmrg 19,27,34,42,50,58,35,43, 92af69d88dSmrg 51,59,20,28, 5,13, 6,14, 93af69d88dSmrg 21,29,36,44,52,60,37,45, 94af69d88dSmrg 53,61,22,30, 7,15,23,31, 95af69d88dSmrg 38,46,54,62,39,47,55,63 96af69d88dSmrg}; 97af69d88dSmrg 9801e04c3fSmrgconst int vl_zscan_h265_up_right_diagonal_16[] = 9901e04c3fSmrg{ 10001e04c3fSmrg /* Up-right diagonal scan order for 4x4 blocks - see H.265 section 6.5.3. */ 10101e04c3fSmrg 0, 4, 1, 8, 5, 2, 12, 9, 10201e04c3fSmrg 6, 3, 13, 10, 7, 14, 11, 15, 10301e04c3fSmrg}; 10401e04c3fSmrg 10501e04c3fSmrgconst int vl_zscan_h265_up_right_diagonal[] = 10601e04c3fSmrg{ 10701e04c3fSmrg /* Up-right diagonal scan order for 8x8 blocks - see H.265 section 6.5.3. */ 10801e04c3fSmrg 0, 8, 1, 16, 9, 2, 24, 17, 10901e04c3fSmrg 10, 3, 32, 25, 18, 11, 4, 40, 11001e04c3fSmrg 33, 26, 19, 12, 5, 48, 41, 34, 11101e04c3fSmrg 27, 20, 13, 6, 56, 49, 42, 35, 11201e04c3fSmrg 28, 21, 14, 7, 57, 50, 43, 36, 11301e04c3fSmrg 29, 22, 15, 58, 51, 44, 37, 30, 11401e04c3fSmrg 23, 59, 52, 45, 38, 31, 60, 53, 11501e04c3fSmrg 46, 39, 61, 54, 47, 62, 55, 63, 11601e04c3fSmrg}; 11701e04c3fSmrg 11801e04c3fSmrg 119af69d88dSmrgstatic void * 120af69d88dSmrgcreate_vert_shader(struct vl_zscan *zscan) 121af69d88dSmrg{ 122af69d88dSmrg struct ureg_program *shader; 123af69d88dSmrg struct ureg_src scale; 124af69d88dSmrg struct ureg_src vrect, vpos, block_num; 125af69d88dSmrg struct ureg_dst tmp; 126af69d88dSmrg struct ureg_dst o_vpos; 127af69d88dSmrg struct ureg_dst *o_vtex; 12801e04c3fSmrg unsigned i; 129af69d88dSmrg 13001e04c3fSmrg shader = ureg_create(PIPE_SHADER_VERTEX); 131af69d88dSmrg if (!shader) 132af69d88dSmrg return NULL; 133af69d88dSmrg 134af69d88dSmrg o_vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_dst)); 135af69d88dSmrg 136af69d88dSmrg scale = ureg_imm2f(shader, 137af69d88dSmrg (float)VL_BLOCK_WIDTH / zscan->buffer_width, 138af69d88dSmrg (float)VL_BLOCK_HEIGHT / zscan->buffer_height); 139af69d88dSmrg 140af69d88dSmrg vrect = ureg_DECL_vs_input(shader, VS_I_RECT); 141af69d88dSmrg vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); 142af69d88dSmrg block_num = ureg_DECL_vs_input(shader, VS_I_BLOCK_NUM); 143af69d88dSmrg 144af69d88dSmrg tmp = ureg_DECL_temporary(shader); 145af69d88dSmrg 146af69d88dSmrg o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); 147af69d88dSmrg 148af69d88dSmrg for (i = 0; i < zscan->num_channels; ++i) 149af69d88dSmrg o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i); 150af69d88dSmrg 151af69d88dSmrg /* 152af69d88dSmrg * o_vpos.xy = (vpos + vrect) * scale 153af69d88dSmrg * o_vpos.zw = 1.0f 154af69d88dSmrg * 155af69d88dSmrg * tmp.xy = InstanceID / blocks_per_line 156af69d88dSmrg * tmp.x = frac(tmp.x) 157af69d88dSmrg * tmp.y = floor(tmp.y) 158af69d88dSmrg * 159af69d88dSmrg * o_vtex.x = vrect.x / blocks_per_line + tmp.x 160af69d88dSmrg * o_vtex.y = vrect.y 161af69d88dSmrg * o_vtex.z = tmp.z * blocks_per_line / blocks_total 162af69d88dSmrg */ 163af69d88dSmrg ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect); 164af69d88dSmrg ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale); 165af69d88dSmrg ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); 166af69d88dSmrg 167af69d88dSmrg ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(block_num, TGSI_SWIZZLE_X), 168af69d88dSmrg ureg_imm1f(shader, 1.0f / zscan->blocks_per_line)); 169af69d88dSmrg 170af69d88dSmrg ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); 171af69d88dSmrg ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_src(tmp)); 172af69d88dSmrg 173af69d88dSmrg for (i = 0; i < zscan->num_channels; ++i) { 174af69d88dSmrg ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), 175af69d88dSmrg ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * VL_BLOCK_WIDTH) 17601e04c3fSmrg * ((signed)i - (signed)zscan->num_channels / 2))); 177af69d88dSmrg 178af69d88dSmrg ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect, 179af69d88dSmrg ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp)); 180af69d88dSmrg ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect); 181af69d88dSmrg ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), vpos); 182af69d88dSmrg ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_W), ureg_src(tmp), 183af69d88dSmrg ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total)); 184af69d88dSmrg } 185af69d88dSmrg 186af69d88dSmrg ureg_release_temporary(shader, tmp); 187af69d88dSmrg ureg_END(shader); 188af69d88dSmrg 189af69d88dSmrg FREE(o_vtex); 190af69d88dSmrg 191af69d88dSmrg return ureg_create_shader_and_destroy(shader, zscan->pipe); 192af69d88dSmrg} 193af69d88dSmrg 194af69d88dSmrgstatic void * 195af69d88dSmrgcreate_frag_shader(struct vl_zscan *zscan) 196af69d88dSmrg{ 197af69d88dSmrg struct ureg_program *shader; 198af69d88dSmrg struct ureg_src *vtex; 199af69d88dSmrg 200af69d88dSmrg struct ureg_src samp_src, samp_scan, samp_quant; 201af69d88dSmrg 202af69d88dSmrg struct ureg_dst *tmp; 203af69d88dSmrg struct ureg_dst quant, fragment; 204af69d88dSmrg 205af69d88dSmrg unsigned i; 206af69d88dSmrg 20701e04c3fSmrg shader = ureg_create(PIPE_SHADER_FRAGMENT); 208af69d88dSmrg if (!shader) 209af69d88dSmrg return NULL; 210af69d88dSmrg 211af69d88dSmrg vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_src)); 212af69d88dSmrg tmp = MALLOC(zscan->num_channels * sizeof(struct ureg_dst)); 213af69d88dSmrg 214af69d88dSmrg for (i = 0; i < zscan->num_channels; ++i) 215af69d88dSmrg vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR); 216af69d88dSmrg 217af69d88dSmrg samp_src = ureg_DECL_sampler(shader, 0); 218af69d88dSmrg samp_scan = ureg_DECL_sampler(shader, 1); 219af69d88dSmrg samp_quant = ureg_DECL_sampler(shader, 2); 220af69d88dSmrg 221af69d88dSmrg for (i = 0; i < zscan->num_channels; ++i) 222af69d88dSmrg tmp[i] = ureg_DECL_temporary(shader); 223af69d88dSmrg quant = ureg_DECL_temporary(shader); 224af69d88dSmrg 225af69d88dSmrg fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); 226af69d88dSmrg 227af69d88dSmrg /* 228af69d88dSmrg * tmp.x = tex(vtex, 1) 229af69d88dSmrg * tmp.y = vtex.z 230af69d88dSmrg * fragment = tex(tmp, 0) * quant 231af69d88dSmrg */ 232af69d88dSmrg for (i = 0; i < zscan->num_channels; ++i) 233af69d88dSmrg ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], samp_scan); 234af69d88dSmrg 235af69d88dSmrg for (i = 0; i < zscan->num_channels; ++i) 236af69d88dSmrg ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_W)); 237af69d88dSmrg 238af69d88dSmrg for (i = 0; i < zscan->num_channels; ++i) { 239af69d88dSmrg ureg_TEX(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, ureg_src(tmp[i]), samp_src); 240af69d88dSmrg ureg_TEX(shader, ureg_writemask(quant, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, vtex[i], samp_quant); 241af69d88dSmrg } 242af69d88dSmrg 243af69d88dSmrg ureg_MUL(shader, quant, ureg_src(quant), ureg_imm1f(shader, 16.0f)); 244af69d88dSmrg ureg_MUL(shader, fragment, ureg_src(tmp[0]), ureg_src(quant)); 245af69d88dSmrg 246af69d88dSmrg for (i = 0; i < zscan->num_channels; ++i) 247af69d88dSmrg ureg_release_temporary(shader, tmp[i]); 248af69d88dSmrg ureg_END(shader); 249af69d88dSmrg 250af69d88dSmrg FREE(vtex); 251af69d88dSmrg FREE(tmp); 252af69d88dSmrg 253af69d88dSmrg return ureg_create_shader_and_destroy(shader, zscan->pipe); 254af69d88dSmrg} 255af69d88dSmrg 256af69d88dSmrgstatic bool 257af69d88dSmrginit_shaders(struct vl_zscan *zscan) 258af69d88dSmrg{ 259af69d88dSmrg assert(zscan); 260af69d88dSmrg 261af69d88dSmrg zscan->vs = create_vert_shader(zscan); 262af69d88dSmrg if (!zscan->vs) 263af69d88dSmrg goto error_vs; 264af69d88dSmrg 265af69d88dSmrg zscan->fs = create_frag_shader(zscan); 266af69d88dSmrg if (!zscan->fs) 267af69d88dSmrg goto error_fs; 268af69d88dSmrg 269af69d88dSmrg return true; 270af69d88dSmrg 271af69d88dSmrgerror_fs: 272af69d88dSmrg zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs); 273af69d88dSmrg 274af69d88dSmrgerror_vs: 275af69d88dSmrg return false; 276af69d88dSmrg} 277af69d88dSmrg 278af69d88dSmrgstatic void 279af69d88dSmrgcleanup_shaders(struct vl_zscan *zscan) 280af69d88dSmrg{ 281af69d88dSmrg assert(zscan); 282af69d88dSmrg 283af69d88dSmrg zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs); 284af69d88dSmrg zscan->pipe->delete_fs_state(zscan->pipe, zscan->fs); 285af69d88dSmrg} 286af69d88dSmrg 287af69d88dSmrgstatic bool 288af69d88dSmrginit_state(struct vl_zscan *zscan) 289af69d88dSmrg{ 290af69d88dSmrg struct pipe_blend_state blend; 291af69d88dSmrg struct pipe_rasterizer_state rs_state; 292af69d88dSmrg struct pipe_sampler_state sampler; 293af69d88dSmrg unsigned i; 294af69d88dSmrg 295af69d88dSmrg assert(zscan); 296af69d88dSmrg 297af69d88dSmrg memset(&rs_state, 0, sizeof(rs_state)); 298af69d88dSmrg rs_state.half_pixel_center = true; 299af69d88dSmrg rs_state.bottom_edge_rule = true; 30001e04c3fSmrg rs_state.depth_clip_near = 1; 30101e04c3fSmrg rs_state.depth_clip_far = 1; 30201e04c3fSmrg 303af69d88dSmrg zscan->rs_state = zscan->pipe->create_rasterizer_state(zscan->pipe, &rs_state); 304af69d88dSmrg if (!zscan->rs_state) 305af69d88dSmrg goto error_rs_state; 306af69d88dSmrg 307af69d88dSmrg memset(&blend, 0, sizeof blend); 308af69d88dSmrg 309af69d88dSmrg blend.independent_blend_enable = 0; 310af69d88dSmrg blend.rt[0].blend_enable = 0; 311af69d88dSmrg blend.rt[0].rgb_func = PIPE_BLEND_ADD; 312af69d88dSmrg blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; 313af69d88dSmrg blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; 314af69d88dSmrg blend.rt[0].alpha_func = PIPE_BLEND_ADD; 315af69d88dSmrg blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; 316af69d88dSmrg blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; 317af69d88dSmrg blend.logicop_enable = 0; 318af69d88dSmrg blend.logicop_func = PIPE_LOGICOP_CLEAR; 319af69d88dSmrg /* Needed to allow color writes to FB, even if blending disabled */ 320af69d88dSmrg blend.rt[0].colormask = PIPE_MASK_RGBA; 321af69d88dSmrg blend.dither = 0; 322af69d88dSmrg zscan->blend = zscan->pipe->create_blend_state(zscan->pipe, &blend); 323af69d88dSmrg if (!zscan->blend) 324af69d88dSmrg goto error_blend; 325af69d88dSmrg 326af69d88dSmrg for (i = 0; i < 3; ++i) { 327af69d88dSmrg memset(&sampler, 0, sizeof(sampler)); 328af69d88dSmrg sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; 329af69d88dSmrg sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; 330af69d88dSmrg sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 331af69d88dSmrg sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; 332af69d88dSmrg sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 333af69d88dSmrg sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; 334af69d88dSmrg sampler.compare_mode = PIPE_TEX_COMPARE_NONE; 335af69d88dSmrg sampler.compare_func = PIPE_FUNC_ALWAYS; 336af69d88dSmrg sampler.normalized_coords = 1; 337af69d88dSmrg zscan->samplers[i] = zscan->pipe->create_sampler_state(zscan->pipe, &sampler); 338af69d88dSmrg if (!zscan->samplers[i]) 339af69d88dSmrg goto error_samplers; 340af69d88dSmrg } 341af69d88dSmrg 342af69d88dSmrg return true; 343af69d88dSmrg 344af69d88dSmrgerror_samplers: 345af69d88dSmrg for (i = 0; i < 2; ++i) 346af69d88dSmrg if (zscan->samplers[i]) 347af69d88dSmrg zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]); 348af69d88dSmrg 349af69d88dSmrg zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state); 350af69d88dSmrg 351af69d88dSmrgerror_blend: 352af69d88dSmrg zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend); 353af69d88dSmrg 354af69d88dSmrgerror_rs_state: 355af69d88dSmrg return false; 356af69d88dSmrg} 357af69d88dSmrg 358af69d88dSmrgstatic void 359af69d88dSmrgcleanup_state(struct vl_zscan *zscan) 360af69d88dSmrg{ 361af69d88dSmrg unsigned i; 362af69d88dSmrg 363af69d88dSmrg assert(zscan); 364af69d88dSmrg 365af69d88dSmrg for (i = 0; i < 3; ++i) 366af69d88dSmrg zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]); 367af69d88dSmrg 368af69d88dSmrg zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state); 369af69d88dSmrg zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend); 370af69d88dSmrg} 371af69d88dSmrg 372af69d88dSmrgstruct pipe_sampler_view * 373af69d88dSmrgvl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks_per_line) 374af69d88dSmrg{ 375af69d88dSmrg const unsigned total_size = blocks_per_line * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; 376af69d88dSmrg 377af69d88dSmrg int patched_layout[64]; 378af69d88dSmrg 379af69d88dSmrg struct pipe_resource res_tmpl, *res; 380af69d88dSmrg struct pipe_sampler_view sv_tmpl, *sv; 381af69d88dSmrg struct pipe_transfer *buf_transfer; 382af69d88dSmrg unsigned x, y, i, pitch; 383af69d88dSmrg float *f; 384af69d88dSmrg 385af69d88dSmrg struct pipe_box rect = 386af69d88dSmrg { 387af69d88dSmrg 0, 0, 0, 388af69d88dSmrg VL_BLOCK_WIDTH * blocks_per_line, 389af69d88dSmrg VL_BLOCK_HEIGHT, 390af69d88dSmrg 1 391af69d88dSmrg }; 392af69d88dSmrg 393af69d88dSmrg assert(pipe && layout && blocks_per_line); 394af69d88dSmrg 395af69d88dSmrg for (i = 0; i < 64; ++i) 396af69d88dSmrg patched_layout[layout[i]] = i; 397af69d88dSmrg 398af69d88dSmrg memset(&res_tmpl, 0, sizeof(res_tmpl)); 399af69d88dSmrg res_tmpl.target = PIPE_TEXTURE_2D; 400af69d88dSmrg res_tmpl.format = PIPE_FORMAT_R32_FLOAT; 401af69d88dSmrg res_tmpl.width0 = VL_BLOCK_WIDTH * blocks_per_line; 402af69d88dSmrg res_tmpl.height0 = VL_BLOCK_HEIGHT; 403af69d88dSmrg res_tmpl.depth0 = 1; 404af69d88dSmrg res_tmpl.array_size = 1; 405af69d88dSmrg res_tmpl.usage = PIPE_USAGE_IMMUTABLE; 406af69d88dSmrg res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW; 407af69d88dSmrg 408af69d88dSmrg res = pipe->screen->resource_create(pipe->screen, &res_tmpl); 409af69d88dSmrg if (!res) 410af69d88dSmrg goto error_resource; 411af69d88dSmrg 4127ec681f3Smrg f = pipe->texture_map(pipe, res, 4137ec681f3Smrg 0, PIPE_MAP_WRITE | PIPE_MAP_DISCARD_RANGE, 414af69d88dSmrg &rect, &buf_transfer); 415af69d88dSmrg if (!f) 416af69d88dSmrg goto error_map; 417af69d88dSmrg 418af69d88dSmrg pitch = buf_transfer->stride / sizeof(float); 419af69d88dSmrg 420af69d88dSmrg for (i = 0; i < blocks_per_line; ++i) 421af69d88dSmrg for (y = 0; y < VL_BLOCK_HEIGHT; ++y) 422af69d88dSmrg for (x = 0; x < VL_BLOCK_WIDTH; ++x) { 423af69d88dSmrg float addr = patched_layout[x + y * VL_BLOCK_WIDTH] + 424af69d88dSmrg i * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; 425af69d88dSmrg 426af69d88dSmrg addr /= total_size; 427af69d88dSmrg 428af69d88dSmrg f[i * VL_BLOCK_WIDTH + y * pitch + x] = addr; 429af69d88dSmrg } 430af69d88dSmrg 4317ec681f3Smrg pipe->texture_unmap(pipe, buf_transfer); 432af69d88dSmrg 433af69d88dSmrg memset(&sv_tmpl, 0, sizeof(sv_tmpl)); 434af69d88dSmrg u_sampler_view_default_template(&sv_tmpl, res, res->format); 435af69d88dSmrg sv = pipe->create_sampler_view(pipe, res, &sv_tmpl); 436af69d88dSmrg pipe_resource_reference(&res, NULL); 437af69d88dSmrg if (!sv) 438af69d88dSmrg goto error_map; 439af69d88dSmrg 440af69d88dSmrg return sv; 441af69d88dSmrg 442af69d88dSmrgerror_map: 443af69d88dSmrg pipe_resource_reference(&res, NULL); 444af69d88dSmrg 445af69d88dSmrgerror_resource: 446af69d88dSmrg return NULL; 447af69d88dSmrg} 448af69d88dSmrg 449af69d88dSmrgbool 450af69d88dSmrgvl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe, 451af69d88dSmrg unsigned buffer_width, unsigned buffer_height, 452af69d88dSmrg unsigned blocks_per_line, unsigned blocks_total, 453af69d88dSmrg unsigned num_channels) 454af69d88dSmrg{ 455af69d88dSmrg assert(zscan && pipe); 456af69d88dSmrg 457af69d88dSmrg zscan->pipe = pipe; 458af69d88dSmrg zscan->buffer_width = buffer_width; 459af69d88dSmrg zscan->buffer_height = buffer_height; 460af69d88dSmrg zscan->num_channels = num_channels; 461af69d88dSmrg zscan->blocks_per_line = blocks_per_line; 462af69d88dSmrg zscan->blocks_total = blocks_total; 463af69d88dSmrg 464af69d88dSmrg if(!init_shaders(zscan)) 465af69d88dSmrg return false; 466af69d88dSmrg 467af69d88dSmrg if(!init_state(zscan)) { 468af69d88dSmrg cleanup_shaders(zscan); 469af69d88dSmrg return false; 470af69d88dSmrg } 471af69d88dSmrg 472af69d88dSmrg return true; 473af69d88dSmrg} 474af69d88dSmrg 475af69d88dSmrgvoid 476af69d88dSmrgvl_zscan_cleanup(struct vl_zscan *zscan) 477af69d88dSmrg{ 478af69d88dSmrg assert(zscan); 479af69d88dSmrg 480af69d88dSmrg cleanup_shaders(zscan); 481af69d88dSmrg cleanup_state(zscan); 482af69d88dSmrg} 483af69d88dSmrg 484af69d88dSmrgbool 485af69d88dSmrgvl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, 486af69d88dSmrg struct pipe_sampler_view *src, struct pipe_surface *dst) 487af69d88dSmrg{ 488af69d88dSmrg struct pipe_resource res_tmpl, *res; 489af69d88dSmrg struct pipe_sampler_view sv_tmpl; 490af69d88dSmrg 491af69d88dSmrg assert(zscan && buffer); 492af69d88dSmrg 493af69d88dSmrg memset(buffer, 0, sizeof(struct vl_zscan_buffer)); 494af69d88dSmrg 495af69d88dSmrg pipe_sampler_view_reference(&buffer->src, src); 496af69d88dSmrg 497af69d88dSmrg buffer->viewport.scale[0] = dst->width; 498af69d88dSmrg buffer->viewport.scale[1] = dst->height; 499af69d88dSmrg buffer->viewport.scale[2] = 1; 500af69d88dSmrg buffer->viewport.translate[0] = 0; 501af69d88dSmrg buffer->viewport.translate[1] = 0; 502af69d88dSmrg buffer->viewport.translate[2] = 0; 5037ec681f3Smrg buffer->viewport.swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X; 5047ec681f3Smrg buffer->viewport.swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y; 5057ec681f3Smrg buffer->viewport.swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z; 5067ec681f3Smrg buffer->viewport.swizzle_w = PIPE_VIEWPORT_SWIZZLE_POSITIVE_W; 507af69d88dSmrg 508af69d88dSmrg buffer->fb_state.width = dst->width; 509af69d88dSmrg buffer->fb_state.height = dst->height; 510af69d88dSmrg buffer->fb_state.nr_cbufs = 1; 511af69d88dSmrg pipe_surface_reference(&buffer->fb_state.cbufs[0], dst); 512af69d88dSmrg 513af69d88dSmrg memset(&res_tmpl, 0, sizeof(res_tmpl)); 514af69d88dSmrg res_tmpl.target = PIPE_TEXTURE_3D; 515af69d88dSmrg res_tmpl.format = PIPE_FORMAT_R8_UNORM; 516af69d88dSmrg res_tmpl.width0 = VL_BLOCK_WIDTH * zscan->blocks_per_line; 517af69d88dSmrg res_tmpl.height0 = VL_BLOCK_HEIGHT; 518af69d88dSmrg res_tmpl.depth0 = 2; 519af69d88dSmrg res_tmpl.array_size = 1; 520af69d88dSmrg res_tmpl.usage = PIPE_USAGE_IMMUTABLE; 521af69d88dSmrg res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW; 522af69d88dSmrg 523af69d88dSmrg res = zscan->pipe->screen->resource_create(zscan->pipe->screen, &res_tmpl); 524af69d88dSmrg if (!res) 525af69d88dSmrg return false; 526af69d88dSmrg 527af69d88dSmrg memset(&sv_tmpl, 0, sizeof(sv_tmpl)); 528af69d88dSmrg u_sampler_view_default_template(&sv_tmpl, res, res->format); 529af69d88dSmrg sv_tmpl.swizzle_r = sv_tmpl.swizzle_g = sv_tmpl.swizzle_b = sv_tmpl.swizzle_a = TGSI_SWIZZLE_X; 530af69d88dSmrg buffer->quant = zscan->pipe->create_sampler_view(zscan->pipe, res, &sv_tmpl); 531af69d88dSmrg pipe_resource_reference(&res, NULL); 532af69d88dSmrg if (!buffer->quant) 533af69d88dSmrg return false; 534af69d88dSmrg 535af69d88dSmrg return true; 536af69d88dSmrg} 537af69d88dSmrg 538af69d88dSmrgvoid 539af69d88dSmrgvl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer) 540af69d88dSmrg{ 541af69d88dSmrg assert(buffer); 542af69d88dSmrg 543af69d88dSmrg pipe_sampler_view_reference(&buffer->src, NULL); 544af69d88dSmrg pipe_sampler_view_reference(&buffer->layout, NULL); 545af69d88dSmrg pipe_sampler_view_reference(&buffer->quant, NULL); 546af69d88dSmrg pipe_surface_reference(&buffer->fb_state.cbufs[0], NULL); 547af69d88dSmrg} 548af69d88dSmrg 549af69d88dSmrgvoid 550af69d88dSmrgvl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout) 551af69d88dSmrg{ 552af69d88dSmrg assert(buffer); 553af69d88dSmrg assert(layout); 554af69d88dSmrg 555af69d88dSmrg pipe_sampler_view_reference(&buffer->layout, layout); 556af69d88dSmrg} 557af69d88dSmrg 558af69d88dSmrgvoid 559af69d88dSmrgvl_zscan_upload_quant(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, 560af69d88dSmrg const uint8_t matrix[64], bool intra) 561af69d88dSmrg{ 562af69d88dSmrg struct pipe_context *pipe; 563af69d88dSmrg struct pipe_transfer *buf_transfer; 564af69d88dSmrg unsigned x, y, i, pitch; 565af69d88dSmrg uint8_t *data; 566af69d88dSmrg 567af69d88dSmrg struct pipe_box rect = 568af69d88dSmrg { 569af69d88dSmrg 0, 0, intra ? 1 : 0, 570af69d88dSmrg VL_BLOCK_WIDTH, 571af69d88dSmrg VL_BLOCK_HEIGHT, 572af69d88dSmrg 1 573af69d88dSmrg }; 574af69d88dSmrg 575af69d88dSmrg assert(buffer); 576af69d88dSmrg assert(matrix); 577af69d88dSmrg 578af69d88dSmrg pipe = zscan->pipe; 579af69d88dSmrg 580af69d88dSmrg rect.width *= zscan->blocks_per_line; 581af69d88dSmrg 5827ec681f3Smrg data = pipe->texture_map(pipe, buffer->quant->texture, 5837ec681f3Smrg 0, PIPE_MAP_WRITE | 5847ec681f3Smrg PIPE_MAP_DISCARD_RANGE, 585af69d88dSmrg &rect, &buf_transfer); 586af69d88dSmrg if (!data) 587af69d88dSmrg return; 588af69d88dSmrg 589af69d88dSmrg pitch = buf_transfer->stride; 590af69d88dSmrg 591af69d88dSmrg for (i = 0; i < zscan->blocks_per_line; ++i) 592af69d88dSmrg for (y = 0; y < VL_BLOCK_HEIGHT; ++y) 593af69d88dSmrg for (x = 0; x < VL_BLOCK_WIDTH; ++x) 594af69d88dSmrg data[i * VL_BLOCK_WIDTH + y * pitch + x] = matrix[x + y * VL_BLOCK_WIDTH]; 595af69d88dSmrg 5967ec681f3Smrg pipe->texture_unmap(pipe, buf_transfer); 597af69d88dSmrg} 598af69d88dSmrg 599af69d88dSmrgvoid 600af69d88dSmrgvl_zscan_render(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, unsigned num_instances) 601af69d88dSmrg{ 602af69d88dSmrg assert(buffer); 603af69d88dSmrg 604af69d88dSmrg zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state); 605af69d88dSmrg zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend); 606af69d88dSmrg zscan->pipe->bind_sampler_states(zscan->pipe, PIPE_SHADER_FRAGMENT, 607af69d88dSmrg 0, 3, zscan->samplers); 608af69d88dSmrg zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state); 609af69d88dSmrg zscan->pipe->set_viewport_states(zscan->pipe, 0, 1, &buffer->viewport); 610af69d88dSmrg zscan->pipe->set_sampler_views(zscan->pipe, PIPE_SHADER_FRAGMENT, 6117ec681f3Smrg 0, 3, 0, false, &buffer->src); 612af69d88dSmrg zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs); 613af69d88dSmrg zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs); 614af69d88dSmrg util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); 615af69d88dSmrg} 616