1848b8605Smrg/************************************************************************** 2848b8605Smrg * 3848b8605Smrg * Copyright 2011 Christian König 4848b8605Smrg * All Rights Reserved. 5848b8605Smrg * 6848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 7848b8605Smrg * copy of this software and associated documentation files (the 8848b8605Smrg * "Software"), to deal in the Software without restriction, including 9848b8605Smrg * without limitation the rights to use, copy, modify, merge, publish, 10848b8605Smrg * distribute, sub license, and/or sell copies of the Software, and to 11848b8605Smrg * permit persons to whom the Software is furnished to do so, subject to 12848b8605Smrg * the following conditions: 13848b8605Smrg * 14848b8605Smrg * The above copyright notice and this permission notice (including the 15848b8605Smrg * next paragraph) shall be included in all copies or substantial portions 16848b8605Smrg * of the Software. 17848b8605Smrg * 18848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20848b8605Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21848b8605Smrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22848b8605Smrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23848b8605Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24848b8605Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25848b8605Smrg * 26848b8605Smrg **************************************************************************/ 27848b8605Smrg 28848b8605Smrg#include <assert.h> 29848b8605Smrg 30848b8605Smrg#include "pipe/p_screen.h" 31848b8605Smrg#include "pipe/p_context.h" 32848b8605Smrg 33848b8605Smrg#include "util/u_draw.h" 34848b8605Smrg#include "util/u_sampler.h" 35848b8605Smrg#include "util/u_inlines.h" 36848b8605Smrg#include "util/u_memory.h" 37848b8605Smrg 38848b8605Smrg#include "tgsi/tgsi_ureg.h" 39848b8605Smrg 40848b8605Smrg#include "vl_defines.h" 41848b8605Smrg#include "vl_types.h" 42848b8605Smrg 43848b8605Smrg#include "vl_zscan.h" 44848b8605Smrg#include "vl_vertex_buffers.h" 45848b8605Smrg 46848b8605Smrgenum VS_OUTPUT 47848b8605Smrg{ 48848b8605Smrg VS_O_VPOS = 0, 49848b8605Smrg VS_O_VTEX = 0 50848b8605Smrg}; 51848b8605Smrg 52b8e80941Smrgconst int vl_zscan_normal_16[] = 53b8e80941Smrg{ 54b8e80941Smrg /* Zig-Zag scan pattern */ 55b8e80941Smrg 0, 1, 4, 8, 5, 2, 3, 6, 56b8e80941Smrg 9,12,13,10, 7,11,14,15 57b8e80941Smrg}; 58b8e80941Smrg 59848b8605Smrgconst int vl_zscan_linear[] = 60848b8605Smrg{ 61848b8605Smrg /* Linear scan pattern */ 62848b8605Smrg 0, 1, 2, 3, 4, 5, 6, 7, 63848b8605Smrg 8, 9,10,11,12,13,14,15, 64848b8605Smrg 16,17,18,19,20,21,22,23, 65848b8605Smrg 24,25,26,27,28,29,30,31, 66848b8605Smrg 32,33,34,35,36,37,38,39, 67848b8605Smrg 40,41,42,43,44,45,46,47, 68848b8605Smrg 48,49,50,51,52,53,54,55, 69848b8605Smrg 56,57,58,59,60,61,62,63 70848b8605Smrg}; 71848b8605Smrg 72848b8605Smrgconst int vl_zscan_normal[] = 73848b8605Smrg{ 74848b8605Smrg /* Zig-Zag scan pattern */ 75848b8605Smrg 0, 1, 8,16, 9, 2, 3,10, 76848b8605Smrg 17,24,32,25,18,11, 4, 5, 77848b8605Smrg 12,19,26,33,40,48,41,34, 78848b8605Smrg 27,20,13, 6, 7,14,21,28, 79848b8605Smrg 35,42,49,56,57,50,43,36, 80848b8605Smrg 29,22,15,23,30,37,44,51, 81848b8605Smrg 58,59,52,45,38,31,39,46, 82848b8605Smrg 53,60,61,54,47,55,62,63 83848b8605Smrg}; 84848b8605Smrg 85848b8605Smrgconst int vl_zscan_alternate[] = 86848b8605Smrg{ 87848b8605Smrg /* Alternate scan pattern */ 88848b8605Smrg 0, 8,16,24, 1, 9, 2,10, 89848b8605Smrg 17,25,32,40,48,56,57,49, 90848b8605Smrg 41,33,26,18, 3,11, 4,12, 91848b8605Smrg 19,27,34,42,50,58,35,43, 92848b8605Smrg 51,59,20,28, 5,13, 6,14, 93848b8605Smrg 21,29,36,44,52,60,37,45, 94848b8605Smrg 53,61,22,30, 7,15,23,31, 95848b8605Smrg 38,46,54,62,39,47,55,63 96848b8605Smrg}; 97848b8605Smrg 98b8e80941Smrgconst int vl_zscan_h265_up_right_diagonal_16[] = 99b8e80941Smrg{ 100b8e80941Smrg /* Up-right diagonal scan order for 4x4 blocks - see H.265 section 6.5.3. */ 101b8e80941Smrg 0, 4, 1, 8, 5, 2, 12, 9, 102b8e80941Smrg 6, 3, 13, 10, 7, 14, 11, 15, 103b8e80941Smrg}; 104b8e80941Smrg 105b8e80941Smrgconst int vl_zscan_h265_up_right_diagonal[] = 106b8e80941Smrg{ 107b8e80941Smrg /* Up-right diagonal scan order for 8x8 blocks - see H.265 section 6.5.3. */ 108b8e80941Smrg 0, 8, 1, 16, 9, 2, 24, 17, 109b8e80941Smrg 10, 3, 32, 25, 18, 11, 4, 40, 110b8e80941Smrg 33, 26, 19, 12, 5, 48, 41, 34, 111b8e80941Smrg 27, 20, 13, 6, 56, 49, 42, 35, 112b8e80941Smrg 28, 21, 14, 7, 57, 50, 43, 36, 113b8e80941Smrg 29, 22, 15, 58, 51, 44, 37, 30, 114b8e80941Smrg 23, 59, 52, 45, 38, 31, 60, 53, 115b8e80941Smrg 46, 39, 61, 54, 47, 62, 55, 63, 116b8e80941Smrg}; 117b8e80941Smrg 118b8e80941Smrg 119848b8605Smrgstatic void * 120848b8605Smrgcreate_vert_shader(struct vl_zscan *zscan) 121848b8605Smrg{ 122848b8605Smrg struct ureg_program *shader; 123848b8605Smrg struct ureg_src scale; 124848b8605Smrg struct ureg_src vrect, vpos, block_num; 125848b8605Smrg struct ureg_dst tmp; 126848b8605Smrg struct ureg_dst o_vpos; 127848b8605Smrg struct ureg_dst *o_vtex; 128b8e80941Smrg unsigned i; 129848b8605Smrg 130b8e80941Smrg shader = ureg_create(PIPE_SHADER_VERTEX); 131848b8605Smrg if (!shader) 132848b8605Smrg return NULL; 133848b8605Smrg 134848b8605Smrg o_vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_dst)); 135848b8605Smrg 136848b8605Smrg scale = ureg_imm2f(shader, 137848b8605Smrg (float)VL_BLOCK_WIDTH / zscan->buffer_width, 138848b8605Smrg (float)VL_BLOCK_HEIGHT / zscan->buffer_height); 139848b8605Smrg 140848b8605Smrg vrect = ureg_DECL_vs_input(shader, VS_I_RECT); 141848b8605Smrg vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); 142848b8605Smrg block_num = ureg_DECL_vs_input(shader, VS_I_BLOCK_NUM); 143848b8605Smrg 144848b8605Smrg tmp = ureg_DECL_temporary(shader); 145848b8605Smrg 146848b8605Smrg o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); 147848b8605Smrg 148848b8605Smrg for (i = 0; i < zscan->num_channels; ++i) 149848b8605Smrg o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i); 150848b8605Smrg 151848b8605Smrg /* 152848b8605Smrg * o_vpos.xy = (vpos + vrect) * scale 153848b8605Smrg * o_vpos.zw = 1.0f 154848b8605Smrg * 155848b8605Smrg * tmp.xy = InstanceID / blocks_per_line 156848b8605Smrg * tmp.x = frac(tmp.x) 157848b8605Smrg * tmp.y = floor(tmp.y) 158848b8605Smrg * 159848b8605Smrg * o_vtex.x = vrect.x / blocks_per_line + tmp.x 160848b8605Smrg * o_vtex.y = vrect.y 161848b8605Smrg * o_vtex.z = tmp.z * blocks_per_line / blocks_total 162848b8605Smrg */ 163848b8605Smrg ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect); 164848b8605Smrg ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale); 165848b8605Smrg ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); 166848b8605Smrg 167848b8605Smrg ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(block_num, TGSI_SWIZZLE_X), 168848b8605Smrg ureg_imm1f(shader, 1.0f / zscan->blocks_per_line)); 169848b8605Smrg 170848b8605Smrg ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); 171848b8605Smrg ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_src(tmp)); 172848b8605Smrg 173848b8605Smrg for (i = 0; i < zscan->num_channels; ++i) { 174848b8605Smrg ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), 175848b8605Smrg ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * VL_BLOCK_WIDTH) 176b8e80941Smrg * ((signed)i - (signed)zscan->num_channels / 2))); 177848b8605Smrg 178848b8605Smrg ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect, 179848b8605Smrg ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp)); 180848b8605Smrg ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect); 181848b8605Smrg ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), vpos); 182848b8605Smrg ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_W), ureg_src(tmp), 183848b8605Smrg ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total)); 184848b8605Smrg } 185848b8605Smrg 186848b8605Smrg ureg_release_temporary(shader, tmp); 187848b8605Smrg ureg_END(shader); 188848b8605Smrg 189848b8605Smrg FREE(o_vtex); 190848b8605Smrg 191848b8605Smrg return ureg_create_shader_and_destroy(shader, zscan->pipe); 192848b8605Smrg} 193848b8605Smrg 194848b8605Smrgstatic void * 195848b8605Smrgcreate_frag_shader(struct vl_zscan *zscan) 196848b8605Smrg{ 197848b8605Smrg struct ureg_program *shader; 198848b8605Smrg struct ureg_src *vtex; 199848b8605Smrg 200848b8605Smrg struct ureg_src samp_src, samp_scan, samp_quant; 201848b8605Smrg 202848b8605Smrg struct ureg_dst *tmp; 203848b8605Smrg struct ureg_dst quant, fragment; 204848b8605Smrg 205848b8605Smrg unsigned i; 206848b8605Smrg 207b8e80941Smrg shader = ureg_create(PIPE_SHADER_FRAGMENT); 208848b8605Smrg if (!shader) 209848b8605Smrg return NULL; 210848b8605Smrg 211848b8605Smrg vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_src)); 212848b8605Smrg tmp = MALLOC(zscan->num_channels * sizeof(struct ureg_dst)); 213848b8605Smrg 214848b8605Smrg for (i = 0; i < zscan->num_channels; ++i) 215848b8605Smrg vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR); 216848b8605Smrg 217848b8605Smrg samp_src = ureg_DECL_sampler(shader, 0); 218848b8605Smrg samp_scan = ureg_DECL_sampler(shader, 1); 219848b8605Smrg samp_quant = ureg_DECL_sampler(shader, 2); 220848b8605Smrg 221848b8605Smrg for (i = 0; i < zscan->num_channels; ++i) 222848b8605Smrg tmp[i] = ureg_DECL_temporary(shader); 223848b8605Smrg quant = ureg_DECL_temporary(shader); 224848b8605Smrg 225848b8605Smrg fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); 226848b8605Smrg 227848b8605Smrg /* 228848b8605Smrg * tmp.x = tex(vtex, 1) 229848b8605Smrg * tmp.y = vtex.z 230848b8605Smrg * fragment = tex(tmp, 0) * quant 231848b8605Smrg */ 232848b8605Smrg for (i = 0; i < zscan->num_channels; ++i) 233848b8605Smrg ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], samp_scan); 234848b8605Smrg 235848b8605Smrg for (i = 0; i < zscan->num_channels; ++i) 236848b8605Smrg ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_W)); 237848b8605Smrg 238848b8605Smrg for (i = 0; i < zscan->num_channels; ++i) { 239848b8605Smrg ureg_TEX(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, ureg_src(tmp[i]), samp_src); 240848b8605Smrg ureg_TEX(shader, ureg_writemask(quant, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, vtex[i], samp_quant); 241848b8605Smrg } 242848b8605Smrg 243848b8605Smrg ureg_MUL(shader, quant, ureg_src(quant), ureg_imm1f(shader, 16.0f)); 244848b8605Smrg ureg_MUL(shader, fragment, ureg_src(tmp[0]), ureg_src(quant)); 245848b8605Smrg 246848b8605Smrg for (i = 0; i < zscan->num_channels; ++i) 247848b8605Smrg ureg_release_temporary(shader, tmp[i]); 248848b8605Smrg ureg_END(shader); 249848b8605Smrg 250848b8605Smrg FREE(vtex); 251848b8605Smrg FREE(tmp); 252848b8605Smrg 253848b8605Smrg return ureg_create_shader_and_destroy(shader, zscan->pipe); 254848b8605Smrg} 255848b8605Smrg 256848b8605Smrgstatic bool 257848b8605Smrginit_shaders(struct vl_zscan *zscan) 258848b8605Smrg{ 259848b8605Smrg assert(zscan); 260848b8605Smrg 261848b8605Smrg zscan->vs = create_vert_shader(zscan); 262848b8605Smrg if (!zscan->vs) 263848b8605Smrg goto error_vs; 264848b8605Smrg 265848b8605Smrg zscan->fs = create_frag_shader(zscan); 266848b8605Smrg if (!zscan->fs) 267848b8605Smrg goto error_fs; 268848b8605Smrg 269848b8605Smrg return true; 270848b8605Smrg 271848b8605Smrgerror_fs: 272848b8605Smrg zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs); 273848b8605Smrg 274848b8605Smrgerror_vs: 275848b8605Smrg return false; 276848b8605Smrg} 277848b8605Smrg 278848b8605Smrgstatic void 279848b8605Smrgcleanup_shaders(struct vl_zscan *zscan) 280848b8605Smrg{ 281848b8605Smrg assert(zscan); 282848b8605Smrg 283848b8605Smrg zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs); 284848b8605Smrg zscan->pipe->delete_fs_state(zscan->pipe, zscan->fs); 285848b8605Smrg} 286848b8605Smrg 287848b8605Smrgstatic bool 288848b8605Smrginit_state(struct vl_zscan *zscan) 289848b8605Smrg{ 290848b8605Smrg struct pipe_blend_state blend; 291848b8605Smrg struct pipe_rasterizer_state rs_state; 292848b8605Smrg struct pipe_sampler_state sampler; 293848b8605Smrg unsigned i; 294848b8605Smrg 295848b8605Smrg assert(zscan); 296848b8605Smrg 297848b8605Smrg memset(&rs_state, 0, sizeof(rs_state)); 298848b8605Smrg rs_state.half_pixel_center = true; 299848b8605Smrg rs_state.bottom_edge_rule = true; 300b8e80941Smrg rs_state.depth_clip_near = 1; 301b8e80941Smrg rs_state.depth_clip_far = 1; 302b8e80941Smrg 303848b8605Smrg zscan->rs_state = zscan->pipe->create_rasterizer_state(zscan->pipe, &rs_state); 304848b8605Smrg if (!zscan->rs_state) 305848b8605Smrg goto error_rs_state; 306848b8605Smrg 307848b8605Smrg memset(&blend, 0, sizeof blend); 308848b8605Smrg 309848b8605Smrg blend.independent_blend_enable = 0; 310848b8605Smrg blend.rt[0].blend_enable = 0; 311848b8605Smrg blend.rt[0].rgb_func = PIPE_BLEND_ADD; 312848b8605Smrg blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; 313848b8605Smrg blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; 314848b8605Smrg blend.rt[0].alpha_func = PIPE_BLEND_ADD; 315848b8605Smrg blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; 316848b8605Smrg blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; 317848b8605Smrg blend.logicop_enable = 0; 318848b8605Smrg blend.logicop_func = PIPE_LOGICOP_CLEAR; 319848b8605Smrg /* Needed to allow color writes to FB, even if blending disabled */ 320848b8605Smrg blend.rt[0].colormask = PIPE_MASK_RGBA; 321848b8605Smrg blend.dither = 0; 322848b8605Smrg zscan->blend = zscan->pipe->create_blend_state(zscan->pipe, &blend); 323848b8605Smrg if (!zscan->blend) 324848b8605Smrg goto error_blend; 325848b8605Smrg 326848b8605Smrg for (i = 0; i < 3; ++i) { 327848b8605Smrg memset(&sampler, 0, sizeof(sampler)); 328848b8605Smrg sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; 329848b8605Smrg sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; 330848b8605Smrg sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 331848b8605Smrg sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; 332848b8605Smrg sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 333848b8605Smrg sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; 334848b8605Smrg sampler.compare_mode = PIPE_TEX_COMPARE_NONE; 335848b8605Smrg sampler.compare_func = PIPE_FUNC_ALWAYS; 336848b8605Smrg sampler.normalized_coords = 1; 337848b8605Smrg zscan->samplers[i] = zscan->pipe->create_sampler_state(zscan->pipe, &sampler); 338848b8605Smrg if (!zscan->samplers[i]) 339848b8605Smrg goto error_samplers; 340848b8605Smrg } 341848b8605Smrg 342848b8605Smrg return true; 343848b8605Smrg 344848b8605Smrgerror_samplers: 345848b8605Smrg for (i = 0; i < 2; ++i) 346848b8605Smrg if (zscan->samplers[i]) 347848b8605Smrg zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]); 348848b8605Smrg 349848b8605Smrg zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state); 350848b8605Smrg 351848b8605Smrgerror_blend: 352848b8605Smrg zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend); 353848b8605Smrg 354848b8605Smrgerror_rs_state: 355848b8605Smrg return false; 356848b8605Smrg} 357848b8605Smrg 358848b8605Smrgstatic void 359848b8605Smrgcleanup_state(struct vl_zscan *zscan) 360848b8605Smrg{ 361848b8605Smrg unsigned i; 362848b8605Smrg 363848b8605Smrg assert(zscan); 364848b8605Smrg 365848b8605Smrg for (i = 0; i < 3; ++i) 366848b8605Smrg zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]); 367848b8605Smrg 368848b8605Smrg zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state); 369848b8605Smrg zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend); 370848b8605Smrg} 371848b8605Smrg 372848b8605Smrgstruct pipe_sampler_view * 373848b8605Smrgvl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks_per_line) 374848b8605Smrg{ 375848b8605Smrg const unsigned total_size = blocks_per_line * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; 376848b8605Smrg 377848b8605Smrg int patched_layout[64]; 378848b8605Smrg 379848b8605Smrg struct pipe_resource res_tmpl, *res; 380848b8605Smrg struct pipe_sampler_view sv_tmpl, *sv; 381848b8605Smrg struct pipe_transfer *buf_transfer; 382848b8605Smrg unsigned x, y, i, pitch; 383848b8605Smrg float *f; 384848b8605Smrg 385848b8605Smrg struct pipe_box rect = 386848b8605Smrg { 387848b8605Smrg 0, 0, 0, 388848b8605Smrg VL_BLOCK_WIDTH * blocks_per_line, 389848b8605Smrg VL_BLOCK_HEIGHT, 390848b8605Smrg 1 391848b8605Smrg }; 392848b8605Smrg 393848b8605Smrg assert(pipe && layout && blocks_per_line); 394848b8605Smrg 395848b8605Smrg for (i = 0; i < 64; ++i) 396848b8605Smrg patched_layout[layout[i]] = i; 397848b8605Smrg 398848b8605Smrg memset(&res_tmpl, 0, sizeof(res_tmpl)); 399848b8605Smrg res_tmpl.target = PIPE_TEXTURE_2D; 400848b8605Smrg res_tmpl.format = PIPE_FORMAT_R32_FLOAT; 401848b8605Smrg res_tmpl.width0 = VL_BLOCK_WIDTH * blocks_per_line; 402848b8605Smrg res_tmpl.height0 = VL_BLOCK_HEIGHT; 403848b8605Smrg res_tmpl.depth0 = 1; 404848b8605Smrg res_tmpl.array_size = 1; 405848b8605Smrg res_tmpl.usage = PIPE_USAGE_IMMUTABLE; 406848b8605Smrg res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW; 407848b8605Smrg 408848b8605Smrg res = pipe->screen->resource_create(pipe->screen, &res_tmpl); 409848b8605Smrg if (!res) 410848b8605Smrg goto error_resource; 411848b8605Smrg 412848b8605Smrg f = pipe->transfer_map(pipe, res, 413848b8605Smrg 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE, 414848b8605Smrg &rect, &buf_transfer); 415848b8605Smrg if (!f) 416848b8605Smrg goto error_map; 417848b8605Smrg 418848b8605Smrg pitch = buf_transfer->stride / sizeof(float); 419848b8605Smrg 420848b8605Smrg for (i = 0; i < blocks_per_line; ++i) 421848b8605Smrg for (y = 0; y < VL_BLOCK_HEIGHT; ++y) 422848b8605Smrg for (x = 0; x < VL_BLOCK_WIDTH; ++x) { 423848b8605Smrg float addr = patched_layout[x + y * VL_BLOCK_WIDTH] + 424848b8605Smrg i * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; 425848b8605Smrg 426848b8605Smrg addr /= total_size; 427848b8605Smrg 428848b8605Smrg f[i * VL_BLOCK_WIDTH + y * pitch + x] = addr; 429848b8605Smrg } 430848b8605Smrg 431848b8605Smrg pipe->transfer_unmap(pipe, buf_transfer); 432848b8605Smrg 433848b8605Smrg memset(&sv_tmpl, 0, sizeof(sv_tmpl)); 434848b8605Smrg u_sampler_view_default_template(&sv_tmpl, res, res->format); 435848b8605Smrg sv = pipe->create_sampler_view(pipe, res, &sv_tmpl); 436848b8605Smrg pipe_resource_reference(&res, NULL); 437848b8605Smrg if (!sv) 438848b8605Smrg goto error_map; 439848b8605Smrg 440848b8605Smrg return sv; 441848b8605Smrg 442848b8605Smrgerror_map: 443848b8605Smrg pipe_resource_reference(&res, NULL); 444848b8605Smrg 445848b8605Smrgerror_resource: 446848b8605Smrg return NULL; 447848b8605Smrg} 448848b8605Smrg 449848b8605Smrgbool 450848b8605Smrgvl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe, 451848b8605Smrg unsigned buffer_width, unsigned buffer_height, 452848b8605Smrg unsigned blocks_per_line, unsigned blocks_total, 453848b8605Smrg unsigned num_channels) 454848b8605Smrg{ 455848b8605Smrg assert(zscan && pipe); 456848b8605Smrg 457848b8605Smrg zscan->pipe = pipe; 458848b8605Smrg zscan->buffer_width = buffer_width; 459848b8605Smrg zscan->buffer_height = buffer_height; 460848b8605Smrg zscan->num_channels = num_channels; 461848b8605Smrg zscan->blocks_per_line = blocks_per_line; 462848b8605Smrg zscan->blocks_total = blocks_total; 463848b8605Smrg 464848b8605Smrg if(!init_shaders(zscan)) 465848b8605Smrg return false; 466848b8605Smrg 467848b8605Smrg if(!init_state(zscan)) { 468848b8605Smrg cleanup_shaders(zscan); 469848b8605Smrg return false; 470848b8605Smrg } 471848b8605Smrg 472848b8605Smrg return true; 473848b8605Smrg} 474848b8605Smrg 475848b8605Smrgvoid 476848b8605Smrgvl_zscan_cleanup(struct vl_zscan *zscan) 477848b8605Smrg{ 478848b8605Smrg assert(zscan); 479848b8605Smrg 480848b8605Smrg cleanup_shaders(zscan); 481848b8605Smrg cleanup_state(zscan); 482848b8605Smrg} 483848b8605Smrg 484848b8605Smrgbool 485848b8605Smrgvl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, 486848b8605Smrg struct pipe_sampler_view *src, struct pipe_surface *dst) 487848b8605Smrg{ 488848b8605Smrg struct pipe_resource res_tmpl, *res; 489848b8605Smrg struct pipe_sampler_view sv_tmpl; 490848b8605Smrg 491848b8605Smrg assert(zscan && buffer); 492848b8605Smrg 493848b8605Smrg memset(buffer, 0, sizeof(struct vl_zscan_buffer)); 494848b8605Smrg 495848b8605Smrg pipe_sampler_view_reference(&buffer->src, src); 496848b8605Smrg 497848b8605Smrg buffer->viewport.scale[0] = dst->width; 498848b8605Smrg buffer->viewport.scale[1] = dst->height; 499848b8605Smrg buffer->viewport.scale[2] = 1; 500848b8605Smrg buffer->viewport.translate[0] = 0; 501848b8605Smrg buffer->viewport.translate[1] = 0; 502848b8605Smrg buffer->viewport.translate[2] = 0; 503848b8605Smrg 504848b8605Smrg buffer->fb_state.width = dst->width; 505848b8605Smrg buffer->fb_state.height = dst->height; 506848b8605Smrg buffer->fb_state.nr_cbufs = 1; 507848b8605Smrg pipe_surface_reference(&buffer->fb_state.cbufs[0], dst); 508848b8605Smrg 509848b8605Smrg memset(&res_tmpl, 0, sizeof(res_tmpl)); 510848b8605Smrg res_tmpl.target = PIPE_TEXTURE_3D; 511848b8605Smrg res_tmpl.format = PIPE_FORMAT_R8_UNORM; 512848b8605Smrg res_tmpl.width0 = VL_BLOCK_WIDTH * zscan->blocks_per_line; 513848b8605Smrg res_tmpl.height0 = VL_BLOCK_HEIGHT; 514848b8605Smrg res_tmpl.depth0 = 2; 515848b8605Smrg res_tmpl.array_size = 1; 516848b8605Smrg res_tmpl.usage = PIPE_USAGE_IMMUTABLE; 517848b8605Smrg res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW; 518848b8605Smrg 519848b8605Smrg res = zscan->pipe->screen->resource_create(zscan->pipe->screen, &res_tmpl); 520848b8605Smrg if (!res) 521848b8605Smrg return false; 522848b8605Smrg 523848b8605Smrg memset(&sv_tmpl, 0, sizeof(sv_tmpl)); 524848b8605Smrg u_sampler_view_default_template(&sv_tmpl, res, res->format); 525848b8605Smrg sv_tmpl.swizzle_r = sv_tmpl.swizzle_g = sv_tmpl.swizzle_b = sv_tmpl.swizzle_a = TGSI_SWIZZLE_X; 526848b8605Smrg buffer->quant = zscan->pipe->create_sampler_view(zscan->pipe, res, &sv_tmpl); 527848b8605Smrg pipe_resource_reference(&res, NULL); 528848b8605Smrg if (!buffer->quant) 529848b8605Smrg return false; 530848b8605Smrg 531848b8605Smrg return true; 532848b8605Smrg} 533848b8605Smrg 534848b8605Smrgvoid 535848b8605Smrgvl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer) 536848b8605Smrg{ 537848b8605Smrg assert(buffer); 538848b8605Smrg 539848b8605Smrg pipe_sampler_view_reference(&buffer->src, NULL); 540848b8605Smrg pipe_sampler_view_reference(&buffer->layout, NULL); 541848b8605Smrg pipe_sampler_view_reference(&buffer->quant, NULL); 542848b8605Smrg pipe_surface_reference(&buffer->fb_state.cbufs[0], NULL); 543848b8605Smrg} 544848b8605Smrg 545848b8605Smrgvoid 546848b8605Smrgvl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout) 547848b8605Smrg{ 548848b8605Smrg assert(buffer); 549848b8605Smrg assert(layout); 550848b8605Smrg 551848b8605Smrg pipe_sampler_view_reference(&buffer->layout, layout); 552848b8605Smrg} 553848b8605Smrg 554848b8605Smrgvoid 555848b8605Smrgvl_zscan_upload_quant(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, 556848b8605Smrg const uint8_t matrix[64], bool intra) 557848b8605Smrg{ 558848b8605Smrg struct pipe_context *pipe; 559848b8605Smrg struct pipe_transfer *buf_transfer; 560848b8605Smrg unsigned x, y, i, pitch; 561848b8605Smrg uint8_t *data; 562848b8605Smrg 563848b8605Smrg struct pipe_box rect = 564848b8605Smrg { 565848b8605Smrg 0, 0, intra ? 1 : 0, 566848b8605Smrg VL_BLOCK_WIDTH, 567848b8605Smrg VL_BLOCK_HEIGHT, 568848b8605Smrg 1 569848b8605Smrg }; 570848b8605Smrg 571848b8605Smrg assert(buffer); 572848b8605Smrg assert(matrix); 573848b8605Smrg 574848b8605Smrg pipe = zscan->pipe; 575848b8605Smrg 576848b8605Smrg rect.width *= zscan->blocks_per_line; 577848b8605Smrg 578848b8605Smrg data = pipe->transfer_map(pipe, buffer->quant->texture, 579848b8605Smrg 0, PIPE_TRANSFER_WRITE | 580848b8605Smrg PIPE_TRANSFER_DISCARD_RANGE, 581848b8605Smrg &rect, &buf_transfer); 582848b8605Smrg if (!data) 583848b8605Smrg return; 584848b8605Smrg 585848b8605Smrg pitch = buf_transfer->stride; 586848b8605Smrg 587848b8605Smrg for (i = 0; i < zscan->blocks_per_line; ++i) 588848b8605Smrg for (y = 0; y < VL_BLOCK_HEIGHT; ++y) 589848b8605Smrg for (x = 0; x < VL_BLOCK_WIDTH; ++x) 590848b8605Smrg data[i * VL_BLOCK_WIDTH + y * pitch + x] = matrix[x + y * VL_BLOCK_WIDTH]; 591848b8605Smrg 592848b8605Smrg pipe->transfer_unmap(pipe, buf_transfer); 593848b8605Smrg} 594848b8605Smrg 595848b8605Smrgvoid 596848b8605Smrgvl_zscan_render(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, unsigned num_instances) 597848b8605Smrg{ 598848b8605Smrg assert(buffer); 599848b8605Smrg 600848b8605Smrg zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state); 601848b8605Smrg zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend); 602848b8605Smrg zscan->pipe->bind_sampler_states(zscan->pipe, PIPE_SHADER_FRAGMENT, 603848b8605Smrg 0, 3, zscan->samplers); 604848b8605Smrg zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state); 605848b8605Smrg zscan->pipe->set_viewport_states(zscan->pipe, 0, 1, &buffer->viewport); 606848b8605Smrg zscan->pipe->set_sampler_views(zscan->pipe, PIPE_SHADER_FRAGMENT, 607848b8605Smrg 0, 3, &buffer->src); 608848b8605Smrg zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs); 609848b8605Smrg zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs); 610848b8605Smrg util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); 611848b8605Smrg} 612