1848b8605Smrg/************************************************************************** 2848b8605Smrg * 3848b8605Smrg * Copyright 2010 Christian König 4848b8605Smrg * All Rights Reserved. 5848b8605Smrg * 6848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 7848b8605Smrg * copy of this software and associated documentation files (the 8848b8605Smrg * "Software"), to deal in the Software without restriction, including 9848b8605Smrg * without limitation the rights to use, copy, modify, merge, publish, 10848b8605Smrg * distribute, sub license, and/or sell copies of the Software, and to 11848b8605Smrg * permit persons to whom the Software is furnished to do so, subject to 12848b8605Smrg * the following conditions: 13848b8605Smrg * 14848b8605Smrg * The above copyright notice and this permission notice (including the 15848b8605Smrg * next paragraph) shall be included in all copies or substantial portions 16848b8605Smrg * of the Software. 17848b8605Smrg * 18848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20848b8605Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21848b8605Smrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22848b8605Smrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23848b8605Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24848b8605Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25848b8605Smrg * 26848b8605Smrg **************************************************************************/ 27848b8605Smrg 28848b8605Smrg#include <assert.h> 29848b8605Smrg 30848b8605Smrg#include "pipe/p_context.h" 31848b8605Smrg#include "pipe/p_screen.h" 32848b8605Smrg 33848b8605Smrg#include "util/u_draw.h" 34848b8605Smrg#include "util/u_sampler.h" 35848b8605Smrg#include "util/u_memory.h" 36848b8605Smrg 37848b8605Smrg#include "tgsi/tgsi_ureg.h" 38848b8605Smrg 39848b8605Smrg#include "vl_defines.h" 40848b8605Smrg#include "vl_types.h" 41848b8605Smrg#include "vl_vertex_buffers.h" 42848b8605Smrg#include "vl_idct.h" 43848b8605Smrg 44848b8605Smrgenum VS_OUTPUT 45848b8605Smrg{ 46848b8605Smrg VS_O_VPOS = 0, 47848b8605Smrg VS_O_L_ADDR0 = 0, 48848b8605Smrg VS_O_L_ADDR1, 49848b8605Smrg VS_O_R_ADDR0, 50848b8605Smrg VS_O_R_ADDR1 51848b8605Smrg}; 52848b8605Smrg 53848b8605Smrg/** 54848b8605Smrg * The DCT matrix stored as hex representation of floats. Equal to the following equation: 55848b8605Smrg * for (i = 0; i < 8; ++i) 56848b8605Smrg * for (j = 0; j < 8; ++j) 57848b8605Smrg * if (i == 0) const_matrix[i][j] = 1.0f / sqrtf(8.0f); 58848b8605Smrg * else const_matrix[i][j] = sqrtf(2.0f / 8.0f) * cosf((2 * j + 1) * i * M_PI / (2.0f * 8.0f)); 59848b8605Smrg */ 60848b8605Smrgstatic const uint32_t const_matrix[8][8] = { 61848b8605Smrg { 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3 }, 62848b8605Smrg { 0x3efb14be, 0x3ed4db31, 0x3e8e39da, 0x3dc7c5c4, 0xbdc7c5c2, 0xbe8e39d9, 0xbed4db32, 0xbefb14bf }, 63848b8605Smrg { 0x3eec835f, 0x3e43ef15, 0xbe43ef14, 0xbeec835e, 0xbeec835f, 0xbe43ef1a, 0x3e43ef1b, 0x3eec835f }, 64848b8605Smrg { 0x3ed4db31, 0xbdc7c5c2, 0xbefb14bf, 0xbe8e39dd, 0x3e8e39d7, 0x3efb14bf, 0x3dc7c5d0, 0xbed4db34 }, 65848b8605Smrg { 0x3eb504f3, 0xbeb504f3, 0xbeb504f4, 0x3eb504f1, 0x3eb504f3, 0xbeb504f0, 0xbeb504ef, 0x3eb504f4 }, 66848b8605Smrg { 0x3e8e39da, 0xbefb14bf, 0x3dc7c5c8, 0x3ed4db32, 0xbed4db34, 0xbdc7c5bb, 0x3efb14bf, 0xbe8e39d7 }, 67848b8605Smrg { 0x3e43ef15, 0xbeec835f, 0x3eec835f, 0xbe43ef07, 0xbe43ef23, 0x3eec8361, 0xbeec835c, 0x3e43ef25 }, 68848b8605Smrg { 0x3dc7c5c4, 0xbe8e39dd, 0x3ed4db32, 0xbefb14c0, 0x3efb14be, 0xbed4db31, 0x3e8e39ce, 0xbdc7c596 }, 69848b8605Smrg}; 70848b8605Smrg 71848b8605Smrgstatic void 72848b8605Smrgcalc_addr(struct ureg_program *shader, struct ureg_dst addr[2], 73848b8605Smrg struct ureg_src tc, struct ureg_src start, bool right_side, 74848b8605Smrg bool transposed, float size) 75848b8605Smrg{ 76848b8605Smrg unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; 77848b8605Smrg unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X; 78848b8605Smrg 79848b8605Smrg unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; 80848b8605Smrg unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y; 81848b8605Smrg 82848b8605Smrg /* 83848b8605Smrg * addr[0..1].(start) = right_side ? start.x : tc.x 84848b8605Smrg * addr[0..1].(tc) = right_side ? tc.y : start.y 85848b8605Smrg * addr[0..1].z = tc.z 86848b8605Smrg * addr[1].(start) += 1.0f / scale 87848b8605Smrg */ 88848b8605Smrg ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start)); 89848b8605Smrg ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc)); 90848b8605Smrg 91848b8605Smrg ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size)); 92848b8605Smrg ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc)); 93848b8605Smrg} 94848b8605Smrg 95848b8605Smrgstatic void 96848b8605Smrgincrement_addr(struct ureg_program *shader, struct ureg_dst daddr[2], 97848b8605Smrg struct ureg_src saddr[2], bool right_side, bool transposed, 98848b8605Smrg int pos, float size) 99848b8605Smrg{ 100848b8605Smrg unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y; 101848b8605Smrg unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X; 102848b8605Smrg 103848b8605Smrg /* 104848b8605Smrg * daddr[0..1].(start) = saddr[0..1].(start) 105848b8605Smrg * daddr[0..1].(tc) = saddr[0..1].(tc) 106848b8605Smrg */ 107848b8605Smrg 108848b8605Smrg ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]); 109848b8605Smrg ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size)); 110848b8605Smrg ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]); 111848b8605Smrg ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size)); 112848b8605Smrg} 113848b8605Smrg 114848b8605Smrgstatic void 115848b8605Smrgfetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], 116848b8605Smrg struct ureg_src sampler, bool resource3d) 117848b8605Smrg{ 118848b8605Smrg ureg_TEX(shader, m[0], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[0], sampler); 119848b8605Smrg ureg_TEX(shader, m[1], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[1], sampler); 120848b8605Smrg} 121848b8605Smrg 122848b8605Smrgstatic void 123848b8605Smrgmatrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2]) 124848b8605Smrg{ 125848b8605Smrg struct ureg_dst tmp; 126848b8605Smrg 127848b8605Smrg tmp = ureg_DECL_temporary(shader); 128848b8605Smrg 129848b8605Smrg /* 130848b8605Smrg * tmp.xy = dot4(m[0][0..1], m[1][0..1]) 131848b8605Smrg * dst = tmp.x + tmp.y 132848b8605Smrg */ 133848b8605Smrg ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0])); 134848b8605Smrg ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1])); 135848b8605Smrg ureg_ADD(shader, dst, 136848b8605Smrg ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), 137848b8605Smrg ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 138848b8605Smrg 139848b8605Smrg ureg_release_temporary(shader, tmp); 140848b8605Smrg} 141848b8605Smrg 142848b8605Smrgstatic void * 143848b8605Smrgcreate_mismatch_vert_shader(struct vl_idct *idct) 144848b8605Smrg{ 145848b8605Smrg struct ureg_program *shader; 146848b8605Smrg struct ureg_src vpos; 147848b8605Smrg struct ureg_src scale; 148848b8605Smrg struct ureg_dst t_tex; 149848b8605Smrg struct ureg_dst o_vpos, o_addr[2]; 150848b8605Smrg 151b8e80941Smrg shader = ureg_create(PIPE_SHADER_VERTEX); 152848b8605Smrg if (!shader) 153848b8605Smrg return NULL; 154848b8605Smrg 155848b8605Smrg vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); 156848b8605Smrg 157848b8605Smrg t_tex = ureg_DECL_temporary(shader); 158848b8605Smrg 159848b8605Smrg o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); 160848b8605Smrg 161848b8605Smrg o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); 162848b8605Smrg o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); 163848b8605Smrg 164848b8605Smrg /* 165848b8605Smrg * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height) 166848b8605Smrg * 167848b8605Smrg * t_vpos = vpos + 7 / VL_BLOCK_WIDTH 168848b8605Smrg * o_vpos.xy = t_vpos * scale 169848b8605Smrg * 170848b8605Smrg * o_addr = calc_addr(...) 171848b8605Smrg * 172848b8605Smrg */ 173848b8605Smrg 174848b8605Smrg scale = ureg_imm2f(shader, 175848b8605Smrg (float)VL_BLOCK_WIDTH / idct->buffer_width, 176848b8605Smrg (float)VL_BLOCK_HEIGHT / idct->buffer_height); 177848b8605Smrg 178848b8605Smrg ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale); 179848b8605Smrg ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); 180848b8605Smrg 181848b8605Smrg ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale); 182848b8605Smrg calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4); 183848b8605Smrg 184848b8605Smrg ureg_release_temporary(shader, t_tex); 185848b8605Smrg 186848b8605Smrg ureg_END(shader); 187848b8605Smrg 188848b8605Smrg return ureg_create_shader_and_destroy(shader, idct->pipe); 189848b8605Smrg} 190848b8605Smrg 191848b8605Smrgstatic void * 192848b8605Smrgcreate_mismatch_frag_shader(struct vl_idct *idct) 193848b8605Smrg{ 194848b8605Smrg struct ureg_program *shader; 195848b8605Smrg 196848b8605Smrg struct ureg_src addr[2]; 197848b8605Smrg 198848b8605Smrg struct ureg_dst m[8][2]; 199848b8605Smrg struct ureg_dst fragment; 200848b8605Smrg 201848b8605Smrg unsigned i; 202848b8605Smrg 203b8e80941Smrg shader = ureg_create(PIPE_SHADER_FRAGMENT); 204848b8605Smrg if (!shader) 205848b8605Smrg return NULL; 206848b8605Smrg 207848b8605Smrg addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); 208848b8605Smrg addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); 209848b8605Smrg 210848b8605Smrg fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); 211848b8605Smrg 212848b8605Smrg for (i = 0; i < 8; ++i) { 213848b8605Smrg m[i][0] = ureg_DECL_temporary(shader); 214848b8605Smrg m[i][1] = ureg_DECL_temporary(shader); 215848b8605Smrg } 216848b8605Smrg 217848b8605Smrg for (i = 0; i < 8; ++i) { 218848b8605Smrg increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height); 219848b8605Smrg } 220848b8605Smrg 221848b8605Smrg for (i = 0; i < 8; ++i) { 222848b8605Smrg struct ureg_src s_addr[2]; 223848b8605Smrg s_addr[0] = ureg_src(m[i][0]); 224848b8605Smrg s_addr[1] = ureg_src(m[i][1]); 225848b8605Smrg fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false); 226848b8605Smrg } 227848b8605Smrg 228848b8605Smrg for (i = 1; i < 8; ++i) { 229848b8605Smrg ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0])); 230848b8605Smrg ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1])); 231848b8605Smrg } 232848b8605Smrg 233848b8605Smrg ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1])); 234848b8605Smrg ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14)); 235848b8605Smrg 236848b8605Smrg ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14)); 237848b8605Smrg ureg_FRC(shader, m[0][0], ureg_src(m[0][0])); 238848b8605Smrg ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0]))); 239848b8605Smrg 240848b8605Smrg ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])), 241848b8605Smrg ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15))); 242848b8605Smrg ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]), 243848b8605Smrg ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X)); 244848b8605Smrg 245848b8605Smrg ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1])); 246848b8605Smrg ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1])); 247848b8605Smrg 248848b8605Smrg for (i = 0; i < 8; ++i) { 249848b8605Smrg ureg_release_temporary(shader, m[i][0]); 250848b8605Smrg ureg_release_temporary(shader, m[i][1]); 251848b8605Smrg } 252848b8605Smrg 253848b8605Smrg ureg_END(shader); 254848b8605Smrg 255848b8605Smrg return ureg_create_shader_and_destroy(shader, idct->pipe); 256848b8605Smrg} 257848b8605Smrg 258848b8605Smrgstatic void * 259848b8605Smrgcreate_stage1_vert_shader(struct vl_idct *idct) 260848b8605Smrg{ 261848b8605Smrg struct ureg_program *shader; 262848b8605Smrg struct ureg_src vrect, vpos; 263848b8605Smrg struct ureg_src scale; 264848b8605Smrg struct ureg_dst t_tex, t_start; 265848b8605Smrg struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2]; 266848b8605Smrg 267b8e80941Smrg shader = ureg_create(PIPE_SHADER_VERTEX); 268848b8605Smrg if (!shader) 269848b8605Smrg return NULL; 270848b8605Smrg 271848b8605Smrg vrect = ureg_DECL_vs_input(shader, VS_I_RECT); 272848b8605Smrg vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); 273848b8605Smrg 274848b8605Smrg t_tex = ureg_DECL_temporary(shader); 275848b8605Smrg t_start = ureg_DECL_temporary(shader); 276848b8605Smrg 277848b8605Smrg o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); 278848b8605Smrg 279848b8605Smrg o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); 280848b8605Smrg o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); 281848b8605Smrg 282848b8605Smrg o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0); 283848b8605Smrg o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1); 284848b8605Smrg 285848b8605Smrg /* 286848b8605Smrg * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height) 287848b8605Smrg * 288848b8605Smrg * t_vpos = vpos + vrect 289848b8605Smrg * o_vpos.xy = t_vpos * scale 290848b8605Smrg * o_vpos.zw = vpos 291848b8605Smrg * 292848b8605Smrg * o_l_addr = calc_addr(...) 293848b8605Smrg * o_r_addr = calc_addr(...) 294848b8605Smrg * 295848b8605Smrg */ 296848b8605Smrg 297848b8605Smrg scale = ureg_imm2f(shader, 298848b8605Smrg (float)VL_BLOCK_WIDTH / idct->buffer_width, 299848b8605Smrg (float)VL_BLOCK_HEIGHT / idct->buffer_height); 300848b8605Smrg 301848b8605Smrg ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect); 302848b8605Smrg ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); 303848b8605Smrg 304848b8605Smrg ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex)); 305848b8605Smrg ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); 306848b8605Smrg 307848b8605Smrg ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); 308848b8605Smrg 309848b8605Smrg calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); 310848b8605Smrg calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, VL_BLOCK_WIDTH / 4); 311848b8605Smrg 312848b8605Smrg ureg_release_temporary(shader, t_tex); 313848b8605Smrg ureg_release_temporary(shader, t_start); 314848b8605Smrg 315848b8605Smrg ureg_END(shader); 316848b8605Smrg 317848b8605Smrg return ureg_create_shader_and_destroy(shader, idct->pipe); 318848b8605Smrg} 319848b8605Smrg 320848b8605Smrgstatic void * 321848b8605Smrgcreate_stage1_frag_shader(struct vl_idct *idct) 322848b8605Smrg{ 323848b8605Smrg struct ureg_program *shader; 324848b8605Smrg struct ureg_src l_addr[2], r_addr[2]; 325848b8605Smrg struct ureg_dst l[4][2], r[2]; 326848b8605Smrg struct ureg_dst *fragment; 327b8e80941Smrg unsigned i; 328b8e80941Smrg int j; 329848b8605Smrg 330b8e80941Smrg shader = ureg_create(PIPE_SHADER_FRAGMENT); 331848b8605Smrg if (!shader) 332848b8605Smrg return NULL; 333848b8605Smrg 334848b8605Smrg fragment = MALLOC(idct->nr_of_render_targets * sizeof(struct ureg_dst)); 335848b8605Smrg 336848b8605Smrg l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); 337848b8605Smrg l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); 338848b8605Smrg 339848b8605Smrg r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); 340848b8605Smrg r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); 341848b8605Smrg 342848b8605Smrg for (i = 0; i < idct->nr_of_render_targets; ++i) 343848b8605Smrg fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i); 344848b8605Smrg 345848b8605Smrg for (i = 0; i < 4; ++i) { 346848b8605Smrg l[i][0] = ureg_DECL_temporary(shader); 347848b8605Smrg l[i][1] = ureg_DECL_temporary(shader); 348848b8605Smrg } 349848b8605Smrg 350848b8605Smrg r[0] = ureg_DECL_temporary(shader); 351848b8605Smrg r[1] = ureg_DECL_temporary(shader); 352848b8605Smrg 353848b8605Smrg for (i = 0; i < 4; ++i) { 354848b8605Smrg increment_addr(shader, l[i], l_addr, false, false, i - 2, idct->buffer_height); 355848b8605Smrg } 356848b8605Smrg 357848b8605Smrg for (i = 0; i < 4; ++i) { 358848b8605Smrg struct ureg_src s_addr[2]; 359848b8605Smrg s_addr[0] = ureg_src(l[i][0]); 360848b8605Smrg s_addr[1] = ureg_src(l[i][1]); 361848b8605Smrg fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false); 362848b8605Smrg } 363848b8605Smrg 364848b8605Smrg for (i = 0; i < idct->nr_of_render_targets; ++i) { 365848b8605Smrg struct ureg_src s_addr[2]; 366848b8605Smrg 367848b8605Smrg increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, VL_BLOCK_HEIGHT); 368848b8605Smrg 369848b8605Smrg s_addr[0] = ureg_src(r[0]); 370848b8605Smrg s_addr[1] = ureg_src(r[1]); 371848b8605Smrg fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false); 372848b8605Smrg 373848b8605Smrg for (j = 0; j < 4; ++j) { 374848b8605Smrg matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r); 375848b8605Smrg } 376848b8605Smrg } 377848b8605Smrg 378848b8605Smrg for (i = 0; i < 4; ++i) { 379848b8605Smrg ureg_release_temporary(shader, l[i][0]); 380848b8605Smrg ureg_release_temporary(shader, l[i][1]); 381848b8605Smrg } 382848b8605Smrg ureg_release_temporary(shader, r[0]); 383848b8605Smrg ureg_release_temporary(shader, r[1]); 384848b8605Smrg 385848b8605Smrg ureg_END(shader); 386848b8605Smrg 387848b8605Smrg FREE(fragment); 388848b8605Smrg 389848b8605Smrg return ureg_create_shader_and_destroy(shader, idct->pipe); 390848b8605Smrg} 391848b8605Smrg 392848b8605Smrgvoid 393848b8605Smrgvl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader, 394848b8605Smrg unsigned first_output, struct ureg_dst tex) 395848b8605Smrg{ 396848b8605Smrg struct ureg_src vrect, vpos; 397848b8605Smrg struct ureg_src scale; 398848b8605Smrg struct ureg_dst t_start; 399848b8605Smrg struct ureg_dst o_l_addr[2], o_r_addr[2]; 400848b8605Smrg 401848b8605Smrg vrect = ureg_DECL_vs_input(shader, VS_I_RECT); 402848b8605Smrg vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); 403848b8605Smrg 404848b8605Smrg t_start = ureg_DECL_temporary(shader); 405848b8605Smrg 406848b8605Smrg --first_output; 407848b8605Smrg 408848b8605Smrg o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0); 409848b8605Smrg o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1); 410848b8605Smrg 411848b8605Smrg o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0); 412848b8605Smrg o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1); 413848b8605Smrg 414848b8605Smrg scale = ureg_imm2f(shader, 415848b8605Smrg (float)VL_BLOCK_WIDTH / idct->buffer_width, 416848b8605Smrg (float)VL_BLOCK_HEIGHT / idct->buffer_height); 417848b8605Smrg 418848b8605Smrg ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z), 419848b8605Smrg ureg_scalar(vrect, TGSI_SWIZZLE_X), 420848b8605Smrg ureg_imm1f(shader, VL_BLOCK_WIDTH / idct->nr_of_render_targets)); 421848b8605Smrg ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); 422848b8605Smrg 423848b8605Smrg calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, VL_BLOCK_WIDTH / 4); 424848b8605Smrg calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4); 425848b8605Smrg 426848b8605Smrg ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex)); 427848b8605Smrg ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex)); 428848b8605Smrg} 429848b8605Smrg 430848b8605Smrgvoid 431848b8605Smrgvl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader, 432848b8605Smrg unsigned first_input, struct ureg_dst fragment) 433848b8605Smrg{ 434848b8605Smrg struct ureg_src l_addr[2], r_addr[2]; 435848b8605Smrg 436848b8605Smrg struct ureg_dst l[2], r[2]; 437848b8605Smrg 438848b8605Smrg --first_input; 439848b8605Smrg 440848b8605Smrg l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); 441848b8605Smrg l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); 442848b8605Smrg 443848b8605Smrg r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); 444848b8605Smrg r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); 445848b8605Smrg 446848b8605Smrg l[0] = ureg_DECL_temporary(shader); 447848b8605Smrg l[1] = ureg_DECL_temporary(shader); 448848b8605Smrg r[0] = ureg_DECL_temporary(shader); 449848b8605Smrg r[1] = ureg_DECL_temporary(shader); 450848b8605Smrg 451848b8605Smrg fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false); 452848b8605Smrg fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true); 453848b8605Smrg 454848b8605Smrg matrix_mul(shader, fragment, l, r); 455848b8605Smrg 456848b8605Smrg ureg_release_temporary(shader, l[0]); 457848b8605Smrg ureg_release_temporary(shader, l[1]); 458848b8605Smrg ureg_release_temporary(shader, r[0]); 459848b8605Smrg ureg_release_temporary(shader, r[1]); 460848b8605Smrg} 461848b8605Smrg 462848b8605Smrgstatic bool 463848b8605Smrginit_shaders(struct vl_idct *idct) 464848b8605Smrg{ 465848b8605Smrg idct->vs_mismatch = create_mismatch_vert_shader(idct); 466848b8605Smrg if (!idct->vs_mismatch) 467848b8605Smrg goto error_vs_mismatch; 468848b8605Smrg 469848b8605Smrg idct->fs_mismatch = create_mismatch_frag_shader(idct); 470848b8605Smrg if (!idct->fs_mismatch) 471848b8605Smrg goto error_fs_mismatch; 472848b8605Smrg 473848b8605Smrg idct->vs = create_stage1_vert_shader(idct); 474848b8605Smrg if (!idct->vs) 475848b8605Smrg goto error_vs; 476848b8605Smrg 477848b8605Smrg idct->fs = create_stage1_frag_shader(idct); 478848b8605Smrg if (!idct->fs) 479848b8605Smrg goto error_fs; 480848b8605Smrg 481848b8605Smrg return true; 482848b8605Smrg 483848b8605Smrgerror_fs: 484848b8605Smrg idct->pipe->delete_vs_state(idct->pipe, idct->vs); 485848b8605Smrg 486848b8605Smrgerror_vs: 487848b8605Smrg idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch); 488848b8605Smrg 489848b8605Smrgerror_fs_mismatch: 490848b8605Smrg idct->pipe->delete_vs_state(idct->pipe, idct->fs); 491848b8605Smrg 492848b8605Smrgerror_vs_mismatch: 493848b8605Smrg return false; 494848b8605Smrg} 495848b8605Smrg 496848b8605Smrgstatic void 497848b8605Smrgcleanup_shaders(struct vl_idct *idct) 498848b8605Smrg{ 499848b8605Smrg idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch); 500848b8605Smrg idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch); 501848b8605Smrg idct->pipe->delete_vs_state(idct->pipe, idct->vs); 502848b8605Smrg idct->pipe->delete_fs_state(idct->pipe, idct->fs); 503848b8605Smrg} 504848b8605Smrg 505848b8605Smrgstatic bool 506848b8605Smrginit_state(struct vl_idct *idct) 507848b8605Smrg{ 508848b8605Smrg struct pipe_blend_state blend; 509848b8605Smrg struct pipe_rasterizer_state rs_state; 510848b8605Smrg struct pipe_sampler_state sampler; 511848b8605Smrg unsigned i; 512848b8605Smrg 513848b8605Smrg assert(idct); 514848b8605Smrg 515848b8605Smrg memset(&rs_state, 0, sizeof(rs_state)); 516848b8605Smrg rs_state.point_size = 1; 517848b8605Smrg rs_state.half_pixel_center = true; 518848b8605Smrg rs_state.bottom_edge_rule = true; 519b8e80941Smrg rs_state.depth_clip_near = 1; 520b8e80941Smrg rs_state.depth_clip_far = 1; 521b8e80941Smrg 522848b8605Smrg idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state); 523848b8605Smrg if (!idct->rs_state) 524848b8605Smrg goto error_rs_state; 525848b8605Smrg 526848b8605Smrg memset(&blend, 0, sizeof blend); 527848b8605Smrg 528848b8605Smrg blend.independent_blend_enable = 0; 529848b8605Smrg blend.rt[0].blend_enable = 0; 530848b8605Smrg blend.rt[0].rgb_func = PIPE_BLEND_ADD; 531848b8605Smrg blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; 532848b8605Smrg blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; 533848b8605Smrg blend.rt[0].alpha_func = PIPE_BLEND_ADD; 534848b8605Smrg blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; 535848b8605Smrg blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; 536848b8605Smrg blend.logicop_enable = 0; 537848b8605Smrg blend.logicop_func = PIPE_LOGICOP_CLEAR; 538848b8605Smrg /* Needed to allow color writes to FB, even if blending disabled */ 539848b8605Smrg blend.rt[0].colormask = PIPE_MASK_RGBA; 540848b8605Smrg blend.dither = 0; 541848b8605Smrg idct->blend = idct->pipe->create_blend_state(idct->pipe, &blend); 542848b8605Smrg if (!idct->blend) 543848b8605Smrg goto error_blend; 544848b8605Smrg 545848b8605Smrg for (i = 0; i < 2; ++i) { 546848b8605Smrg memset(&sampler, 0, sizeof(sampler)); 547848b8605Smrg sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; 548848b8605Smrg sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; 549848b8605Smrg sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; 550848b8605Smrg sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; 551848b8605Smrg sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 552848b8605Smrg sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; 553848b8605Smrg sampler.compare_mode = PIPE_TEX_COMPARE_NONE; 554848b8605Smrg sampler.compare_func = PIPE_FUNC_ALWAYS; 555848b8605Smrg sampler.normalized_coords = 1; 556848b8605Smrg idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler); 557848b8605Smrg if (!idct->samplers[i]) 558848b8605Smrg goto error_samplers; 559848b8605Smrg } 560848b8605Smrg 561848b8605Smrg return true; 562848b8605Smrg 563848b8605Smrgerror_samplers: 564848b8605Smrg for (i = 0; i < 2; ++i) 565848b8605Smrg if (idct->samplers[i]) 566848b8605Smrg idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]); 567848b8605Smrg 568848b8605Smrg idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state); 569848b8605Smrg 570848b8605Smrgerror_blend: 571848b8605Smrg idct->pipe->delete_blend_state(idct->pipe, idct->blend); 572848b8605Smrg 573848b8605Smrgerror_rs_state: 574848b8605Smrg return false; 575848b8605Smrg} 576848b8605Smrg 577848b8605Smrgstatic void 578848b8605Smrgcleanup_state(struct vl_idct *idct) 579848b8605Smrg{ 580848b8605Smrg unsigned i; 581848b8605Smrg 582848b8605Smrg for (i = 0; i < 2; ++i) 583848b8605Smrg idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]); 584848b8605Smrg 585848b8605Smrg idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state); 586848b8605Smrg idct->pipe->delete_blend_state(idct->pipe, idct->blend); 587848b8605Smrg} 588848b8605Smrg 589848b8605Smrgstatic bool 590848b8605Smrginit_source(struct vl_idct *idct, struct vl_idct_buffer *buffer) 591848b8605Smrg{ 592848b8605Smrg struct pipe_resource *tex; 593848b8605Smrg struct pipe_surface surf_templ; 594848b8605Smrg 595848b8605Smrg assert(idct && buffer); 596848b8605Smrg 597848b8605Smrg tex = buffer->sampler_views.individual.source->texture; 598848b8605Smrg 599848b8605Smrg buffer->fb_state_mismatch.width = tex->width0; 600848b8605Smrg buffer->fb_state_mismatch.height = tex->height0; 601848b8605Smrg buffer->fb_state_mismatch.nr_cbufs = 1; 602848b8605Smrg 603848b8605Smrg memset(&surf_templ, 0, sizeof(surf_templ)); 604848b8605Smrg surf_templ.format = tex->format; 605848b8605Smrg surf_templ.u.tex.first_layer = 0; 606848b8605Smrg surf_templ.u.tex.last_layer = 0; 607848b8605Smrg buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ); 608848b8605Smrg 609848b8605Smrg buffer->viewport_mismatch.scale[0] = tex->width0; 610848b8605Smrg buffer->viewport_mismatch.scale[1] = tex->height0; 611848b8605Smrg buffer->viewport_mismatch.scale[2] = 1; 612848b8605Smrg 613848b8605Smrg return true; 614848b8605Smrg} 615848b8605Smrg 616848b8605Smrgstatic void 617848b8605Smrgcleanup_source(struct vl_idct_buffer *buffer) 618848b8605Smrg{ 619848b8605Smrg assert(buffer); 620848b8605Smrg 621848b8605Smrg pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL); 622848b8605Smrg 623848b8605Smrg pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL); 624848b8605Smrg} 625848b8605Smrg 626848b8605Smrgstatic bool 627848b8605Smrginit_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) 628848b8605Smrg{ 629848b8605Smrg struct pipe_resource *tex; 630848b8605Smrg struct pipe_surface surf_templ; 631848b8605Smrg unsigned i; 632848b8605Smrg 633848b8605Smrg assert(idct && buffer); 634848b8605Smrg 635848b8605Smrg tex = buffer->sampler_views.individual.intermediate->texture; 636848b8605Smrg 637848b8605Smrg buffer->fb_state.width = tex->width0; 638848b8605Smrg buffer->fb_state.height = tex->height0; 639848b8605Smrg buffer->fb_state.nr_cbufs = idct->nr_of_render_targets; 640848b8605Smrg for(i = 0; i < idct->nr_of_render_targets; ++i) { 641848b8605Smrg memset(&surf_templ, 0, sizeof(surf_templ)); 642848b8605Smrg surf_templ.format = tex->format; 643848b8605Smrg surf_templ.u.tex.first_layer = i; 644848b8605Smrg surf_templ.u.tex.last_layer = i; 645848b8605Smrg buffer->fb_state.cbufs[i] = idct->pipe->create_surface( 646848b8605Smrg idct->pipe, tex, &surf_templ); 647848b8605Smrg 648848b8605Smrg if (!buffer->fb_state.cbufs[i]) 649848b8605Smrg goto error_surfaces; 650848b8605Smrg } 651848b8605Smrg 652848b8605Smrg buffer->viewport.scale[0] = tex->width0; 653848b8605Smrg buffer->viewport.scale[1] = tex->height0; 654848b8605Smrg buffer->viewport.scale[2] = 1; 655848b8605Smrg 656848b8605Smrg return true; 657848b8605Smrg 658848b8605Smrgerror_surfaces: 659848b8605Smrg for(i = 0; i < idct->nr_of_render_targets; ++i) 660848b8605Smrg pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); 661848b8605Smrg 662848b8605Smrg return false; 663848b8605Smrg} 664848b8605Smrg 665848b8605Smrgstatic void 666848b8605Smrgcleanup_intermediate(struct vl_idct_buffer *buffer) 667848b8605Smrg{ 668848b8605Smrg unsigned i; 669848b8605Smrg 670848b8605Smrg assert(buffer); 671848b8605Smrg 672848b8605Smrg for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) 673848b8605Smrg pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); 674848b8605Smrg 675848b8605Smrg pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL); 676848b8605Smrg} 677848b8605Smrg 678848b8605Smrgstruct pipe_sampler_view * 679848b8605Smrgvl_idct_upload_matrix(struct pipe_context *pipe, float scale) 680848b8605Smrg{ 681848b8605Smrg struct pipe_resource tex_templ, *matrix; 682848b8605Smrg struct pipe_sampler_view sv_templ, *sv; 683848b8605Smrg struct pipe_transfer *buf_transfer; 684848b8605Smrg unsigned i, j, pitch; 685848b8605Smrg float *f; 686848b8605Smrg 687848b8605Smrg struct pipe_box rect = 688848b8605Smrg { 689848b8605Smrg 0, 0, 0, 690848b8605Smrg VL_BLOCK_WIDTH / 4, 691848b8605Smrg VL_BLOCK_HEIGHT, 692848b8605Smrg 1 693848b8605Smrg }; 694848b8605Smrg 695848b8605Smrg assert(pipe); 696848b8605Smrg 697848b8605Smrg memset(&tex_templ, 0, sizeof(tex_templ)); 698848b8605Smrg tex_templ.target = PIPE_TEXTURE_2D; 699848b8605Smrg tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT; 700848b8605Smrg tex_templ.last_level = 0; 701848b8605Smrg tex_templ.width0 = 2; 702848b8605Smrg tex_templ.height0 = 8; 703848b8605Smrg tex_templ.depth0 = 1; 704848b8605Smrg tex_templ.array_size = 1; 705848b8605Smrg tex_templ.usage = PIPE_USAGE_IMMUTABLE; 706848b8605Smrg tex_templ.bind = PIPE_BIND_SAMPLER_VIEW; 707848b8605Smrg tex_templ.flags = 0; 708848b8605Smrg 709848b8605Smrg matrix = pipe->screen->resource_create(pipe->screen, &tex_templ); 710848b8605Smrg if (!matrix) 711848b8605Smrg goto error_matrix; 712848b8605Smrg 713848b8605Smrg f = pipe->transfer_map(pipe, matrix, 0, 714848b8605Smrg PIPE_TRANSFER_WRITE | 715848b8605Smrg PIPE_TRANSFER_DISCARD_RANGE, 716848b8605Smrg &rect, &buf_transfer); 717848b8605Smrg if (!f) 718848b8605Smrg goto error_map; 719848b8605Smrg 720848b8605Smrg pitch = buf_transfer->stride / sizeof(float); 721848b8605Smrg 722848b8605Smrg for(i = 0; i < VL_BLOCK_HEIGHT; ++i) 723848b8605Smrg for(j = 0; j < VL_BLOCK_WIDTH; ++j) 724848b8605Smrg // transpose and scale 725848b8605Smrg f[i * pitch + j] = ((const float (*)[8])const_matrix)[j][i] * scale; 726848b8605Smrg 727848b8605Smrg pipe->transfer_unmap(pipe, buf_transfer); 728848b8605Smrg 729848b8605Smrg memset(&sv_templ, 0, sizeof(sv_templ)); 730848b8605Smrg u_sampler_view_default_template(&sv_templ, matrix, matrix->format); 731848b8605Smrg sv = pipe->create_sampler_view(pipe, matrix, &sv_templ); 732848b8605Smrg pipe_resource_reference(&matrix, NULL); 733848b8605Smrg if (!sv) 734848b8605Smrg goto error_map; 735848b8605Smrg 736848b8605Smrg return sv; 737848b8605Smrg 738848b8605Smrgerror_map: 739848b8605Smrg pipe_resource_reference(&matrix, NULL); 740848b8605Smrg 741848b8605Smrgerror_matrix: 742848b8605Smrg return NULL; 743848b8605Smrg} 744848b8605Smrg 745848b8605Smrgbool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, 746848b8605Smrg unsigned buffer_width, unsigned buffer_height, 747848b8605Smrg unsigned nr_of_render_targets, 748848b8605Smrg struct pipe_sampler_view *matrix, 749848b8605Smrg struct pipe_sampler_view *transpose) 750848b8605Smrg{ 751848b8605Smrg assert(idct && pipe); 752848b8605Smrg assert(matrix && transpose); 753848b8605Smrg 754848b8605Smrg idct->pipe = pipe; 755848b8605Smrg idct->buffer_width = buffer_width; 756848b8605Smrg idct->buffer_height = buffer_height; 757848b8605Smrg idct->nr_of_render_targets = nr_of_render_targets; 758848b8605Smrg 759848b8605Smrg pipe_sampler_view_reference(&idct->matrix, matrix); 760848b8605Smrg pipe_sampler_view_reference(&idct->transpose, transpose); 761848b8605Smrg 762848b8605Smrg if(!init_shaders(idct)) 763848b8605Smrg return false; 764848b8605Smrg 765848b8605Smrg if(!init_state(idct)) { 766848b8605Smrg cleanup_shaders(idct); 767848b8605Smrg return false; 768848b8605Smrg } 769848b8605Smrg 770848b8605Smrg return true; 771848b8605Smrg} 772848b8605Smrg 773848b8605Smrgvoid 774848b8605Smrgvl_idct_cleanup(struct vl_idct *idct) 775848b8605Smrg{ 776848b8605Smrg cleanup_shaders(idct); 777848b8605Smrg cleanup_state(idct); 778848b8605Smrg 779848b8605Smrg pipe_sampler_view_reference(&idct->matrix, NULL); 780848b8605Smrg pipe_sampler_view_reference(&idct->transpose, NULL); 781848b8605Smrg} 782848b8605Smrg 783848b8605Smrgbool 784848b8605Smrgvl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, 785848b8605Smrg struct pipe_sampler_view *source, 786848b8605Smrg struct pipe_sampler_view *intermediate) 787848b8605Smrg{ 788848b8605Smrg assert(buffer && idct); 789848b8605Smrg assert(source && intermediate); 790848b8605Smrg 791848b8605Smrg memset(buffer, 0, sizeof(struct vl_idct_buffer)); 792848b8605Smrg 793848b8605Smrg pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix); 794848b8605Smrg pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source); 795848b8605Smrg pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose); 796848b8605Smrg pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate); 797848b8605Smrg 798848b8605Smrg if (!init_source(idct, buffer)) 799848b8605Smrg return false; 800848b8605Smrg 801848b8605Smrg if (!init_intermediate(idct, buffer)) 802848b8605Smrg return false; 803848b8605Smrg 804848b8605Smrg return true; 805848b8605Smrg} 806848b8605Smrg 807848b8605Smrgvoid 808848b8605Smrgvl_idct_cleanup_buffer(struct vl_idct_buffer *buffer) 809848b8605Smrg{ 810848b8605Smrg assert(buffer); 811848b8605Smrg 812848b8605Smrg cleanup_source(buffer); 813848b8605Smrg cleanup_intermediate(buffer); 814848b8605Smrg 815848b8605Smrg pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL); 816848b8605Smrg pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, NULL); 817848b8605Smrg} 818848b8605Smrg 819848b8605Smrgvoid 820848b8605Smrgvl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances) 821848b8605Smrg{ 822848b8605Smrg assert(buffer); 823848b8605Smrg 824848b8605Smrg idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); 825848b8605Smrg idct->pipe->bind_blend_state(idct->pipe, idct->blend); 826848b8605Smrg 827848b8605Smrg idct->pipe->bind_sampler_states(idct->pipe, PIPE_SHADER_FRAGMENT, 828848b8605Smrg 0, 2, idct->samplers); 829848b8605Smrg 830848b8605Smrg idct->pipe->set_sampler_views(idct->pipe, PIPE_SHADER_FRAGMENT, 0, 2, 831848b8605Smrg buffer->sampler_views.stage[0]); 832848b8605Smrg 833848b8605Smrg /* mismatch control */ 834848b8605Smrg idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch); 835848b8605Smrg idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport_mismatch); 836848b8605Smrg idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch); 837848b8605Smrg idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch); 838848b8605Smrg util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances); 839848b8605Smrg 840848b8605Smrg /* first stage */ 841848b8605Smrg idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state); 842848b8605Smrg idct->pipe->set_viewport_states(idct->pipe, 0, 1, &buffer->viewport); 843848b8605Smrg idct->pipe->bind_vs_state(idct->pipe, idct->vs); 844848b8605Smrg idct->pipe->bind_fs_state(idct->pipe, idct->fs); 845848b8605Smrg util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); 846848b8605Smrg} 847848b8605Smrg 848848b8605Smrgvoid 849848b8605Smrgvl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer) 850848b8605Smrg{ 851848b8605Smrg assert(buffer); 852848b8605Smrg 853848b8605Smrg /* second stage */ 854848b8605Smrg idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); 855848b8605Smrg idct->pipe->bind_sampler_states(idct->pipe, PIPE_SHADER_FRAGMENT, 856848b8605Smrg 0, 2, idct->samplers); 857848b8605Smrg idct->pipe->set_sampler_views(idct->pipe, PIPE_SHADER_FRAGMENT, 858848b8605Smrg 0, 2, buffer->sampler_views.stage[1]); 859848b8605Smrg} 860848b8605Smrg 861