r600_textured_videofuncs.c revision b7e1c893
1b7e1c893Smrg/*
2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3b7e1c893Smrg *
4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"),
6b7e1c893Smrg * to deal in the Software without restriction, including without limitation
7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions:
10b7e1c893Smrg *
11b7e1c893Smrg * The above copyright notice and this permission notice (including the next
12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the
13b7e1c893Smrg * Software.
14b7e1c893Smrg *
15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b7e1c893Smrg * SOFTWARE.
22b7e1c893Smrg *
23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24b7e1c893Smrg *
25b7e1c893Smrg */
26b7e1c893Smrg
27b7e1c893Smrg#ifdef HAVE_CONFIG_H
28b7e1c893Smrg#include "config.h"
29b7e1c893Smrg#endif
30b7e1c893Smrg
31b7e1c893Smrg#include "xf86.h"
32b7e1c893Smrg
33b7e1c893Smrg#include "exa.h"
34b7e1c893Smrg
35b7e1c893Smrg#include "radeon.h"
36b7e1c893Smrg#include "radeon_reg.h"
37b7e1c893Smrg#include "r600_shader.h"
38b7e1c893Smrg#include "r600_reg.h"
39b7e1c893Smrg#include "r600_state.h"
40b7e1c893Smrg
41b7e1c893Smrg#include "radeon_video.h"
42b7e1c893Smrg
43b7e1c893Smrg#include <X11/extensions/Xv.h>
44b7e1c893Smrg#include "fourcc.h"
45b7e1c893Smrg
46b7e1c893Smrg#include "damage.h"
47b7e1c893Smrg
48b7e1c893Smrgstatic void
49b7e1c893SmrgR600DoneTexturedVideo(ScrnInfoPtr pScrn)
50b7e1c893Smrg{
51b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
52b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
53b7e1c893Smrg    draw_config_t   draw_conf;
54b7e1c893Smrg    vtx_resource_t  vtx_res;
55b7e1c893Smrg
56b7e1c893Smrg    CLEAR (draw_conf);
57b7e1c893Smrg    CLEAR (vtx_res);
58b7e1c893Smrg
59b7e1c893Smrg    if (accel_state->vb_index == 0) {
60b7e1c893Smrg	R600IBDiscard(pScrn, accel_state->ib);
61b7e1c893Smrg	return;
62b7e1c893Smrg    }
63b7e1c893Smrg
64b7e1c893Smrg    accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart +
65b7e1c893Smrg	(accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2);
66b7e1c893Smrg    accel_state->vb_size = accel_state->vb_index * 16;
67b7e1c893Smrg
68b7e1c893Smrg    /* flush vertex cache */
69b7e1c893Smrg    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
70b7e1c893Smrg	(info->ChipFamily == CHIP_FAMILY_RV620) ||
71b7e1c893Smrg	(info->ChipFamily == CHIP_FAMILY_RS780) ||
72b7e1c893Smrg	(info->ChipFamily == CHIP_FAMILY_RV710))
73b7e1c893Smrg	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
74b7e1c893Smrg			    accel_state->vb_size, accel_state->vb_mc_addr);
75b7e1c893Smrg    else
76b7e1c893Smrg	cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
77b7e1c893Smrg			    accel_state->vb_size, accel_state->vb_mc_addr);
78b7e1c893Smrg
79b7e1c893Smrg    /* Vertex buffer setup */
80b7e1c893Smrg    vtx_res.id              = SQ_VTX_RESOURCE_vs;
81b7e1c893Smrg    vtx_res.vtx_size_dw     = 16 / 4;
82b7e1c893Smrg    vtx_res.vtx_num_entries = accel_state->vb_size / 4;
83b7e1c893Smrg    vtx_res.mem_req_size    = 1;
84b7e1c893Smrg    vtx_res.vb_addr         = accel_state->vb_mc_addr;
85b7e1c893Smrg    set_vtx_resource        (pScrn, accel_state->ib, &vtx_res);
86b7e1c893Smrg
87b7e1c893Smrg    draw_conf.prim_type          = DI_PT_RECTLIST;
88b7e1c893Smrg    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
89b7e1c893Smrg    draw_conf.num_instances      = 1;
90b7e1c893Smrg    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
91b7e1c893Smrg    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
92b7e1c893Smrg
93b7e1c893Smrg    draw_auto(pScrn, accel_state->ib, &draw_conf);
94b7e1c893Smrg
95b7e1c893Smrg    wait_3d_idle_clean(pScrn, accel_state->ib);
96b7e1c893Smrg
97b7e1c893Smrg    /* sync destination surface */
98b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
99b7e1c893Smrg			accel_state->dst_size, accel_state->dst_mc_addr);
100b7e1c893Smrg
101b7e1c893Smrg    R600CPFlushIndirect(pScrn, accel_state->ib);
102b7e1c893Smrg}
103b7e1c893Smrg
104b7e1c893Smrgvoid
105b7e1c893SmrgR600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
106b7e1c893Smrg{
107b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
108b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
109b7e1c893Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
110b7e1c893Smrg    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
111b7e1c893Smrg    int nBox = REGION_NUM_RECTS(&pPriv->clip);
112b7e1c893Smrg    int dstxoff, dstyoff;
113b7e1c893Smrg    cb_config_t     cb_conf;
114b7e1c893Smrg    tex_resource_t  tex_res;
115b7e1c893Smrg    tex_sampler_t   tex_samp;
116b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
117b7e1c893Smrg    int uv_offset;
118b7e1c893Smrg    static float ps_alu_consts[] = {
119b7e1c893Smrg        1.0,  0.0,      1.4020,   0,  /* r - c[0] */
120b7e1c893Smrg        1.0, -0.34414, -0.71414,  0,  /* g - c[1] */
121b7e1c893Smrg        1.0,  1.7720,   0.0,      0,  /* b - c[2] */
122b7e1c893Smrg	/* Constants for undoing Y'CbCr scaling
123b7e1c893Smrg	 *  - Y' is scaled from 16:235
124b7e1c893Smrg	 *  - Cb/Cr are scaled from 16:240
125b7e1c893Smrg	 * Unscaled value N' = N * N_mul + N_shift (N' in range [-0.5, 0.5])
126b7e1c893Smrg	 * Vector is [Y_mul, Y_shfit, C_mul, C_shift]
127b7e1c893Smrg	 */
128b7e1c893Smrg        256.0/219.0, -16.0/219.0, 256.0/224.0, -128.0/224.0,
129b7e1c893Smrg    };
130b7e1c893Smrg
131b7e1c893Smrg    CLEAR (cb_conf);
132b7e1c893Smrg    CLEAR (tex_res);
133b7e1c893Smrg    CLEAR (tex_samp);
134b7e1c893Smrg    CLEAR (vs_conf);
135b7e1c893Smrg    CLEAR (ps_conf);
136b7e1c893Smrg
137b7e1c893Smrg    accel_state->dst_pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8);
138b7e1c893Smrg    accel_state->src_pitch[0] = pPriv->src_pitch;
139b7e1c893Smrg
140b7e1c893Smrg    /* bad pitch */
141b7e1c893Smrg    if (accel_state->src_pitch[0] & 7)
142b7e1c893Smrg	return;
143b7e1c893Smrg    if (accel_state->dst_pitch & 7)
144b7e1c893Smrg	return;
145b7e1c893Smrg
146b7e1c893Smrg#ifdef COMPOSITE
147b7e1c893Smrg    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
148b7e1c893Smrg    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
149b7e1c893Smrg#else
150b7e1c893Smrg    dstxoff = 0;
151b7e1c893Smrg    dstyoff = 0;
152b7e1c893Smrg#endif
153b7e1c893Smrg
154b7e1c893Smrg    accel_state->ib = RADEONCPGetBuffer(pScrn);
155b7e1c893Smrg
156b7e1c893Smrg    /* Init */
157b7e1c893Smrg    start_3d(pScrn, accel_state->ib);
158b7e1c893Smrg
159b7e1c893Smrg    set_default_state(pScrn, accel_state->ib);
160b7e1c893Smrg
161b7e1c893Smrg    /* Scissor / viewport */
162b7e1c893Smrg    EREG(accel_state->ib, PA_CL_VTE_CNTL,                      VTX_XY_FMT_bit);
163b7e1c893Smrg    EREG(accel_state->ib, PA_CL_CLIP_CNTL,                     CLIP_DISABLE_bit);
164b7e1c893Smrg
165b7e1c893Smrg    accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
166b7e1c893Smrg	accel_state->xv_vs_offset;
167b7e1c893Smrg
168b7e1c893Smrg    accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
169b7e1c893Smrg	accel_state->xv_ps_offset;
170b7e1c893Smrg
171b7e1c893Smrg    /* PS bool constant */
172b7e1c893Smrg    switch(pPriv->id) {
173b7e1c893Smrg    case FOURCC_YV12:
174b7e1c893Smrg    case FOURCC_I420:
175b7e1c893Smrg	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
176b7e1c893Smrg	break;
177b7e1c893Smrg    case FOURCC_UYVY:
178b7e1c893Smrg    case FOURCC_YUY2:
179b7e1c893Smrg    default:
180b7e1c893Smrg	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
181b7e1c893Smrg	break;
182b7e1c893Smrg    }
183b7e1c893Smrg
184b7e1c893Smrg    accel_state->vs_size = 512;
185b7e1c893Smrg    accel_state->ps_size = 512;
186b7e1c893Smrg
187b7e1c893Smrg    /* Shader */
188b7e1c893Smrg
189b7e1c893Smrg    /* flush SQ cache */
190b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
191b7e1c893Smrg			accel_state->vs_size, accel_state->vs_mc_addr);
192b7e1c893Smrg
193b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
194b7e1c893Smrg    vs_conf.num_gprs            = 2;
195b7e1c893Smrg    vs_conf.stack_size          = 0;
196b7e1c893Smrg    vs_setup                    (pScrn, accel_state->ib, &vs_conf);
197b7e1c893Smrg
198b7e1c893Smrg    /* flush SQ cache */
199b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
200b7e1c893Smrg			accel_state->ps_size, accel_state->ps_mc_addr);
201b7e1c893Smrg
202b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
203b7e1c893Smrg    ps_conf.num_gprs            = 3;
204b7e1c893Smrg    ps_conf.stack_size          = 1;
205b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
206b7e1c893Smrg    ps_conf.clamp_consts        = 0;
207b7e1c893Smrg    ps_conf.export_mode         = 2;
208b7e1c893Smrg    ps_setup                    (pScrn, accel_state->ib, &ps_conf);
209b7e1c893Smrg
210b7e1c893Smrg    /* PS alu constants */
211b7e1c893Smrg    set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
212b7e1c893Smrg		   sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
213b7e1c893Smrg
214b7e1c893Smrg    /* Texture */
215b7e1c893Smrg    switch(pPriv->id) {
216b7e1c893Smrg    case FOURCC_YV12:
217b7e1c893Smrg    case FOURCC_I420:
218b7e1c893Smrg	accel_state->src_mc_addr[0] = pPriv->src_offset;
219b7e1c893Smrg	accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h;
220b7e1c893Smrg
221b7e1c893Smrg	/* flush texture cache */
222b7e1c893Smrg	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0],
223b7e1c893Smrg			    accel_state->src_mc_addr[0]);
224b7e1c893Smrg
225b7e1c893Smrg	/* Y texture */
226b7e1c893Smrg	tex_res.id                  = 0;
227b7e1c893Smrg	tex_res.w                   = pPriv->w;
228b7e1c893Smrg	tex_res.h                   = pPriv->h;
229b7e1c893Smrg	tex_res.pitch               = accel_state->src_pitch[0];
230b7e1c893Smrg	tex_res.depth               = 0;
231b7e1c893Smrg	tex_res.dim                 = SQ_TEX_DIM_2D;
232b7e1c893Smrg	tex_res.base                = accel_state->src_mc_addr[0];
233b7e1c893Smrg	tex_res.mip_base            = accel_state->src_mc_addr[0];
234b7e1c893Smrg
235b7e1c893Smrg	tex_res.format              = FMT_8;
236b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
237b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
238b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
239b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
240b7e1c893Smrg
241b7e1c893Smrg	tex_res.request_size        = 1;
242b7e1c893Smrg	tex_res.base_level          = 0;
243b7e1c893Smrg	tex_res.last_level          = 0;
244b7e1c893Smrg	tex_res.perf_modulation     = 0;
245b7e1c893Smrg	tex_res.interlaced          = 0;
246b7e1c893Smrg	set_tex_resource            (pScrn, accel_state->ib, &tex_res);
247b7e1c893Smrg
248b7e1c893Smrg	/* Y sampler */
249b7e1c893Smrg	tex_samp.id                 = 0;
250b7e1c893Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
251b7e1c893Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
252b7e1c893Smrg	tex_samp.clamp_z            = SQ_TEX_WRAP;
253b7e1c893Smrg
254b7e1c893Smrg	/* xxx: switch to bicubic */
255b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
256b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
257b7e1c893Smrg
258b7e1c893Smrg	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
259b7e1c893Smrg	tex_samp.mip_filter         = 0;			/* no mipmap */
260b7e1c893Smrg	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
261b7e1c893Smrg
262b7e1c893Smrg	/* U or V texture */
263b7e1c893Smrg	uv_offset = accel_state->src_pitch[0] * pPriv->h;
264b7e1c893Smrg	uv_offset = (uv_offset + 255) & ~255;
265b7e1c893Smrg
266b7e1c893Smrg	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
267b7e1c893Smrg			    accel_state->src_size[0] / 4,
268b7e1c893Smrg			    accel_state->src_mc_addr[0] + uv_offset);
269b7e1c893Smrg
270b7e1c893Smrg	tex_res.id                  = 1;
271b7e1c893Smrg	tex_res.format              = FMT_8;
272b7e1c893Smrg	tex_res.w                   = pPriv->w >> 1;
273b7e1c893Smrg	tex_res.h                   = pPriv->h >> 1;
274b7e1c893Smrg	tex_res.pitch               = accel_state->src_pitch[0] >> 1;
275b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
276b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
277b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
278b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
279b7e1c893Smrg	tex_res.interlaced          = 0;
280b7e1c893Smrg
281b7e1c893Smrg	tex_res.base                = accel_state->src_mc_addr[0] + uv_offset;
282b7e1c893Smrg	tex_res.mip_base            = accel_state->src_mc_addr[0] + uv_offset;
283b7e1c893Smrg	set_tex_resource            (pScrn, accel_state->ib, &tex_res);
284b7e1c893Smrg
285b7e1c893Smrg	/* U or V sampler */
286b7e1c893Smrg	tex_samp.id                 = 1;
287b7e1c893Smrg	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
288b7e1c893Smrg
289b7e1c893Smrg	/* U or V texture */
290b7e1c893Smrg	uv_offset += ((accel_state->src_pitch[0] >> 1) * (pPriv->h >> 1));
291b7e1c893Smrg	uv_offset = (uv_offset + 255) & ~255;
292b7e1c893Smrg
293b7e1c893Smrg	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
294b7e1c893Smrg			    accel_state->src_size[0] / 4,
295b7e1c893Smrg			    accel_state->src_mc_addr[0] + uv_offset);
296b7e1c893Smrg
297b7e1c893Smrg	tex_res.id                  = 2;
298b7e1c893Smrg	tex_res.format              = FMT_8;
299b7e1c893Smrg	tex_res.w                   = pPriv->w >> 1;
300b7e1c893Smrg	tex_res.h                   = pPriv->h >> 1;
301b7e1c893Smrg	tex_res.pitch               = accel_state->src_pitch[0] >> 1;
302b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
303b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
304b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
305b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
306b7e1c893Smrg	tex_res.interlaced          = 0;
307b7e1c893Smrg
308b7e1c893Smrg	tex_res.base                = accel_state->src_mc_addr[0] + uv_offset;
309b7e1c893Smrg	tex_res.mip_base            = accel_state->src_mc_addr[0] + uv_offset;
310b7e1c893Smrg	set_tex_resource            (pScrn, accel_state->ib, &tex_res);
311b7e1c893Smrg
312b7e1c893Smrg	/* UV sampler */
313b7e1c893Smrg	tex_samp.id                 = 2;
314b7e1c893Smrg	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
315b7e1c893Smrg	break;
316b7e1c893Smrg    case FOURCC_UYVY:
317b7e1c893Smrg    case FOURCC_YUY2:
318b7e1c893Smrg    default:
319b7e1c893Smrg	accel_state->src_mc_addr[0] = pPriv->src_offset;
320b7e1c893Smrg	accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h;
321b7e1c893Smrg
322b7e1c893Smrg	/* flush texture cache */
323b7e1c893Smrg	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0],
324b7e1c893Smrg			    accel_state->src_mc_addr[0]);
325b7e1c893Smrg
326b7e1c893Smrg	/* Y texture */
327b7e1c893Smrg	tex_res.id                  = 0;
328b7e1c893Smrg	tex_res.w                   = pPriv->w;
329b7e1c893Smrg	tex_res.h                   = pPriv->h;
330b7e1c893Smrg	tex_res.pitch               = accel_state->src_pitch[0] >> 1;
331b7e1c893Smrg	tex_res.depth               = 0;
332b7e1c893Smrg	tex_res.dim                 = SQ_TEX_DIM_2D;
333b7e1c893Smrg	tex_res.base                = accel_state->src_mc_addr[0];
334b7e1c893Smrg	tex_res.mip_base            = accel_state->src_mc_addr[0];
335b7e1c893Smrg
336b7e1c893Smrg	tex_res.format              = FMT_8_8;
337b7e1c893Smrg	if (pPriv->id == FOURCC_UYVY)
338b7e1c893Smrg	    tex_res.dst_sel_x           = SQ_SEL_Y; /* Y */
339b7e1c893Smrg	else
340b7e1c893Smrg	    tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
341b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
342b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
343b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
344b7e1c893Smrg
345b7e1c893Smrg	tex_res.request_size        = 1;
346b7e1c893Smrg	tex_res.base_level          = 0;
347b7e1c893Smrg	tex_res.last_level          = 0;
348b7e1c893Smrg	tex_res.perf_modulation     = 0;
349b7e1c893Smrg	tex_res.interlaced          = 0;
350b7e1c893Smrg	set_tex_resource            (pScrn, accel_state->ib, &tex_res);
351b7e1c893Smrg
352b7e1c893Smrg	/* Y sampler */
353b7e1c893Smrg	tex_samp.id                 = 0;
354b7e1c893Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
355b7e1c893Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
356b7e1c893Smrg	tex_samp.clamp_z            = SQ_TEX_WRAP;
357b7e1c893Smrg
358b7e1c893Smrg	/* xxx: switch to bicubic */
359b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
360b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
361b7e1c893Smrg
362b7e1c893Smrg	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
363b7e1c893Smrg	tex_samp.mip_filter         = 0;			/* no mipmap */
364b7e1c893Smrg	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
365b7e1c893Smrg
366b7e1c893Smrg	/* UV texture */
367b7e1c893Smrg	tex_res.id                  = 1;
368b7e1c893Smrg	tex_res.format              = FMT_8_8_8_8;
369b7e1c893Smrg	tex_res.w                   = pPriv->w >> 1;
370b7e1c893Smrg	tex_res.h                   = pPriv->h;
371b7e1c893Smrg	tex_res.pitch               = accel_state->src_pitch[0] >> 2;
372b7e1c893Smrg	if (pPriv->id == FOURCC_UYVY) {
373b7e1c893Smrg	    tex_res.dst_sel_x           = SQ_SEL_X; /* V */
374b7e1c893Smrg	    tex_res.dst_sel_y           = SQ_SEL_Z; /* U */
375b7e1c893Smrg	} else {
376b7e1c893Smrg	    tex_res.dst_sel_x           = SQ_SEL_Y; /* V */
377b7e1c893Smrg	    tex_res.dst_sel_y           = SQ_SEL_W; /* U */
378b7e1c893Smrg	}
379b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
380b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
381b7e1c893Smrg	tex_res.interlaced          = 0;
382b7e1c893Smrg
383b7e1c893Smrg	tex_res.base                = accel_state->src_mc_addr[0];
384b7e1c893Smrg	tex_res.mip_base            = accel_state->src_mc_addr[0];
385b7e1c893Smrg	set_tex_resource            (pScrn, accel_state->ib, &tex_res);
386b7e1c893Smrg
387b7e1c893Smrg	/* UV sampler */
388b7e1c893Smrg	tex_samp.id                 = 1;
389b7e1c893Smrg	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
390b7e1c893Smrg	break;
391b7e1c893Smrg    }
392b7e1c893Smrg
393b7e1c893Smrg    /* Render setup */
394b7e1c893Smrg    EREG(accel_state->ib, CB_SHADER_MASK,                      (0x0f << OUTPUT0_ENABLE_shift));
395b7e1c893Smrg    EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
396b7e1c893Smrg    EREG(accel_state->ib, CB_COLOR_CONTROL,                    (0xcc << ROP3_shift)); /* copy */
397b7e1c893Smrg
398b7e1c893Smrg    cb_conf.id = 0;
399b7e1c893Smrg
400b7e1c893Smrg    accel_state->dst_mc_addr = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
401b7e1c893Smrg
402b7e1c893Smrg    cb_conf.w = accel_state->dst_pitch;
403b7e1c893Smrg    cb_conf.h = pPixmap->drawable.height;
404b7e1c893Smrg    cb_conf.base = accel_state->dst_mc_addr;
405b7e1c893Smrg
406b7e1c893Smrg    switch (pPixmap->drawable.bitsPerPixel) {
407b7e1c893Smrg    case 16:
408b7e1c893Smrg	if (pPixmap->drawable.depth == 15) {
409b7e1c893Smrg	    cb_conf.format = COLOR_1_5_5_5;
410b7e1c893Smrg	    cb_conf.comp_swap = 1; /* ARGB */
411b7e1c893Smrg	} else {
412b7e1c893Smrg	    cb_conf.format = COLOR_5_6_5;
413b7e1c893Smrg	    cb_conf.comp_swap = 2; /* RGB */
414b7e1c893Smrg	}
415b7e1c893Smrg	break;
416b7e1c893Smrg    case 32:
417b7e1c893Smrg	cb_conf.format = COLOR_8_8_8_8;
418b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
419b7e1c893Smrg	break;
420b7e1c893Smrg    default:
421b7e1c893Smrg	return;
422b7e1c893Smrg    }
423b7e1c893Smrg
424b7e1c893Smrg    cb_conf.source_format = 1;
425b7e1c893Smrg    cb_conf.blend_clamp = 1;
426b7e1c893Smrg    set_render_target(pScrn, accel_state->ib, &cb_conf);
427b7e1c893Smrg
428b7e1c893Smrg    EREG(accel_state->ib, PA_SU_SC_MODE_CNTL,                  (FACE_bit			|
429b7e1c893Smrg								(POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift)	|
430b7e1c893Smrg								(POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
431b7e1c893Smrg    EREG(accel_state->ib, DB_SHADER_CONTROL,                   ((1 << Z_ORDER_shift)		| /* EARLY_Z_THEN_LATE_Z */
432b7e1c893Smrg								DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
433b7e1c893Smrg
434b7e1c893Smrg    /* Interpolator setup */
435b7e1c893Smrg    /* export tex coords from VS */
436b7e1c893Smrg    EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
437b7e1c893Smrg    EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
438b7e1c893Smrg
439b7e1c893Smrg    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
440b7e1c893Smrg     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
441b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_IN_CONTROL_0,                 ((1 << NUM_INTERP_shift)));
442b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
443b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)	|
444b7e1c893Smrg								(0x03 << DEFAULT_VAL_shift)	|
445b7e1c893Smrg								SEL_CENTROID_bit));
446b7e1c893Smrg    EREG(accel_state->ib, SPI_INTERP_CONTROL_0,                0);
447b7e1c893Smrg
448b7e1c893Smrg
449b7e1c893Smrg    if (pPriv->vsync) {
450b7e1c893Smrg	xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn,
451b7e1c893Smrg						    pPriv->drw_x,
452b7e1c893Smrg						    pPriv->drw_x + pPriv->dst_w,
453b7e1c893Smrg						    pPriv->drw_y,
454b7e1c893Smrg						    pPriv->drw_y + pPriv->dst_h);
455b7e1c893Smrg	if (crtc) {
456b7e1c893Smrg	    RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private;
457b7e1c893Smrg
458b7e1c893Smrg	    cp_wait_vline_sync(pScrn, accel_state->ib, pPixmap,
459b7e1c893Smrg			       radeon_crtc->crtc_id,
460b7e1c893Smrg			       pPriv->drw_y - crtc->y,
461b7e1c893Smrg			       (pPriv->drw_y - crtc->y) + pPriv->dst_h);
462b7e1c893Smrg	}
463b7e1c893Smrg    }
464b7e1c893Smrg
465b7e1c893Smrg    accel_state->vb_index = 0;
466b7e1c893Smrg
467b7e1c893Smrg    while (nBox--) {
468b7e1c893Smrg	int srcX, srcY, srcw, srch;
469b7e1c893Smrg	int dstX, dstY, dstw, dsth;
470b7e1c893Smrg	float *vb;
471b7e1c893Smrg
472b7e1c893Smrg	if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) {
473b7e1c893Smrg	    R600DoneTexturedVideo(pScrn);
474b7e1c893Smrg	    accel_state->vb_index = 0;
475b7e1c893Smrg	    accel_state->ib = RADEONCPGetBuffer(pScrn);
476b7e1c893Smrg	}
477b7e1c893Smrg
478b7e1c893Smrg	vb = (pointer)((char*)accel_state->ib->address +
479b7e1c893Smrg		       (accel_state->ib->total / 2) +
480b7e1c893Smrg		       accel_state->vb_index * 16);
481b7e1c893Smrg
482b7e1c893Smrg	dstX = pBox->x1 + dstxoff;
483b7e1c893Smrg	dstY = pBox->y1 + dstyoff;
484b7e1c893Smrg	dstw = pBox->x2 - pBox->x1;
485b7e1c893Smrg	dsth = pBox->y2 - pBox->y1;
486b7e1c893Smrg
487b7e1c893Smrg	srcX = ((pBox->x1 - pPriv->drw_x) *
488b7e1c893Smrg		pPriv->src_w) / pPriv->dst_w;
489b7e1c893Smrg	srcY = ((pBox->y1 - pPriv->drw_y) *
490b7e1c893Smrg		pPriv->src_h) / pPriv->dst_h;
491b7e1c893Smrg
492b7e1c893Smrg	srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
493b7e1c893Smrg	srch = (pPriv->src_h * dsth) / pPriv->dst_h;
494b7e1c893Smrg
495b7e1c893Smrg	vb[0] = (float)dstX;
496b7e1c893Smrg	vb[1] = (float)dstY;
497b7e1c893Smrg	vb[2] = (float)srcX / pPriv->w;
498b7e1c893Smrg	vb[3] = (float)srcY / pPriv->h;
499b7e1c893Smrg
500b7e1c893Smrg	vb[4] = (float)dstX;
501b7e1c893Smrg	vb[5] = (float)(dstY + dsth);
502b7e1c893Smrg	vb[6] = (float)srcX / pPriv->w;
503b7e1c893Smrg	vb[7] = (float)(srcY + srch) / pPriv->h;
504b7e1c893Smrg
505b7e1c893Smrg	vb[8] = (float)(dstX + dstw);
506b7e1c893Smrg	vb[9] = (float)(dstY + dsth);
507b7e1c893Smrg	vb[10] = (float)(srcX + srcw) / pPriv->w;
508b7e1c893Smrg	vb[11] = (float)(srcY + srch) / pPriv->h;
509b7e1c893Smrg
510b7e1c893Smrg	accel_state->vb_index += 3;
511b7e1c893Smrg
512b7e1c893Smrg	pBox++;
513b7e1c893Smrg    }
514b7e1c893Smrg
515b7e1c893Smrg    R600DoneTexturedVideo(pScrn);
516b7e1c893Smrg
517b7e1c893Smrg    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
518b7e1c893Smrg}
519