r600_textured_videofuncs.c revision 40732134
1b7e1c893Smrg/*
2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3b7e1c893Smrg *
4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"),
6b7e1c893Smrg * to deal in the Software without restriction, including without limitation
7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions:
10b7e1c893Smrg *
11b7e1c893Smrg * The above copyright notice and this permission notice (including the next
12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the
13b7e1c893Smrg * Software.
14b7e1c893Smrg *
15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b7e1c893Smrg * SOFTWARE.
22b7e1c893Smrg *
23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24b7e1c893Smrg *
25b7e1c893Smrg */
26b7e1c893Smrg
27b7e1c893Smrg#ifdef HAVE_CONFIG_H
28b7e1c893Smrg#include "config.h"
29b7e1c893Smrg#endif
30b7e1c893Smrg
31b7e1c893Smrg#include "xf86.h"
32b7e1c893Smrg
33b7e1c893Smrg#include "exa.h"
34b7e1c893Smrg
35b7e1c893Smrg#include "radeon.h"
36b7e1c893Smrg#include "radeon_reg.h"
37b7e1c893Smrg#include "r600_shader.h"
38b7e1c893Smrg#include "r600_reg.h"
39b7e1c893Smrg#include "r600_state.h"
40b7e1c893Smrg
41b7e1c893Smrg#include "radeon_video.h"
42b7e1c893Smrg
43b7e1c893Smrg#include <X11/extensions/Xv.h>
44b7e1c893Smrg#include "fourcc.h"
45b7e1c893Smrg
46b7e1c893Smrg#include "damage.h"
47b7e1c893Smrg
480974d292Smrg#include "radeon_exa_shared.h"
49ad43ddacSmrg#include "radeon_vbo.h"
50b7e1c893Smrg
51ad43ddacSmrg/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces
52ad43ddacSmrg   note the difference to the parameters used in overlay are due
53ad43ddacSmrg   to 10bit vs. float calcs */
54ad43ddacSmrgstatic REF_TRANSFORM trans[2] =
55ad43ddacSmrg{
56ad43ddacSmrg    {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */
57ad43ddacSmrg    {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0}  /* BT.709 */
58ad43ddacSmrg};
59b7e1c893Smrg
60b7e1c893Smrgvoid
61b7e1c893SmrgR600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
62b7e1c893Smrg{
63b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
64b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
65b7e1c893Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
66b7e1c893Smrg    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
67b7e1c893Smrg    int nBox = REGION_NUM_RECTS(&pPriv->clip);
68b7e1c893Smrg    int dstxoff, dstyoff;
69ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
70b7e1c893Smrg    cb_config_t     cb_conf;
71b7e1c893Smrg    tex_resource_t  tex_res;
72b7e1c893Smrg    tex_sampler_t   tex_samp;
73b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
74ad43ddacSmrg    /*
75ad43ddacSmrg     * y' = y - .0625
76ad43ddacSmrg     * u' = u - .5
77ad43ddacSmrg     * v' = v - .5;
78ad43ddacSmrg     *
79ad43ddacSmrg     * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
80ad43ddacSmrg     * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
81ad43ddacSmrg     * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
82ad43ddacSmrg     *
83ad43ddacSmrg     * DP3 might look like the straightforward solution
84ad43ddacSmrg     * but we'd need to move the texture yuv values in
85ad43ddacSmrg     * the same reg for this to work. Therefore use MADs.
86ad43ddacSmrg     * Brightness just adds to the off constant.
87ad43ddacSmrg     * Contrast is multiplication of luminance.
88ad43ddacSmrg     * Saturation and hue change the u and v coeffs.
89ad43ddacSmrg     * Default values (before adjustments - depend on colorspace):
90ad43ddacSmrg     * yco = 1.1643
91ad43ddacSmrg     * uco = 0, -0.39173, 2.017
92ad43ddacSmrg     * vco = 1.5958, -0.8129, 0
93ad43ddacSmrg     * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
94ad43ddacSmrg     *       -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
95ad43ddacSmrg     *       -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
96ad43ddacSmrg     *
97ad43ddacSmrg     * temp = MAD(yco, yuv.yyyy, off)
98ad43ddacSmrg     * temp = MAD(uco, yuv.uuuu, temp)
99ad43ddacSmrg     * result = MAD(vco, yuv.vvvv, temp)
100ad43ddacSmrg     */
101ad43ddacSmrg    /* TODO: calc consts in the shader */
102ad43ddacSmrg    const float Loff = -0.0627;
103ad43ddacSmrg    const float Coff = -0.502;
104ad43ddacSmrg    float uvcosf, uvsinf;
105ad43ddacSmrg    float yco;
106ad43ddacSmrg    float uco[3], vco[3], off[3];
107ad43ddacSmrg    float bright, cont, gamma;
108ad43ddacSmrg    int ref = pPriv->transform_index;
109ad43ddacSmrg    Bool needgamma = FALSE;
110ad43ddacSmrg    float ps_alu_consts[12];
111ad43ddacSmrg    float vs_alu_consts[4];
112ad43ddacSmrg
113ad43ddacSmrg    cont = RTFContrast(pPriv->contrast);
114ad43ddacSmrg    bright = RTFBrightness(pPriv->brightness);
115ad43ddacSmrg    gamma = (float)pPriv->gamma / 1000.0;
116ad43ddacSmrg    uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
117ad43ddacSmrg    uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
118ad43ddacSmrg    /* overlay video also does pre-gamma contrast/sat adjust, should we? */
119ad43ddacSmrg
120ad43ddacSmrg    yco = trans[ref].RefLuma * cont;
121ad43ddacSmrg    uco[0] = -trans[ref].RefRCr * uvsinf;
122ad43ddacSmrg    uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
123ad43ddacSmrg    uco[2] = trans[ref].RefBCb * uvcosf;
124ad43ddacSmrg    vco[0] = trans[ref].RefRCr * uvcosf;
125ad43ddacSmrg    vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
126ad43ddacSmrg    vco[2] = trans[ref].RefBCb * uvsinf;
127ad43ddacSmrg    off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
128ad43ddacSmrg    off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
129ad43ddacSmrg    off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
130ad43ddacSmrg
131ad43ddacSmrg    // XXX
132ad43ddacSmrg    gamma = 1.0;
133ad43ddacSmrg
134ad43ddacSmrg    if (gamma != 1.0) {
135ad43ddacSmrg	needgamma = TRUE;
136ad43ddacSmrg	/* note: gamma correction is out = in ^ gamma;
137ad43ddacSmrg	   gpu can only do LG2/EX2 therefore we transform into
138ad43ddacSmrg	   in ^ gamma = 2 ^ (log2(in) * gamma).
139ad43ddacSmrg	   Lots of scalar ops, unfortunately (better solution?) -
140ad43ddacSmrg	   without gamma that's 3 inst, with gamma it's 10...
141ad43ddacSmrg	   could use different gamma factors per channel,
142ad43ddacSmrg	   if that's of any use. */
143ad43ddacSmrg    }
144ad43ddacSmrg
145ad43ddacSmrg    /* setup the ps consts */
146ad43ddacSmrg    ps_alu_consts[0] = off[0];
147ad43ddacSmrg    ps_alu_consts[1] = off[1];
148ad43ddacSmrg    ps_alu_consts[2] = off[2];
149ad43ddacSmrg    ps_alu_consts[3] = yco;
150ad43ddacSmrg
151ad43ddacSmrg    ps_alu_consts[4] = uco[0];
152ad43ddacSmrg    ps_alu_consts[5] = uco[1];
153ad43ddacSmrg    ps_alu_consts[6] = uco[2];
154ad43ddacSmrg    ps_alu_consts[7] = gamma;
155ad43ddacSmrg
156ad43ddacSmrg    ps_alu_consts[8] = vco[0];
157ad43ddacSmrg    ps_alu_consts[9] = vco[1];
158ad43ddacSmrg    ps_alu_consts[10] = vco[2];
159ad43ddacSmrg    ps_alu_consts[11] = 0.0;
160b7e1c893Smrg
161b7e1c893Smrg    CLEAR (cb_conf);
162b7e1c893Smrg    CLEAR (tex_res);
163b7e1c893Smrg    CLEAR (tex_samp);
164b7e1c893Smrg    CLEAR (vs_conf);
165b7e1c893Smrg    CLEAR (ps_conf);
166b7e1c893Smrg
167ad43ddacSmrg#if defined(XF86DRM_MODE)
168ad43ddacSmrg    if (info->cs) {
169ad43ddacSmrg	dst_obj.offset = 0;
170ad43ddacSmrg	src_obj.offset = 0;
171ad43ddacSmrg	dst_obj.bo = radeon_get_pixmap_bo(pPixmap);
172c4ae5be6Smrg	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pPixmap);
17340732134Srjs	dst_obj.surface = radeon_get_pixmap_surface(pPixmap);
174ad43ddacSmrg    } else
175ad43ddacSmrg#endif
176ad43ddacSmrg    {
177ad43ddacSmrg	dst_obj.offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
178ad43ddacSmrg	src_obj.offset = pPriv->src_offset + info->fbLocation + pScrn->fbOffset;
179ad43ddacSmrg	dst_obj.bo = src_obj.bo = NULL;
180ad43ddacSmrg    }
181ad43ddacSmrg    dst_obj.pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8);
182ad43ddacSmrg
183ad43ddacSmrg    src_obj.pitch = pPriv->src_pitch;
184ad43ddacSmrg    src_obj.width = pPriv->w;
185ad43ddacSmrg    src_obj.height = pPriv->h;
186ad43ddacSmrg    src_obj.bpp = 16;
187ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
188ad43ddacSmrg    src_obj.bo = pPriv->src_bo[pPriv->currentBuffer];
189c4ae5be6Smrg    src_obj.tiling_flags = 0;
19040732134Srjs#ifdef XF86DRM_MODE
19140732134Srjs    src_obj.surface = NULL;
19240732134Srjs#endif
193c4ae5be6Smrg
194ad43ddacSmrg    dst_obj.width = pPixmap->drawable.width;
195ad43ddacSmrg    dst_obj.height = pPixmap->drawable.height;
196ad43ddacSmrg    dst_obj.bpp = pPixmap->drawable.bitsPerPixel;
197ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
198ad43ddacSmrg
199ad43ddacSmrg    if (!R600SetAccelState(pScrn,
200ad43ddacSmrg			   &src_obj,
201ad43ddacSmrg			   NULL,
202ad43ddacSmrg			   &dst_obj,
203ad43ddacSmrg			   accel_state->xv_vs_offset, accel_state->xv_ps_offset,
204ad43ddacSmrg			   3, 0xffffffff))
205b7e1c893Smrg	return;
206b7e1c893Smrg
207b7e1c893Smrg#ifdef COMPOSITE
208b7e1c893Smrg    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
209b7e1c893Smrg    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
210b7e1c893Smrg#else
211b7e1c893Smrg    dstxoff = 0;
212b7e1c893Smrg    dstyoff = 0;
213b7e1c893Smrg#endif
214b7e1c893Smrg
215921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
2160974d292Smrg    radeon_cp_start(pScrn);
217b7e1c893Smrg
218921a55d8Smrg    r600_set_default_state(pScrn, accel_state->ib);
219b7e1c893Smrg
220921a55d8Smrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
221921a55d8Smrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
222921a55d8Smrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
223b7e1c893Smrg
224b7e1c893Smrg    /* PS bool constant */
225b7e1c893Smrg    switch(pPriv->id) {
226b7e1c893Smrg    case FOURCC_YV12:
227b7e1c893Smrg    case FOURCC_I420:
228921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
229b7e1c893Smrg	break;
230b7e1c893Smrg    case FOURCC_UYVY:
231b7e1c893Smrg    case FOURCC_YUY2:
232b7e1c893Smrg    default:
233921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
234b7e1c893Smrg	break;
235b7e1c893Smrg    }
236b7e1c893Smrg
237b7e1c893Smrg    /* Shader */
238b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
2390974d292Smrg    vs_conf.shader_size         = accel_state->vs_size;
240b7e1c893Smrg    vs_conf.num_gprs            = 2;
241b7e1c893Smrg    vs_conf.stack_size          = 0;
242ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
243921a55d8Smrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
244b7e1c893Smrg
245b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
2460974d292Smrg    ps_conf.shader_size         = accel_state->ps_size;
247b7e1c893Smrg    ps_conf.num_gprs            = 3;
248b7e1c893Smrg    ps_conf.stack_size          = 1;
249b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
250b7e1c893Smrg    ps_conf.clamp_consts        = 0;
251b7e1c893Smrg    ps_conf.export_mode         = 2;
252ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
253921a55d8Smrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
254b7e1c893Smrg
255b7e1c893Smrg    /* PS alu constants */
256921a55d8Smrg    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
257921a55d8Smrg			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
258b7e1c893Smrg
259b7e1c893Smrg    /* Texture */
260b7e1c893Smrg    switch(pPriv->id) {
261b7e1c893Smrg    case FOURCC_YV12:
262b7e1c893Smrg    case FOURCC_I420:
263ad43ddacSmrg	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
264b7e1c893Smrg
265b7e1c893Smrg	/* Y texture */
266b7e1c893Smrg	tex_res.id                  = 0;
267ad43ddacSmrg	tex_res.w                   = accel_state->src_obj[0].width;
268ad43ddacSmrg	tex_res.h                   = accel_state->src_obj[0].height;
269ad43ddacSmrg	tex_res.pitch               = accel_state->src_obj[0].pitch;
270b7e1c893Smrg	tex_res.depth               = 0;
271b7e1c893Smrg	tex_res.dim                 = SQ_TEX_DIM_2D;
272ad43ddacSmrg	tex_res.base                = accel_state->src_obj[0].offset;
273ad43ddacSmrg	tex_res.mip_base            = accel_state->src_obj[0].offset;
2740974d292Smrg	tex_res.size                = accel_state->src_size[0];
275ad43ddacSmrg	tex_res.bo                  = accel_state->src_obj[0].bo;
276ad43ddacSmrg	tex_res.mip_bo              = accel_state->src_obj[0].bo;
27740732134Srjs#ifdef XF86DRM_MODE
27840732134Srjs	tex_res.surface             = NULL;
27940732134Srjs#endif
280b7e1c893Smrg
281b7e1c893Smrg	tex_res.format              = FMT_8;
282b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
283b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
284b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
285b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
286b7e1c893Smrg
287b7e1c893Smrg	tex_res.request_size        = 1;
288b7e1c893Smrg	tex_res.base_level          = 0;
289b7e1c893Smrg	tex_res.last_level          = 0;
290b7e1c893Smrg	tex_res.perf_modulation     = 0;
291b7e1c893Smrg	tex_res.interlaced          = 0;
292b13dfe66Smrg	if (accel_state->src_obj[0].tiling_flags == 0)
293b13dfe66Smrg	    tex_res.tile_mode           = 1;
294921a55d8Smrg	r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
295b7e1c893Smrg
296b7e1c893Smrg	/* Y sampler */
297b7e1c893Smrg	tex_samp.id                 = 0;
298b7e1c893Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
299b7e1c893Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
300b7e1c893Smrg	tex_samp.clamp_z            = SQ_TEX_WRAP;
301b7e1c893Smrg
302b7e1c893Smrg	/* xxx: switch to bicubic */
303b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
304b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
305b7e1c893Smrg
306b7e1c893Smrg	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
307b7e1c893Smrg	tex_samp.mip_filter         = 0;			/* no mipmap */
308921a55d8Smrg	r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
309b7e1c893Smrg
310b7e1c893Smrg	/* U or V texture */
311b7e1c893Smrg	tex_res.id                  = 1;
312b7e1c893Smrg	tex_res.format              = FMT_8;
313ad43ddacSmrg	tex_res.w                   = accel_state->src_obj[0].width >> 1;
314ad43ddacSmrg	tex_res.h                   = accel_state->src_obj[0].height >> 1;
315b13dfe66Smrg	tex_res.pitch               = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, pPriv->hw_align);
316b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
317b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
318b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
319b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
320b7e1c893Smrg	tex_res.interlaced          = 0;
321b7e1c893Smrg
322ad43ddacSmrg	tex_res.base                = accel_state->src_obj[0].offset + pPriv->planev_offset;
323ad43ddacSmrg	tex_res.mip_base            = accel_state->src_obj[0].offset + pPriv->planev_offset;
324921a55d8Smrg	tex_res.size                = tex_res.pitch * (pPriv->h >> 1);
325b13dfe66Smrg	if (accel_state->src_obj[0].tiling_flags == 0)
326b13dfe66Smrg	    tex_res.tile_mode           = 1;
327921a55d8Smrg	r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
328b7e1c893Smrg
329b7e1c893Smrg	/* U or V sampler */
330b7e1c893Smrg	tex_samp.id                 = 1;
331921a55d8Smrg	r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
332b7e1c893Smrg
333b7e1c893Smrg	/* U or V texture */
334b7e1c893Smrg	tex_res.id                  = 2;
335b7e1c893Smrg	tex_res.format              = FMT_8;
336ad43ddacSmrg	tex_res.w                   = accel_state->src_obj[0].width >> 1;
337ad43ddacSmrg	tex_res.h                   = accel_state->src_obj[0].height >> 1;
338b13dfe66Smrg	tex_res.pitch               = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, pPriv->hw_align);
339b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
340b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
341b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
342b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
343b7e1c893Smrg	tex_res.interlaced          = 0;
344b7e1c893Smrg
345ad43ddacSmrg	tex_res.base                = accel_state->src_obj[0].offset + pPriv->planeu_offset;
346ad43ddacSmrg	tex_res.mip_base            = accel_state->src_obj[0].offset + pPriv->planeu_offset;
347921a55d8Smrg	tex_res.size                = tex_res.pitch * (pPriv->h >> 1);
348b13dfe66Smrg	if (accel_state->src_obj[0].tiling_flags == 0)
349b13dfe66Smrg	    tex_res.tile_mode           = 1;
350921a55d8Smrg	r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
351b7e1c893Smrg
352b7e1c893Smrg	/* UV sampler */
353b7e1c893Smrg	tex_samp.id                 = 2;
354921a55d8Smrg	r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
355b7e1c893Smrg	break;
356b7e1c893Smrg    case FOURCC_UYVY:
357b7e1c893Smrg    case FOURCC_YUY2:
358b7e1c893Smrg    default:
359ad43ddacSmrg	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
360b7e1c893Smrg
361b7e1c893Smrg	/* Y texture */
362b7e1c893Smrg	tex_res.id                  = 0;
363ad43ddacSmrg	tex_res.w                   = accel_state->src_obj[0].width;
364ad43ddacSmrg	tex_res.h                   = accel_state->src_obj[0].height;
365ad43ddacSmrg	tex_res.pitch               = accel_state->src_obj[0].pitch >> 1;
366b7e1c893Smrg	tex_res.depth               = 0;
367b7e1c893Smrg	tex_res.dim                 = SQ_TEX_DIM_2D;
368ad43ddacSmrg	tex_res.base                = accel_state->src_obj[0].offset;
369ad43ddacSmrg	tex_res.mip_base            = accel_state->src_obj[0].offset;
3700974d292Smrg	tex_res.size                = accel_state->src_size[0];
371ad43ddacSmrg	tex_res.bo                  = accel_state->src_obj[0].bo;
372ad43ddacSmrg	tex_res.mip_bo              = accel_state->src_obj[0].bo;
373b7e1c893Smrg
374b7e1c893Smrg	tex_res.format              = FMT_8_8;
375b7e1c893Smrg	if (pPriv->id == FOURCC_UYVY)
376b7e1c893Smrg	    tex_res.dst_sel_x           = SQ_SEL_Y; /* Y */
377b7e1c893Smrg	else
378b7e1c893Smrg	    tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
379b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
380b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
381b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
382b7e1c893Smrg
383b7e1c893Smrg	tex_res.request_size        = 1;
384b7e1c893Smrg	tex_res.base_level          = 0;
385b7e1c893Smrg	tex_res.last_level          = 0;
386b7e1c893Smrg	tex_res.perf_modulation     = 0;
387b7e1c893Smrg	tex_res.interlaced          = 0;
388b13dfe66Smrg	if (accel_state->src_obj[0].tiling_flags == 0)
389b13dfe66Smrg	    tex_res.tile_mode           = 1;
390921a55d8Smrg	r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
391b7e1c893Smrg
392b7e1c893Smrg	/* Y sampler */
393b7e1c893Smrg	tex_samp.id                 = 0;
394b7e1c893Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
395b7e1c893Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
396b7e1c893Smrg	tex_samp.clamp_z            = SQ_TEX_WRAP;
397b7e1c893Smrg
398b7e1c893Smrg	/* xxx: switch to bicubic */
399b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
400b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
401b7e1c893Smrg
402b7e1c893Smrg	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
403b7e1c893Smrg	tex_samp.mip_filter         = 0;			/* no mipmap */
404921a55d8Smrg	r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
405b7e1c893Smrg
406b7e1c893Smrg	/* UV texture */
407b7e1c893Smrg	tex_res.id                  = 1;
408b7e1c893Smrg	tex_res.format              = FMT_8_8_8_8;
409ad43ddacSmrg	tex_res.w                   = accel_state->src_obj[0].width >> 1;
410ad43ddacSmrg	tex_res.h                   = accel_state->src_obj[0].height;
411ad43ddacSmrg	tex_res.pitch               = accel_state->src_obj[0].pitch >> 2;
412b7e1c893Smrg	if (pPriv->id == FOURCC_UYVY) {
413b7e1c893Smrg	    tex_res.dst_sel_x           = SQ_SEL_X; /* V */
414b7e1c893Smrg	    tex_res.dst_sel_y           = SQ_SEL_Z; /* U */
415b7e1c893Smrg	} else {
416b7e1c893Smrg	    tex_res.dst_sel_x           = SQ_SEL_Y; /* V */
417b7e1c893Smrg	    tex_res.dst_sel_y           = SQ_SEL_W; /* U */
418b7e1c893Smrg	}
419b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
420b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
421b7e1c893Smrg	tex_res.interlaced          = 0;
422b7e1c893Smrg
423ad43ddacSmrg	tex_res.base                = accel_state->src_obj[0].offset;
424ad43ddacSmrg	tex_res.mip_base            = accel_state->src_obj[0].offset;
4250974d292Smrg	tex_res.size                = accel_state->src_size[0];
426b13dfe66Smrg	if (accel_state->src_obj[0].tiling_flags == 0)
427b13dfe66Smrg	    tex_res.tile_mode           = 1;
428921a55d8Smrg	r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
429b7e1c893Smrg
430b7e1c893Smrg	/* UV sampler */
431b7e1c893Smrg	tex_samp.id                 = 1;
432921a55d8Smrg	r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
433b7e1c893Smrg	break;
434b7e1c893Smrg    }
435b7e1c893Smrg
436b7e1c893Smrg    cb_conf.id = 0;
437ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
438ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
439ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
440ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
44140732134Srjs#ifdef XF86DRM_MODE
44240732134Srjs    cb_conf.surface = accel_state->dst_obj.surface;
44340732134Srjs#endif
444b7e1c893Smrg
445ad43ddacSmrg    switch (accel_state->dst_obj.bpp) {
446b7e1c893Smrg    case 16:
447b7e1c893Smrg	if (pPixmap->drawable.depth == 15) {
448b7e1c893Smrg	    cb_conf.format = COLOR_1_5_5_5;
449b7e1c893Smrg	    cb_conf.comp_swap = 1; /* ARGB */
450b7e1c893Smrg	} else {
451b7e1c893Smrg	    cb_conf.format = COLOR_5_6_5;
452b7e1c893Smrg	    cb_conf.comp_swap = 2; /* RGB */
453b7e1c893Smrg	}
454b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
455b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN16;
456b13dfe66Smrg#endif
457b7e1c893Smrg	break;
458b7e1c893Smrg    case 32:
459b7e1c893Smrg	cb_conf.format = COLOR_8_8_8_8;
460b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
461b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
462b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN32;
463b13dfe66Smrg#endif
464b7e1c893Smrg	break;
465b7e1c893Smrg    default:
466b7e1c893Smrg	return;
467b7e1c893Smrg    }
468b7e1c893Smrg
469b7e1c893Smrg    cb_conf.source_format = 1;
470b7e1c893Smrg    cb_conf.blend_clamp = 1;
471b13dfe66Smrg    cb_conf.pmask = 0xf;
472b13dfe66Smrg    cb_conf.rop = 3;
473b13dfe66Smrg    if (accel_state->dst_obj.tiling_flags == 0)
474b13dfe66Smrg	cb_conf.array_mode = 1;
475921a55d8Smrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
476b7e1c893Smrg
477b13dfe66Smrg    r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
478b7e1c893Smrg
479ad43ddacSmrg    vs_alu_consts[0] = 1.0 / pPriv->w;
480ad43ddacSmrg    vs_alu_consts[1] = 1.0 / pPriv->h;
481ad43ddacSmrg    vs_alu_consts[2] = 0.0;
482ad43ddacSmrg    vs_alu_consts[3] = 0.0;
483b7e1c893Smrg
484ad43ddacSmrg    /* VS alu constants */
485921a55d8Smrg    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs,
486921a55d8Smrg			sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
487b7e1c893Smrg
488ad43ddacSmrg    if (pPriv->vsync) {
489ad43ddacSmrg	xf86CrtcPtr crtc;
490ad43ddacSmrg	if (pPriv->desired_crtc)
491ad43ddacSmrg	    crtc = pPriv->desired_crtc;
492ad43ddacSmrg	else
493ad43ddacSmrg	    crtc = radeon_pick_best_crtc(pScrn,
494ad43ddacSmrg					 pPriv->drw_x,
495ad43ddacSmrg					 pPriv->drw_x + pPriv->dst_w,
496ad43ddacSmrg					 pPriv->drw_y,
497ad43ddacSmrg					 pPriv->drw_y + pPriv->dst_h);
498ad43ddacSmrg	if (crtc)
499921a55d8Smrg	    r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPixmap,
500921a55d8Smrg				    crtc,
501921a55d8Smrg				    pPriv->drw_y - crtc->y,
502921a55d8Smrg				    (pPriv->drw_y - crtc->y) + pPriv->dst_h);
503b7e1c893Smrg    }
504b7e1c893Smrg
505b7e1c893Smrg    while (nBox--) {
50640732134Srjs	float srcX, srcY, srcw, srch;
507b7e1c893Smrg	int dstX, dstY, dstw, dsth;
508b7e1c893Smrg	float *vb;
509b7e1c893Smrg
510b7e1c893Smrg
511b7e1c893Smrg	dstX = pBox->x1 + dstxoff;
512b7e1c893Smrg	dstY = pBox->y1 + dstyoff;
513b7e1c893Smrg	dstw = pBox->x2 - pBox->x1;
514b7e1c893Smrg	dsth = pBox->y2 - pBox->y1;
515b7e1c893Smrg
516ad43ddacSmrg	srcX = pPriv->src_x;
517ad43ddacSmrg	srcX += ((pBox->x1 - pPriv->drw_x) *
51840732134Srjs		 pPriv->src_w) / (float)pPriv->dst_w;
519ad43ddacSmrg	srcY = pPriv->src_y;
520ad43ddacSmrg	srcY += ((pBox->y1 - pPriv->drw_y) *
52140732134Srjs		 pPriv->src_h) / (float)pPriv->dst_h;
522b7e1c893Smrg
52340732134Srjs	srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w;
52440732134Srjs	srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h;
525b7e1c893Smrg
526921a55d8Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
527ad43ddacSmrg
528b7e1c893Smrg	vb[0] = (float)dstX;
529b7e1c893Smrg	vb[1] = (float)dstY;
530ad43ddacSmrg	vb[2] = (float)srcX;
531ad43ddacSmrg	vb[3] = (float)srcY;
532b7e1c893Smrg
533b7e1c893Smrg	vb[4] = (float)dstX;
534b7e1c893Smrg	vb[5] = (float)(dstY + dsth);
535ad43ddacSmrg	vb[6] = (float)srcX;
536ad43ddacSmrg	vb[7] = (float)(srcY + srch);
537b7e1c893Smrg
538b7e1c893Smrg	vb[8] = (float)(dstX + dstw);
539b7e1c893Smrg	vb[9] = (float)(dstY + dsth);
540ad43ddacSmrg	vb[10] = (float)(srcX + srcw);
541ad43ddacSmrg	vb[11] = (float)(srcY + srch);
542b7e1c893Smrg
543921a55d8Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
544b7e1c893Smrg
545b7e1c893Smrg	pBox++;
546b7e1c893Smrg    }
547b7e1c893Smrg
548ad43ddacSmrg    r600_finish_op(pScrn, 16);
549b7e1c893Smrg
550b7e1c893Smrg    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
551b7e1c893Smrg}
552