evergreen_textured_videofuncs.c revision b13dfe66
1921a55d8Smrg/*
2921a55d8Smrg * Copyright 2010 Advanced Micro Devices, Inc.
3921a55d8Smrg *
4921a55d8Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5921a55d8Smrg * copy of this software and associated documentation files (the "Software"),
6921a55d8Smrg * to deal in the Software without restriction, including without limitation
7921a55d8Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8921a55d8Smrg * and/or sell copies of the Software, and to permit persons to whom the
9921a55d8Smrg * Software is furnished to do so, subject to the following conditions:
10921a55d8Smrg *
11921a55d8Smrg * The above copyright notice and this permission notice (including the next
12921a55d8Smrg * paragraph) shall be included in all copies or substantial portions of the
13921a55d8Smrg * Software.
14921a55d8Smrg *
15921a55d8Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16921a55d8Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17921a55d8Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18921a55d8Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19921a55d8Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20921a55d8Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21921a55d8Smrg * SOFTWARE.
22921a55d8Smrg *
23921a55d8Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24921a55d8Smrg *
25921a55d8Smrg */
26921a55d8Smrg
27921a55d8Smrg#ifdef HAVE_CONFIG_H
28921a55d8Smrg#include "config.h"
29921a55d8Smrg#endif
30921a55d8Smrg
31921a55d8Smrg#ifdef XF86DRM_MODE
32921a55d8Smrg
33921a55d8Smrg#include "xf86.h"
34921a55d8Smrg
35921a55d8Smrg#include "exa.h"
36921a55d8Smrg
37921a55d8Smrg#include "radeon.h"
38921a55d8Smrg#include "radeon_reg.h"
39921a55d8Smrg#include "evergreen_shader.h"
40921a55d8Smrg#include "evergreen_reg.h"
41921a55d8Smrg#include "evergreen_state.h"
42921a55d8Smrg
43921a55d8Smrg#include "radeon_video.h"
44921a55d8Smrg
45921a55d8Smrg#include <X11/extensions/Xv.h>
46921a55d8Smrg#include "fourcc.h"
47921a55d8Smrg
48921a55d8Smrg#include "damage.h"
49921a55d8Smrg
50921a55d8Smrg#include "radeon_exa_shared.h"
51921a55d8Smrg#include "radeon_vbo.h"
52921a55d8Smrg
53921a55d8Smrg/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces
54921a55d8Smrg   note the difference to the parameters used in overlay are due
55921a55d8Smrg   to 10bit vs. float calcs */
56921a55d8Smrgstatic REF_TRANSFORM trans[2] =
57921a55d8Smrg{
58921a55d8Smrg    {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */
59921a55d8Smrg    {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0}  /* BT.709 */
60921a55d8Smrg};
61921a55d8Smrg
62921a55d8Smrgvoid
63921a55d8SmrgEVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
64921a55d8Smrg{
65921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
66921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
67921a55d8Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
68921a55d8Smrg    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
69921a55d8Smrg    int nBox = REGION_NUM_RECTS(&pPriv->clip);
70921a55d8Smrg    int dstxoff, dstyoff;
71921a55d8Smrg    struct r600_accel_object src_obj, dst_obj;
72921a55d8Smrg    cb_config_t     cb_conf;
73921a55d8Smrg    tex_resource_t  tex_res;
74921a55d8Smrg    tex_sampler_t   tex_samp;
75921a55d8Smrg    shader_config_t vs_conf, ps_conf;
76921a55d8Smrg    /*
77921a55d8Smrg     * y' = y - .0625
78921a55d8Smrg     * u' = u - .5
79921a55d8Smrg     * v' = v - .5;
80921a55d8Smrg     *
81921a55d8Smrg     * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
82921a55d8Smrg     * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
83921a55d8Smrg     * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
84921a55d8Smrg     *
85921a55d8Smrg     * DP3 might look like the straightforward solution
86921a55d8Smrg     * but we'd need to move the texture yuv values in
87921a55d8Smrg     * the same reg for this to work. Therefore use MADs.
88921a55d8Smrg     * Brightness just adds to the off constant.
89921a55d8Smrg     * Contrast is multiplication of luminance.
90921a55d8Smrg     * Saturation and hue change the u and v coeffs.
91921a55d8Smrg     * Default values (before adjustments - depend on colorspace):
92921a55d8Smrg     * yco = 1.1643
93921a55d8Smrg     * uco = 0, -0.39173, 2.017
94921a55d8Smrg     * vco = 1.5958, -0.8129, 0
95921a55d8Smrg     * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
96921a55d8Smrg     *       -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
97921a55d8Smrg     *       -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
98921a55d8Smrg     *
99921a55d8Smrg     * temp = MAD(yco, yuv.yyyy, off)
100921a55d8Smrg     * temp = MAD(uco, yuv.uuuu, temp)
101921a55d8Smrg     * result = MAD(vco, yuv.vvvv, temp)
102921a55d8Smrg     */
103921a55d8Smrg    /* TODO: calc consts in the shader */
104921a55d8Smrg    const float Loff = -0.0627;
105921a55d8Smrg    const float Coff = -0.502;
106921a55d8Smrg    float uvcosf, uvsinf;
107921a55d8Smrg    float yco;
108921a55d8Smrg    float uco[3], vco[3], off[3];
109921a55d8Smrg    float bright, cont, gamma;
110921a55d8Smrg    int ref = pPriv->transform_index;
111921a55d8Smrg    Bool needgamma = FALSE;
112921a55d8Smrg    float *ps_alu_consts;
113921a55d8Smrg    const_config_t ps_const_conf;
114921a55d8Smrg    float *vs_alu_consts;
115921a55d8Smrg    const_config_t vs_const_conf;
116921a55d8Smrg
117921a55d8Smrg    cont = RTFContrast(pPriv->contrast);
118921a55d8Smrg    bright = RTFBrightness(pPriv->brightness);
119921a55d8Smrg    gamma = (float)pPriv->gamma / 1000.0;
120921a55d8Smrg    uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
121921a55d8Smrg    uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
122921a55d8Smrg    /* overlay video also does pre-gamma contrast/sat adjust, should we? */
123921a55d8Smrg
124921a55d8Smrg    yco = trans[ref].RefLuma * cont;
125921a55d8Smrg    uco[0] = -trans[ref].RefRCr * uvsinf;
126921a55d8Smrg    uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
127921a55d8Smrg    uco[2] = trans[ref].RefBCb * uvcosf;
128921a55d8Smrg    vco[0] = trans[ref].RefRCr * uvcosf;
129921a55d8Smrg    vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
130921a55d8Smrg    vco[2] = trans[ref].RefBCb * uvsinf;
131921a55d8Smrg    off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
132921a55d8Smrg    off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
133921a55d8Smrg    off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
134921a55d8Smrg
135921a55d8Smrg    // XXX
136921a55d8Smrg    gamma = 1.0;
137921a55d8Smrg
138921a55d8Smrg    if (gamma != 1.0) {
139921a55d8Smrg	needgamma = TRUE;
140921a55d8Smrg	/* note: gamma correction is out = in ^ gamma;
141921a55d8Smrg	   gpu can only do LG2/EX2 therefore we transform into
142921a55d8Smrg	   in ^ gamma = 2 ^ (log2(in) * gamma).
143921a55d8Smrg	   Lots of scalar ops, unfortunately (better solution?) -
144921a55d8Smrg	   without gamma that's 3 inst, with gamma it's 10...
145921a55d8Smrg	   could use different gamma factors per channel,
146921a55d8Smrg	   if that's of any use. */
147921a55d8Smrg    }
148921a55d8Smrg
149921a55d8Smrg    CLEAR (cb_conf);
150921a55d8Smrg    CLEAR (tex_res);
151921a55d8Smrg    CLEAR (tex_samp);
152921a55d8Smrg    CLEAR (vs_conf);
153921a55d8Smrg    CLEAR (ps_conf);
154921a55d8Smrg    CLEAR (vs_const_conf);
155921a55d8Smrg    CLEAR (ps_const_conf);
156921a55d8Smrg
157921a55d8Smrg#if defined(XF86DRM_MODE)
158921a55d8Smrg    if (info->cs) {
159921a55d8Smrg	dst_obj.offset = 0;
160921a55d8Smrg	src_obj.offset = 0;
161921a55d8Smrg	dst_obj.bo = radeon_get_pixmap_bo(pPixmap);
162921a55d8Smrg    } else
163921a55d8Smrg#endif
164921a55d8Smrg    {
165921a55d8Smrg	dst_obj.offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
166921a55d8Smrg	src_obj.offset = pPriv->src_offset + info->fbLocation + pScrn->fbOffset;
167921a55d8Smrg	dst_obj.bo = src_obj.bo = NULL;
168921a55d8Smrg    }
169921a55d8Smrg    dst_obj.pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8);
170921a55d8Smrg
171921a55d8Smrg    src_obj.pitch = pPriv->src_pitch;
172921a55d8Smrg    src_obj.width = pPriv->w;
173921a55d8Smrg    src_obj.height = pPriv->h;
174921a55d8Smrg    src_obj.bpp = 16;
175921a55d8Smrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
176921a55d8Smrg    src_obj.bo = pPriv->src_bo[pPriv->currentBuffer];
177921a55d8Smrg
178921a55d8Smrg    dst_obj.width = pPixmap->drawable.width;
179921a55d8Smrg    dst_obj.height = pPixmap->drawable.height;
180921a55d8Smrg    dst_obj.bpp = pPixmap->drawable.bitsPerPixel;
181921a55d8Smrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
182921a55d8Smrg
183b13dfe66Smrg    if (!R600SetAccelState(pScrn,
184b13dfe66Smrg			   &src_obj,
185b13dfe66Smrg			   NULL,
186b13dfe66Smrg			   &dst_obj,
187b13dfe66Smrg			   accel_state->xv_vs_offset, accel_state->xv_ps_offset,
188b13dfe66Smrg			   3, 0xffffffff))
189921a55d8Smrg	return;
190921a55d8Smrg
191921a55d8Smrg#ifdef COMPOSITE
192921a55d8Smrg    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
193921a55d8Smrg    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
194921a55d8Smrg#else
195921a55d8Smrg    dstxoff = 0;
196921a55d8Smrg    dstyoff = 0;
197921a55d8Smrg#endif
198921a55d8Smrg
199921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
200921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->cbuf, 512);
201921a55d8Smrg    radeon_cp_start(pScrn);
202921a55d8Smrg
203921a55d8Smrg    evergreen_set_default_state(pScrn);
204921a55d8Smrg
205921a55d8Smrg    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
206921a55d8Smrg    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
207921a55d8Smrg    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
208921a55d8Smrg
209921a55d8Smrg    /* PS bool constant */
210921a55d8Smrg    switch(pPriv->id) {
211921a55d8Smrg    case FOURCC_YV12:
212921a55d8Smrg    case FOURCC_I420:
213921a55d8Smrg	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0));
214921a55d8Smrg	break;
215921a55d8Smrg    case FOURCC_UYVY:
216921a55d8Smrg    case FOURCC_YUY2:
217921a55d8Smrg    default:
218921a55d8Smrg	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0));
219921a55d8Smrg	break;
220921a55d8Smrg    }
221921a55d8Smrg
222921a55d8Smrg    /* Shader */
223921a55d8Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
224921a55d8Smrg    vs_conf.shader_size         = accel_state->vs_size;
225921a55d8Smrg    vs_conf.num_gprs            = 2;
226921a55d8Smrg    vs_conf.stack_size          = 0;
227921a55d8Smrg    vs_conf.bo                  = accel_state->shaders_bo;
228921a55d8Smrg    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
229921a55d8Smrg
230921a55d8Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
231921a55d8Smrg    ps_conf.shader_size         = accel_state->ps_size;
232921a55d8Smrg    ps_conf.num_gprs            = 3;
233921a55d8Smrg    ps_conf.stack_size          = 1;
234921a55d8Smrg    ps_conf.clamp_consts        = 0;
235921a55d8Smrg    ps_conf.export_mode         = 2;
236921a55d8Smrg    ps_conf.bo                  = accel_state->shaders_bo;
237921a55d8Smrg    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
238921a55d8Smrg
239921a55d8Smrg    /* Texture */
240921a55d8Smrg    switch(pPriv->id) {
241921a55d8Smrg    case FOURCC_YV12:
242921a55d8Smrg    case FOURCC_I420:
243921a55d8Smrg	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
244921a55d8Smrg
245921a55d8Smrg	/* Y texture */
246921a55d8Smrg	tex_res.id                  = 0;
247921a55d8Smrg	tex_res.w                   = accel_state->src_obj[0].width;
248921a55d8Smrg	tex_res.h                   = accel_state->src_obj[0].height;
249921a55d8Smrg	tex_res.pitch               = accel_state->src_obj[0].pitch;
250921a55d8Smrg	tex_res.depth               = 0;
251921a55d8Smrg	tex_res.dim                 = SQ_TEX_DIM_2D;
252921a55d8Smrg	tex_res.base                = accel_state->src_obj[0].offset;
253921a55d8Smrg	tex_res.mip_base            = accel_state->src_obj[0].offset;
254921a55d8Smrg	tex_res.size                = accel_state->src_size[0];
255921a55d8Smrg	tex_res.bo                  = accel_state->src_obj[0].bo;
256921a55d8Smrg	tex_res.mip_bo              = accel_state->src_obj[0].bo;
257921a55d8Smrg
258921a55d8Smrg	tex_res.format              = FMT_8;
259921a55d8Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
260921a55d8Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
261921a55d8Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
262921a55d8Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
263921a55d8Smrg
264921a55d8Smrg	tex_res.base_level          = 0;
265921a55d8Smrg	tex_res.last_level          = 0;
266921a55d8Smrg	tex_res.perf_modulation     = 0;
267921a55d8Smrg	tex_res.interlaced          = 0;
268b13dfe66Smrg	if (accel_state->src_obj[0].tiling_flags == 0)
269b13dfe66Smrg	    tex_res.array_mode          = 1;
270921a55d8Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
271921a55d8Smrg
272921a55d8Smrg	/* Y sampler */
273921a55d8Smrg	tex_samp.id                 = 0;
274921a55d8Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
275921a55d8Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
276921a55d8Smrg	tex_samp.clamp_z            = SQ_TEX_WRAP;
277921a55d8Smrg
278921a55d8Smrg	/* xxx: switch to bicubic */
279921a55d8Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
280921a55d8Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
281921a55d8Smrg
282921a55d8Smrg	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
283921a55d8Smrg	tex_samp.mip_filter         = 0;			/* no mipmap */
284921a55d8Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
285921a55d8Smrg
286921a55d8Smrg	/* U or V texture */
287921a55d8Smrg	tex_res.id                  = 1;
288921a55d8Smrg	tex_res.format              = FMT_8;
289921a55d8Smrg	tex_res.w                   = accel_state->src_obj[0].width >> 1;
290921a55d8Smrg	tex_res.h                   = accel_state->src_obj[0].height >> 1;
291b13dfe66Smrg	tex_res.pitch               = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, pPriv->hw_align);
292921a55d8Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
293921a55d8Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
294921a55d8Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
295921a55d8Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
296921a55d8Smrg	tex_res.interlaced          = 0;
297921a55d8Smrg
298921a55d8Smrg	tex_res.base                = accel_state->src_obj[0].offset + pPriv->planev_offset;
299921a55d8Smrg	tex_res.mip_base            = accel_state->src_obj[0].offset + pPriv->planev_offset;
300921a55d8Smrg	tex_res.size                = tex_res.pitch * (pPriv->h >> 1);
301b13dfe66Smrg	if (accel_state->src_obj[0].tiling_flags == 0)
302b13dfe66Smrg	    tex_res.array_mode          = 1;
303921a55d8Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
304921a55d8Smrg
305921a55d8Smrg	/* U or V sampler */
306921a55d8Smrg	tex_samp.id                 = 1;
307921a55d8Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
308921a55d8Smrg
309921a55d8Smrg	/* U or V texture */
310921a55d8Smrg	tex_res.id                  = 2;
311921a55d8Smrg	tex_res.format              = FMT_8;
312921a55d8Smrg	tex_res.w                   = accel_state->src_obj[0].width >> 1;
313921a55d8Smrg	tex_res.h                   = accel_state->src_obj[0].height >> 1;
314b13dfe66Smrg	tex_res.pitch               = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, pPriv->hw_align);
315921a55d8Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
316921a55d8Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
317921a55d8Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
318921a55d8Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
319921a55d8Smrg	tex_res.interlaced          = 0;
320921a55d8Smrg
321921a55d8Smrg	tex_res.base                = accel_state->src_obj[0].offset + pPriv->planeu_offset;
322921a55d8Smrg	tex_res.mip_base            = accel_state->src_obj[0].offset + pPriv->planeu_offset;
323921a55d8Smrg	tex_res.size                = tex_res.pitch * (pPriv->h >> 1);
324b13dfe66Smrg	if (accel_state->src_obj[0].tiling_flags == 0)
325b13dfe66Smrg	    tex_res.array_mode          = 1;
326921a55d8Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
327921a55d8Smrg
328921a55d8Smrg	/* UV sampler */
329921a55d8Smrg	tex_samp.id                 = 2;
330921a55d8Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
331921a55d8Smrg	break;
332921a55d8Smrg    case FOURCC_UYVY:
333921a55d8Smrg    case FOURCC_YUY2:
334921a55d8Smrg    default:
335921a55d8Smrg	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
336921a55d8Smrg
337921a55d8Smrg	/* Y texture */
338921a55d8Smrg	tex_res.id                  = 0;
339921a55d8Smrg	tex_res.w                   = accel_state->src_obj[0].width;
340921a55d8Smrg	tex_res.h                   = accel_state->src_obj[0].height;
341921a55d8Smrg	tex_res.pitch               = accel_state->src_obj[0].pitch >> 1;
342921a55d8Smrg	tex_res.depth               = 0;
343921a55d8Smrg	tex_res.dim                 = SQ_TEX_DIM_2D;
344921a55d8Smrg	tex_res.base                = accel_state->src_obj[0].offset;
345921a55d8Smrg	tex_res.mip_base            = accel_state->src_obj[0].offset;
346921a55d8Smrg	tex_res.size                = accel_state->src_size[0];
347921a55d8Smrg	tex_res.bo                  = accel_state->src_obj[0].bo;
348921a55d8Smrg	tex_res.mip_bo              = accel_state->src_obj[0].bo;
349921a55d8Smrg
350921a55d8Smrg	tex_res.format              = FMT_8_8;
351921a55d8Smrg	if (pPriv->id == FOURCC_UYVY)
352921a55d8Smrg	    tex_res.dst_sel_x           = SQ_SEL_Y; /* Y */
353921a55d8Smrg	else
354921a55d8Smrg	    tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
355921a55d8Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
356921a55d8Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
357921a55d8Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
358921a55d8Smrg
359921a55d8Smrg	tex_res.base_level          = 0;
360921a55d8Smrg	tex_res.last_level          = 0;
361921a55d8Smrg	tex_res.perf_modulation     = 0;
362921a55d8Smrg	tex_res.interlaced          = 0;
363b13dfe66Smrg	if (accel_state->src_obj[0].tiling_flags == 0)
364b13dfe66Smrg	    tex_res.array_mode          = 1;
365921a55d8Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
366921a55d8Smrg
367921a55d8Smrg	/* Y sampler */
368921a55d8Smrg	tex_samp.id                 = 0;
369921a55d8Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
370921a55d8Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
371921a55d8Smrg	tex_samp.clamp_z            = SQ_TEX_WRAP;
372921a55d8Smrg
373921a55d8Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
374921a55d8Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
375921a55d8Smrg
376921a55d8Smrg	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
377921a55d8Smrg	tex_samp.mip_filter         = 0;			/* no mipmap */
378921a55d8Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
379921a55d8Smrg
380921a55d8Smrg	/* UV texture */
381921a55d8Smrg	tex_res.id                  = 1;
382921a55d8Smrg	tex_res.format              = FMT_8_8_8_8;
383921a55d8Smrg	tex_res.w                   = accel_state->src_obj[0].width >> 1;
384921a55d8Smrg	tex_res.h                   = accel_state->src_obj[0].height;
385921a55d8Smrg	tex_res.pitch               = accel_state->src_obj[0].pitch >> 2;
386921a55d8Smrg	if (pPriv->id == FOURCC_UYVY) {
387921a55d8Smrg	    tex_res.dst_sel_x           = SQ_SEL_X; /* V */
388921a55d8Smrg	    tex_res.dst_sel_y           = SQ_SEL_Z; /* U */
389921a55d8Smrg	} else {
390921a55d8Smrg	    tex_res.dst_sel_x           = SQ_SEL_Y; /* V */
391921a55d8Smrg	    tex_res.dst_sel_y           = SQ_SEL_W; /* U */
392921a55d8Smrg	}
393921a55d8Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
394921a55d8Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
395921a55d8Smrg	tex_res.interlaced          = 0;
396921a55d8Smrg
397921a55d8Smrg	tex_res.base                = accel_state->src_obj[0].offset;
398921a55d8Smrg	tex_res.mip_base            = accel_state->src_obj[0].offset;
399921a55d8Smrg	tex_res.size                = accel_state->src_size[0];
400b13dfe66Smrg	if (accel_state->src_obj[0].tiling_flags == 0)
401b13dfe66Smrg	    tex_res.array_mode          = 1;
402921a55d8Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
403921a55d8Smrg
404921a55d8Smrg	/* UV sampler */
405921a55d8Smrg	tex_samp.id                 = 1;
406921a55d8Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
407921a55d8Smrg	break;
408921a55d8Smrg    }
409921a55d8Smrg
410921a55d8Smrg    cb_conf.id = 0;
411921a55d8Smrg    cb_conf.w = accel_state->dst_obj.pitch;
412921a55d8Smrg    cb_conf.h = accel_state->dst_obj.height;
413921a55d8Smrg    cb_conf.base = accel_state->dst_obj.offset;
414921a55d8Smrg    cb_conf.bo = accel_state->dst_obj.bo;
415921a55d8Smrg
416921a55d8Smrg    switch (accel_state->dst_obj.bpp) {
417921a55d8Smrg    case 16:
418921a55d8Smrg	if (pPixmap->drawable.depth == 15) {
419921a55d8Smrg	    cb_conf.format = COLOR_1_5_5_5;
420921a55d8Smrg	    cb_conf.comp_swap = 1; /* ARGB */
421921a55d8Smrg	} else {
422921a55d8Smrg	    cb_conf.format = COLOR_5_6_5;
423921a55d8Smrg	    cb_conf.comp_swap = 2; /* RGB */
424921a55d8Smrg	}
425b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
426b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN16;
427b13dfe66Smrg#endif
428921a55d8Smrg	break;
429921a55d8Smrg    case 32:
430921a55d8Smrg	cb_conf.format = COLOR_8_8_8_8;
431921a55d8Smrg	cb_conf.comp_swap = 1; /* ARGB */
432b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
433b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN32;
434b13dfe66Smrg#endif
435921a55d8Smrg	break;
436921a55d8Smrg    default:
437921a55d8Smrg	return;
438921a55d8Smrg    }
439921a55d8Smrg
440921a55d8Smrg    cb_conf.source_format = EXPORT_4C_16BPC;
441921a55d8Smrg    cb_conf.blend_clamp = 1;
442b13dfe66Smrg    cb_conf.pmask = 0xf;
443b13dfe66Smrg    cb_conf.rop = 3;
444b13dfe66Smrg    if (accel_state->dst_obj.tiling_flags == 0) {
445b13dfe66Smrg	cb_conf.array_mode = 1;
446b13dfe66Smrg	cb_conf.non_disp_tiling = 1;
447b13dfe66Smrg    }
448921a55d8Smrg    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
449921a55d8Smrg
450b13dfe66Smrg    evergreen_set_spi(pScrn, (1 - 1), 1);
451921a55d8Smrg
452921a55d8Smrg    /* PS alu constants */
453921a55d8Smrg    ps_const_conf.size_bytes = 256;
454921a55d8Smrg    ps_const_conf.type = SHADER_TYPE_PS;
455921a55d8Smrg    ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
456921a55d8Smrg    ps_const_conf.bo = accel_state->cbuf.vb_bo;
457921a55d8Smrg    ps_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_offset;
458921a55d8Smrg
459921a55d8Smrg    ps_alu_consts[0] = off[0];
460921a55d8Smrg    ps_alu_consts[1] = off[1];
461921a55d8Smrg    ps_alu_consts[2] = off[2];
462921a55d8Smrg    ps_alu_consts[3] = yco;
463921a55d8Smrg
464921a55d8Smrg    ps_alu_consts[4] = uco[0];
465921a55d8Smrg    ps_alu_consts[5] = uco[1];
466921a55d8Smrg    ps_alu_consts[6] = uco[2];
467921a55d8Smrg    ps_alu_consts[7] = gamma;
468921a55d8Smrg
469921a55d8Smrg    ps_alu_consts[8] = vco[0];
470921a55d8Smrg    ps_alu_consts[9] = vco[1];
471921a55d8Smrg    ps_alu_consts[10] = vco[2];
472921a55d8Smrg    ps_alu_consts[11] = 0.0;
473921a55d8Smrg
474921a55d8Smrg    radeon_vbo_commit(pScrn, &accel_state->cbuf);
475921a55d8Smrg    evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT);
476921a55d8Smrg
477921a55d8Smrg    /* VS alu constants */
478921a55d8Smrg    vs_const_conf.size_bytes = 256;
479921a55d8Smrg    vs_const_conf.type = SHADER_TYPE_VS;
480921a55d8Smrg    vs_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
481921a55d8Smrg    vs_const_conf.bo = accel_state->cbuf.vb_bo;
482921a55d8Smrg    vs_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_offset;
483921a55d8Smrg
484921a55d8Smrg    vs_alu_consts[0] = 1.0 / pPriv->w;
485921a55d8Smrg    vs_alu_consts[1] = 1.0 / pPriv->h;
486921a55d8Smrg    vs_alu_consts[2] = 0.0;
487921a55d8Smrg    vs_alu_consts[3] = 0.0;
488921a55d8Smrg
489921a55d8Smrg    radeon_vbo_commit(pScrn, &accel_state->cbuf);
490921a55d8Smrg    evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT);
491921a55d8Smrg
492921a55d8Smrg    if (pPriv->vsync) {
493921a55d8Smrg	xf86CrtcPtr crtc;
494921a55d8Smrg	if (pPriv->desired_crtc)
495921a55d8Smrg	    crtc = pPriv->desired_crtc;
496921a55d8Smrg	else
497921a55d8Smrg	    crtc = radeon_pick_best_crtc(pScrn,
498921a55d8Smrg					 pPriv->drw_x,
499921a55d8Smrg					 pPriv->drw_x + pPriv->dst_w,
500921a55d8Smrg					 pPriv->drw_y,
501921a55d8Smrg					 pPriv->drw_y + pPriv->dst_h);
502921a55d8Smrg	if (crtc)
503921a55d8Smrg	    evergreen_cp_wait_vline_sync(pScrn, pPixmap,
504921a55d8Smrg					 crtc,
505921a55d8Smrg					 pPriv->drw_y - crtc->y,
506921a55d8Smrg					 (pPriv->drw_y - crtc->y) + pPriv->dst_h);
507921a55d8Smrg    }
508921a55d8Smrg
509921a55d8Smrg    while (nBox--) {
510921a55d8Smrg	int srcX, srcY, srcw, srch;
511921a55d8Smrg	int dstX, dstY, dstw, dsth;
512921a55d8Smrg	float *vb;
513921a55d8Smrg
514921a55d8Smrg
515921a55d8Smrg	dstX = pBox->x1 + dstxoff;
516921a55d8Smrg	dstY = pBox->y1 + dstyoff;
517921a55d8Smrg	dstw = pBox->x2 - pBox->x1;
518921a55d8Smrg	dsth = pBox->y2 - pBox->y1;
519921a55d8Smrg
520921a55d8Smrg	srcX = pPriv->src_x;
521921a55d8Smrg	srcX += ((pBox->x1 - pPriv->drw_x) *
522921a55d8Smrg		 pPriv->src_w) / pPriv->dst_w;
523921a55d8Smrg	srcY = pPriv->src_y;
524921a55d8Smrg	srcY += ((pBox->y1 - pPriv->drw_y) *
525921a55d8Smrg		 pPriv->src_h) / pPriv->dst_h;
526921a55d8Smrg
527921a55d8Smrg	srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
528921a55d8Smrg	srch = (pPriv->src_h * dsth) / pPriv->dst_h;
529921a55d8Smrg
530921a55d8Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
531921a55d8Smrg
532921a55d8Smrg	vb[0] = (float)dstX;
533921a55d8Smrg	vb[1] = (float)dstY;
534921a55d8Smrg	vb[2] = (float)srcX;
535921a55d8Smrg	vb[3] = (float)srcY;
536921a55d8Smrg
537921a55d8Smrg	vb[4] = (float)dstX;
538921a55d8Smrg	vb[5] = (float)(dstY + dsth);
539921a55d8Smrg	vb[6] = (float)srcX;
540921a55d8Smrg	vb[7] = (float)(srcY + srch);
541921a55d8Smrg
542921a55d8Smrg	vb[8] = (float)(dstX + dstw);
543921a55d8Smrg	vb[9] = (float)(dstY + dsth);
544921a55d8Smrg	vb[10] = (float)(srcX + srcw);
545921a55d8Smrg	vb[11] = (float)(srcY + srch);
546921a55d8Smrg
547921a55d8Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
548921a55d8Smrg
549921a55d8Smrg	pBox++;
550921a55d8Smrg    }
551921a55d8Smrg
552921a55d8Smrg    evergreen_finish_op(pScrn, 16);
553921a55d8Smrg
554921a55d8Smrg    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
555921a55d8Smrg}
556921a55d8Smrg
557921a55d8Smrg#endif
558