1921a55d8Smrg/*
2921a55d8Smrg * Copyright 2010 Advanced Micro Devices, Inc.
3921a55d8Smrg *
4921a55d8Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5921a55d8Smrg * copy of this software and associated documentation files (the "Software"),
6921a55d8Smrg * to deal in the Software without restriction, including without limitation
7921a55d8Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8921a55d8Smrg * and/or sell copies of the Software, and to permit persons to whom the
9921a55d8Smrg * Software is furnished to do so, subject to the following conditions:
10921a55d8Smrg *
11921a55d8Smrg * The above copyright notice and this permission notice (including the next
12921a55d8Smrg * paragraph) shall be included in all copies or substantial portions of the
13921a55d8Smrg * Software.
14921a55d8Smrg *
15921a55d8Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16921a55d8Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17921a55d8Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18921a55d8Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19921a55d8Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20921a55d8Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21921a55d8Smrg * SOFTWARE.
22921a55d8Smrg *
23921a55d8Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24921a55d8Smrg *
25921a55d8Smrg */
26921a55d8Smrg
27921a55d8Smrg#ifdef HAVE_CONFIG_H
28921a55d8Smrg#include "config.h"
29921a55d8Smrg#endif
30921a55d8Smrg
3143df4709Smrg#ifdef XF86DRM_MODE
3243df4709Smrg
33921a55d8Smrg#include "xf86.h"
34921a55d8Smrg
35921a55d8Smrg#include "exa.h"
36921a55d8Smrg
37921a55d8Smrg#include "radeon.h"
38921a55d8Smrg#include "radeon_reg.h"
39921a55d8Smrg#include "evergreen_shader.h"
40921a55d8Smrg#include "evergreen_reg.h"
41921a55d8Smrg#include "evergreen_state.h"
42921a55d8Smrg
43921a55d8Smrg#include "radeon_video.h"
44921a55d8Smrg
45921a55d8Smrg#include <X11/extensions/Xv.h>
46921a55d8Smrg#include "fourcc.h"
47921a55d8Smrg
48921a55d8Smrg#include "damage.h"
49921a55d8Smrg
50921a55d8Smrg#include "radeon_exa_shared.h"
51921a55d8Smrg#include "radeon_vbo.h"
52921a55d8Smrg
53921a55d8Smrg/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces
54921a55d8Smrg   note the difference to the parameters used in overlay are due
55921a55d8Smrg   to 10bit vs. float calcs */
56921a55d8Smrgstatic REF_TRANSFORM trans[2] =
57921a55d8Smrg{
58921a55d8Smrg    {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */
59921a55d8Smrg    {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0}  /* BT.709 */
60921a55d8Smrg};
61921a55d8Smrg
62921a55d8Smrgvoid
63921a55d8SmrgEVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
64921a55d8Smrg{
65921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
66921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
67921a55d8Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
68921a55d8Smrg    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
69921a55d8Smrg    int nBox = REGION_NUM_RECTS(&pPriv->clip);
70921a55d8Smrg    int dstxoff, dstyoff;
71921a55d8Smrg    struct r600_accel_object src_obj, dst_obj;
72921a55d8Smrg    cb_config_t     cb_conf;
73921a55d8Smrg    tex_resource_t  tex_res;
74921a55d8Smrg    tex_sampler_t   tex_samp;
75921a55d8Smrg    shader_config_t vs_conf, ps_conf;
76921a55d8Smrg    /*
77921a55d8Smrg     * y' = y - .0625
78921a55d8Smrg     * u' = u - .5
79921a55d8Smrg     * v' = v - .5;
80921a55d8Smrg     *
81921a55d8Smrg     * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
82921a55d8Smrg     * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
83921a55d8Smrg     * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
84921a55d8Smrg     *
85921a55d8Smrg     * DP3 might look like the straightforward solution
86921a55d8Smrg     * but we'd need to move the texture yuv values in
87921a55d8Smrg     * the same reg for this to work. Therefore use MADs.
88921a55d8Smrg     * Brightness just adds to the off constant.
89921a55d8Smrg     * Contrast is multiplication of luminance.
90921a55d8Smrg     * Saturation and hue change the u and v coeffs.
91921a55d8Smrg     * Default values (before adjustments - depend on colorspace):
92921a55d8Smrg     * yco = 1.1643
93921a55d8Smrg     * uco = 0, -0.39173, 2.017
94921a55d8Smrg     * vco = 1.5958, -0.8129, 0
95921a55d8Smrg     * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
96921a55d8Smrg     *       -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
97921a55d8Smrg     *       -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
98921a55d8Smrg     *
99921a55d8Smrg     * temp = MAD(yco, yuv.yyyy, off)
100921a55d8Smrg     * temp = MAD(uco, yuv.uuuu, temp)
101921a55d8Smrg     * result = MAD(vco, yuv.vvvv, temp)
102921a55d8Smrg     */
103921a55d8Smrg    /* TODO: calc consts in the shader */
104921a55d8Smrg    const float Loff = -0.0627;
105921a55d8Smrg    const float Coff = -0.502;
106921a55d8Smrg    float uvcosf, uvsinf;
107921a55d8Smrg    float yco;
108921a55d8Smrg    float uco[3], vco[3], off[3];
109921a55d8Smrg    float bright, cont, gamma;
110921a55d8Smrg    int ref = pPriv->transform_index;
11143df4709Smrg    Bool needgamma = FALSE;
112921a55d8Smrg    float *ps_alu_consts;
113921a55d8Smrg    const_config_t ps_const_conf;
114921a55d8Smrg    float *vs_alu_consts;
115921a55d8Smrg    const_config_t vs_const_conf;
116921a55d8Smrg
117921a55d8Smrg    cont = RTFContrast(pPriv->contrast);
118921a55d8Smrg    bright = RTFBrightness(pPriv->brightness);
119921a55d8Smrg    gamma = (float)pPriv->gamma / 1000.0;
120921a55d8Smrg    uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
121921a55d8Smrg    uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
122921a55d8Smrg    /* overlay video also does pre-gamma contrast/sat adjust, should we? */
123921a55d8Smrg
124921a55d8Smrg    yco = trans[ref].RefLuma * cont;
125921a55d8Smrg    uco[0] = -trans[ref].RefRCr * uvsinf;
126921a55d8Smrg    uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
127921a55d8Smrg    uco[2] = trans[ref].RefBCb * uvcosf;
128921a55d8Smrg    vco[0] = trans[ref].RefRCr * uvcosf;
129921a55d8Smrg    vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
130921a55d8Smrg    vco[2] = trans[ref].RefBCb * uvsinf;
131921a55d8Smrg    off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
132921a55d8Smrg    off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
133921a55d8Smrg    off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
134921a55d8Smrg
135921a55d8Smrg    // XXX
136921a55d8Smrg    gamma = 1.0;
137921a55d8Smrg
13843df4709Smrg    if (gamma != 1.0) {
13943df4709Smrg	needgamma = TRUE;
14043df4709Smrg	/* note: gamma correction is out = in ^ gamma;
14143df4709Smrg	   gpu can only do LG2/EX2 therefore we transform into
14243df4709Smrg	   in ^ gamma = 2 ^ (log2(in) * gamma).
14343df4709Smrg	   Lots of scalar ops, unfortunately (better solution?) -
14443df4709Smrg	   without gamma that's 3 inst, with gamma it's 10...
14543df4709Smrg	   could use different gamma factors per channel,
14643df4709Smrg	   if that's of any use. */
14743df4709Smrg    }
14843df4709Smrg
149921a55d8Smrg    CLEAR (cb_conf);
150921a55d8Smrg    CLEAR (tex_res);
151921a55d8Smrg    CLEAR (tex_samp);
152921a55d8Smrg    CLEAR (vs_conf);
153921a55d8Smrg    CLEAR (ps_conf);
154921a55d8Smrg    CLEAR (vs_const_conf);
155921a55d8Smrg    CLEAR (ps_const_conf);
156921a55d8Smrg
15743df4709Smrg    dst_obj.offset = 0;
15843df4709Smrg    src_obj.offset = 0;
159c4ae5be6Smrg    dst_obj.bo = radeon_get_pixmap_bo(pPixmap);
160c4ae5be6Smrg    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pPixmap);
16140732134Srjs    dst_obj.surface = radeon_get_pixmap_surface(pPixmap);
162c4ae5be6Smrg
163921a55d8Smrg    dst_obj.pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8);
164921a55d8Smrg
165921a55d8Smrg    src_obj.pitch = pPriv->src_pitch;
166921a55d8Smrg    src_obj.width = pPriv->w;
167921a55d8Smrg    src_obj.height = pPriv->h;
168921a55d8Smrg    src_obj.bpp = 16;
169921a55d8Smrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
170921a55d8Smrg    src_obj.bo = pPriv->src_bo[pPriv->currentBuffer];
171c4ae5be6Smrg    src_obj.tiling_flags = 0;
17240732134Srjs    src_obj.surface = NULL;
173921a55d8Smrg
174921a55d8Smrg    dst_obj.width = pPixmap->drawable.width;
175921a55d8Smrg    dst_obj.height = pPixmap->drawable.height;
176921a55d8Smrg    dst_obj.bpp = pPixmap->drawable.bitsPerPixel;
177921a55d8Smrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
178921a55d8Smrg
179b13dfe66Smrg    if (!R600SetAccelState(pScrn,
180b13dfe66Smrg			   &src_obj,
181b13dfe66Smrg			   NULL,
182b13dfe66Smrg			   &dst_obj,
183b13dfe66Smrg			   accel_state->xv_vs_offset, accel_state->xv_ps_offset,
184b13dfe66Smrg			   3, 0xffffffff))
185921a55d8Smrg	return;
186921a55d8Smrg
187921a55d8Smrg#ifdef COMPOSITE
188921a55d8Smrg    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
189921a55d8Smrg    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
190921a55d8Smrg#else
191921a55d8Smrg    dstxoff = 0;
192921a55d8Smrg    dstyoff = 0;
193921a55d8Smrg#endif
194921a55d8Smrg
195921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
196921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->cbuf, 512);
197921a55d8Smrg    radeon_cp_start(pScrn);
198921a55d8Smrg
199921a55d8Smrg    evergreen_set_default_state(pScrn);
200921a55d8Smrg
201921a55d8Smrg    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
202921a55d8Smrg    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
203921a55d8Smrg    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
204921a55d8Smrg
205921a55d8Smrg    /* PS bool constant */
206921a55d8Smrg    switch(pPriv->id) {
207921a55d8Smrg    case FOURCC_YV12:
208921a55d8Smrg    case FOURCC_I420:
209921a55d8Smrg	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0));
210921a55d8Smrg	break;
211921a55d8Smrg    case FOURCC_UYVY:
212921a55d8Smrg    case FOURCC_YUY2:
213921a55d8Smrg    default:
214921a55d8Smrg	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0));
215921a55d8Smrg	break;
216921a55d8Smrg    }
217921a55d8Smrg
218921a55d8Smrg    /* Shader */
219921a55d8Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
220921a55d8Smrg    vs_conf.shader_size         = accel_state->vs_size;
221921a55d8Smrg    vs_conf.num_gprs            = 2;
222921a55d8Smrg    vs_conf.stack_size          = 0;
223921a55d8Smrg    vs_conf.bo                  = accel_state->shaders_bo;
224921a55d8Smrg    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
225921a55d8Smrg
226921a55d8Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
227921a55d8Smrg    ps_conf.shader_size         = accel_state->ps_size;
228921a55d8Smrg    ps_conf.num_gprs            = 3;
229921a55d8Smrg    ps_conf.stack_size          = 1;
230921a55d8Smrg    ps_conf.clamp_consts        = 0;
231921a55d8Smrg    ps_conf.export_mode         = 2;
232921a55d8Smrg    ps_conf.bo                  = accel_state->shaders_bo;
233921a55d8Smrg    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
234921a55d8Smrg
235921a55d8Smrg    /* Texture */
236921a55d8Smrg    switch(pPriv->id) {
237921a55d8Smrg    case FOURCC_YV12:
238921a55d8Smrg    case FOURCC_I420:
239921a55d8Smrg	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
240921a55d8Smrg
241921a55d8Smrg	/* Y texture */
242921a55d8Smrg	tex_res.id                  = 0;
243921a55d8Smrg	tex_res.w                   = accel_state->src_obj[0].width;
244921a55d8Smrg	tex_res.h                   = accel_state->src_obj[0].height;
245921a55d8Smrg	tex_res.pitch               = accel_state->src_obj[0].pitch;
246921a55d8Smrg	tex_res.depth               = 0;
247921a55d8Smrg	tex_res.dim                 = SQ_TEX_DIM_2D;
24843df4709Smrg	tex_res.base                = accel_state->src_obj[0].offset;
24943df4709Smrg	tex_res.mip_base            = accel_state->src_obj[0].offset;
250921a55d8Smrg	tex_res.size                = accel_state->src_size[0];
251921a55d8Smrg	tex_res.bo                  = accel_state->src_obj[0].bo;
252921a55d8Smrg	tex_res.mip_bo              = accel_state->src_obj[0].bo;
25340732134Srjs	tex_res.surface             = NULL;
254921a55d8Smrg
255921a55d8Smrg	tex_res.format              = FMT_8;
256921a55d8Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
257921a55d8Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
258921a55d8Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
259921a55d8Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
260921a55d8Smrg
261921a55d8Smrg	tex_res.base_level          = 0;
262921a55d8Smrg	tex_res.last_level          = 0;
263921a55d8Smrg	tex_res.perf_modulation     = 0;
264921a55d8Smrg	tex_res.interlaced          = 0;
26543df4709Smrg	if (accel_state->src_obj[0].tiling_flags == 0)
266b13dfe66Smrg	    tex_res.array_mode          = 1;
267921a55d8Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
268921a55d8Smrg
269921a55d8Smrg	/* Y sampler */
270921a55d8Smrg	tex_samp.id                 = 0;
271921a55d8Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
272921a55d8Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
273921a55d8Smrg	tex_samp.clamp_z            = SQ_TEX_WRAP;
274921a55d8Smrg
275921a55d8Smrg	/* xxx: switch to bicubic */
276921a55d8Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
277921a55d8Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
278921a55d8Smrg
279921a55d8Smrg	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
280921a55d8Smrg	tex_samp.mip_filter         = 0;			/* no mipmap */
281921a55d8Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
282921a55d8Smrg
283921a55d8Smrg	/* U or V texture */
284921a55d8Smrg	tex_res.id                  = 1;
285921a55d8Smrg	tex_res.format              = FMT_8;
286921a55d8Smrg	tex_res.w                   = accel_state->src_obj[0].width >> 1;
287921a55d8Smrg	tex_res.h                   = accel_state->src_obj[0].height >> 1;
288b13dfe66Smrg	tex_res.pitch               = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, pPriv->hw_align);
289921a55d8Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
290921a55d8Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
291921a55d8Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
292921a55d8Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
293921a55d8Smrg	tex_res.interlaced          = 0;
294921a55d8Smrg
29543df4709Smrg	tex_res.base                = accel_state->src_obj[0].offset + pPriv->planev_offset;
29643df4709Smrg	tex_res.mip_base            = accel_state->src_obj[0].offset + pPriv->planev_offset;
297921a55d8Smrg	tex_res.size                = tex_res.pitch * (pPriv->h >> 1);
29843df4709Smrg	if (accel_state->src_obj[0].tiling_flags == 0)
299b13dfe66Smrg	    tex_res.array_mode          = 1;
300921a55d8Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
301921a55d8Smrg
302921a55d8Smrg	/* U or V sampler */
303921a55d8Smrg	tex_samp.id                 = 1;
304921a55d8Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
305921a55d8Smrg
306921a55d8Smrg	/* U or V texture */
307921a55d8Smrg	tex_res.id                  = 2;
308921a55d8Smrg	tex_res.format              = FMT_8;
309921a55d8Smrg	tex_res.w                   = accel_state->src_obj[0].width >> 1;
310921a55d8Smrg	tex_res.h                   = accel_state->src_obj[0].height >> 1;
311b13dfe66Smrg	tex_res.pitch               = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, pPriv->hw_align);
312921a55d8Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
313921a55d8Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
314921a55d8Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
315921a55d8Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
316921a55d8Smrg	tex_res.interlaced          = 0;
317921a55d8Smrg
31843df4709Smrg	tex_res.base                = accel_state->src_obj[0].offset + pPriv->planeu_offset;
31943df4709Smrg	tex_res.mip_base            = accel_state->src_obj[0].offset + pPriv->planeu_offset;
320921a55d8Smrg	tex_res.size                = tex_res.pitch * (pPriv->h >> 1);
32143df4709Smrg	if (accel_state->src_obj[0].tiling_flags == 0)
322b13dfe66Smrg	    tex_res.array_mode          = 1;
323921a55d8Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
324921a55d8Smrg
325921a55d8Smrg	/* UV sampler */
326921a55d8Smrg	tex_samp.id                 = 2;
327921a55d8Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
328921a55d8Smrg	break;
329921a55d8Smrg    case FOURCC_UYVY:
330921a55d8Smrg    case FOURCC_YUY2:
331921a55d8Smrg    default:
332921a55d8Smrg	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
333921a55d8Smrg
33468105dcbSveego	/* YUV texture */
335921a55d8Smrg	tex_res.id                  = 0;
336921a55d8Smrg	tex_res.w                   = accel_state->src_obj[0].width;
337921a55d8Smrg	tex_res.h                   = accel_state->src_obj[0].height;
338921a55d8Smrg	tex_res.pitch               = accel_state->src_obj[0].pitch >> 1;
339921a55d8Smrg	tex_res.depth               = 0;
340921a55d8Smrg	tex_res.dim                 = SQ_TEX_DIM_2D;
34143df4709Smrg	tex_res.base                = accel_state->src_obj[0].offset;
34243df4709Smrg	tex_res.mip_base            = accel_state->src_obj[0].offset;
343921a55d8Smrg	tex_res.size                = accel_state->src_size[0];
344921a55d8Smrg	tex_res.bo                  = accel_state->src_obj[0].bo;
345921a55d8Smrg	tex_res.mip_bo              = accel_state->src_obj[0].bo;
34640732134Srjs	tex_res.surface             = NULL;
347921a55d8Smrg
348921a55d8Smrg	if (pPriv->id == FOURCC_UYVY)
34968105dcbSveego	    tex_res.format              = FMT_GB_GR;
350921a55d8Smrg	else
35168105dcbSveego	    tex_res.format              = FMT_BG_RG;
35268105dcbSveego	tex_res.dst_sel_x           = SQ_SEL_Y;
35368105dcbSveego	tex_res.dst_sel_y           = SQ_SEL_X;
35468105dcbSveego	tex_res.dst_sel_z           = SQ_SEL_Z;
355921a55d8Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
356921a55d8Smrg
357921a55d8Smrg	tex_res.base_level          = 0;
358921a55d8Smrg	tex_res.last_level          = 0;
359921a55d8Smrg	tex_res.perf_modulation     = 0;
360921a55d8Smrg	tex_res.interlaced          = 0;
36143df4709Smrg	if (accel_state->src_obj[0].tiling_flags == 0)
362b13dfe66Smrg	    tex_res.array_mode          = 1;
363921a55d8Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
364921a55d8Smrg
36568105dcbSveego	/* YUV sampler */
366921a55d8Smrg	tex_samp.id                 = 0;
367921a55d8Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
368921a55d8Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
369921a55d8Smrg	tex_samp.clamp_z            = SQ_TEX_WRAP;
370921a55d8Smrg
371921a55d8Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
372921a55d8Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
373921a55d8Smrg
374921a55d8Smrg	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
375921a55d8Smrg	tex_samp.mip_filter         = 0;			/* no mipmap */
376921a55d8Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
377921a55d8Smrg
378921a55d8Smrg	break;
379921a55d8Smrg    }
380921a55d8Smrg
381921a55d8Smrg    cb_conf.id = 0;
382921a55d8Smrg    cb_conf.w = accel_state->dst_obj.pitch;
383921a55d8Smrg    cb_conf.h = accel_state->dst_obj.height;
38443df4709Smrg    cb_conf.base = accel_state->dst_obj.offset;
385921a55d8Smrg    cb_conf.bo = accel_state->dst_obj.bo;
38640732134Srjs    cb_conf.surface = accel_state->dst_obj.surface;
387921a55d8Smrg
388921a55d8Smrg    switch (accel_state->dst_obj.bpp) {
389921a55d8Smrg    case 16:
390921a55d8Smrg	if (pPixmap->drawable.depth == 15) {
391921a55d8Smrg	    cb_conf.format = COLOR_1_5_5_5;
392921a55d8Smrg	    cb_conf.comp_swap = 1; /* ARGB */
393921a55d8Smrg	} else {
394921a55d8Smrg	    cb_conf.format = COLOR_5_6_5;
395921a55d8Smrg	    cb_conf.comp_swap = 2; /* RGB */
396921a55d8Smrg	}
397b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
398b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN16;
399b13dfe66Smrg#endif
400921a55d8Smrg	break;
401921a55d8Smrg    case 32:
402921a55d8Smrg	cb_conf.format = COLOR_8_8_8_8;
403921a55d8Smrg	cb_conf.comp_swap = 1; /* ARGB */
404b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
405b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN32;
406b13dfe66Smrg#endif
407921a55d8Smrg	break;
408921a55d8Smrg    default:
409921a55d8Smrg	return;
410921a55d8Smrg    }
411921a55d8Smrg
412921a55d8Smrg    cb_conf.source_format = EXPORT_4C_16BPC;
413921a55d8Smrg    cb_conf.blend_clamp = 1;
414b13dfe66Smrg    cb_conf.pmask = 0xf;
415b13dfe66Smrg    cb_conf.rop = 3;
41643df4709Smrg    if (accel_state->dst_obj.tiling_flags == 0) {
417b13dfe66Smrg	cb_conf.array_mode = 1;
418b13dfe66Smrg	cb_conf.non_disp_tiling = 1;
419b13dfe66Smrg    }
420921a55d8Smrg    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
421921a55d8Smrg
422b13dfe66Smrg    evergreen_set_spi(pScrn, (1 - 1), 1);
423921a55d8Smrg
424921a55d8Smrg    /* PS alu constants */
425921a55d8Smrg    ps_const_conf.size_bytes = 256;
426921a55d8Smrg    ps_const_conf.type = SHADER_TYPE_PS;
427921a55d8Smrg    ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
428921a55d8Smrg    ps_const_conf.bo = accel_state->cbuf.vb_bo;
42943df4709Smrg    ps_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_offset;
43040732134Srjs    ps_const_conf.cpu_ptr = (uint32_t *)(char *)ps_alu_consts;
431921a55d8Smrg
432921a55d8Smrg    ps_alu_consts[0] = off[0];
433921a55d8Smrg    ps_alu_consts[1] = off[1];
434921a55d8Smrg    ps_alu_consts[2] = off[2];
435921a55d8Smrg    ps_alu_consts[3] = yco;
436921a55d8Smrg
437921a55d8Smrg    ps_alu_consts[4] = uco[0];
438921a55d8Smrg    ps_alu_consts[5] = uco[1];
439921a55d8Smrg    ps_alu_consts[6] = uco[2];
440921a55d8Smrg    ps_alu_consts[7] = gamma;
441921a55d8Smrg
442921a55d8Smrg    ps_alu_consts[8] = vco[0];
443921a55d8Smrg    ps_alu_consts[9] = vco[1];
444921a55d8Smrg    ps_alu_consts[10] = vco[2];
445921a55d8Smrg    ps_alu_consts[11] = 0.0;
446921a55d8Smrg
447921a55d8Smrg    radeon_vbo_commit(pScrn, &accel_state->cbuf);
448921a55d8Smrg    evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT);
449921a55d8Smrg
450921a55d8Smrg    /* VS alu constants */
451921a55d8Smrg    vs_const_conf.size_bytes = 256;
452921a55d8Smrg    vs_const_conf.type = SHADER_TYPE_VS;
453921a55d8Smrg    vs_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
454921a55d8Smrg    vs_const_conf.bo = accel_state->cbuf.vb_bo;
45543df4709Smrg    vs_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_offset;
45640732134Srjs    vs_const_conf.cpu_ptr = (uint32_t *)(char *)vs_alu_consts;
457921a55d8Smrg
458921a55d8Smrg    vs_alu_consts[0] = 1.0 / pPriv->w;
459921a55d8Smrg    vs_alu_consts[1] = 1.0 / pPriv->h;
460921a55d8Smrg    vs_alu_consts[2] = 0.0;
461921a55d8Smrg    vs_alu_consts[3] = 0.0;
462921a55d8Smrg
463921a55d8Smrg    radeon_vbo_commit(pScrn, &accel_state->cbuf);
464921a55d8Smrg    evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT);
465921a55d8Smrg
466921a55d8Smrg    if (pPriv->vsync) {
467921a55d8Smrg	xf86CrtcPtr crtc;
468921a55d8Smrg	if (pPriv->desired_crtc)
469921a55d8Smrg	    crtc = pPriv->desired_crtc;
470921a55d8Smrg	else
47143df4709Smrg	    crtc = radeon_pick_best_crtc(pScrn,
472921a55d8Smrg					 pPriv->drw_x,
473921a55d8Smrg					 pPriv->drw_x + pPriv->dst_w,
474921a55d8Smrg					 pPriv->drw_y,
475921a55d8Smrg					 pPriv->drw_y + pPriv->dst_h);
476921a55d8Smrg	if (crtc)
477921a55d8Smrg	    evergreen_cp_wait_vline_sync(pScrn, pPixmap,
478921a55d8Smrg					 crtc,
479921a55d8Smrg					 pPriv->drw_y - crtc->y,
480921a55d8Smrg					 (pPriv->drw_y - crtc->y) + pPriv->dst_h);
481921a55d8Smrg    }
482921a55d8Smrg
483921a55d8Smrg    while (nBox--) {
48468105dcbSveego	float srcX, srcY, srcw, srch;
485921a55d8Smrg	int dstX, dstY, dstw, dsth;
486921a55d8Smrg	float *vb;
487921a55d8Smrg
488921a55d8Smrg
489921a55d8Smrg	dstX = pBox->x1 + dstxoff;
490921a55d8Smrg	dstY = pBox->y1 + dstyoff;
491921a55d8Smrg	dstw = pBox->x2 - pBox->x1;
492921a55d8Smrg	dsth = pBox->y2 - pBox->y1;
493921a55d8Smrg
494921a55d8Smrg	srcX = pPriv->src_x;
495921a55d8Smrg	srcX += ((pBox->x1 - pPriv->drw_x) *
49668105dcbSveego		 pPriv->src_w) / (float)pPriv->dst_w;
497921a55d8Smrg	srcY = pPriv->src_y;
498921a55d8Smrg	srcY += ((pBox->y1 - pPriv->drw_y) *
49968105dcbSveego		 pPriv->src_h) / (float)pPriv->dst_h;
500921a55d8Smrg
50168105dcbSveego	srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w;
50268105dcbSveego	srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h;
503921a55d8Smrg
504921a55d8Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
505921a55d8Smrg
506921a55d8Smrg	vb[0] = (float)dstX;
507921a55d8Smrg	vb[1] = (float)dstY;
508921a55d8Smrg	vb[2] = (float)srcX;
509921a55d8Smrg	vb[3] = (float)srcY;
510921a55d8Smrg
511921a55d8Smrg	vb[4] = (float)dstX;
512921a55d8Smrg	vb[5] = (float)(dstY + dsth);
513921a55d8Smrg	vb[6] = (float)srcX;
514921a55d8Smrg	vb[7] = (float)(srcY + srch);
515921a55d8Smrg
516921a55d8Smrg	vb[8] = (float)(dstX + dstw);
517921a55d8Smrg	vb[9] = (float)(dstY + dsth);
518921a55d8Smrg	vb[10] = (float)(srcX + srcw);
519921a55d8Smrg	vb[11] = (float)(srcY + srch);
520921a55d8Smrg
521921a55d8Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
522921a55d8Smrg
523921a55d8Smrg	pBox++;
524921a55d8Smrg    }
525921a55d8Smrg
526921a55d8Smrg    evergreen_finish_op(pScrn, 16);
527921a55d8Smrg
528921a55d8Smrg    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
529921a55d8Smrg}
53043df4709Smrg
53143df4709Smrg#endif
532