1de2362d3Smrg/*
2de2362d3Smrg * Copyright 2010 Advanced Micro Devices, Inc.
3de2362d3Smrg *
4de2362d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5de2362d3Smrg * copy of this software and associated documentation files (the "Software"),
6de2362d3Smrg * to deal in the Software without restriction, including without limitation
7de2362d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8de2362d3Smrg * and/or sell copies of the Software, and to permit persons to whom the
9de2362d3Smrg * Software is furnished to do so, subject to the following conditions:
10de2362d3Smrg *
11de2362d3Smrg * The above copyright notice and this permission notice (including the next
12de2362d3Smrg * paragraph) shall be included in all copies or substantial portions of the
13de2362d3Smrg * Software.
14de2362d3Smrg *
15de2362d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16de2362d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17de2362d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18de2362d3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19de2362d3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20de2362d3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21de2362d3Smrg * SOFTWARE.
22de2362d3Smrg *
23de2362d3Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24de2362d3Smrg *
25de2362d3Smrg */
26de2362d3Smrg
27de2362d3Smrg#ifdef HAVE_CONFIG_H
28de2362d3Smrg#include "config.h"
29de2362d3Smrg#endif
30de2362d3Smrg
31de2362d3Smrg#include "xf86.h"
32de2362d3Smrg
33de2362d3Smrg#include "exa.h"
34de2362d3Smrg
35de2362d3Smrg#include "radeon.h"
36de2362d3Smrg#include "radeon_reg.h"
37de2362d3Smrg#include "evergreen_shader.h"
38de2362d3Smrg#include "evergreen_reg.h"
39de2362d3Smrg#include "evergreen_state.h"
40de2362d3Smrg
41de2362d3Smrg#include "radeon_video.h"
42de2362d3Smrg
43de2362d3Smrg#include <X11/extensions/Xv.h>
44de2362d3Smrg#include "fourcc.h"
45de2362d3Smrg
46de2362d3Smrg#include "damage.h"
47de2362d3Smrg
48de2362d3Smrg#include "radeon_exa_shared.h"
49de2362d3Smrg#include "radeon_vbo.h"
50de2362d3Smrg
51de2362d3Smrg/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces
52de2362d3Smrg   note the difference to the parameters used in overlay are due
53de2362d3Smrg   to 10bit vs. float calcs */
54de2362d3Smrgstatic REF_TRANSFORM trans[2] =
55de2362d3Smrg{
56de2362d3Smrg    {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */
57de2362d3Smrg    {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0}  /* BT.709 */
58de2362d3Smrg};
59de2362d3Smrg
60de2362d3Smrgvoid
61de2362d3SmrgEVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
62de2362d3Smrg{
63de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
64de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
65de2362d3Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
66de2362d3Smrg    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
67de2362d3Smrg    int nBox = REGION_NUM_RECTS(&pPriv->clip);
68de2362d3Smrg    int dstxoff, dstyoff;
69de2362d3Smrg    struct r600_accel_object src_obj, dst_obj;
70de2362d3Smrg    cb_config_t     cb_conf;
71de2362d3Smrg    tex_resource_t  tex_res;
72de2362d3Smrg    tex_sampler_t   tex_samp;
73de2362d3Smrg    shader_config_t vs_conf, ps_conf;
74de2362d3Smrg    /*
75de2362d3Smrg     * y' = y - .0625
76de2362d3Smrg     * u' = u - .5
77de2362d3Smrg     * v' = v - .5;
78de2362d3Smrg     *
79de2362d3Smrg     * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
80de2362d3Smrg     * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
81de2362d3Smrg     * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
82de2362d3Smrg     *
83de2362d3Smrg     * DP3 might look like the straightforward solution
84de2362d3Smrg     * but we'd need to move the texture yuv values in
85de2362d3Smrg     * the same reg for this to work. Therefore use MADs.
86de2362d3Smrg     * Brightness just adds to the off constant.
87de2362d3Smrg     * Contrast is multiplication of luminance.
88de2362d3Smrg     * Saturation and hue change the u and v coeffs.
89de2362d3Smrg     * Default values (before adjustments - depend on colorspace):
90de2362d3Smrg     * yco = 1.1643
91de2362d3Smrg     * uco = 0, -0.39173, 2.017
92de2362d3Smrg     * vco = 1.5958, -0.8129, 0
93de2362d3Smrg     * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
94de2362d3Smrg     *       -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
95de2362d3Smrg     *       -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
96de2362d3Smrg     *
97de2362d3Smrg     * temp = MAD(yco, yuv.yyyy, off)
98de2362d3Smrg     * temp = MAD(uco, yuv.uuuu, temp)
99de2362d3Smrg     * result = MAD(vco, yuv.vvvv, temp)
100de2362d3Smrg     */
101de2362d3Smrg    /* TODO: calc consts in the shader */
102de2362d3Smrg    const float Loff = -0.0627;
103de2362d3Smrg    const float Coff = -0.502;
104de2362d3Smrg    float uvcosf, uvsinf;
105de2362d3Smrg    float yco;
106de2362d3Smrg    float uco[3], vco[3], off[3];
107de2362d3Smrg    float bright, cont, gamma;
108de2362d3Smrg    int ref = pPriv->transform_index;
109de2362d3Smrg    float *ps_alu_consts;
110de2362d3Smrg    const_config_t ps_const_conf;
111de2362d3Smrg    float *vs_alu_consts;
112de2362d3Smrg    const_config_t vs_const_conf;
113de2362d3Smrg
114de2362d3Smrg    cont = RTFContrast(pPriv->contrast);
115de2362d3Smrg    bright = RTFBrightness(pPriv->brightness);
116de2362d3Smrg    gamma = (float)pPriv->gamma / 1000.0;
117de2362d3Smrg    uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
118de2362d3Smrg    uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
119de2362d3Smrg    /* overlay video also does pre-gamma contrast/sat adjust, should we? */
120de2362d3Smrg
121de2362d3Smrg    yco = trans[ref].RefLuma * cont;
122de2362d3Smrg    uco[0] = -trans[ref].RefRCr * uvsinf;
123de2362d3Smrg    uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
124de2362d3Smrg    uco[2] = trans[ref].RefBCb * uvcosf;
125de2362d3Smrg    vco[0] = trans[ref].RefRCr * uvcosf;
126de2362d3Smrg    vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
127de2362d3Smrg    vco[2] = trans[ref].RefBCb * uvsinf;
128de2362d3Smrg    off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
129de2362d3Smrg    off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
130de2362d3Smrg    off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
131de2362d3Smrg
132de2362d3Smrg    // XXX
133de2362d3Smrg    gamma = 1.0;
134de2362d3Smrg
135de2362d3Smrg    CLEAR (cb_conf);
136de2362d3Smrg    CLEAR (tex_res);
137de2362d3Smrg    CLEAR (tex_samp);
138de2362d3Smrg    CLEAR (vs_conf);
139de2362d3Smrg    CLEAR (ps_conf);
140de2362d3Smrg    CLEAR (vs_const_conf);
141de2362d3Smrg    CLEAR (ps_const_conf);
142de2362d3Smrg
14339413783Smrg    dst_obj.bo = radeon_get_pixmap_bo(pPixmap)->bo.radeon;
144de2362d3Smrg    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pPixmap);
145de2362d3Smrg    dst_obj.surface = radeon_get_pixmap_surface(pPixmap);
146de2362d3Smrg
147de2362d3Smrg    dst_obj.pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8);
148de2362d3Smrg
149de2362d3Smrg    src_obj.pitch = pPriv->src_pitch;
150de2362d3Smrg    src_obj.width = pPriv->w;
151de2362d3Smrg    src_obj.height = pPriv->h;
152de2362d3Smrg    src_obj.bpp = 16;
153de2362d3Smrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
154de2362d3Smrg    src_obj.bo = pPriv->src_bo[pPriv->currentBuffer];
155de2362d3Smrg    src_obj.tiling_flags = 0;
156de2362d3Smrg    src_obj.surface = NULL;
157de2362d3Smrg
158de2362d3Smrg    dst_obj.width = pPixmap->drawable.width;
159de2362d3Smrg    dst_obj.height = pPixmap->drawable.height;
160de2362d3Smrg    dst_obj.bpp = pPixmap->drawable.bitsPerPixel;
161de2362d3Smrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
162de2362d3Smrg
163de2362d3Smrg    if (!R600SetAccelState(pScrn,
164de2362d3Smrg			   &src_obj,
165de2362d3Smrg			   NULL,
166de2362d3Smrg			   &dst_obj,
167de2362d3Smrg			   accel_state->xv_vs_offset, accel_state->xv_ps_offset,
168de2362d3Smrg			   3, 0xffffffff))
169de2362d3Smrg	return;
170de2362d3Smrg
171de2362d3Smrg#ifdef COMPOSITE
172de2362d3Smrg    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
173de2362d3Smrg    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
174de2362d3Smrg#else
175de2362d3Smrg    dstxoff = 0;
176de2362d3Smrg    dstyoff = 0;
177de2362d3Smrg#endif
178de2362d3Smrg
179de2362d3Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
180de2362d3Smrg    radeon_vbo_check(pScrn, &accel_state->cbuf, 512);
181de2362d3Smrg    radeon_cp_start(pScrn);
182de2362d3Smrg
183de2362d3Smrg    evergreen_set_default_state(pScrn);
184de2362d3Smrg
185de2362d3Smrg    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
186de2362d3Smrg    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
187de2362d3Smrg    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
188de2362d3Smrg
189de2362d3Smrg    /* PS bool constant */
190de2362d3Smrg    switch(pPriv->id) {
191de2362d3Smrg    case FOURCC_YV12:
192de2362d3Smrg    case FOURCC_I420:
193de2362d3Smrg	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0));
194de2362d3Smrg	break;
195de2362d3Smrg    case FOURCC_UYVY:
196de2362d3Smrg    case FOURCC_YUY2:
197de2362d3Smrg    default:
198de2362d3Smrg	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0));
199de2362d3Smrg	break;
200de2362d3Smrg    }
201de2362d3Smrg
202de2362d3Smrg    /* Shader */
203de2362d3Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
204de2362d3Smrg    vs_conf.shader_size         = accel_state->vs_size;
205de2362d3Smrg    vs_conf.num_gprs            = 2;
206de2362d3Smrg    vs_conf.stack_size          = 0;
207de2362d3Smrg    vs_conf.bo                  = accel_state->shaders_bo;
208de2362d3Smrg    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
209de2362d3Smrg
210de2362d3Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
211de2362d3Smrg    ps_conf.shader_size         = accel_state->ps_size;
212de2362d3Smrg    ps_conf.num_gprs            = 3;
213de2362d3Smrg    ps_conf.stack_size          = 1;
214de2362d3Smrg    ps_conf.clamp_consts        = 0;
215de2362d3Smrg    ps_conf.export_mode         = 2;
216de2362d3Smrg    ps_conf.bo                  = accel_state->shaders_bo;
217de2362d3Smrg    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
218de2362d3Smrg
219de2362d3Smrg    /* Texture */
220de2362d3Smrg    switch(pPriv->id) {
221de2362d3Smrg    case FOURCC_YV12:
222de2362d3Smrg    case FOURCC_I420:
223de2362d3Smrg	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
224de2362d3Smrg
225de2362d3Smrg	/* Y texture */
226de2362d3Smrg	tex_res.id                  = 0;
227de2362d3Smrg	tex_res.w                   = accel_state->src_obj[0].width;
228de2362d3Smrg	tex_res.h                   = accel_state->src_obj[0].height;
229de2362d3Smrg	tex_res.pitch               = accel_state->src_obj[0].pitch;
230de2362d3Smrg	tex_res.depth               = 0;
231de2362d3Smrg	tex_res.dim                 = SQ_TEX_DIM_2D;
23218781e08Smrg	tex_res.base                = 0;
23318781e08Smrg	tex_res.mip_base            = 0;
234de2362d3Smrg	tex_res.size                = accel_state->src_size[0];
235de2362d3Smrg	tex_res.bo                  = accel_state->src_obj[0].bo;
236de2362d3Smrg	tex_res.mip_bo              = accel_state->src_obj[0].bo;
237de2362d3Smrg	tex_res.surface             = NULL;
238de2362d3Smrg
239de2362d3Smrg	tex_res.format              = FMT_8;
240de2362d3Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
241de2362d3Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
242de2362d3Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
243de2362d3Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
244de2362d3Smrg
245de2362d3Smrg	tex_res.base_level          = 0;
246de2362d3Smrg	tex_res.last_level          = 0;
247de2362d3Smrg	tex_res.perf_modulation     = 0;
248de2362d3Smrg	tex_res.interlaced          = 0;
24918781e08Smrg	if ((accel_state->src_obj[0].tiling_flags & RADEON_TILING_MASK) ==
25018781e08Smrg	    RADEON_TILING_LINEAR)
251de2362d3Smrg	    tex_res.array_mode          = 1;
252de2362d3Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
253de2362d3Smrg
254de2362d3Smrg	/* Y sampler */
255de2362d3Smrg	tex_samp.id                 = 0;
256de2362d3Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
257de2362d3Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
258de2362d3Smrg	tex_samp.clamp_z            = SQ_TEX_WRAP;
259de2362d3Smrg
260de2362d3Smrg	/* xxx: switch to bicubic */
261de2362d3Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
262de2362d3Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
263de2362d3Smrg
264de2362d3Smrg	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
265de2362d3Smrg	tex_samp.mip_filter         = 0;			/* no mipmap */
266de2362d3Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
267de2362d3Smrg
268de2362d3Smrg	/* U or V texture */
269de2362d3Smrg	tex_res.id                  = 1;
270de2362d3Smrg	tex_res.format              = FMT_8;
271de2362d3Smrg	tex_res.w                   = accel_state->src_obj[0].width >> 1;
272de2362d3Smrg	tex_res.h                   = accel_state->src_obj[0].height >> 1;
273de2362d3Smrg	tex_res.pitch               = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, pPriv->hw_align);
274de2362d3Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
275de2362d3Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
276de2362d3Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
277de2362d3Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
278de2362d3Smrg	tex_res.interlaced          = 0;
279de2362d3Smrg
28018781e08Smrg	tex_res.base                = pPriv->planev_offset;
28118781e08Smrg	tex_res.mip_base            = pPriv->planev_offset;
282de2362d3Smrg	tex_res.size                = tex_res.pitch * (pPriv->h >> 1);
28318781e08Smrg	if ((accel_state->src_obj[0].tiling_flags & RADEON_TILING_MASK) ==
28418781e08Smrg	    RADEON_TILING_LINEAR)
285de2362d3Smrg	    tex_res.array_mode          = 1;
286de2362d3Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
287de2362d3Smrg
288de2362d3Smrg	/* U or V sampler */
289de2362d3Smrg	tex_samp.id                 = 1;
290de2362d3Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
291de2362d3Smrg
292de2362d3Smrg	/* U or V texture */
293de2362d3Smrg	tex_res.id                  = 2;
294de2362d3Smrg	tex_res.format              = FMT_8;
295de2362d3Smrg	tex_res.w                   = accel_state->src_obj[0].width >> 1;
296de2362d3Smrg	tex_res.h                   = accel_state->src_obj[0].height >> 1;
297de2362d3Smrg	tex_res.pitch               = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, pPriv->hw_align);
298de2362d3Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
299de2362d3Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
300de2362d3Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
301de2362d3Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
302de2362d3Smrg	tex_res.interlaced          = 0;
303de2362d3Smrg
30418781e08Smrg	tex_res.base                = pPriv->planeu_offset;
30518781e08Smrg	tex_res.mip_base            = pPriv->planeu_offset;
306de2362d3Smrg	tex_res.size                = tex_res.pitch * (pPriv->h >> 1);
30718781e08Smrg	if ((accel_state->src_obj[0].tiling_flags & RADEON_TILING_MASK) ==
30818781e08Smrg	    RADEON_TILING_LINEAR)
309de2362d3Smrg	    tex_res.array_mode          = 1;
310de2362d3Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
311de2362d3Smrg
312de2362d3Smrg	/* UV sampler */
313de2362d3Smrg	tex_samp.id                 = 2;
314de2362d3Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
315de2362d3Smrg	break;
316de2362d3Smrg    case FOURCC_UYVY:
317de2362d3Smrg    case FOURCC_YUY2:
318de2362d3Smrg    default:
319de2362d3Smrg	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
320de2362d3Smrg
321de2362d3Smrg	/* YUV texture */
322de2362d3Smrg	tex_res.id                  = 0;
323de2362d3Smrg	tex_res.w                   = accel_state->src_obj[0].width;
324de2362d3Smrg	tex_res.h                   = accel_state->src_obj[0].height;
325de2362d3Smrg	tex_res.pitch               = accel_state->src_obj[0].pitch >> 1;
326de2362d3Smrg	tex_res.depth               = 0;
327de2362d3Smrg	tex_res.dim                 = SQ_TEX_DIM_2D;
32818781e08Smrg	tex_res.base                = 0;
32918781e08Smrg	tex_res.mip_base            = 0;
330de2362d3Smrg	tex_res.size                = accel_state->src_size[0];
331de2362d3Smrg	tex_res.bo                  = accel_state->src_obj[0].bo;
332de2362d3Smrg	tex_res.mip_bo              = accel_state->src_obj[0].bo;
333de2362d3Smrg	tex_res.surface             = NULL;
334de2362d3Smrg
335de2362d3Smrg	if (pPriv->id == FOURCC_UYVY)
336de2362d3Smrg	    tex_res.format              = FMT_GB_GR;
337de2362d3Smrg	else
338de2362d3Smrg	    tex_res.format              = FMT_BG_RG;
339de2362d3Smrg	tex_res.dst_sel_x           = SQ_SEL_Y;
340de2362d3Smrg	tex_res.dst_sel_y           = SQ_SEL_X;
341de2362d3Smrg	tex_res.dst_sel_z           = SQ_SEL_Z;
342de2362d3Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
343de2362d3Smrg
344de2362d3Smrg	tex_res.base_level          = 0;
345de2362d3Smrg	tex_res.last_level          = 0;
346de2362d3Smrg	tex_res.perf_modulation     = 0;
347de2362d3Smrg	tex_res.interlaced          = 0;
34818781e08Smrg	if ((accel_state->src_obj[0].tiling_flags & RADEON_TILING_MASK) ==
34918781e08Smrg	    RADEON_TILING_LINEAR)
350de2362d3Smrg	    tex_res.array_mode          = 1;
351de2362d3Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
352de2362d3Smrg
353de2362d3Smrg	/* YUV sampler */
354de2362d3Smrg	tex_samp.id                 = 0;
355de2362d3Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
356de2362d3Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
357de2362d3Smrg	tex_samp.clamp_z            = SQ_TEX_WRAP;
358de2362d3Smrg
359de2362d3Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
360de2362d3Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
361de2362d3Smrg
362de2362d3Smrg	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
363de2362d3Smrg	tex_samp.mip_filter         = 0;			/* no mipmap */
364de2362d3Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
365de2362d3Smrg
366de2362d3Smrg	break;
367de2362d3Smrg    }
368de2362d3Smrg
369de2362d3Smrg    cb_conf.id = 0;
370de2362d3Smrg    cb_conf.w = accel_state->dst_obj.pitch;
371de2362d3Smrg    cb_conf.h = accel_state->dst_obj.height;
37218781e08Smrg    cb_conf.base = 0;
373de2362d3Smrg    cb_conf.bo = accel_state->dst_obj.bo;
374de2362d3Smrg    cb_conf.surface = accel_state->dst_obj.surface;
375de2362d3Smrg
376de2362d3Smrg    switch (accel_state->dst_obj.bpp) {
377de2362d3Smrg    case 16:
378de2362d3Smrg	if (pPixmap->drawable.depth == 15) {
379de2362d3Smrg	    cb_conf.format = COLOR_1_5_5_5;
380de2362d3Smrg	    cb_conf.comp_swap = 1; /* ARGB */
381de2362d3Smrg	} else {
382de2362d3Smrg	    cb_conf.format = COLOR_5_6_5;
383de2362d3Smrg	    cb_conf.comp_swap = 2; /* RGB */
384de2362d3Smrg	}
385de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
386de2362d3Smrg	cb_conf.endian = ENDIAN_8IN16;
387de2362d3Smrg#endif
388de2362d3Smrg	break;
389de2362d3Smrg    case 32:
390de2362d3Smrg	cb_conf.format = COLOR_8_8_8_8;
391de2362d3Smrg	cb_conf.comp_swap = 1; /* ARGB */
392de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
393de2362d3Smrg	cb_conf.endian = ENDIAN_8IN32;
394de2362d3Smrg#endif
395de2362d3Smrg	break;
396de2362d3Smrg    default:
397de2362d3Smrg	return;
398de2362d3Smrg    }
399de2362d3Smrg
400de2362d3Smrg    cb_conf.source_format = EXPORT_4C_16BPC;
401de2362d3Smrg    cb_conf.blend_clamp = 1;
402de2362d3Smrg    cb_conf.pmask = 0xf;
403de2362d3Smrg    cb_conf.rop = 3;
40418781e08Smrg    if ((accel_state->dst_obj.tiling_flags & RADEON_TILING_MASK) ==
40518781e08Smrg	RADEON_TILING_LINEAR) {
406de2362d3Smrg	cb_conf.array_mode = 1;
407de2362d3Smrg	cb_conf.non_disp_tiling = 1;
408de2362d3Smrg    }
409de2362d3Smrg    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
410de2362d3Smrg
411de2362d3Smrg    evergreen_set_spi(pScrn, (1 - 1), 1);
412de2362d3Smrg
413de2362d3Smrg    /* PS alu constants */
414de2362d3Smrg    ps_const_conf.size_bytes = 256;
415de2362d3Smrg    ps_const_conf.type = SHADER_TYPE_PS;
416de2362d3Smrg    ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
417de2362d3Smrg    ps_const_conf.bo = accel_state->cbuf.vb_bo;
41818781e08Smrg    ps_const_conf.const_addr = accel_state->cbuf.vb_offset;
419de2362d3Smrg    ps_const_conf.cpu_ptr = (uint32_t *)(char *)ps_alu_consts;
420de2362d3Smrg
421de2362d3Smrg    ps_alu_consts[0] = off[0];
422de2362d3Smrg    ps_alu_consts[1] = off[1];
423de2362d3Smrg    ps_alu_consts[2] = off[2];
424de2362d3Smrg    ps_alu_consts[3] = yco;
425de2362d3Smrg
426de2362d3Smrg    ps_alu_consts[4] = uco[0];
427de2362d3Smrg    ps_alu_consts[5] = uco[1];
428de2362d3Smrg    ps_alu_consts[6] = uco[2];
429de2362d3Smrg    ps_alu_consts[7] = gamma;
430de2362d3Smrg
431de2362d3Smrg    ps_alu_consts[8] = vco[0];
432de2362d3Smrg    ps_alu_consts[9] = vco[1];
433de2362d3Smrg    ps_alu_consts[10] = vco[2];
434de2362d3Smrg    ps_alu_consts[11] = 0.0;
435de2362d3Smrg
436de2362d3Smrg    radeon_vbo_commit(pScrn, &accel_state->cbuf);
437de2362d3Smrg    evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT);
438de2362d3Smrg
439de2362d3Smrg    /* VS alu constants */
440de2362d3Smrg    vs_const_conf.size_bytes = 256;
441de2362d3Smrg    vs_const_conf.type = SHADER_TYPE_VS;
442de2362d3Smrg    vs_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
443de2362d3Smrg    vs_const_conf.bo = accel_state->cbuf.vb_bo;
44418781e08Smrg    vs_const_conf.const_addr = accel_state->cbuf.vb_offset;
445de2362d3Smrg    vs_const_conf.cpu_ptr = (uint32_t *)(char *)vs_alu_consts;
446de2362d3Smrg
447de2362d3Smrg    vs_alu_consts[0] = 1.0 / pPriv->w;
448de2362d3Smrg    vs_alu_consts[1] = 1.0 / pPriv->h;
449de2362d3Smrg    vs_alu_consts[2] = 0.0;
450de2362d3Smrg    vs_alu_consts[3] = 0.0;
451de2362d3Smrg
452de2362d3Smrg    radeon_vbo_commit(pScrn, &accel_state->cbuf);
453de2362d3Smrg    evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT);
454de2362d3Smrg
455de2362d3Smrg    if (pPriv->vsync) {
456de2362d3Smrg	xf86CrtcPtr crtc;
457de2362d3Smrg	if (pPriv->desired_crtc)
458de2362d3Smrg	    crtc = pPriv->desired_crtc;
459de2362d3Smrg	else
46018781e08Smrg	    crtc = radeon_pick_best_crtc(pScrn, FALSE,
461de2362d3Smrg					 pPriv->drw_x,
462de2362d3Smrg					 pPriv->drw_x + pPriv->dst_w,
463de2362d3Smrg					 pPriv->drw_y,
464de2362d3Smrg					 pPriv->drw_y + pPriv->dst_h);
465de2362d3Smrg	if (crtc)
466de2362d3Smrg	    evergreen_cp_wait_vline_sync(pScrn, pPixmap,
467de2362d3Smrg					 crtc,
468de2362d3Smrg					 pPriv->drw_y - crtc->y,
469de2362d3Smrg					 (pPriv->drw_y - crtc->y) + pPriv->dst_h);
470de2362d3Smrg    }
471de2362d3Smrg
472de2362d3Smrg    while (nBox--) {
473de2362d3Smrg	float srcX, srcY, srcw, srch;
474de2362d3Smrg	int dstX, dstY, dstw, dsth;
475de2362d3Smrg	float *vb;
476de2362d3Smrg
477de2362d3Smrg
478de2362d3Smrg	dstX = pBox->x1 + dstxoff;
479de2362d3Smrg	dstY = pBox->y1 + dstyoff;
480de2362d3Smrg	dstw = pBox->x2 - pBox->x1;
481de2362d3Smrg	dsth = pBox->y2 - pBox->y1;
482de2362d3Smrg
483de2362d3Smrg	srcX = pPriv->src_x;
484de2362d3Smrg	srcX += ((pBox->x1 - pPriv->drw_x) *
485de2362d3Smrg		 pPriv->src_w) / (float)pPriv->dst_w;
486de2362d3Smrg	srcY = pPriv->src_y;
487de2362d3Smrg	srcY += ((pBox->y1 - pPriv->drw_y) *
488de2362d3Smrg		 pPriv->src_h) / (float)pPriv->dst_h;
489de2362d3Smrg
490de2362d3Smrg	srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w;
491de2362d3Smrg	srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h;
492de2362d3Smrg
493de2362d3Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
494de2362d3Smrg
495de2362d3Smrg	vb[0] = (float)dstX;
496de2362d3Smrg	vb[1] = (float)dstY;
497de2362d3Smrg	vb[2] = (float)srcX;
498de2362d3Smrg	vb[3] = (float)srcY;
499de2362d3Smrg
500de2362d3Smrg	vb[4] = (float)dstX;
501de2362d3Smrg	vb[5] = (float)(dstY + dsth);
502de2362d3Smrg	vb[6] = (float)srcX;
503de2362d3Smrg	vb[7] = (float)(srcY + srch);
504de2362d3Smrg
505de2362d3Smrg	vb[8] = (float)(dstX + dstw);
506de2362d3Smrg	vb[9] = (float)(dstY + dsth);
507de2362d3Smrg	vb[10] = (float)(srcX + srcw);
508de2362d3Smrg	vb[11] = (float)(srcY + srch);
509de2362d3Smrg
510de2362d3Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
511de2362d3Smrg
512de2362d3Smrg	pBox++;
513de2362d3Smrg    }
514de2362d3Smrg
515de2362d3Smrg    evergreen_finish_op(pScrn, 16);
516de2362d3Smrg
517de2362d3Smrg    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
518de2362d3Smrg}
519