evergreen_textured_videofuncs.c revision 921a55d8
1921a55d8Smrg/*
2921a55d8Smrg * Copyright 2010 Advanced Micro Devices, Inc.
3921a55d8Smrg *
4921a55d8Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5921a55d8Smrg * copy of this software and associated documentation files (the "Software"),
6921a55d8Smrg * to deal in the Software without restriction, including without limitation
7921a55d8Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8921a55d8Smrg * and/or sell copies of the Software, and to permit persons to whom the
9921a55d8Smrg * Software is furnished to do so, subject to the following conditions:
10921a55d8Smrg *
11921a55d8Smrg * The above copyright notice and this permission notice (including the next
12921a55d8Smrg * paragraph) shall be included in all copies or substantial portions of the
13921a55d8Smrg * Software.
14921a55d8Smrg *
15921a55d8Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16921a55d8Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17921a55d8Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18921a55d8Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19921a55d8Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20921a55d8Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21921a55d8Smrg * SOFTWARE.
22921a55d8Smrg *
23921a55d8Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24921a55d8Smrg *
25921a55d8Smrg */
26921a55d8Smrg
27921a55d8Smrg#ifdef HAVE_CONFIG_H
28921a55d8Smrg#include "config.h"
29921a55d8Smrg#endif
30921a55d8Smrg
31921a55d8Smrg#ifdef XF86DRM_MODE
32921a55d8Smrg
33921a55d8Smrg#include "xf86.h"
34921a55d8Smrg
35921a55d8Smrg#include "exa.h"
36921a55d8Smrg
37921a55d8Smrg#include "radeon.h"
38921a55d8Smrg#include "radeon_reg.h"
39921a55d8Smrg#include "evergreen_shader.h"
40921a55d8Smrg#include "evergreen_reg.h"
41921a55d8Smrg#include "evergreen_state.h"
42921a55d8Smrg
43921a55d8Smrg#include "radeon_video.h"
44921a55d8Smrg
45921a55d8Smrg#include <X11/extensions/Xv.h>
46921a55d8Smrg#include "fourcc.h"
47921a55d8Smrg
48921a55d8Smrg#include "damage.h"
49921a55d8Smrg
50921a55d8Smrg#include "radeon_exa_shared.h"
51921a55d8Smrg#include "radeon_vbo.h"
52921a55d8Smrg
53921a55d8Smrg/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces
54921a55d8Smrg   note the difference to the parameters used in overlay are due
55921a55d8Smrg   to 10bit vs. float calcs */
56921a55d8Smrgstatic REF_TRANSFORM trans[2] =
57921a55d8Smrg{
58921a55d8Smrg    {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */
59921a55d8Smrg    {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0}  /* BT.709 */
60921a55d8Smrg};
61921a55d8Smrg
62921a55d8Smrgvoid
63921a55d8SmrgEVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
64921a55d8Smrg{
65921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
66921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
67921a55d8Smrg    PixmapPtr pPixmap = pPriv->pPixmap;
68921a55d8Smrg    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
69921a55d8Smrg    int nBox = REGION_NUM_RECTS(&pPriv->clip);
70921a55d8Smrg    int dstxoff, dstyoff;
71921a55d8Smrg    struct r600_accel_object src_obj, dst_obj;
72921a55d8Smrg    cb_config_t     cb_conf;
73921a55d8Smrg    tex_resource_t  tex_res;
74921a55d8Smrg    tex_sampler_t   tex_samp;
75921a55d8Smrg    shader_config_t vs_conf, ps_conf;
76921a55d8Smrg    /*
77921a55d8Smrg     * y' = y - .0625
78921a55d8Smrg     * u' = u - .5
79921a55d8Smrg     * v' = v - .5;
80921a55d8Smrg     *
81921a55d8Smrg     * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
82921a55d8Smrg     * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
83921a55d8Smrg     * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
84921a55d8Smrg     *
85921a55d8Smrg     * DP3 might look like the straightforward solution
86921a55d8Smrg     * but we'd need to move the texture yuv values in
87921a55d8Smrg     * the same reg for this to work. Therefore use MADs.
88921a55d8Smrg     * Brightness just adds to the off constant.
89921a55d8Smrg     * Contrast is multiplication of luminance.
90921a55d8Smrg     * Saturation and hue change the u and v coeffs.
91921a55d8Smrg     * Default values (before adjustments - depend on colorspace):
92921a55d8Smrg     * yco = 1.1643
93921a55d8Smrg     * uco = 0, -0.39173, 2.017
94921a55d8Smrg     * vco = 1.5958, -0.8129, 0
95921a55d8Smrg     * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
96921a55d8Smrg     *       -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
97921a55d8Smrg     *       -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
98921a55d8Smrg     *
99921a55d8Smrg     * temp = MAD(yco, yuv.yyyy, off)
100921a55d8Smrg     * temp = MAD(uco, yuv.uuuu, temp)
101921a55d8Smrg     * result = MAD(vco, yuv.vvvv, temp)
102921a55d8Smrg     */
103921a55d8Smrg    /* TODO: calc consts in the shader */
104921a55d8Smrg    const float Loff = -0.0627;
105921a55d8Smrg    const float Coff = -0.502;
106921a55d8Smrg    float uvcosf, uvsinf;
107921a55d8Smrg    float yco;
108921a55d8Smrg    float uco[3], vco[3], off[3];
109921a55d8Smrg    float bright, cont, gamma;
110921a55d8Smrg    int ref = pPriv->transform_index;
111921a55d8Smrg    Bool needgamma = FALSE;
112921a55d8Smrg    float *ps_alu_consts;
113921a55d8Smrg    const_config_t ps_const_conf;
114921a55d8Smrg    float *vs_alu_consts;
115921a55d8Smrg    const_config_t vs_const_conf;
116921a55d8Smrg
117921a55d8Smrg    cont = RTFContrast(pPriv->contrast);
118921a55d8Smrg    bright = RTFBrightness(pPriv->brightness);
119921a55d8Smrg    gamma = (float)pPriv->gamma / 1000.0;
120921a55d8Smrg    uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
121921a55d8Smrg    uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
122921a55d8Smrg    /* overlay video also does pre-gamma contrast/sat adjust, should we? */
123921a55d8Smrg
124921a55d8Smrg    yco = trans[ref].RefLuma * cont;
125921a55d8Smrg    uco[0] = -trans[ref].RefRCr * uvsinf;
126921a55d8Smrg    uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
127921a55d8Smrg    uco[2] = trans[ref].RefBCb * uvcosf;
128921a55d8Smrg    vco[0] = trans[ref].RefRCr * uvcosf;
129921a55d8Smrg    vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
130921a55d8Smrg    vco[2] = trans[ref].RefBCb * uvsinf;
131921a55d8Smrg    off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
132921a55d8Smrg    off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
133921a55d8Smrg    off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
134921a55d8Smrg
135921a55d8Smrg    // XXX
136921a55d8Smrg    gamma = 1.0;
137921a55d8Smrg
138921a55d8Smrg    if (gamma != 1.0) {
139921a55d8Smrg	needgamma = TRUE;
140921a55d8Smrg	/* note: gamma correction is out = in ^ gamma;
141921a55d8Smrg	   gpu can only do LG2/EX2 therefore we transform into
142921a55d8Smrg	   in ^ gamma = 2 ^ (log2(in) * gamma).
143921a55d8Smrg	   Lots of scalar ops, unfortunately (better solution?) -
144921a55d8Smrg	   without gamma that's 3 inst, with gamma it's 10...
145921a55d8Smrg	   could use different gamma factors per channel,
146921a55d8Smrg	   if that's of any use. */
147921a55d8Smrg    }
148921a55d8Smrg
149921a55d8Smrg    CLEAR (cb_conf);
150921a55d8Smrg    CLEAR (tex_res);
151921a55d8Smrg    CLEAR (tex_samp);
152921a55d8Smrg    CLEAR (vs_conf);
153921a55d8Smrg    CLEAR (ps_conf);
154921a55d8Smrg    CLEAR (vs_const_conf);
155921a55d8Smrg    CLEAR (ps_const_conf);
156921a55d8Smrg
157921a55d8Smrg#if defined(XF86DRM_MODE)
158921a55d8Smrg    if (info->cs) {
159921a55d8Smrg	dst_obj.offset = 0;
160921a55d8Smrg	src_obj.offset = 0;
161921a55d8Smrg	dst_obj.bo = radeon_get_pixmap_bo(pPixmap);
162921a55d8Smrg    } else
163921a55d8Smrg#endif
164921a55d8Smrg    {
165921a55d8Smrg	dst_obj.offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
166921a55d8Smrg	src_obj.offset = pPriv->src_offset + info->fbLocation + pScrn->fbOffset;
167921a55d8Smrg	dst_obj.bo = src_obj.bo = NULL;
168921a55d8Smrg    }
169921a55d8Smrg    dst_obj.pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8);
170921a55d8Smrg
171921a55d8Smrg    src_obj.pitch = pPriv->src_pitch;
172921a55d8Smrg    src_obj.width = pPriv->w;
173921a55d8Smrg    src_obj.height = pPriv->h;
174921a55d8Smrg    src_obj.bpp = 16;
175921a55d8Smrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
176921a55d8Smrg    src_obj.bo = pPriv->src_bo[pPriv->currentBuffer];
177921a55d8Smrg
178921a55d8Smrg    dst_obj.width = pPixmap->drawable.width;
179921a55d8Smrg    dst_obj.height = pPixmap->drawable.height;
180921a55d8Smrg    dst_obj.bpp = pPixmap->drawable.bitsPerPixel;
181921a55d8Smrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
182921a55d8Smrg
183921a55d8Smrg    if (!EVERGREENSetAccelState(pScrn,
184921a55d8Smrg				&src_obj,
185921a55d8Smrg				NULL,
186921a55d8Smrg				&dst_obj,
187921a55d8Smrg				accel_state->xv_vs_offset, accel_state->xv_ps_offset,
188921a55d8Smrg				3, 0xffffffff))
189921a55d8Smrg	return;
190921a55d8Smrg
191921a55d8Smrg#ifdef COMPOSITE
192921a55d8Smrg    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
193921a55d8Smrg    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
194921a55d8Smrg#else
195921a55d8Smrg    dstxoff = 0;
196921a55d8Smrg    dstyoff = 0;
197921a55d8Smrg#endif
198921a55d8Smrg
199921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
200921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->cbuf, 512);
201921a55d8Smrg    radeon_cp_start(pScrn);
202921a55d8Smrg
203921a55d8Smrg    evergreen_set_default_state(pScrn);
204921a55d8Smrg
205921a55d8Smrg    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
206921a55d8Smrg    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
207921a55d8Smrg    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
208921a55d8Smrg
209921a55d8Smrg    /* PS bool constant */
210921a55d8Smrg    switch(pPriv->id) {
211921a55d8Smrg    case FOURCC_YV12:
212921a55d8Smrg    case FOURCC_I420:
213921a55d8Smrg	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0));
214921a55d8Smrg	break;
215921a55d8Smrg    case FOURCC_UYVY:
216921a55d8Smrg    case FOURCC_YUY2:
217921a55d8Smrg    default:
218921a55d8Smrg	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0));
219921a55d8Smrg	break;
220921a55d8Smrg    }
221921a55d8Smrg
222921a55d8Smrg    /* Shader */
223921a55d8Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
224921a55d8Smrg    vs_conf.shader_size         = accel_state->vs_size;
225921a55d8Smrg    vs_conf.num_gprs            = 2;
226921a55d8Smrg    vs_conf.stack_size          = 0;
227921a55d8Smrg    vs_conf.bo                  = accel_state->shaders_bo;
228921a55d8Smrg    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
229921a55d8Smrg
230921a55d8Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
231921a55d8Smrg    ps_conf.shader_size         = accel_state->ps_size;
232921a55d8Smrg    ps_conf.num_gprs            = 3;
233921a55d8Smrg    ps_conf.stack_size          = 1;
234921a55d8Smrg    ps_conf.clamp_consts        = 0;
235921a55d8Smrg    ps_conf.export_mode         = 2;
236921a55d8Smrg    ps_conf.bo                  = accel_state->shaders_bo;
237921a55d8Smrg    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
238921a55d8Smrg
239921a55d8Smrg    /* Texture */
240921a55d8Smrg    switch(pPriv->id) {
241921a55d8Smrg    case FOURCC_YV12:
242921a55d8Smrg    case FOURCC_I420:
243921a55d8Smrg	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
244921a55d8Smrg
245921a55d8Smrg	/* Y texture */
246921a55d8Smrg	tex_res.id                  = 0;
247921a55d8Smrg	tex_res.w                   = accel_state->src_obj[0].width;
248921a55d8Smrg	tex_res.h                   = accel_state->src_obj[0].height;
249921a55d8Smrg	tex_res.pitch               = accel_state->src_obj[0].pitch;
250921a55d8Smrg	tex_res.depth               = 0;
251921a55d8Smrg	tex_res.dim                 = SQ_TEX_DIM_2D;
252921a55d8Smrg	tex_res.base                = accel_state->src_obj[0].offset;
253921a55d8Smrg	tex_res.mip_base            = accel_state->src_obj[0].offset;
254921a55d8Smrg	tex_res.size                = accel_state->src_size[0];
255921a55d8Smrg	tex_res.bo                  = accel_state->src_obj[0].bo;
256921a55d8Smrg	tex_res.mip_bo              = accel_state->src_obj[0].bo;
257921a55d8Smrg
258921a55d8Smrg	tex_res.format              = FMT_8;
259921a55d8Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
260921a55d8Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
261921a55d8Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
262921a55d8Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
263921a55d8Smrg
264921a55d8Smrg	tex_res.base_level          = 0;
265921a55d8Smrg	tex_res.last_level          = 0;
266921a55d8Smrg	tex_res.perf_modulation     = 0;
267921a55d8Smrg	tex_res.interlaced          = 0;
268921a55d8Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
269921a55d8Smrg
270921a55d8Smrg	/* Y sampler */
271921a55d8Smrg	tex_samp.id                 = 0;
272921a55d8Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
273921a55d8Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
274921a55d8Smrg	tex_samp.clamp_z            = SQ_TEX_WRAP;
275921a55d8Smrg
276921a55d8Smrg	/* xxx: switch to bicubic */
277921a55d8Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
278921a55d8Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
279921a55d8Smrg
280921a55d8Smrg	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
281921a55d8Smrg	tex_samp.mip_filter         = 0;			/* no mipmap */
282921a55d8Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
283921a55d8Smrg
284921a55d8Smrg	/* U or V texture */
285921a55d8Smrg	tex_res.id                  = 1;
286921a55d8Smrg	tex_res.format              = FMT_8;
287921a55d8Smrg	tex_res.w                   = accel_state->src_obj[0].width >> 1;
288921a55d8Smrg	tex_res.h                   = accel_state->src_obj[0].height >> 1;
289921a55d8Smrg	tex_res.pitch               = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, 256);
290921a55d8Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
291921a55d8Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
292921a55d8Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
293921a55d8Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
294921a55d8Smrg	tex_res.interlaced          = 0;
295921a55d8Smrg
296921a55d8Smrg	tex_res.base                = accel_state->src_obj[0].offset + pPriv->planev_offset;
297921a55d8Smrg	tex_res.mip_base            = accel_state->src_obj[0].offset + pPriv->planev_offset;
298921a55d8Smrg	tex_res.size                = tex_res.pitch * (pPriv->h >> 1);
299921a55d8Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
300921a55d8Smrg
301921a55d8Smrg	/* U or V sampler */
302921a55d8Smrg	tex_samp.id                 = 1;
303921a55d8Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
304921a55d8Smrg
305921a55d8Smrg	/* U or V texture */
306921a55d8Smrg	tex_res.id                  = 2;
307921a55d8Smrg	tex_res.format              = FMT_8;
308921a55d8Smrg	tex_res.w                   = accel_state->src_obj[0].width >> 1;
309921a55d8Smrg	tex_res.h                   = accel_state->src_obj[0].height >> 1;
310921a55d8Smrg	tex_res.pitch               = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, 256);
311921a55d8Smrg	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
312921a55d8Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
313921a55d8Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
314921a55d8Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
315921a55d8Smrg	tex_res.interlaced          = 0;
316921a55d8Smrg
317921a55d8Smrg	tex_res.base                = accel_state->src_obj[0].offset + pPriv->planeu_offset;
318921a55d8Smrg	tex_res.mip_base            = accel_state->src_obj[0].offset + pPriv->planeu_offset;
319921a55d8Smrg	tex_res.size                = tex_res.pitch * (pPriv->h >> 1);
320921a55d8Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
321921a55d8Smrg
322921a55d8Smrg	/* UV sampler */
323921a55d8Smrg	tex_samp.id                 = 2;
324921a55d8Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
325921a55d8Smrg	break;
326921a55d8Smrg    case FOURCC_UYVY:
327921a55d8Smrg    case FOURCC_YUY2:
328921a55d8Smrg    default:
329921a55d8Smrg	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
330921a55d8Smrg
331921a55d8Smrg	/* Y texture */
332921a55d8Smrg	tex_res.id                  = 0;
333921a55d8Smrg	tex_res.w                   = accel_state->src_obj[0].width;
334921a55d8Smrg	tex_res.h                   = accel_state->src_obj[0].height;
335921a55d8Smrg	tex_res.pitch               = accel_state->src_obj[0].pitch >> 1;
336921a55d8Smrg	tex_res.depth               = 0;
337921a55d8Smrg	tex_res.dim                 = SQ_TEX_DIM_2D;
338921a55d8Smrg	tex_res.base                = accel_state->src_obj[0].offset;
339921a55d8Smrg	tex_res.mip_base            = accel_state->src_obj[0].offset;
340921a55d8Smrg	tex_res.size                = accel_state->src_size[0];
341921a55d8Smrg	tex_res.bo                  = accel_state->src_obj[0].bo;
342921a55d8Smrg	tex_res.mip_bo              = accel_state->src_obj[0].bo;
343921a55d8Smrg
344921a55d8Smrg	tex_res.format              = FMT_8_8;
345921a55d8Smrg	if (pPriv->id == FOURCC_UYVY)
346921a55d8Smrg	    tex_res.dst_sel_x           = SQ_SEL_Y; /* Y */
347921a55d8Smrg	else
348921a55d8Smrg	    tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
349921a55d8Smrg	tex_res.dst_sel_y           = SQ_SEL_1;
350921a55d8Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
351921a55d8Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
352921a55d8Smrg
353921a55d8Smrg	tex_res.base_level          = 0;
354921a55d8Smrg	tex_res.last_level          = 0;
355921a55d8Smrg	tex_res.perf_modulation     = 0;
356921a55d8Smrg	tex_res.interlaced          = 0;
357921a55d8Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
358921a55d8Smrg
359921a55d8Smrg	/* Y sampler */
360921a55d8Smrg	tex_samp.id                 = 0;
361921a55d8Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
362921a55d8Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
363921a55d8Smrg	tex_samp.clamp_z            = SQ_TEX_WRAP;
364921a55d8Smrg
365921a55d8Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
366921a55d8Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
367921a55d8Smrg
368921a55d8Smrg	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
369921a55d8Smrg	tex_samp.mip_filter         = 0;			/* no mipmap */
370921a55d8Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
371921a55d8Smrg
372921a55d8Smrg	/* UV texture */
373921a55d8Smrg	tex_res.id                  = 1;
374921a55d8Smrg	tex_res.format              = FMT_8_8_8_8;
375921a55d8Smrg	tex_res.w                   = accel_state->src_obj[0].width >> 1;
376921a55d8Smrg	tex_res.h                   = accel_state->src_obj[0].height;
377921a55d8Smrg	tex_res.pitch               = accel_state->src_obj[0].pitch >> 2;
378921a55d8Smrg	if (pPriv->id == FOURCC_UYVY) {
379921a55d8Smrg	    tex_res.dst_sel_x           = SQ_SEL_X; /* V */
380921a55d8Smrg	    tex_res.dst_sel_y           = SQ_SEL_Z; /* U */
381921a55d8Smrg	} else {
382921a55d8Smrg	    tex_res.dst_sel_x           = SQ_SEL_Y; /* V */
383921a55d8Smrg	    tex_res.dst_sel_y           = SQ_SEL_W; /* U */
384921a55d8Smrg	}
385921a55d8Smrg	tex_res.dst_sel_z           = SQ_SEL_1;
386921a55d8Smrg	tex_res.dst_sel_w           = SQ_SEL_1;
387921a55d8Smrg	tex_res.interlaced          = 0;
388921a55d8Smrg
389921a55d8Smrg	tex_res.base                = accel_state->src_obj[0].offset;
390921a55d8Smrg	tex_res.mip_base            = accel_state->src_obj[0].offset;
391921a55d8Smrg	tex_res.size                = accel_state->src_size[0];
392921a55d8Smrg	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
393921a55d8Smrg
394921a55d8Smrg	/* UV sampler */
395921a55d8Smrg	tex_samp.id                 = 1;
396921a55d8Smrg	evergreen_set_tex_sampler(pScrn, &tex_samp);
397921a55d8Smrg	break;
398921a55d8Smrg    }
399921a55d8Smrg
400921a55d8Smrg    cb_conf.id = 0;
401921a55d8Smrg    cb_conf.w = accel_state->dst_obj.pitch;
402921a55d8Smrg    cb_conf.h = accel_state->dst_obj.height;
403921a55d8Smrg    cb_conf.base = accel_state->dst_obj.offset;
404921a55d8Smrg    cb_conf.bo = accel_state->dst_obj.bo;
405921a55d8Smrg
406921a55d8Smrg    switch (accel_state->dst_obj.bpp) {
407921a55d8Smrg    case 16:
408921a55d8Smrg	if (pPixmap->drawable.depth == 15) {
409921a55d8Smrg	    cb_conf.format = COLOR_1_5_5_5;
410921a55d8Smrg	    cb_conf.comp_swap = 1; /* ARGB */
411921a55d8Smrg	} else {
412921a55d8Smrg	    cb_conf.format = COLOR_5_6_5;
413921a55d8Smrg	    cb_conf.comp_swap = 2; /* RGB */
414921a55d8Smrg	}
415921a55d8Smrg	break;
416921a55d8Smrg    case 32:
417921a55d8Smrg	cb_conf.format = COLOR_8_8_8_8;
418921a55d8Smrg	cb_conf.comp_swap = 1; /* ARGB */
419921a55d8Smrg	break;
420921a55d8Smrg    default:
421921a55d8Smrg	return;
422921a55d8Smrg    }
423921a55d8Smrg
424921a55d8Smrg    cb_conf.source_format = EXPORT_4C_16BPC;
425921a55d8Smrg    cb_conf.blend_clamp = 1;
426921a55d8Smrg    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
427921a55d8Smrg
428921a55d8Smrg    /* Render setup */
429921a55d8Smrg    BEGIN_BATCH(23);
430921a55d8Smrg    EREG(CB_TARGET_MASK,                      (0x0f << TARGET0_ENABLE_shift));
431921a55d8Smrg    EREG(CB_COLOR_CONTROL,                    ((0xcc << ROP3_shift) |
432921a55d8Smrg					       (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
433921a55d8Smrg    EREG(CB_BLEND0_CONTROL,                   0);
434921a55d8Smrg
435921a55d8Smrg    /* Interpolator setup */
436921a55d8Smrg    /* export tex coords from VS */
437921a55d8Smrg    EREG(SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
438921a55d8Smrg    EREG(SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
439921a55d8Smrg    EREG(SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)	|
440921a55d8Smrg					       (0x03 << DEFAULT_VAL_shift)));
441921a55d8Smrg
442921a55d8Smrg    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
443921a55d8Smrg     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
444921a55d8Smrg    PACK0(SPI_PS_IN_CONTROL_0, 3);
445921a55d8Smrg    E32(((1 << NUM_INTERP_shift) |
446921a55d8Smrg	 LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
447921a55d8Smrg    E32(0); // SPI_PS_IN_CONTROL_1
448921a55d8Smrg    E32(0); // SPI_INTERP_CONTROL_0
449921a55d8Smrg    END_BATCH();
450921a55d8Smrg
451921a55d8Smrg    /* PS alu constants */
452921a55d8Smrg    ps_const_conf.size_bytes = 256;
453921a55d8Smrg    ps_const_conf.type = SHADER_TYPE_PS;
454921a55d8Smrg    ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
455921a55d8Smrg    ps_const_conf.bo = accel_state->cbuf.vb_bo;
456921a55d8Smrg    ps_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_offset;
457921a55d8Smrg
458921a55d8Smrg    ps_alu_consts[0] = off[0];
459921a55d8Smrg    ps_alu_consts[1] = off[1];
460921a55d8Smrg    ps_alu_consts[2] = off[2];
461921a55d8Smrg    ps_alu_consts[3] = yco;
462921a55d8Smrg
463921a55d8Smrg    ps_alu_consts[4] = uco[0];
464921a55d8Smrg    ps_alu_consts[5] = uco[1];
465921a55d8Smrg    ps_alu_consts[6] = uco[2];
466921a55d8Smrg    ps_alu_consts[7] = gamma;
467921a55d8Smrg
468921a55d8Smrg    ps_alu_consts[8] = vco[0];
469921a55d8Smrg    ps_alu_consts[9] = vco[1];
470921a55d8Smrg    ps_alu_consts[10] = vco[2];
471921a55d8Smrg    ps_alu_consts[11] = 0.0;
472921a55d8Smrg
473921a55d8Smrg    radeon_vbo_commit(pScrn, &accel_state->cbuf);
474921a55d8Smrg    evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT);
475921a55d8Smrg
476921a55d8Smrg    /* VS alu constants */
477921a55d8Smrg    vs_const_conf.size_bytes = 256;
478921a55d8Smrg    vs_const_conf.type = SHADER_TYPE_VS;
479921a55d8Smrg    vs_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
480921a55d8Smrg    vs_const_conf.bo = accel_state->cbuf.vb_bo;
481921a55d8Smrg    vs_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_offset;
482921a55d8Smrg
483921a55d8Smrg    vs_alu_consts[0] = 1.0 / pPriv->w;
484921a55d8Smrg    vs_alu_consts[1] = 1.0 / pPriv->h;
485921a55d8Smrg    vs_alu_consts[2] = 0.0;
486921a55d8Smrg    vs_alu_consts[3] = 0.0;
487921a55d8Smrg
488921a55d8Smrg    radeon_vbo_commit(pScrn, &accel_state->cbuf);
489921a55d8Smrg    evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT);
490921a55d8Smrg
491921a55d8Smrg    if (pPriv->vsync) {
492921a55d8Smrg	xf86CrtcPtr crtc;
493921a55d8Smrg	if (pPriv->desired_crtc)
494921a55d8Smrg	    crtc = pPriv->desired_crtc;
495921a55d8Smrg	else
496921a55d8Smrg	    crtc = radeon_pick_best_crtc(pScrn,
497921a55d8Smrg					 pPriv->drw_x,
498921a55d8Smrg					 pPriv->drw_x + pPriv->dst_w,
499921a55d8Smrg					 pPriv->drw_y,
500921a55d8Smrg					 pPriv->drw_y + pPriv->dst_h);
501921a55d8Smrg	if (crtc)
502921a55d8Smrg	    evergreen_cp_wait_vline_sync(pScrn, pPixmap,
503921a55d8Smrg					 crtc,
504921a55d8Smrg					 pPriv->drw_y - crtc->y,
505921a55d8Smrg					 (pPriv->drw_y - crtc->y) + pPriv->dst_h);
506921a55d8Smrg    }
507921a55d8Smrg
508921a55d8Smrg    while (nBox--) {
509921a55d8Smrg	int srcX, srcY, srcw, srch;
510921a55d8Smrg	int dstX, dstY, dstw, dsth;
511921a55d8Smrg	float *vb;
512921a55d8Smrg
513921a55d8Smrg
514921a55d8Smrg	dstX = pBox->x1 + dstxoff;
515921a55d8Smrg	dstY = pBox->y1 + dstyoff;
516921a55d8Smrg	dstw = pBox->x2 - pBox->x1;
517921a55d8Smrg	dsth = pBox->y2 - pBox->y1;
518921a55d8Smrg
519921a55d8Smrg	srcX = pPriv->src_x;
520921a55d8Smrg	srcX += ((pBox->x1 - pPriv->drw_x) *
521921a55d8Smrg		 pPriv->src_w) / pPriv->dst_w;
522921a55d8Smrg	srcY = pPriv->src_y;
523921a55d8Smrg	srcY += ((pBox->y1 - pPriv->drw_y) *
524921a55d8Smrg		 pPriv->src_h) / pPriv->dst_h;
525921a55d8Smrg
526921a55d8Smrg	srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
527921a55d8Smrg	srch = (pPriv->src_h * dsth) / pPriv->dst_h;
528921a55d8Smrg
529921a55d8Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
530921a55d8Smrg
531921a55d8Smrg	vb[0] = (float)dstX;
532921a55d8Smrg	vb[1] = (float)dstY;
533921a55d8Smrg	vb[2] = (float)srcX;
534921a55d8Smrg	vb[3] = (float)srcY;
535921a55d8Smrg
536921a55d8Smrg	vb[4] = (float)dstX;
537921a55d8Smrg	vb[5] = (float)(dstY + dsth);
538921a55d8Smrg	vb[6] = (float)srcX;
539921a55d8Smrg	vb[7] = (float)(srcY + srch);
540921a55d8Smrg
541921a55d8Smrg	vb[8] = (float)(dstX + dstw);
542921a55d8Smrg	vb[9] = (float)(dstY + dsth);
543921a55d8Smrg	vb[10] = (float)(srcX + srcw);
544921a55d8Smrg	vb[11] = (float)(srcY + srch);
545921a55d8Smrg
546921a55d8Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
547921a55d8Smrg
548921a55d8Smrg	pBox++;
549921a55d8Smrg    }
550921a55d8Smrg
551921a55d8Smrg    evergreen_finish_op(pScrn, 16);
552921a55d8Smrg
553921a55d8Smrg    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
554921a55d8Smrg}
555921a55d8Smrg
556921a55d8Smrg#endif
557