1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "exa.h"
34
35#include "radeon.h"
36#include "radeon_reg.h"
37#include "evergreen_shader.h"
38#include "evergreen_reg.h"
39#include "evergreen_state.h"
40
41#include "radeon_video.h"
42
43#include <X11/extensions/Xv.h>
44#include "fourcc.h"
45
46#include "damage.h"
47
48#include "radeon_exa_shared.h"
49#include "radeon_vbo.h"
50
51/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces
52   note the difference to the parameters used in overlay are due
53   to 10bit vs. float calcs */
54static REF_TRANSFORM trans[2] =
55{
56    {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */
57    {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0}  /* BT.709 */
58};
59
60void
61EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
62{
63    RADEONInfoPtr info = RADEONPTR(pScrn);
64    struct radeon_accel_state *accel_state = info->accel_state;
65    PixmapPtr pPixmap = pPriv->pPixmap;
66    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
67    int nBox = REGION_NUM_RECTS(&pPriv->clip);
68    int dstxoff, dstyoff;
69    struct r600_accel_object src_obj, dst_obj;
70    cb_config_t     cb_conf;
71    tex_resource_t  tex_res;
72    tex_sampler_t   tex_samp;
73    shader_config_t vs_conf, ps_conf;
74    /*
75     * y' = y - .0625
76     * u' = u - .5
77     * v' = v - .5;
78     *
79     * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
80     * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
81     * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
82     *
83     * DP3 might look like the straightforward solution
84     * but we'd need to move the texture yuv values in
85     * the same reg for this to work. Therefore use MADs.
86     * Brightness just adds to the off constant.
87     * Contrast is multiplication of luminance.
88     * Saturation and hue change the u and v coeffs.
89     * Default values (before adjustments - depend on colorspace):
90     * yco = 1.1643
91     * uco = 0, -0.39173, 2.017
92     * vco = 1.5958, -0.8129, 0
93     * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
94     *       -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
95     *       -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
96     *
97     * temp = MAD(yco, yuv.yyyy, off)
98     * temp = MAD(uco, yuv.uuuu, temp)
99     * result = MAD(vco, yuv.vvvv, temp)
100     */
101    /* TODO: calc consts in the shader */
102    const float Loff = -0.0627;
103    const float Coff = -0.502;
104    float uvcosf, uvsinf;
105    float yco;
106    float uco[3], vco[3], off[3];
107    float bright, cont, gamma;
108    int ref = pPriv->transform_index;
109    float *ps_alu_consts;
110    const_config_t ps_const_conf;
111    float *vs_alu_consts;
112    const_config_t vs_const_conf;
113
114    cont = RTFContrast(pPriv->contrast);
115    bright = RTFBrightness(pPriv->brightness);
116    gamma = (float)pPriv->gamma / 1000.0;
117    uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
118    uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
119    /* overlay video also does pre-gamma contrast/sat adjust, should we? */
120
121    yco = trans[ref].RefLuma * cont;
122    uco[0] = -trans[ref].RefRCr * uvsinf;
123    uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
124    uco[2] = trans[ref].RefBCb * uvcosf;
125    vco[0] = trans[ref].RefRCr * uvcosf;
126    vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
127    vco[2] = trans[ref].RefBCb * uvsinf;
128    off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
129    off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
130    off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
131
132    // XXX
133    gamma = 1.0;
134
135    CLEAR (cb_conf);
136    CLEAR (tex_res);
137    CLEAR (tex_samp);
138    CLEAR (vs_conf);
139    CLEAR (ps_conf);
140    CLEAR (vs_const_conf);
141    CLEAR (ps_const_conf);
142
143    dst_obj.bo = radeon_get_pixmap_bo(pPixmap)->bo.radeon;
144    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pPixmap);
145    dst_obj.surface = radeon_get_pixmap_surface(pPixmap);
146
147    dst_obj.pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8);
148
149    src_obj.pitch = pPriv->src_pitch;
150    src_obj.width = pPriv->w;
151    src_obj.height = pPriv->h;
152    src_obj.bpp = 16;
153    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
154    src_obj.bo = pPriv->src_bo[pPriv->currentBuffer];
155    src_obj.tiling_flags = 0;
156    src_obj.surface = NULL;
157
158    dst_obj.width = pPixmap->drawable.width;
159    dst_obj.height = pPixmap->drawable.height;
160    dst_obj.bpp = pPixmap->drawable.bitsPerPixel;
161    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
162
163    if (!R600SetAccelState(pScrn,
164			   &src_obj,
165			   NULL,
166			   &dst_obj,
167			   accel_state->xv_vs_offset, accel_state->xv_ps_offset,
168			   3, 0xffffffff))
169	return;
170
171#ifdef COMPOSITE
172    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
173    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
174#else
175    dstxoff = 0;
176    dstyoff = 0;
177#endif
178
179    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
180    radeon_vbo_check(pScrn, &accel_state->cbuf, 512);
181    radeon_cp_start(pScrn);
182
183    evergreen_set_default_state(pScrn);
184
185    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
186    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
187    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
188
189    /* PS bool constant */
190    switch(pPriv->id) {
191    case FOURCC_YV12:
192    case FOURCC_I420:
193	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0));
194	break;
195    case FOURCC_UYVY:
196    case FOURCC_YUY2:
197    default:
198	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0));
199	break;
200    }
201
202    /* Shader */
203    vs_conf.shader_addr         = accel_state->vs_mc_addr;
204    vs_conf.shader_size         = accel_state->vs_size;
205    vs_conf.num_gprs            = 2;
206    vs_conf.stack_size          = 0;
207    vs_conf.bo                  = accel_state->shaders_bo;
208    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
209
210    ps_conf.shader_addr         = accel_state->ps_mc_addr;
211    ps_conf.shader_size         = accel_state->ps_size;
212    ps_conf.num_gprs            = 3;
213    ps_conf.stack_size          = 1;
214    ps_conf.clamp_consts        = 0;
215    ps_conf.export_mode         = 2;
216    ps_conf.bo                  = accel_state->shaders_bo;
217    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
218
219    /* Texture */
220    switch(pPriv->id) {
221    case FOURCC_YV12:
222    case FOURCC_I420:
223	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
224
225	/* Y texture */
226	tex_res.id                  = 0;
227	tex_res.w                   = accel_state->src_obj[0].width;
228	tex_res.h                   = accel_state->src_obj[0].height;
229	tex_res.pitch               = accel_state->src_obj[0].pitch;
230	tex_res.depth               = 0;
231	tex_res.dim                 = SQ_TEX_DIM_2D;
232	tex_res.base                = 0;
233	tex_res.mip_base            = 0;
234	tex_res.size                = accel_state->src_size[0];
235	tex_res.bo                  = accel_state->src_obj[0].bo;
236	tex_res.mip_bo              = accel_state->src_obj[0].bo;
237	tex_res.surface             = NULL;
238
239	tex_res.format              = FMT_8;
240	tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
241	tex_res.dst_sel_y           = SQ_SEL_1;
242	tex_res.dst_sel_z           = SQ_SEL_1;
243	tex_res.dst_sel_w           = SQ_SEL_1;
244
245	tex_res.base_level          = 0;
246	tex_res.last_level          = 0;
247	tex_res.perf_modulation     = 0;
248	tex_res.interlaced          = 0;
249	if ((accel_state->src_obj[0].tiling_flags & RADEON_TILING_MASK) ==
250	    RADEON_TILING_LINEAR)
251	    tex_res.array_mode          = 1;
252	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
253
254	/* Y sampler */
255	tex_samp.id                 = 0;
256	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
257	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
258	tex_samp.clamp_z            = SQ_TEX_WRAP;
259
260	/* xxx: switch to bicubic */
261	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
262	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
263
264	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
265	tex_samp.mip_filter         = 0;			/* no mipmap */
266	evergreen_set_tex_sampler(pScrn, &tex_samp);
267
268	/* U or V texture */
269	tex_res.id                  = 1;
270	tex_res.format              = FMT_8;
271	tex_res.w                   = accel_state->src_obj[0].width >> 1;
272	tex_res.h                   = accel_state->src_obj[0].height >> 1;
273	tex_res.pitch               = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, pPriv->hw_align);
274	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
275	tex_res.dst_sel_y           = SQ_SEL_1;
276	tex_res.dst_sel_z           = SQ_SEL_1;
277	tex_res.dst_sel_w           = SQ_SEL_1;
278	tex_res.interlaced          = 0;
279
280	tex_res.base                = pPriv->planev_offset;
281	tex_res.mip_base            = pPriv->planev_offset;
282	tex_res.size                = tex_res.pitch * (pPriv->h >> 1);
283	if ((accel_state->src_obj[0].tiling_flags & RADEON_TILING_MASK) ==
284	    RADEON_TILING_LINEAR)
285	    tex_res.array_mode          = 1;
286	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
287
288	/* U or V sampler */
289	tex_samp.id                 = 1;
290	evergreen_set_tex_sampler(pScrn, &tex_samp);
291
292	/* U or V texture */
293	tex_res.id                  = 2;
294	tex_res.format              = FMT_8;
295	tex_res.w                   = accel_state->src_obj[0].width >> 1;
296	tex_res.h                   = accel_state->src_obj[0].height >> 1;
297	tex_res.pitch               = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, pPriv->hw_align);
298	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
299	tex_res.dst_sel_y           = SQ_SEL_1;
300	tex_res.dst_sel_z           = SQ_SEL_1;
301	tex_res.dst_sel_w           = SQ_SEL_1;
302	tex_res.interlaced          = 0;
303
304	tex_res.base                = pPriv->planeu_offset;
305	tex_res.mip_base            = pPriv->planeu_offset;
306	tex_res.size                = tex_res.pitch * (pPriv->h >> 1);
307	if ((accel_state->src_obj[0].tiling_flags & RADEON_TILING_MASK) ==
308	    RADEON_TILING_LINEAR)
309	    tex_res.array_mode          = 1;
310	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
311
312	/* UV sampler */
313	tex_samp.id                 = 2;
314	evergreen_set_tex_sampler(pScrn, &tex_samp);
315	break;
316    case FOURCC_UYVY:
317    case FOURCC_YUY2:
318    default:
319	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
320
321	/* YUV texture */
322	tex_res.id                  = 0;
323	tex_res.w                   = accel_state->src_obj[0].width;
324	tex_res.h                   = accel_state->src_obj[0].height;
325	tex_res.pitch               = accel_state->src_obj[0].pitch >> 1;
326	tex_res.depth               = 0;
327	tex_res.dim                 = SQ_TEX_DIM_2D;
328	tex_res.base                = 0;
329	tex_res.mip_base            = 0;
330	tex_res.size                = accel_state->src_size[0];
331	tex_res.bo                  = accel_state->src_obj[0].bo;
332	tex_res.mip_bo              = accel_state->src_obj[0].bo;
333	tex_res.surface             = NULL;
334
335	if (pPriv->id == FOURCC_UYVY)
336	    tex_res.format              = FMT_GB_GR;
337	else
338	    tex_res.format              = FMT_BG_RG;
339	tex_res.dst_sel_x           = SQ_SEL_Y;
340	tex_res.dst_sel_y           = SQ_SEL_X;
341	tex_res.dst_sel_z           = SQ_SEL_Z;
342	tex_res.dst_sel_w           = SQ_SEL_1;
343
344	tex_res.base_level          = 0;
345	tex_res.last_level          = 0;
346	tex_res.perf_modulation     = 0;
347	tex_res.interlaced          = 0;
348	if ((accel_state->src_obj[0].tiling_flags & RADEON_TILING_MASK) ==
349	    RADEON_TILING_LINEAR)
350	    tex_res.array_mode          = 1;
351	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
352
353	/* YUV sampler */
354	tex_samp.id                 = 0;
355	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
356	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
357	tex_samp.clamp_z            = SQ_TEX_WRAP;
358
359	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
360	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
361
362	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
363	tex_samp.mip_filter         = 0;			/* no mipmap */
364	evergreen_set_tex_sampler(pScrn, &tex_samp);
365
366	break;
367    }
368
369    cb_conf.id = 0;
370    cb_conf.w = accel_state->dst_obj.pitch;
371    cb_conf.h = accel_state->dst_obj.height;
372    cb_conf.base = 0;
373    cb_conf.bo = accel_state->dst_obj.bo;
374    cb_conf.surface = accel_state->dst_obj.surface;
375
376    switch (accel_state->dst_obj.bpp) {
377    case 16:
378	if (pPixmap->drawable.depth == 15) {
379	    cb_conf.format = COLOR_1_5_5_5;
380	    cb_conf.comp_swap = 1; /* ARGB */
381	} else {
382	    cb_conf.format = COLOR_5_6_5;
383	    cb_conf.comp_swap = 2; /* RGB */
384	}
385#if X_BYTE_ORDER == X_BIG_ENDIAN
386	cb_conf.endian = ENDIAN_8IN16;
387#endif
388	break;
389    case 32:
390	cb_conf.format = COLOR_8_8_8_8;
391	cb_conf.comp_swap = 1; /* ARGB */
392#if X_BYTE_ORDER == X_BIG_ENDIAN
393	cb_conf.endian = ENDIAN_8IN32;
394#endif
395	break;
396    default:
397	return;
398    }
399
400    cb_conf.source_format = EXPORT_4C_16BPC;
401    cb_conf.blend_clamp = 1;
402    cb_conf.pmask = 0xf;
403    cb_conf.rop = 3;
404    if ((accel_state->dst_obj.tiling_flags & RADEON_TILING_MASK) ==
405	RADEON_TILING_LINEAR) {
406	cb_conf.array_mode = 1;
407	cb_conf.non_disp_tiling = 1;
408    }
409    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
410
411    evergreen_set_spi(pScrn, (1 - 1), 1);
412
413    /* PS alu constants */
414    ps_const_conf.size_bytes = 256;
415    ps_const_conf.type = SHADER_TYPE_PS;
416    ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
417    ps_const_conf.bo = accel_state->cbuf.vb_bo;
418    ps_const_conf.const_addr = accel_state->cbuf.vb_offset;
419    ps_const_conf.cpu_ptr = (uint32_t *)(char *)ps_alu_consts;
420
421    ps_alu_consts[0] = off[0];
422    ps_alu_consts[1] = off[1];
423    ps_alu_consts[2] = off[2];
424    ps_alu_consts[3] = yco;
425
426    ps_alu_consts[4] = uco[0];
427    ps_alu_consts[5] = uco[1];
428    ps_alu_consts[6] = uco[2];
429    ps_alu_consts[7] = gamma;
430
431    ps_alu_consts[8] = vco[0];
432    ps_alu_consts[9] = vco[1];
433    ps_alu_consts[10] = vco[2];
434    ps_alu_consts[11] = 0.0;
435
436    radeon_vbo_commit(pScrn, &accel_state->cbuf);
437    evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT);
438
439    /* VS alu constants */
440    vs_const_conf.size_bytes = 256;
441    vs_const_conf.type = SHADER_TYPE_VS;
442    vs_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
443    vs_const_conf.bo = accel_state->cbuf.vb_bo;
444    vs_const_conf.const_addr = accel_state->cbuf.vb_offset;
445    vs_const_conf.cpu_ptr = (uint32_t *)(char *)vs_alu_consts;
446
447    vs_alu_consts[0] = 1.0 / pPriv->w;
448    vs_alu_consts[1] = 1.0 / pPriv->h;
449    vs_alu_consts[2] = 0.0;
450    vs_alu_consts[3] = 0.0;
451
452    radeon_vbo_commit(pScrn, &accel_state->cbuf);
453    evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT);
454
455    if (pPriv->vsync) {
456	xf86CrtcPtr crtc;
457	if (pPriv->desired_crtc)
458	    crtc = pPriv->desired_crtc;
459	else
460	    crtc = radeon_pick_best_crtc(pScrn, FALSE,
461					 pPriv->drw_x,
462					 pPriv->drw_x + pPriv->dst_w,
463					 pPriv->drw_y,
464					 pPriv->drw_y + pPriv->dst_h);
465	if (crtc)
466	    evergreen_cp_wait_vline_sync(pScrn, pPixmap,
467					 crtc,
468					 pPriv->drw_y - crtc->y,
469					 (pPriv->drw_y - crtc->y) + pPriv->dst_h);
470    }
471
472    while (nBox--) {
473	float srcX, srcY, srcw, srch;
474	int dstX, dstY, dstw, dsth;
475	float *vb;
476
477
478	dstX = pBox->x1 + dstxoff;
479	dstY = pBox->y1 + dstyoff;
480	dstw = pBox->x2 - pBox->x1;
481	dsth = pBox->y2 - pBox->y1;
482
483	srcX = pPriv->src_x;
484	srcX += ((pBox->x1 - pPriv->drw_x) *
485		 pPriv->src_w) / (float)pPriv->dst_w;
486	srcY = pPriv->src_y;
487	srcY += ((pBox->y1 - pPriv->drw_y) *
488		 pPriv->src_h) / (float)pPriv->dst_h;
489
490	srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w;
491	srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h;
492
493	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
494
495	vb[0] = (float)dstX;
496	vb[1] = (float)dstY;
497	vb[2] = (float)srcX;
498	vb[3] = (float)srcY;
499
500	vb[4] = (float)dstX;
501	vb[5] = (float)(dstY + dsth);
502	vb[6] = (float)srcX;
503	vb[7] = (float)(srcY + srch);
504
505	vb[8] = (float)(dstX + dstw);
506	vb[9] = (float)(dstY + dsth);
507	vb[10] = (float)(srcX + srcw);
508	vb[11] = (float)(srcY + srch);
509
510	radeon_vbo_commit(pScrn, &accel_state->vbo);
511
512	pBox++;
513    }
514
515    evergreen_finish_op(pScrn, 16);
516
517    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
518}
519