r600_textured_videofuncs.c revision b7e1c893
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "exa.h"
34
35#include "radeon.h"
36#include "radeon_reg.h"
37#include "r600_shader.h"
38#include "r600_reg.h"
39#include "r600_state.h"
40
41#include "radeon_video.h"
42
43#include <X11/extensions/Xv.h>
44#include "fourcc.h"
45
46#include "damage.h"
47
48static void
49R600DoneTexturedVideo(ScrnInfoPtr pScrn)
50{
51    RADEONInfoPtr info = RADEONPTR(pScrn);
52    struct radeon_accel_state *accel_state = info->accel_state;
53    draw_config_t   draw_conf;
54    vtx_resource_t  vtx_res;
55
56    CLEAR (draw_conf);
57    CLEAR (vtx_res);
58
59    if (accel_state->vb_index == 0) {
60	R600IBDiscard(pScrn, accel_state->ib);
61	return;
62    }
63
64    accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart +
65	(accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2);
66    accel_state->vb_size = accel_state->vb_index * 16;
67
68    /* flush vertex cache */
69    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
70	(info->ChipFamily == CHIP_FAMILY_RV620) ||
71	(info->ChipFamily == CHIP_FAMILY_RS780) ||
72	(info->ChipFamily == CHIP_FAMILY_RV710))
73	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
74			    accel_state->vb_size, accel_state->vb_mc_addr);
75    else
76	cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
77			    accel_state->vb_size, accel_state->vb_mc_addr);
78
79    /* Vertex buffer setup */
80    vtx_res.id              = SQ_VTX_RESOURCE_vs;
81    vtx_res.vtx_size_dw     = 16 / 4;
82    vtx_res.vtx_num_entries = accel_state->vb_size / 4;
83    vtx_res.mem_req_size    = 1;
84    vtx_res.vb_addr         = accel_state->vb_mc_addr;
85    set_vtx_resource        (pScrn, accel_state->ib, &vtx_res);
86
87    draw_conf.prim_type          = DI_PT_RECTLIST;
88    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
89    draw_conf.num_instances      = 1;
90    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
91    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
92
93    draw_auto(pScrn, accel_state->ib, &draw_conf);
94
95    wait_3d_idle_clean(pScrn, accel_state->ib);
96
97    /* sync destination surface */
98    cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
99			accel_state->dst_size, accel_state->dst_mc_addr);
100
101    R600CPFlushIndirect(pScrn, accel_state->ib);
102}
103
104void
105R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
106{
107    RADEONInfoPtr info = RADEONPTR(pScrn);
108    struct radeon_accel_state *accel_state = info->accel_state;
109    PixmapPtr pPixmap = pPriv->pPixmap;
110    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
111    int nBox = REGION_NUM_RECTS(&pPriv->clip);
112    int dstxoff, dstyoff;
113    cb_config_t     cb_conf;
114    tex_resource_t  tex_res;
115    tex_sampler_t   tex_samp;
116    shader_config_t vs_conf, ps_conf;
117    int uv_offset;
118    static float ps_alu_consts[] = {
119        1.0,  0.0,      1.4020,   0,  /* r - c[0] */
120        1.0, -0.34414, -0.71414,  0,  /* g - c[1] */
121        1.0,  1.7720,   0.0,      0,  /* b - c[2] */
122	/* Constants for undoing Y'CbCr scaling
123	 *  - Y' is scaled from 16:235
124	 *  - Cb/Cr are scaled from 16:240
125	 * Unscaled value N' = N * N_mul + N_shift (N' in range [-0.5, 0.5])
126	 * Vector is [Y_mul, Y_shfit, C_mul, C_shift]
127	 */
128        256.0/219.0, -16.0/219.0, 256.0/224.0, -128.0/224.0,
129    };
130
131    CLEAR (cb_conf);
132    CLEAR (tex_res);
133    CLEAR (tex_samp);
134    CLEAR (vs_conf);
135    CLEAR (ps_conf);
136
137    accel_state->dst_pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8);
138    accel_state->src_pitch[0] = pPriv->src_pitch;
139
140    /* bad pitch */
141    if (accel_state->src_pitch[0] & 7)
142	return;
143    if (accel_state->dst_pitch & 7)
144	return;
145
146#ifdef COMPOSITE
147    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
148    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
149#else
150    dstxoff = 0;
151    dstyoff = 0;
152#endif
153
154    accel_state->ib = RADEONCPGetBuffer(pScrn);
155
156    /* Init */
157    start_3d(pScrn, accel_state->ib);
158
159    set_default_state(pScrn, accel_state->ib);
160
161    /* Scissor / viewport */
162    EREG(accel_state->ib, PA_CL_VTE_CNTL,                      VTX_XY_FMT_bit);
163    EREG(accel_state->ib, PA_CL_CLIP_CNTL,                     CLIP_DISABLE_bit);
164
165    accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
166	accel_state->xv_vs_offset;
167
168    accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
169	accel_state->xv_ps_offset;
170
171    /* PS bool constant */
172    switch(pPriv->id) {
173    case FOURCC_YV12:
174    case FOURCC_I420:
175	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
176	break;
177    case FOURCC_UYVY:
178    case FOURCC_YUY2:
179    default:
180	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
181	break;
182    }
183
184    accel_state->vs_size = 512;
185    accel_state->ps_size = 512;
186
187    /* Shader */
188
189    /* flush SQ cache */
190    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
191			accel_state->vs_size, accel_state->vs_mc_addr);
192
193    vs_conf.shader_addr         = accel_state->vs_mc_addr;
194    vs_conf.num_gprs            = 2;
195    vs_conf.stack_size          = 0;
196    vs_setup                    (pScrn, accel_state->ib, &vs_conf);
197
198    /* flush SQ cache */
199    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
200			accel_state->ps_size, accel_state->ps_mc_addr);
201
202    ps_conf.shader_addr         = accel_state->ps_mc_addr;
203    ps_conf.num_gprs            = 3;
204    ps_conf.stack_size          = 1;
205    ps_conf.uncached_first_inst = 1;
206    ps_conf.clamp_consts        = 0;
207    ps_conf.export_mode         = 2;
208    ps_setup                    (pScrn, accel_state->ib, &ps_conf);
209
210    /* PS alu constants */
211    set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
212		   sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
213
214    /* Texture */
215    switch(pPriv->id) {
216    case FOURCC_YV12:
217    case FOURCC_I420:
218	accel_state->src_mc_addr[0] = pPriv->src_offset;
219	accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h;
220
221	/* flush texture cache */
222	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0],
223			    accel_state->src_mc_addr[0]);
224
225	/* Y texture */
226	tex_res.id                  = 0;
227	tex_res.w                   = pPriv->w;
228	tex_res.h                   = pPriv->h;
229	tex_res.pitch               = accel_state->src_pitch[0];
230	tex_res.depth               = 0;
231	tex_res.dim                 = SQ_TEX_DIM_2D;
232	tex_res.base                = accel_state->src_mc_addr[0];
233	tex_res.mip_base            = accel_state->src_mc_addr[0];
234
235	tex_res.format              = FMT_8;
236	tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
237	tex_res.dst_sel_y           = SQ_SEL_1;
238	tex_res.dst_sel_z           = SQ_SEL_1;
239	tex_res.dst_sel_w           = SQ_SEL_1;
240
241	tex_res.request_size        = 1;
242	tex_res.base_level          = 0;
243	tex_res.last_level          = 0;
244	tex_res.perf_modulation     = 0;
245	tex_res.interlaced          = 0;
246	set_tex_resource            (pScrn, accel_state->ib, &tex_res);
247
248	/* Y sampler */
249	tex_samp.id                 = 0;
250	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
251	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
252	tex_samp.clamp_z            = SQ_TEX_WRAP;
253
254	/* xxx: switch to bicubic */
255	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
256	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
257
258	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
259	tex_samp.mip_filter         = 0;			/* no mipmap */
260	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
261
262	/* U or V texture */
263	uv_offset = accel_state->src_pitch[0] * pPriv->h;
264	uv_offset = (uv_offset + 255) & ~255;
265
266	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
267			    accel_state->src_size[0] / 4,
268			    accel_state->src_mc_addr[0] + uv_offset);
269
270	tex_res.id                  = 1;
271	tex_res.format              = FMT_8;
272	tex_res.w                   = pPriv->w >> 1;
273	tex_res.h                   = pPriv->h >> 1;
274	tex_res.pitch               = accel_state->src_pitch[0] >> 1;
275	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
276	tex_res.dst_sel_y           = SQ_SEL_1;
277	tex_res.dst_sel_z           = SQ_SEL_1;
278	tex_res.dst_sel_w           = SQ_SEL_1;
279	tex_res.interlaced          = 0;
280
281	tex_res.base                = accel_state->src_mc_addr[0] + uv_offset;
282	tex_res.mip_base            = accel_state->src_mc_addr[0] + uv_offset;
283	set_tex_resource            (pScrn, accel_state->ib, &tex_res);
284
285	/* U or V sampler */
286	tex_samp.id                 = 1;
287	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
288
289	/* U or V texture */
290	uv_offset += ((accel_state->src_pitch[0] >> 1) * (pPriv->h >> 1));
291	uv_offset = (uv_offset + 255) & ~255;
292
293	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
294			    accel_state->src_size[0] / 4,
295			    accel_state->src_mc_addr[0] + uv_offset);
296
297	tex_res.id                  = 2;
298	tex_res.format              = FMT_8;
299	tex_res.w                   = pPriv->w >> 1;
300	tex_res.h                   = pPriv->h >> 1;
301	tex_res.pitch               = accel_state->src_pitch[0] >> 1;
302	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
303	tex_res.dst_sel_y           = SQ_SEL_1;
304	tex_res.dst_sel_z           = SQ_SEL_1;
305	tex_res.dst_sel_w           = SQ_SEL_1;
306	tex_res.interlaced          = 0;
307
308	tex_res.base                = accel_state->src_mc_addr[0] + uv_offset;
309	tex_res.mip_base            = accel_state->src_mc_addr[0] + uv_offset;
310	set_tex_resource            (pScrn, accel_state->ib, &tex_res);
311
312	/* UV sampler */
313	tex_samp.id                 = 2;
314	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
315	break;
316    case FOURCC_UYVY:
317    case FOURCC_YUY2:
318    default:
319	accel_state->src_mc_addr[0] = pPriv->src_offset;
320	accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h;
321
322	/* flush texture cache */
323	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0],
324			    accel_state->src_mc_addr[0]);
325
326	/* Y texture */
327	tex_res.id                  = 0;
328	tex_res.w                   = pPriv->w;
329	tex_res.h                   = pPriv->h;
330	tex_res.pitch               = accel_state->src_pitch[0] >> 1;
331	tex_res.depth               = 0;
332	tex_res.dim                 = SQ_TEX_DIM_2D;
333	tex_res.base                = accel_state->src_mc_addr[0];
334	tex_res.mip_base            = accel_state->src_mc_addr[0];
335
336	tex_res.format              = FMT_8_8;
337	if (pPriv->id == FOURCC_UYVY)
338	    tex_res.dst_sel_x           = SQ_SEL_Y; /* Y */
339	else
340	    tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
341	tex_res.dst_sel_y           = SQ_SEL_1;
342	tex_res.dst_sel_z           = SQ_SEL_1;
343	tex_res.dst_sel_w           = SQ_SEL_1;
344
345	tex_res.request_size        = 1;
346	tex_res.base_level          = 0;
347	tex_res.last_level          = 0;
348	tex_res.perf_modulation     = 0;
349	tex_res.interlaced          = 0;
350	set_tex_resource            (pScrn, accel_state->ib, &tex_res);
351
352	/* Y sampler */
353	tex_samp.id                 = 0;
354	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
355	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
356	tex_samp.clamp_z            = SQ_TEX_WRAP;
357
358	/* xxx: switch to bicubic */
359	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
360	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
361
362	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
363	tex_samp.mip_filter         = 0;			/* no mipmap */
364	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
365
366	/* UV texture */
367	tex_res.id                  = 1;
368	tex_res.format              = FMT_8_8_8_8;
369	tex_res.w                   = pPriv->w >> 1;
370	tex_res.h                   = pPriv->h;
371	tex_res.pitch               = accel_state->src_pitch[0] >> 2;
372	if (pPriv->id == FOURCC_UYVY) {
373	    tex_res.dst_sel_x           = SQ_SEL_X; /* V */
374	    tex_res.dst_sel_y           = SQ_SEL_Z; /* U */
375	} else {
376	    tex_res.dst_sel_x           = SQ_SEL_Y; /* V */
377	    tex_res.dst_sel_y           = SQ_SEL_W; /* U */
378	}
379	tex_res.dst_sel_z           = SQ_SEL_1;
380	tex_res.dst_sel_w           = SQ_SEL_1;
381	tex_res.interlaced          = 0;
382
383	tex_res.base                = accel_state->src_mc_addr[0];
384	tex_res.mip_base            = accel_state->src_mc_addr[0];
385	set_tex_resource            (pScrn, accel_state->ib, &tex_res);
386
387	/* UV sampler */
388	tex_samp.id                 = 1;
389	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
390	break;
391    }
392
393    /* Render setup */
394    EREG(accel_state->ib, CB_SHADER_MASK,                      (0x0f << OUTPUT0_ENABLE_shift));
395    EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
396    EREG(accel_state->ib, CB_COLOR_CONTROL,                    (0xcc << ROP3_shift)); /* copy */
397
398    cb_conf.id = 0;
399
400    accel_state->dst_mc_addr = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
401
402    cb_conf.w = accel_state->dst_pitch;
403    cb_conf.h = pPixmap->drawable.height;
404    cb_conf.base = accel_state->dst_mc_addr;
405
406    switch (pPixmap->drawable.bitsPerPixel) {
407    case 16:
408	if (pPixmap->drawable.depth == 15) {
409	    cb_conf.format = COLOR_1_5_5_5;
410	    cb_conf.comp_swap = 1; /* ARGB */
411	} else {
412	    cb_conf.format = COLOR_5_6_5;
413	    cb_conf.comp_swap = 2; /* RGB */
414	}
415	break;
416    case 32:
417	cb_conf.format = COLOR_8_8_8_8;
418	cb_conf.comp_swap = 1; /* ARGB */
419	break;
420    default:
421	return;
422    }
423
424    cb_conf.source_format = 1;
425    cb_conf.blend_clamp = 1;
426    set_render_target(pScrn, accel_state->ib, &cb_conf);
427
428    EREG(accel_state->ib, PA_SU_SC_MODE_CNTL,                  (FACE_bit			|
429								(POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift)	|
430								(POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
431    EREG(accel_state->ib, DB_SHADER_CONTROL,                   ((1 << Z_ORDER_shift)		| /* EARLY_Z_THEN_LATE_Z */
432								DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
433
434    /* Interpolator setup */
435    /* export tex coords from VS */
436    EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
437    EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
438
439    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
440     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
441    EREG(accel_state->ib, SPI_PS_IN_CONTROL_0,                 ((1 << NUM_INTERP_shift)));
442    EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
443    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)	|
444								(0x03 << DEFAULT_VAL_shift)	|
445								SEL_CENTROID_bit));
446    EREG(accel_state->ib, SPI_INTERP_CONTROL_0,                0);
447
448
449    if (pPriv->vsync) {
450	xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn,
451						    pPriv->drw_x,
452						    pPriv->drw_x + pPriv->dst_w,
453						    pPriv->drw_y,
454						    pPriv->drw_y + pPriv->dst_h);
455	if (crtc) {
456	    RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private;
457
458	    cp_wait_vline_sync(pScrn, accel_state->ib, pPixmap,
459			       radeon_crtc->crtc_id,
460			       pPriv->drw_y - crtc->y,
461			       (pPriv->drw_y - crtc->y) + pPriv->dst_h);
462	}
463    }
464
465    accel_state->vb_index = 0;
466
467    while (nBox--) {
468	int srcX, srcY, srcw, srch;
469	int dstX, dstY, dstw, dsth;
470	float *vb;
471
472	if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) {
473	    R600DoneTexturedVideo(pScrn);
474	    accel_state->vb_index = 0;
475	    accel_state->ib = RADEONCPGetBuffer(pScrn);
476	}
477
478	vb = (pointer)((char*)accel_state->ib->address +
479		       (accel_state->ib->total / 2) +
480		       accel_state->vb_index * 16);
481
482	dstX = pBox->x1 + dstxoff;
483	dstY = pBox->y1 + dstyoff;
484	dstw = pBox->x2 - pBox->x1;
485	dsth = pBox->y2 - pBox->y1;
486
487	srcX = ((pBox->x1 - pPriv->drw_x) *
488		pPriv->src_w) / pPriv->dst_w;
489	srcY = ((pBox->y1 - pPriv->drw_y) *
490		pPriv->src_h) / pPriv->dst_h;
491
492	srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
493	srch = (pPriv->src_h * dsth) / pPriv->dst_h;
494
495	vb[0] = (float)dstX;
496	vb[1] = (float)dstY;
497	vb[2] = (float)srcX / pPriv->w;
498	vb[3] = (float)srcY / pPriv->h;
499
500	vb[4] = (float)dstX;
501	vb[5] = (float)(dstY + dsth);
502	vb[6] = (float)srcX / pPriv->w;
503	vb[7] = (float)(srcY + srch) / pPriv->h;
504
505	vb[8] = (float)(dstX + dstw);
506	vb[9] = (float)(dstY + dsth);
507	vb[10] = (float)(srcX + srcw) / pPriv->w;
508	vb[11] = (float)(srcY + srch) / pPriv->h;
509
510	accel_state->vb_index += 3;
511
512	pBox++;
513    }
514
515    R600DoneTexturedVideo(pScrn);
516
517    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
518}
519