1/* 2 * Copyright 2010 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Author: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif 30 31#include "xf86.h" 32 33#include "exa.h" 34 35#include "radeon.h" 36#include "radeon_reg.h" 37#include "evergreen_shader.h" 38#include "evergreen_reg.h" 39#include "evergreen_state.h" 40 41#include "radeon_video.h" 42 43#include <X11/extensions/Xv.h> 44#include "fourcc.h" 45 46#include "damage.h" 47 48#include "radeon_exa_shared.h" 49#include "radeon_vbo.h" 50 51/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces 52 note the difference to the parameters used in overlay are due 53 to 10bit vs. float calcs */ 54static REF_TRANSFORM trans[2] = 55{ 56 {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */ 57 {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */ 58}; 59 60void 61EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 62{ 63 RADEONInfoPtr info = RADEONPTR(pScrn); 64 struct radeon_accel_state *accel_state = info->accel_state; 65 PixmapPtr pPixmap = pPriv->pPixmap; 66 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 67 int nBox = REGION_NUM_RECTS(&pPriv->clip); 68 int dstxoff, dstyoff; 69 struct r600_accel_object src_obj, dst_obj; 70 cb_config_t cb_conf; 71 tex_resource_t tex_res; 72 tex_sampler_t tex_samp; 73 shader_config_t vs_conf, ps_conf; 74 /* 75 * y' = y - .0625 76 * u' = u - .5 77 * v' = v - .5; 78 * 79 * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 80 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 81 * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 82 * 83 * DP3 might look like the straightforward solution 84 * but we'd need to move the texture yuv values in 85 * the same reg for this to work. Therefore use MADs. 86 * Brightness just adds to the off constant. 87 * Contrast is multiplication of luminance. 88 * Saturation and hue change the u and v coeffs. 89 * Default values (before adjustments - depend on colorspace): 90 * yco = 1.1643 91 * uco = 0, -0.39173, 2.017 92 * vco = 1.5958, -0.8129, 0 93 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 94 * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 95 * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 96 * 97 * temp = MAD(yco, yuv.yyyy, off) 98 * temp = MAD(uco, yuv.uuuu, temp) 99 * result = MAD(vco, yuv.vvvv, temp) 100 */ 101 /* TODO: calc consts in the shader */ 102 const float Loff = -0.0627; 103 const float Coff = -0.502; 104 float uvcosf, uvsinf; 105 float yco; 106 float uco[3], vco[3], off[3]; 107 float bright, cont, gamma; 108 int ref = pPriv->transform_index; 109 float *ps_alu_consts; 110 const_config_t ps_const_conf; 111 float *vs_alu_consts; 112 const_config_t vs_const_conf; 113 114 cont = RTFContrast(pPriv->contrast); 115 bright = RTFBrightness(pPriv->brightness); 116 gamma = (float)pPriv->gamma / 1000.0; 117 uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 118 uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 119 /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 120 121 yco = trans[ref].RefLuma * cont; 122 uco[0] = -trans[ref].RefRCr * uvsinf; 123 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 124 uco[2] = trans[ref].RefBCb * uvcosf; 125 vco[0] = trans[ref].RefRCr * uvcosf; 126 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 127 vco[2] = trans[ref].RefBCb * uvsinf; 128 off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 129 off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 130 off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 131 132 // XXX 133 gamma = 1.0; 134 135 CLEAR (cb_conf); 136 CLEAR (tex_res); 137 CLEAR (tex_samp); 138 CLEAR (vs_conf); 139 CLEAR (ps_conf); 140 CLEAR (vs_const_conf); 141 CLEAR (ps_const_conf); 142 143 dst_obj.bo = radeon_get_pixmap_bo(pPixmap)->bo.radeon; 144 dst_obj.tiling_flags = radeon_get_pixmap_tiling(pPixmap); 145 dst_obj.surface = radeon_get_pixmap_surface(pPixmap); 146 147 dst_obj.pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8); 148 149 src_obj.pitch = pPriv->src_pitch; 150 src_obj.width = pPriv->w; 151 src_obj.height = pPriv->h; 152 src_obj.bpp = 16; 153 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 154 src_obj.bo = pPriv->src_bo[pPriv->currentBuffer]; 155 src_obj.tiling_flags = 0; 156 src_obj.surface = NULL; 157 158 dst_obj.width = pPixmap->drawable.width; 159 dst_obj.height = pPixmap->drawable.height; 160 dst_obj.bpp = pPixmap->drawable.bitsPerPixel; 161 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 162 163 if (!R600SetAccelState(pScrn, 164 &src_obj, 165 NULL, 166 &dst_obj, 167 accel_state->xv_vs_offset, accel_state->xv_ps_offset, 168 3, 0xffffffff)) 169 return; 170 171#ifdef COMPOSITE 172 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 173 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 174#else 175 dstxoff = 0; 176 dstyoff = 0; 177#endif 178 179 radeon_vbo_check(pScrn, &accel_state->vbo, 16); 180 radeon_vbo_check(pScrn, &accel_state->cbuf, 512); 181 radeon_cp_start(pScrn); 182 183 evergreen_set_default_state(pScrn); 184 185 evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 186 evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 187 evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 188 189 /* PS bool constant */ 190 switch(pPriv->id) { 191 case FOURCC_YV12: 192 case FOURCC_I420: 193 evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0)); 194 break; 195 case FOURCC_UYVY: 196 case FOURCC_YUY2: 197 default: 198 evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0)); 199 break; 200 } 201 202 /* Shader */ 203 vs_conf.shader_addr = accel_state->vs_mc_addr; 204 vs_conf.shader_size = accel_state->vs_size; 205 vs_conf.num_gprs = 2; 206 vs_conf.stack_size = 0; 207 vs_conf.bo = accel_state->shaders_bo; 208 evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM); 209 210 ps_conf.shader_addr = accel_state->ps_mc_addr; 211 ps_conf.shader_size = accel_state->ps_size; 212 ps_conf.num_gprs = 3; 213 ps_conf.stack_size = 1; 214 ps_conf.clamp_consts = 0; 215 ps_conf.export_mode = 2; 216 ps_conf.bo = accel_state->shaders_bo; 217 evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM); 218 219 /* Texture */ 220 switch(pPriv->id) { 221 case FOURCC_YV12: 222 case FOURCC_I420: 223 accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h; 224 225 /* Y texture */ 226 tex_res.id = 0; 227 tex_res.w = accel_state->src_obj[0].width; 228 tex_res.h = accel_state->src_obj[0].height; 229 tex_res.pitch = accel_state->src_obj[0].pitch; 230 tex_res.depth = 0; 231 tex_res.dim = SQ_TEX_DIM_2D; 232 tex_res.base = 0; 233 tex_res.mip_base = 0; 234 tex_res.size = accel_state->src_size[0]; 235 tex_res.bo = accel_state->src_obj[0].bo; 236 tex_res.mip_bo = accel_state->src_obj[0].bo; 237 tex_res.surface = NULL; 238 239 tex_res.format = FMT_8; 240 tex_res.dst_sel_x = SQ_SEL_X; /* Y */ 241 tex_res.dst_sel_y = SQ_SEL_1; 242 tex_res.dst_sel_z = SQ_SEL_1; 243 tex_res.dst_sel_w = SQ_SEL_1; 244 245 tex_res.base_level = 0; 246 tex_res.last_level = 0; 247 tex_res.perf_modulation = 0; 248 tex_res.interlaced = 0; 249 if ((accel_state->src_obj[0].tiling_flags & RADEON_TILING_MASK) == 250 RADEON_TILING_LINEAR) 251 tex_res.array_mode = 1; 252 evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); 253 254 /* Y sampler */ 255 tex_samp.id = 0; 256 tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 257 tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 258 tex_samp.clamp_z = SQ_TEX_WRAP; 259 260 /* xxx: switch to bicubic */ 261 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; 262 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; 263 264 tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 265 tex_samp.mip_filter = 0; /* no mipmap */ 266 evergreen_set_tex_sampler(pScrn, &tex_samp); 267 268 /* U or V texture */ 269 tex_res.id = 1; 270 tex_res.format = FMT_8; 271 tex_res.w = accel_state->src_obj[0].width >> 1; 272 tex_res.h = accel_state->src_obj[0].height >> 1; 273 tex_res.pitch = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, pPriv->hw_align); 274 tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ 275 tex_res.dst_sel_y = SQ_SEL_1; 276 tex_res.dst_sel_z = SQ_SEL_1; 277 tex_res.dst_sel_w = SQ_SEL_1; 278 tex_res.interlaced = 0; 279 280 tex_res.base = pPriv->planev_offset; 281 tex_res.mip_base = pPriv->planev_offset; 282 tex_res.size = tex_res.pitch * (pPriv->h >> 1); 283 if ((accel_state->src_obj[0].tiling_flags & RADEON_TILING_MASK) == 284 RADEON_TILING_LINEAR) 285 tex_res.array_mode = 1; 286 evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); 287 288 /* U or V sampler */ 289 tex_samp.id = 1; 290 evergreen_set_tex_sampler(pScrn, &tex_samp); 291 292 /* U or V texture */ 293 tex_res.id = 2; 294 tex_res.format = FMT_8; 295 tex_res.w = accel_state->src_obj[0].width >> 1; 296 tex_res.h = accel_state->src_obj[0].height >> 1; 297 tex_res.pitch = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, pPriv->hw_align); 298 tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ 299 tex_res.dst_sel_y = SQ_SEL_1; 300 tex_res.dst_sel_z = SQ_SEL_1; 301 tex_res.dst_sel_w = SQ_SEL_1; 302 tex_res.interlaced = 0; 303 304 tex_res.base = pPriv->planeu_offset; 305 tex_res.mip_base = pPriv->planeu_offset; 306 tex_res.size = tex_res.pitch * (pPriv->h >> 1); 307 if ((accel_state->src_obj[0].tiling_flags & RADEON_TILING_MASK) == 308 RADEON_TILING_LINEAR) 309 tex_res.array_mode = 1; 310 evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); 311 312 /* UV sampler */ 313 tex_samp.id = 2; 314 evergreen_set_tex_sampler(pScrn, &tex_samp); 315 break; 316 case FOURCC_UYVY: 317 case FOURCC_YUY2: 318 default: 319 accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h; 320 321 /* YUV texture */ 322 tex_res.id = 0; 323 tex_res.w = accel_state->src_obj[0].width; 324 tex_res.h = accel_state->src_obj[0].height; 325 tex_res.pitch = accel_state->src_obj[0].pitch >> 1; 326 tex_res.depth = 0; 327 tex_res.dim = SQ_TEX_DIM_2D; 328 tex_res.base = 0; 329 tex_res.mip_base = 0; 330 tex_res.size = accel_state->src_size[0]; 331 tex_res.bo = accel_state->src_obj[0].bo; 332 tex_res.mip_bo = accel_state->src_obj[0].bo; 333 tex_res.surface = NULL; 334 335 if (pPriv->id == FOURCC_UYVY) 336 tex_res.format = FMT_GB_GR; 337 else 338 tex_res.format = FMT_BG_RG; 339 tex_res.dst_sel_x = SQ_SEL_Y; 340 tex_res.dst_sel_y = SQ_SEL_X; 341 tex_res.dst_sel_z = SQ_SEL_Z; 342 tex_res.dst_sel_w = SQ_SEL_1; 343 344 tex_res.base_level = 0; 345 tex_res.last_level = 0; 346 tex_res.perf_modulation = 0; 347 tex_res.interlaced = 0; 348 if ((accel_state->src_obj[0].tiling_flags & RADEON_TILING_MASK) == 349 RADEON_TILING_LINEAR) 350 tex_res.array_mode = 1; 351 evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); 352 353 /* YUV sampler */ 354 tex_samp.id = 0; 355 tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 356 tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 357 tex_samp.clamp_z = SQ_TEX_WRAP; 358 359 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; 360 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; 361 362 tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 363 tex_samp.mip_filter = 0; /* no mipmap */ 364 evergreen_set_tex_sampler(pScrn, &tex_samp); 365 366 break; 367 } 368 369 cb_conf.id = 0; 370 cb_conf.w = accel_state->dst_obj.pitch; 371 cb_conf.h = accel_state->dst_obj.height; 372 cb_conf.base = 0; 373 cb_conf.bo = accel_state->dst_obj.bo; 374 cb_conf.surface = accel_state->dst_obj.surface; 375 376 switch (accel_state->dst_obj.bpp) { 377 case 16: 378 if (pPixmap->drawable.depth == 15) { 379 cb_conf.format = COLOR_1_5_5_5; 380 cb_conf.comp_swap = 1; /* ARGB */ 381 } else { 382 cb_conf.format = COLOR_5_6_5; 383 cb_conf.comp_swap = 2; /* RGB */ 384 } 385#if X_BYTE_ORDER == X_BIG_ENDIAN 386 cb_conf.endian = ENDIAN_8IN16; 387#endif 388 break; 389 case 32: 390 cb_conf.format = COLOR_8_8_8_8; 391 cb_conf.comp_swap = 1; /* ARGB */ 392#if X_BYTE_ORDER == X_BIG_ENDIAN 393 cb_conf.endian = ENDIAN_8IN32; 394#endif 395 break; 396 default: 397 return; 398 } 399 400 cb_conf.source_format = EXPORT_4C_16BPC; 401 cb_conf.blend_clamp = 1; 402 cb_conf.pmask = 0xf; 403 cb_conf.rop = 3; 404 if ((accel_state->dst_obj.tiling_flags & RADEON_TILING_MASK) == 405 RADEON_TILING_LINEAR) { 406 cb_conf.array_mode = 1; 407 cb_conf.non_disp_tiling = 1; 408 } 409 evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); 410 411 evergreen_set_spi(pScrn, (1 - 1), 1); 412 413 /* PS alu constants */ 414 ps_const_conf.size_bytes = 256; 415 ps_const_conf.type = SHADER_TYPE_PS; 416 ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256); 417 ps_const_conf.bo = accel_state->cbuf.vb_bo; 418 ps_const_conf.const_addr = accel_state->cbuf.vb_offset; 419 ps_const_conf.cpu_ptr = (uint32_t *)(char *)ps_alu_consts; 420 421 ps_alu_consts[0] = off[0]; 422 ps_alu_consts[1] = off[1]; 423 ps_alu_consts[2] = off[2]; 424 ps_alu_consts[3] = yco; 425 426 ps_alu_consts[4] = uco[0]; 427 ps_alu_consts[5] = uco[1]; 428 ps_alu_consts[6] = uco[2]; 429 ps_alu_consts[7] = gamma; 430 431 ps_alu_consts[8] = vco[0]; 432 ps_alu_consts[9] = vco[1]; 433 ps_alu_consts[10] = vco[2]; 434 ps_alu_consts[11] = 0.0; 435 436 radeon_vbo_commit(pScrn, &accel_state->cbuf); 437 evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT); 438 439 /* VS alu constants */ 440 vs_const_conf.size_bytes = 256; 441 vs_const_conf.type = SHADER_TYPE_VS; 442 vs_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256); 443 vs_const_conf.bo = accel_state->cbuf.vb_bo; 444 vs_const_conf.const_addr = accel_state->cbuf.vb_offset; 445 vs_const_conf.cpu_ptr = (uint32_t *)(char *)vs_alu_consts; 446 447 vs_alu_consts[0] = 1.0 / pPriv->w; 448 vs_alu_consts[1] = 1.0 / pPriv->h; 449 vs_alu_consts[2] = 0.0; 450 vs_alu_consts[3] = 0.0; 451 452 radeon_vbo_commit(pScrn, &accel_state->cbuf); 453 evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT); 454 455 if (pPriv->vsync) { 456 xf86CrtcPtr crtc; 457 if (pPriv->desired_crtc) 458 crtc = pPriv->desired_crtc; 459 else 460 crtc = radeon_pick_best_crtc(pScrn, FALSE, 461 pPriv->drw_x, 462 pPriv->drw_x + pPriv->dst_w, 463 pPriv->drw_y, 464 pPriv->drw_y + pPriv->dst_h); 465 if (crtc) 466 evergreen_cp_wait_vline_sync(pScrn, pPixmap, 467 crtc, 468 pPriv->drw_y - crtc->y, 469 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 470 } 471 472 while (nBox--) { 473 float srcX, srcY, srcw, srch; 474 int dstX, dstY, dstw, dsth; 475 float *vb; 476 477 478 dstX = pBox->x1 + dstxoff; 479 dstY = pBox->y1 + dstyoff; 480 dstw = pBox->x2 - pBox->x1; 481 dsth = pBox->y2 - pBox->y1; 482 483 srcX = pPriv->src_x; 484 srcX += ((pBox->x1 - pPriv->drw_x) * 485 pPriv->src_w) / (float)pPriv->dst_w; 486 srcY = pPriv->src_y; 487 srcY += ((pBox->y1 - pPriv->drw_y) * 488 pPriv->src_h) / (float)pPriv->dst_h; 489 490 srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 491 srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 492 493 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16); 494 495 vb[0] = (float)dstX; 496 vb[1] = (float)dstY; 497 vb[2] = (float)srcX; 498 vb[3] = (float)srcY; 499 500 vb[4] = (float)dstX; 501 vb[5] = (float)(dstY + dsth); 502 vb[6] = (float)srcX; 503 vb[7] = (float)(srcY + srch); 504 505 vb[8] = (float)(dstX + dstw); 506 vb[9] = (float)(dstY + dsth); 507 vb[10] = (float)(srcX + srcw); 508 vb[11] = (float)(srcY + srch); 509 510 radeon_vbo_commit(pScrn, &accel_state->vbo); 511 512 pBox++; 513 } 514 515 evergreen_finish_op(pScrn, 16); 516 517 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 518} 519