r600_textured_videofuncs.c revision c73da4db
1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Author: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif 30 31#include "xf86.h" 32 33#include "exa.h" 34 35#include "radeon.h" 36#include "radeon_reg.h" 37#include "r600_shader.h" 38#include "r600_reg.h" 39#include "r600_state.h" 40 41#include "radeon_video.h" 42 43#include <X11/extensions/Xv.h> 44#include "fourcc.h" 45 46#include "damage.h" 47 48#include "radeon_exa_shared.h" 49#include "radeon_vbo.h" 50 51/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces 52 note the difference to the parameters used in overlay are due 53 to 10bit vs. float calcs */ 54static REF_TRANSFORM trans[2] = 55{ 56 {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */ 57 {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */ 58}; 59 60void 61R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 62{ 63 RADEONInfoPtr info = RADEONPTR(pScrn); 64 struct radeon_accel_state *accel_state = info->accel_state; 65 PixmapPtr pPixmap = pPriv->pPixmap; 66 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 67 int nBox = REGION_NUM_RECTS(&pPriv->clip); 68 int dstxoff, dstyoff; 69 struct r600_accel_object src_obj, dst_obj; 70 cb_config_t cb_conf; 71 tex_resource_t tex_res; 72 tex_sampler_t tex_samp; 73 shader_config_t vs_conf, ps_conf; 74 /* 75 * y' = y - .0625 76 * u' = u - .5 77 * v' = v - .5; 78 * 79 * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 80 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 81 * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 82 * 83 * DP3 might look like the straightforward solution 84 * but we'd need to move the texture yuv values in 85 * the same reg for this to work. Therefore use MADs. 86 * Brightness just adds to the off constant. 87 * Contrast is multiplication of luminance. 88 * Saturation and hue change the u and v coeffs. 89 * Default values (before adjustments - depend on colorspace): 90 * yco = 1.1643 91 * uco = 0, -0.39173, 2.017 92 * vco = 1.5958, -0.8129, 0 93 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 94 * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 95 * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 96 * 97 * temp = MAD(yco, yuv.yyyy, off) 98 * temp = MAD(uco, yuv.uuuu, temp) 99 * result = MAD(vco, yuv.vvvv, temp) 100 */ 101 /* TODO: calc consts in the shader */ 102 const float Loff = -0.0627; 103 const float Coff = -0.502; 104 float uvcosf, uvsinf; 105 float yco; 106 float uco[3], vco[3], off[3]; 107 float bright, cont, gamma; 108 int ref = pPriv->transform_index; 109 Bool needgamma = FALSE; 110 float ps_alu_consts[12]; 111 float vs_alu_consts[4]; 112 113 cont = RTFContrast(pPriv->contrast); 114 bright = RTFBrightness(pPriv->brightness); 115 gamma = (float)pPriv->gamma / 1000.0; 116 uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 117 uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 118 /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 119 120 yco = trans[ref].RefLuma * cont; 121 uco[0] = -trans[ref].RefRCr * uvsinf; 122 uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 123 uco[2] = trans[ref].RefBCb * uvcosf; 124 vco[0] = trans[ref].RefRCr * uvcosf; 125 vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 126 vco[2] = trans[ref].RefBCb * uvsinf; 127 off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 128 off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 129 off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 130 131 // XXX 132 gamma = 1.0; 133 134 if (gamma != 1.0) { 135 needgamma = TRUE; 136 /* note: gamma correction is out = in ^ gamma; 137 gpu can only do LG2/EX2 therefore we transform into 138 in ^ gamma = 2 ^ (log2(in) * gamma). 139 Lots of scalar ops, unfortunately (better solution?) - 140 without gamma that's 3 inst, with gamma it's 10... 141 could use different gamma factors per channel, 142 if that's of any use. */ 143 } 144 145 /* setup the ps consts */ 146 ps_alu_consts[0] = off[0]; 147 ps_alu_consts[1] = off[1]; 148 ps_alu_consts[2] = off[2]; 149 ps_alu_consts[3] = yco; 150 151 ps_alu_consts[4] = uco[0]; 152 ps_alu_consts[5] = uco[1]; 153 ps_alu_consts[6] = uco[2]; 154 ps_alu_consts[7] = gamma; 155 156 ps_alu_consts[8] = vco[0]; 157 ps_alu_consts[9] = vco[1]; 158 ps_alu_consts[10] = vco[2]; 159 ps_alu_consts[11] = 0.0; 160 161 CLEAR (cb_conf); 162 CLEAR (tex_res); 163 CLEAR (tex_samp); 164 CLEAR (vs_conf); 165 CLEAR (ps_conf); 166 167#if defined(XF86DRM_MODE) 168 if (info->cs) { 169 dst_obj.offset = 0; 170 src_obj.offset = 0; 171 dst_obj.bo = radeon_get_pixmap_bo(pPixmap); 172 dst_obj.tiling_flags = radeon_get_pixmap_tiling(pPixmap); 173 dst_obj.surface = radeon_get_pixmap_surface(pPixmap); 174 } else 175#endif 176 { 177 dst_obj.offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; 178 src_obj.offset = pPriv->src_offset + info->fbLocation + pScrn->fbOffset; 179 dst_obj.bo = src_obj.bo = NULL; 180 } 181 dst_obj.pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8); 182 183 src_obj.pitch = pPriv->src_pitch; 184 src_obj.width = pPriv->w; 185 src_obj.height = pPriv->h; 186 src_obj.bpp = 16; 187 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 188 src_obj.bo = pPriv->src_bo[pPriv->currentBuffer]; 189 src_obj.tiling_flags = 0; 190#ifdef XF86DRM_MODE 191 src_obj.surface = NULL; 192#endif 193 194 dst_obj.width = pPixmap->drawable.width; 195 dst_obj.height = pPixmap->drawable.height; 196 dst_obj.bpp = pPixmap->drawable.bitsPerPixel; 197 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 198 199 if (!R600SetAccelState(pScrn, 200 &src_obj, 201 NULL, 202 &dst_obj, 203 accel_state->xv_vs_offset, accel_state->xv_ps_offset, 204 3, 0xffffffff)) 205 return; 206 207#ifdef COMPOSITE 208 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 209 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 210#else 211 dstxoff = 0; 212 dstyoff = 0; 213#endif 214 215 radeon_vbo_check(pScrn, &accel_state->vbo, 16); 216 radeon_cp_start(pScrn); 217 218 r600_set_default_state(pScrn, accel_state->ib); 219 220 r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 221 r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 222 r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 223 224 /* PS bool constant */ 225 switch(pPriv->id) { 226 case FOURCC_YV12: 227 case FOURCC_I420: 228 r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0)); 229 break; 230 case FOURCC_UYVY: 231 case FOURCC_YUY2: 232 default: 233 r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0)); 234 break; 235 } 236 237 /* Shader */ 238 vs_conf.shader_addr = accel_state->vs_mc_addr; 239 vs_conf.shader_size = accel_state->vs_size; 240 vs_conf.num_gprs = 2; 241 vs_conf.stack_size = 0; 242 vs_conf.bo = accel_state->shaders_bo; 243 r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM); 244 245 ps_conf.shader_addr = accel_state->ps_mc_addr; 246 ps_conf.shader_size = accel_state->ps_size; 247 ps_conf.num_gprs = 3; 248 ps_conf.stack_size = 1; 249 ps_conf.uncached_first_inst = 1; 250 ps_conf.clamp_consts = 0; 251 ps_conf.export_mode = 2; 252 ps_conf.bo = accel_state->shaders_bo; 253 r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM); 254 255 /* PS alu constants */ 256 r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps, 257 sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); 258 259 /* Texture */ 260 switch(pPriv->id) { 261 case FOURCC_YV12: 262 case FOURCC_I420: 263 accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h; 264 265 /* Y texture */ 266 tex_res.id = 0; 267 tex_res.w = accel_state->src_obj[0].width; 268 tex_res.h = accel_state->src_obj[0].height; 269 tex_res.pitch = accel_state->src_obj[0].pitch; 270 tex_res.depth = 0; 271 tex_res.dim = SQ_TEX_DIM_2D; 272 tex_res.base = accel_state->src_obj[0].offset; 273 tex_res.mip_base = accel_state->src_obj[0].offset; 274 tex_res.size = accel_state->src_size[0]; 275 tex_res.bo = accel_state->src_obj[0].bo; 276 tex_res.mip_bo = accel_state->src_obj[0].bo; 277#ifdef XF86DRM_MODE 278 if (info->cs) 279 tex_res.surface = NULL; 280#endif 281 282 tex_res.format = FMT_8; 283 tex_res.dst_sel_x = SQ_SEL_X; /* Y */ 284 tex_res.dst_sel_y = SQ_SEL_1; 285 tex_res.dst_sel_z = SQ_SEL_1; 286 tex_res.dst_sel_w = SQ_SEL_1; 287 288 tex_res.request_size = 1; 289 tex_res.base_level = 0; 290 tex_res.last_level = 0; 291 tex_res.perf_modulation = 0; 292 tex_res.interlaced = 0; 293 if (accel_state->src_obj[0].tiling_flags == 0) 294 tex_res.tile_mode = 1; 295 r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); 296 297 /* Y sampler */ 298 tex_samp.id = 0; 299 tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 300 tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 301 tex_samp.clamp_z = SQ_TEX_WRAP; 302 303 /* xxx: switch to bicubic */ 304 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; 305 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; 306 307 tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 308 tex_samp.mip_filter = 0; /* no mipmap */ 309 r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp); 310 311 /* U or V texture */ 312 tex_res.id = 1; 313 tex_res.format = FMT_8; 314 tex_res.w = accel_state->src_obj[0].width >> 1; 315 tex_res.h = accel_state->src_obj[0].height >> 1; 316 tex_res.pitch = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, pPriv->hw_align); 317 tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ 318 tex_res.dst_sel_y = SQ_SEL_1; 319 tex_res.dst_sel_z = SQ_SEL_1; 320 tex_res.dst_sel_w = SQ_SEL_1; 321 tex_res.interlaced = 0; 322 323 tex_res.base = accel_state->src_obj[0].offset + pPriv->planev_offset; 324 tex_res.mip_base = accel_state->src_obj[0].offset + pPriv->planev_offset; 325 tex_res.size = tex_res.pitch * (pPriv->h >> 1); 326 if (accel_state->src_obj[0].tiling_flags == 0) 327 tex_res.tile_mode = 1; 328 r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); 329 330 /* U or V sampler */ 331 tex_samp.id = 1; 332 r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp); 333 334 /* U or V texture */ 335 tex_res.id = 2; 336 tex_res.format = FMT_8; 337 tex_res.w = accel_state->src_obj[0].width >> 1; 338 tex_res.h = accel_state->src_obj[0].height >> 1; 339 tex_res.pitch = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, pPriv->hw_align); 340 tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ 341 tex_res.dst_sel_y = SQ_SEL_1; 342 tex_res.dst_sel_z = SQ_SEL_1; 343 tex_res.dst_sel_w = SQ_SEL_1; 344 tex_res.interlaced = 0; 345 346 tex_res.base = accel_state->src_obj[0].offset + pPriv->planeu_offset; 347 tex_res.mip_base = accel_state->src_obj[0].offset + pPriv->planeu_offset; 348 tex_res.size = tex_res.pitch * (pPriv->h >> 1); 349 if (accel_state->src_obj[0].tiling_flags == 0) 350 tex_res.tile_mode = 1; 351 r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); 352 353 /* UV sampler */ 354 tex_samp.id = 2; 355 r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp); 356 break; 357 case FOURCC_UYVY: 358 case FOURCC_YUY2: 359 default: 360 accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h; 361 362 /* YUV texture */ 363 tex_res.id = 0; 364 tex_res.w = accel_state->src_obj[0].width; 365 tex_res.h = accel_state->src_obj[0].height; 366 tex_res.pitch = accel_state->src_obj[0].pitch >> 1; 367 tex_res.depth = 0; 368 tex_res.dim = SQ_TEX_DIM_2D; 369 tex_res.base = accel_state->src_obj[0].offset; 370 tex_res.mip_base = accel_state->src_obj[0].offset; 371 tex_res.size = accel_state->src_size[0]; 372 tex_res.bo = accel_state->src_obj[0].bo; 373 tex_res.mip_bo = accel_state->src_obj[0].bo; 374 375 if (pPriv->id == FOURCC_UYVY) 376 tex_res.format = FMT_GB_GR; 377 else 378 tex_res.format = FMT_BG_RG; 379 tex_res.dst_sel_x = SQ_SEL_Y; 380 tex_res.dst_sel_y = SQ_SEL_X; 381 tex_res.dst_sel_z = SQ_SEL_Z; 382 tex_res.dst_sel_w = SQ_SEL_1; 383 384 tex_res.request_size = 1; 385 tex_res.base_level = 0; 386 tex_res.last_level = 0; 387 tex_res.perf_modulation = 0; 388 tex_res.interlaced = 0; 389 if (accel_state->src_obj[0].tiling_flags == 0) 390 tex_res.tile_mode = 1; 391 r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); 392 393 /* YUV sampler */ 394 tex_samp.id = 0; 395 tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 396 tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 397 tex_samp.clamp_z = SQ_TEX_WRAP; 398 399 /* xxx: switch to bicubic */ 400 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; 401 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; 402 403 tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 404 tex_samp.mip_filter = 0; /* no mipmap */ 405 r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp); 406 407 break; 408 } 409 410 cb_conf.id = 0; 411 cb_conf.w = accel_state->dst_obj.pitch; 412 cb_conf.h = accel_state->dst_obj.height; 413 cb_conf.base = accel_state->dst_obj.offset; 414 cb_conf.bo = accel_state->dst_obj.bo; 415#ifdef XF86DRM_MODE 416 if (info->cs) 417 cb_conf.surface = accel_state->dst_obj.surface; 418#endif 419 420 switch (accel_state->dst_obj.bpp) { 421 case 16: 422 if (pPixmap->drawable.depth == 15) { 423 cb_conf.format = COLOR_1_5_5_5; 424 cb_conf.comp_swap = 1; /* ARGB */ 425 } else { 426 cb_conf.format = COLOR_5_6_5; 427 cb_conf.comp_swap = 2; /* RGB */ 428 } 429#if X_BYTE_ORDER == X_BIG_ENDIAN 430 cb_conf.endian = ENDIAN_8IN16; 431#endif 432 break; 433 case 32: 434 cb_conf.format = COLOR_8_8_8_8; 435 cb_conf.comp_swap = 1; /* ARGB */ 436#if X_BYTE_ORDER == X_BIG_ENDIAN 437 cb_conf.endian = ENDIAN_8IN32; 438#endif 439 break; 440 default: 441 return; 442 } 443 444 cb_conf.source_format = 1; 445 cb_conf.blend_clamp = 1; 446 cb_conf.pmask = 0xf; 447 cb_conf.rop = 3; 448 if (accel_state->dst_obj.tiling_flags == 0) 449 cb_conf.array_mode = 1; 450 r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain); 451 452 r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1); 453 454 vs_alu_consts[0] = 1.0 / pPriv->w; 455 vs_alu_consts[1] = 1.0 / pPriv->h; 456 vs_alu_consts[2] = 0.0; 457 vs_alu_consts[3] = 0.0; 458 459 /* VS alu constants */ 460 r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs, 461 sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts); 462 463 if (pPriv->vsync) { 464 xf86CrtcPtr crtc; 465 if (pPriv->desired_crtc) 466 crtc = pPriv->desired_crtc; 467 else 468 crtc = radeon_pick_best_crtc(pScrn, 469 pPriv->drw_x, 470 pPriv->drw_x + pPriv->dst_w, 471 pPriv->drw_y, 472 pPriv->drw_y + pPriv->dst_h); 473 if (crtc) 474 r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPixmap, 475 crtc, 476 pPriv->drw_y - crtc->y, 477 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 478 } 479 480 while (nBox--) { 481 float srcX, srcY, srcw, srch; 482 int dstX, dstY, dstw, dsth; 483 float *vb; 484 485 486 dstX = pBox->x1 + dstxoff; 487 dstY = pBox->y1 + dstyoff; 488 dstw = pBox->x2 - pBox->x1; 489 dsth = pBox->y2 - pBox->y1; 490 491 srcX = pPriv->src_x; 492 srcX += ((pBox->x1 - pPriv->drw_x) * 493 pPriv->src_w) / (float)pPriv->dst_w; 494 srcY = pPriv->src_y; 495 srcY += ((pBox->y1 - pPriv->drw_y) * 496 pPriv->src_h) / (float)pPriv->dst_h; 497 498 srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 499 srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 500 501 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16); 502 503 vb[0] = (float)dstX; 504 vb[1] = (float)dstY; 505 vb[2] = (float)srcX; 506 vb[3] = (float)srcY; 507 508 vb[4] = (float)dstX; 509 vb[5] = (float)(dstY + dsth); 510 vb[6] = (float)srcX; 511 vb[7] = (float)(srcY + srch); 512 513 vb[8] = (float)(dstX + dstw); 514 vb[9] = (float)(dstY + dsth); 515 vb[10] = (float)(srcX + srcw); 516 vb[11] = (float)(srcY + srch); 517 518 radeon_vbo_commit(pScrn, &accel_state->vbo); 519 520 pBox++; 521 } 522 523 r600_finish_op(pScrn, 16); 524 525 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 526} 527