r600_textured_videofuncs.c revision b7e1c893
1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Author: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif 30 31#include "xf86.h" 32 33#include "exa.h" 34 35#include "radeon.h" 36#include "radeon_reg.h" 37#include "r600_shader.h" 38#include "r600_reg.h" 39#include "r600_state.h" 40 41#include "radeon_video.h" 42 43#include <X11/extensions/Xv.h> 44#include "fourcc.h" 45 46#include "damage.h" 47 48static void 49R600DoneTexturedVideo(ScrnInfoPtr pScrn) 50{ 51 RADEONInfoPtr info = RADEONPTR(pScrn); 52 struct radeon_accel_state *accel_state = info->accel_state; 53 draw_config_t draw_conf; 54 vtx_resource_t vtx_res; 55 56 CLEAR (draw_conf); 57 CLEAR (vtx_res); 58 59 if (accel_state->vb_index == 0) { 60 R600IBDiscard(pScrn, accel_state->ib); 61 return; 62 } 63 64 accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + 65 (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); 66 accel_state->vb_size = accel_state->vb_index * 16; 67 68 /* flush vertex cache */ 69 if ((info->ChipFamily == CHIP_FAMILY_RV610) || 70 (info->ChipFamily == CHIP_FAMILY_RV620) || 71 (info->ChipFamily == CHIP_FAMILY_RS780) || 72 (info->ChipFamily == CHIP_FAMILY_RV710)) 73 cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, 74 accel_state->vb_size, accel_state->vb_mc_addr); 75 else 76 cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, 77 accel_state->vb_size, accel_state->vb_mc_addr); 78 79 /* Vertex buffer setup */ 80 vtx_res.id = SQ_VTX_RESOURCE_vs; 81 vtx_res.vtx_size_dw = 16 / 4; 82 vtx_res.vtx_num_entries = accel_state->vb_size / 4; 83 vtx_res.mem_req_size = 1; 84 vtx_res.vb_addr = accel_state->vb_mc_addr; 85 set_vtx_resource (pScrn, accel_state->ib, &vtx_res); 86 87 draw_conf.prim_type = DI_PT_RECTLIST; 88 draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; 89 draw_conf.num_instances = 1; 90 draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; 91 draw_conf.index_type = DI_INDEX_SIZE_16_BIT; 92 93 draw_auto(pScrn, accel_state->ib, &draw_conf); 94 95 wait_3d_idle_clean(pScrn, accel_state->ib); 96 97 /* sync destination surface */ 98 cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), 99 accel_state->dst_size, accel_state->dst_mc_addr); 100 101 R600CPFlushIndirect(pScrn, accel_state->ib); 102} 103 104void 105R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 106{ 107 RADEONInfoPtr info = RADEONPTR(pScrn); 108 struct radeon_accel_state *accel_state = info->accel_state; 109 PixmapPtr pPixmap = pPriv->pPixmap; 110 BoxPtr pBox = REGION_RECTS(&pPriv->clip); 111 int nBox = REGION_NUM_RECTS(&pPriv->clip); 112 int dstxoff, dstyoff; 113 cb_config_t cb_conf; 114 tex_resource_t tex_res; 115 tex_sampler_t tex_samp; 116 shader_config_t vs_conf, ps_conf; 117 int uv_offset; 118 static float ps_alu_consts[] = { 119 1.0, 0.0, 1.4020, 0, /* r - c[0] */ 120 1.0, -0.34414, -0.71414, 0, /* g - c[1] */ 121 1.0, 1.7720, 0.0, 0, /* b - c[2] */ 122 /* Constants for undoing Y'CbCr scaling 123 * - Y' is scaled from 16:235 124 * - Cb/Cr are scaled from 16:240 125 * Unscaled value N' = N * N_mul + N_shift (N' in range [-0.5, 0.5]) 126 * Vector is [Y_mul, Y_shfit, C_mul, C_shift] 127 */ 128 256.0/219.0, -16.0/219.0, 256.0/224.0, -128.0/224.0, 129 }; 130 131 CLEAR (cb_conf); 132 CLEAR (tex_res); 133 CLEAR (tex_samp); 134 CLEAR (vs_conf); 135 CLEAR (ps_conf); 136 137 accel_state->dst_pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8); 138 accel_state->src_pitch[0] = pPriv->src_pitch; 139 140 /* bad pitch */ 141 if (accel_state->src_pitch[0] & 7) 142 return; 143 if (accel_state->dst_pitch & 7) 144 return; 145 146#ifdef COMPOSITE 147 dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 148 dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 149#else 150 dstxoff = 0; 151 dstyoff = 0; 152#endif 153 154 accel_state->ib = RADEONCPGetBuffer(pScrn); 155 156 /* Init */ 157 start_3d(pScrn, accel_state->ib); 158 159 set_default_state(pScrn, accel_state->ib); 160 161 /* Scissor / viewport */ 162 EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); 163 EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); 164 165 accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + 166 accel_state->xv_vs_offset; 167 168 accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + 169 accel_state->xv_ps_offset; 170 171 /* PS bool constant */ 172 switch(pPriv->id) { 173 case FOURCC_YV12: 174 case FOURCC_I420: 175 set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0)); 176 break; 177 case FOURCC_UYVY: 178 case FOURCC_YUY2: 179 default: 180 set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0)); 181 break; 182 } 183 184 accel_state->vs_size = 512; 185 accel_state->ps_size = 512; 186 187 /* Shader */ 188 189 /* flush SQ cache */ 190 cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, 191 accel_state->vs_size, accel_state->vs_mc_addr); 192 193 vs_conf.shader_addr = accel_state->vs_mc_addr; 194 vs_conf.num_gprs = 2; 195 vs_conf.stack_size = 0; 196 vs_setup (pScrn, accel_state->ib, &vs_conf); 197 198 /* flush SQ cache */ 199 cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, 200 accel_state->ps_size, accel_state->ps_mc_addr); 201 202 ps_conf.shader_addr = accel_state->ps_mc_addr; 203 ps_conf.num_gprs = 3; 204 ps_conf.stack_size = 1; 205 ps_conf.uncached_first_inst = 1; 206 ps_conf.clamp_consts = 0; 207 ps_conf.export_mode = 2; 208 ps_setup (pScrn, accel_state->ib, &ps_conf); 209 210 /* PS alu constants */ 211 set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps, 212 sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); 213 214 /* Texture */ 215 switch(pPriv->id) { 216 case FOURCC_YV12: 217 case FOURCC_I420: 218 accel_state->src_mc_addr[0] = pPriv->src_offset; 219 accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; 220 221 /* flush texture cache */ 222 cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0], 223 accel_state->src_mc_addr[0]); 224 225 /* Y texture */ 226 tex_res.id = 0; 227 tex_res.w = pPriv->w; 228 tex_res.h = pPriv->h; 229 tex_res.pitch = accel_state->src_pitch[0]; 230 tex_res.depth = 0; 231 tex_res.dim = SQ_TEX_DIM_2D; 232 tex_res.base = accel_state->src_mc_addr[0]; 233 tex_res.mip_base = accel_state->src_mc_addr[0]; 234 235 tex_res.format = FMT_8; 236 tex_res.dst_sel_x = SQ_SEL_X; /* Y */ 237 tex_res.dst_sel_y = SQ_SEL_1; 238 tex_res.dst_sel_z = SQ_SEL_1; 239 tex_res.dst_sel_w = SQ_SEL_1; 240 241 tex_res.request_size = 1; 242 tex_res.base_level = 0; 243 tex_res.last_level = 0; 244 tex_res.perf_modulation = 0; 245 tex_res.interlaced = 0; 246 set_tex_resource (pScrn, accel_state->ib, &tex_res); 247 248 /* Y sampler */ 249 tex_samp.id = 0; 250 tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 251 tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 252 tex_samp.clamp_z = SQ_TEX_WRAP; 253 254 /* xxx: switch to bicubic */ 255 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; 256 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; 257 258 tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 259 tex_samp.mip_filter = 0; /* no mipmap */ 260 set_tex_sampler (pScrn, accel_state->ib, &tex_samp); 261 262 /* U or V texture */ 263 uv_offset = accel_state->src_pitch[0] * pPriv->h; 264 uv_offset = (uv_offset + 255) & ~255; 265 266 cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, 267 accel_state->src_size[0] / 4, 268 accel_state->src_mc_addr[0] + uv_offset); 269 270 tex_res.id = 1; 271 tex_res.format = FMT_8; 272 tex_res.w = pPriv->w >> 1; 273 tex_res.h = pPriv->h >> 1; 274 tex_res.pitch = accel_state->src_pitch[0] >> 1; 275 tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ 276 tex_res.dst_sel_y = SQ_SEL_1; 277 tex_res.dst_sel_z = SQ_SEL_1; 278 tex_res.dst_sel_w = SQ_SEL_1; 279 tex_res.interlaced = 0; 280 281 tex_res.base = accel_state->src_mc_addr[0] + uv_offset; 282 tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset; 283 set_tex_resource (pScrn, accel_state->ib, &tex_res); 284 285 /* U or V sampler */ 286 tex_samp.id = 1; 287 set_tex_sampler (pScrn, accel_state->ib, &tex_samp); 288 289 /* U or V texture */ 290 uv_offset += ((accel_state->src_pitch[0] >> 1) * (pPriv->h >> 1)); 291 uv_offset = (uv_offset + 255) & ~255; 292 293 cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, 294 accel_state->src_size[0] / 4, 295 accel_state->src_mc_addr[0] + uv_offset); 296 297 tex_res.id = 2; 298 tex_res.format = FMT_8; 299 tex_res.w = pPriv->w >> 1; 300 tex_res.h = pPriv->h >> 1; 301 tex_res.pitch = accel_state->src_pitch[0] >> 1; 302 tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ 303 tex_res.dst_sel_y = SQ_SEL_1; 304 tex_res.dst_sel_z = SQ_SEL_1; 305 tex_res.dst_sel_w = SQ_SEL_1; 306 tex_res.interlaced = 0; 307 308 tex_res.base = accel_state->src_mc_addr[0] + uv_offset; 309 tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset; 310 set_tex_resource (pScrn, accel_state->ib, &tex_res); 311 312 /* UV sampler */ 313 tex_samp.id = 2; 314 set_tex_sampler (pScrn, accel_state->ib, &tex_samp); 315 break; 316 case FOURCC_UYVY: 317 case FOURCC_YUY2: 318 default: 319 accel_state->src_mc_addr[0] = pPriv->src_offset; 320 accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; 321 322 /* flush texture cache */ 323 cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0], 324 accel_state->src_mc_addr[0]); 325 326 /* Y texture */ 327 tex_res.id = 0; 328 tex_res.w = pPriv->w; 329 tex_res.h = pPriv->h; 330 tex_res.pitch = accel_state->src_pitch[0] >> 1; 331 tex_res.depth = 0; 332 tex_res.dim = SQ_TEX_DIM_2D; 333 tex_res.base = accel_state->src_mc_addr[0]; 334 tex_res.mip_base = accel_state->src_mc_addr[0]; 335 336 tex_res.format = FMT_8_8; 337 if (pPriv->id == FOURCC_UYVY) 338 tex_res.dst_sel_x = SQ_SEL_Y; /* Y */ 339 else 340 tex_res.dst_sel_x = SQ_SEL_X; /* Y */ 341 tex_res.dst_sel_y = SQ_SEL_1; 342 tex_res.dst_sel_z = SQ_SEL_1; 343 tex_res.dst_sel_w = SQ_SEL_1; 344 345 tex_res.request_size = 1; 346 tex_res.base_level = 0; 347 tex_res.last_level = 0; 348 tex_res.perf_modulation = 0; 349 tex_res.interlaced = 0; 350 set_tex_resource (pScrn, accel_state->ib, &tex_res); 351 352 /* Y sampler */ 353 tex_samp.id = 0; 354 tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 355 tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 356 tex_samp.clamp_z = SQ_TEX_WRAP; 357 358 /* xxx: switch to bicubic */ 359 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; 360 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; 361 362 tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 363 tex_samp.mip_filter = 0; /* no mipmap */ 364 set_tex_sampler (pScrn, accel_state->ib, &tex_samp); 365 366 /* UV texture */ 367 tex_res.id = 1; 368 tex_res.format = FMT_8_8_8_8; 369 tex_res.w = pPriv->w >> 1; 370 tex_res.h = pPriv->h; 371 tex_res.pitch = accel_state->src_pitch[0] >> 2; 372 if (pPriv->id == FOURCC_UYVY) { 373 tex_res.dst_sel_x = SQ_SEL_X; /* V */ 374 tex_res.dst_sel_y = SQ_SEL_Z; /* U */ 375 } else { 376 tex_res.dst_sel_x = SQ_SEL_Y; /* V */ 377 tex_res.dst_sel_y = SQ_SEL_W; /* U */ 378 } 379 tex_res.dst_sel_z = SQ_SEL_1; 380 tex_res.dst_sel_w = SQ_SEL_1; 381 tex_res.interlaced = 0; 382 383 tex_res.base = accel_state->src_mc_addr[0]; 384 tex_res.mip_base = accel_state->src_mc_addr[0]; 385 set_tex_resource (pScrn, accel_state->ib, &tex_res); 386 387 /* UV sampler */ 388 tex_samp.id = 1; 389 set_tex_sampler (pScrn, accel_state->ib, &tex_samp); 390 break; 391 } 392 393 /* Render setup */ 394 EREG(accel_state->ib, CB_SHADER_MASK, (0x0f << OUTPUT0_ENABLE_shift)); 395 EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); 396 EREG(accel_state->ib, CB_COLOR_CONTROL, (0xcc << ROP3_shift)); /* copy */ 397 398 cb_conf.id = 0; 399 400 accel_state->dst_mc_addr = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; 401 402 cb_conf.w = accel_state->dst_pitch; 403 cb_conf.h = pPixmap->drawable.height; 404 cb_conf.base = accel_state->dst_mc_addr; 405 406 switch (pPixmap->drawable.bitsPerPixel) { 407 case 16: 408 if (pPixmap->drawable.depth == 15) { 409 cb_conf.format = COLOR_1_5_5_5; 410 cb_conf.comp_swap = 1; /* ARGB */ 411 } else { 412 cb_conf.format = COLOR_5_6_5; 413 cb_conf.comp_swap = 2; /* RGB */ 414 } 415 break; 416 case 32: 417 cb_conf.format = COLOR_8_8_8_8; 418 cb_conf.comp_swap = 1; /* ARGB */ 419 break; 420 default: 421 return; 422 } 423 424 cb_conf.source_format = 1; 425 cb_conf.blend_clamp = 1; 426 set_render_target(pScrn, accel_state->ib, &cb_conf); 427 428 EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | 429 (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | 430 (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); 431 EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ 432 DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ 433 434 /* Interpolator setup */ 435 /* export tex coords from VS */ 436 EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); 437 EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); 438 439 /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x 440 * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ 441 EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, ((1 << NUM_INTERP_shift))); 442 EREG(accel_state->ib, SPI_PS_IN_CONTROL_1, 0); 443 EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | 444 (0x03 << DEFAULT_VAL_shift) | 445 SEL_CENTROID_bit)); 446 EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); 447 448 449 if (pPriv->vsync) { 450 xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn, 451 pPriv->drw_x, 452 pPriv->drw_x + pPriv->dst_w, 453 pPriv->drw_y, 454 pPriv->drw_y + pPriv->dst_h); 455 if (crtc) { 456 RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; 457 458 cp_wait_vline_sync(pScrn, accel_state->ib, pPixmap, 459 radeon_crtc->crtc_id, 460 pPriv->drw_y - crtc->y, 461 (pPriv->drw_y - crtc->y) + pPriv->dst_h); 462 } 463 } 464 465 accel_state->vb_index = 0; 466 467 while (nBox--) { 468 int srcX, srcY, srcw, srch; 469 int dstX, dstY, dstw, dsth; 470 float *vb; 471 472 if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) { 473 R600DoneTexturedVideo(pScrn); 474 accel_state->vb_index = 0; 475 accel_state->ib = RADEONCPGetBuffer(pScrn); 476 } 477 478 vb = (pointer)((char*)accel_state->ib->address + 479 (accel_state->ib->total / 2) + 480 accel_state->vb_index * 16); 481 482 dstX = pBox->x1 + dstxoff; 483 dstY = pBox->y1 + dstyoff; 484 dstw = pBox->x2 - pBox->x1; 485 dsth = pBox->y2 - pBox->y1; 486 487 srcX = ((pBox->x1 - pPriv->drw_x) * 488 pPriv->src_w) / pPriv->dst_w; 489 srcY = ((pBox->y1 - pPriv->drw_y) * 490 pPriv->src_h) / pPriv->dst_h; 491 492 srcw = (pPriv->src_w * dstw) / pPriv->dst_w; 493 srch = (pPriv->src_h * dsth) / pPriv->dst_h; 494 495 vb[0] = (float)dstX; 496 vb[1] = (float)dstY; 497 vb[2] = (float)srcX / pPriv->w; 498 vb[3] = (float)srcY / pPriv->h; 499 500 vb[4] = (float)dstX; 501 vb[5] = (float)(dstY + dsth); 502 vb[6] = (float)srcX / pPriv->w; 503 vb[7] = (float)(srcY + srch) / pPriv->h; 504 505 vb[8] = (float)(dstX + dstw); 506 vb[9] = (float)(dstY + dsth); 507 vb[10] = (float)(srcX + srcw) / pPriv->w; 508 vb[11] = (float)(srcY + srch) / pPriv->h; 509 510 accel_state->vb_index += 3; 511 512 pBox++; 513 } 514 515 R600DoneTexturedVideo(pScrn); 516 517 DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 518} 519