evergreen_textured_videofuncs.c revision 921a55d8
1921a55d8Smrg/* 2921a55d8Smrg * Copyright 2010 Advanced Micro Devices, Inc. 3921a55d8Smrg * 4921a55d8Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5921a55d8Smrg * copy of this software and associated documentation files (the "Software"), 6921a55d8Smrg * to deal in the Software without restriction, including without limitation 7921a55d8Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8921a55d8Smrg * and/or sell copies of the Software, and to permit persons to whom the 9921a55d8Smrg * Software is furnished to do so, subject to the following conditions: 10921a55d8Smrg * 11921a55d8Smrg * The above copyright notice and this permission notice (including the next 12921a55d8Smrg * paragraph) shall be included in all copies or substantial portions of the 13921a55d8Smrg * Software. 14921a55d8Smrg * 15921a55d8Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16921a55d8Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17921a55d8Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18921a55d8Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19921a55d8Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20921a55d8Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21921a55d8Smrg * SOFTWARE. 22921a55d8Smrg * 23921a55d8Smrg * Author: Alex Deucher <alexander.deucher@amd.com> 24921a55d8Smrg * 25921a55d8Smrg */ 26921a55d8Smrg 27921a55d8Smrg#ifdef HAVE_CONFIG_H 28921a55d8Smrg#include "config.h" 29921a55d8Smrg#endif 30921a55d8Smrg 31921a55d8Smrg#ifdef XF86DRM_MODE 32921a55d8Smrg 33921a55d8Smrg#include "xf86.h" 34921a55d8Smrg 35921a55d8Smrg#include "exa.h" 36921a55d8Smrg 37921a55d8Smrg#include "radeon.h" 38921a55d8Smrg#include "radeon_reg.h" 39921a55d8Smrg#include "evergreen_shader.h" 40921a55d8Smrg#include "evergreen_reg.h" 41921a55d8Smrg#include "evergreen_state.h" 42921a55d8Smrg 43921a55d8Smrg#include "radeon_video.h" 44921a55d8Smrg 45921a55d8Smrg#include <X11/extensions/Xv.h> 46921a55d8Smrg#include "fourcc.h" 47921a55d8Smrg 48921a55d8Smrg#include "damage.h" 49921a55d8Smrg 50921a55d8Smrg#include "radeon_exa_shared.h" 51921a55d8Smrg#include "radeon_vbo.h" 52921a55d8Smrg 53921a55d8Smrg/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces 54921a55d8Smrg note the difference to the parameters used in overlay are due 55921a55d8Smrg to 10bit vs. float calcs */ 56921a55d8Smrgstatic REF_TRANSFORM trans[2] = 57921a55d8Smrg{ 58921a55d8Smrg {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */ 59921a55d8Smrg {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */ 60921a55d8Smrg}; 61921a55d8Smrg 62921a55d8Smrgvoid 63921a55d8SmrgEVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 64921a55d8Smrg{ 65921a55d8Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 66921a55d8Smrg struct radeon_accel_state *accel_state = info->accel_state; 67921a55d8Smrg PixmapPtr pPixmap = pPriv->pPixmap; 68921a55d8Smrg BoxPtr pBox = REGION_RECTS(&pPriv->clip); 69921a55d8Smrg int nBox = REGION_NUM_RECTS(&pPriv->clip); 70921a55d8Smrg int dstxoff, dstyoff; 71921a55d8Smrg struct r600_accel_object src_obj, dst_obj; 72921a55d8Smrg cb_config_t cb_conf; 73921a55d8Smrg tex_resource_t tex_res; 74921a55d8Smrg tex_sampler_t tex_samp; 75921a55d8Smrg shader_config_t vs_conf, ps_conf; 76921a55d8Smrg /* 77921a55d8Smrg * y' = y - .0625 78921a55d8Smrg * u' = u - .5 79921a55d8Smrg * v' = v - .5; 80921a55d8Smrg * 81921a55d8Smrg * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 82921a55d8Smrg * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 83921a55d8Smrg * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 84921a55d8Smrg * 85921a55d8Smrg * DP3 might look like the straightforward solution 86921a55d8Smrg * but we'd need to move the texture yuv values in 87921a55d8Smrg * the same reg for this to work. Therefore use MADs. 88921a55d8Smrg * Brightness just adds to the off constant. 89921a55d8Smrg * Contrast is multiplication of luminance. 90921a55d8Smrg * Saturation and hue change the u and v coeffs. 91921a55d8Smrg * Default values (before adjustments - depend on colorspace): 92921a55d8Smrg * yco = 1.1643 93921a55d8Smrg * uco = 0, -0.39173, 2.017 94921a55d8Smrg * vco = 1.5958, -0.8129, 0 95921a55d8Smrg * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 96921a55d8Smrg * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 97921a55d8Smrg * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 98921a55d8Smrg * 99921a55d8Smrg * temp = MAD(yco, yuv.yyyy, off) 100921a55d8Smrg * temp = MAD(uco, yuv.uuuu, temp) 101921a55d8Smrg * result = MAD(vco, yuv.vvvv, temp) 102921a55d8Smrg */ 103921a55d8Smrg /* TODO: calc consts in the shader */ 104921a55d8Smrg const float Loff = -0.0627; 105921a55d8Smrg const float Coff = -0.502; 106921a55d8Smrg float uvcosf, uvsinf; 107921a55d8Smrg float yco; 108921a55d8Smrg float uco[3], vco[3], off[3]; 109921a55d8Smrg float bright, cont, gamma; 110921a55d8Smrg int ref = pPriv->transform_index; 111921a55d8Smrg Bool needgamma = FALSE; 112921a55d8Smrg float *ps_alu_consts; 113921a55d8Smrg const_config_t ps_const_conf; 114921a55d8Smrg float *vs_alu_consts; 115921a55d8Smrg const_config_t vs_const_conf; 116921a55d8Smrg 117921a55d8Smrg cont = RTFContrast(pPriv->contrast); 118921a55d8Smrg bright = RTFBrightness(pPriv->brightness); 119921a55d8Smrg gamma = (float)pPriv->gamma / 1000.0; 120921a55d8Smrg uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 121921a55d8Smrg uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 122921a55d8Smrg /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 123921a55d8Smrg 124921a55d8Smrg yco = trans[ref].RefLuma * cont; 125921a55d8Smrg uco[0] = -trans[ref].RefRCr * uvsinf; 126921a55d8Smrg uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 127921a55d8Smrg uco[2] = trans[ref].RefBCb * uvcosf; 128921a55d8Smrg vco[0] = trans[ref].RefRCr * uvcosf; 129921a55d8Smrg vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 130921a55d8Smrg vco[2] = trans[ref].RefBCb * uvsinf; 131921a55d8Smrg off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 132921a55d8Smrg off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 133921a55d8Smrg off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 134921a55d8Smrg 135921a55d8Smrg // XXX 136921a55d8Smrg gamma = 1.0; 137921a55d8Smrg 138921a55d8Smrg if (gamma != 1.0) { 139921a55d8Smrg needgamma = TRUE; 140921a55d8Smrg /* note: gamma correction is out = in ^ gamma; 141921a55d8Smrg gpu can only do LG2/EX2 therefore we transform into 142921a55d8Smrg in ^ gamma = 2 ^ (log2(in) * gamma). 143921a55d8Smrg Lots of scalar ops, unfortunately (better solution?) - 144921a55d8Smrg without gamma that's 3 inst, with gamma it's 10... 145921a55d8Smrg could use different gamma factors per channel, 146921a55d8Smrg if that's of any use. */ 147921a55d8Smrg } 148921a55d8Smrg 149921a55d8Smrg CLEAR (cb_conf); 150921a55d8Smrg CLEAR (tex_res); 151921a55d8Smrg CLEAR (tex_samp); 152921a55d8Smrg CLEAR (vs_conf); 153921a55d8Smrg CLEAR (ps_conf); 154921a55d8Smrg CLEAR (vs_const_conf); 155921a55d8Smrg CLEAR (ps_const_conf); 156921a55d8Smrg 157921a55d8Smrg#if defined(XF86DRM_MODE) 158921a55d8Smrg if (info->cs) { 159921a55d8Smrg dst_obj.offset = 0; 160921a55d8Smrg src_obj.offset = 0; 161921a55d8Smrg dst_obj.bo = radeon_get_pixmap_bo(pPixmap); 162921a55d8Smrg } else 163921a55d8Smrg#endif 164921a55d8Smrg { 165921a55d8Smrg dst_obj.offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; 166921a55d8Smrg src_obj.offset = pPriv->src_offset + info->fbLocation + pScrn->fbOffset; 167921a55d8Smrg dst_obj.bo = src_obj.bo = NULL; 168921a55d8Smrg } 169921a55d8Smrg dst_obj.pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8); 170921a55d8Smrg 171921a55d8Smrg src_obj.pitch = pPriv->src_pitch; 172921a55d8Smrg src_obj.width = pPriv->w; 173921a55d8Smrg src_obj.height = pPriv->h; 174921a55d8Smrg src_obj.bpp = 16; 175921a55d8Smrg src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 176921a55d8Smrg src_obj.bo = pPriv->src_bo[pPriv->currentBuffer]; 177921a55d8Smrg 178921a55d8Smrg dst_obj.width = pPixmap->drawable.width; 179921a55d8Smrg dst_obj.height = pPixmap->drawable.height; 180921a55d8Smrg dst_obj.bpp = pPixmap->drawable.bitsPerPixel; 181921a55d8Smrg dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 182921a55d8Smrg 183921a55d8Smrg if (!EVERGREENSetAccelState(pScrn, 184921a55d8Smrg &src_obj, 185921a55d8Smrg NULL, 186921a55d8Smrg &dst_obj, 187921a55d8Smrg accel_state->xv_vs_offset, accel_state->xv_ps_offset, 188921a55d8Smrg 3, 0xffffffff)) 189921a55d8Smrg return; 190921a55d8Smrg 191921a55d8Smrg#ifdef COMPOSITE 192921a55d8Smrg dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 193921a55d8Smrg dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 194921a55d8Smrg#else 195921a55d8Smrg dstxoff = 0; 196921a55d8Smrg dstyoff = 0; 197921a55d8Smrg#endif 198921a55d8Smrg 199921a55d8Smrg radeon_vbo_check(pScrn, &accel_state->vbo, 16); 200921a55d8Smrg radeon_vbo_check(pScrn, &accel_state->cbuf, 512); 201921a55d8Smrg radeon_cp_start(pScrn); 202921a55d8Smrg 203921a55d8Smrg evergreen_set_default_state(pScrn); 204921a55d8Smrg 205921a55d8Smrg evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 206921a55d8Smrg evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 207921a55d8Smrg evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 208921a55d8Smrg 209921a55d8Smrg /* PS bool constant */ 210921a55d8Smrg switch(pPriv->id) { 211921a55d8Smrg case FOURCC_YV12: 212921a55d8Smrg case FOURCC_I420: 213921a55d8Smrg evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0)); 214921a55d8Smrg break; 215921a55d8Smrg case FOURCC_UYVY: 216921a55d8Smrg case FOURCC_YUY2: 217921a55d8Smrg default: 218921a55d8Smrg evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0)); 219921a55d8Smrg break; 220921a55d8Smrg } 221921a55d8Smrg 222921a55d8Smrg /* Shader */ 223921a55d8Smrg vs_conf.shader_addr = accel_state->vs_mc_addr; 224921a55d8Smrg vs_conf.shader_size = accel_state->vs_size; 225921a55d8Smrg vs_conf.num_gprs = 2; 226921a55d8Smrg vs_conf.stack_size = 0; 227921a55d8Smrg vs_conf.bo = accel_state->shaders_bo; 228921a55d8Smrg evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM); 229921a55d8Smrg 230921a55d8Smrg ps_conf.shader_addr = accel_state->ps_mc_addr; 231921a55d8Smrg ps_conf.shader_size = accel_state->ps_size; 232921a55d8Smrg ps_conf.num_gprs = 3; 233921a55d8Smrg ps_conf.stack_size = 1; 234921a55d8Smrg ps_conf.clamp_consts = 0; 235921a55d8Smrg ps_conf.export_mode = 2; 236921a55d8Smrg ps_conf.bo = accel_state->shaders_bo; 237921a55d8Smrg evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM); 238921a55d8Smrg 239921a55d8Smrg /* Texture */ 240921a55d8Smrg switch(pPriv->id) { 241921a55d8Smrg case FOURCC_YV12: 242921a55d8Smrg case FOURCC_I420: 243921a55d8Smrg accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h; 244921a55d8Smrg 245921a55d8Smrg /* Y texture */ 246921a55d8Smrg tex_res.id = 0; 247921a55d8Smrg tex_res.w = accel_state->src_obj[0].width; 248921a55d8Smrg tex_res.h = accel_state->src_obj[0].height; 249921a55d8Smrg tex_res.pitch = accel_state->src_obj[0].pitch; 250921a55d8Smrg tex_res.depth = 0; 251921a55d8Smrg tex_res.dim = SQ_TEX_DIM_2D; 252921a55d8Smrg tex_res.base = accel_state->src_obj[0].offset; 253921a55d8Smrg tex_res.mip_base = accel_state->src_obj[0].offset; 254921a55d8Smrg tex_res.size = accel_state->src_size[0]; 255921a55d8Smrg tex_res.bo = accel_state->src_obj[0].bo; 256921a55d8Smrg tex_res.mip_bo = accel_state->src_obj[0].bo; 257921a55d8Smrg 258921a55d8Smrg tex_res.format = FMT_8; 259921a55d8Smrg tex_res.dst_sel_x = SQ_SEL_X; /* Y */ 260921a55d8Smrg tex_res.dst_sel_y = SQ_SEL_1; 261921a55d8Smrg tex_res.dst_sel_z = SQ_SEL_1; 262921a55d8Smrg tex_res.dst_sel_w = SQ_SEL_1; 263921a55d8Smrg 264921a55d8Smrg tex_res.base_level = 0; 265921a55d8Smrg tex_res.last_level = 0; 266921a55d8Smrg tex_res.perf_modulation = 0; 267921a55d8Smrg tex_res.interlaced = 0; 268921a55d8Smrg evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); 269921a55d8Smrg 270921a55d8Smrg /* Y sampler */ 271921a55d8Smrg tex_samp.id = 0; 272921a55d8Smrg tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 273921a55d8Smrg tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 274921a55d8Smrg tex_samp.clamp_z = SQ_TEX_WRAP; 275921a55d8Smrg 276921a55d8Smrg /* xxx: switch to bicubic */ 277921a55d8Smrg tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; 278921a55d8Smrg tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; 279921a55d8Smrg 280921a55d8Smrg tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 281921a55d8Smrg tex_samp.mip_filter = 0; /* no mipmap */ 282921a55d8Smrg evergreen_set_tex_sampler(pScrn, &tex_samp); 283921a55d8Smrg 284921a55d8Smrg /* U or V texture */ 285921a55d8Smrg tex_res.id = 1; 286921a55d8Smrg tex_res.format = FMT_8; 287921a55d8Smrg tex_res.w = accel_state->src_obj[0].width >> 1; 288921a55d8Smrg tex_res.h = accel_state->src_obj[0].height >> 1; 289921a55d8Smrg tex_res.pitch = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, 256); 290921a55d8Smrg tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ 291921a55d8Smrg tex_res.dst_sel_y = SQ_SEL_1; 292921a55d8Smrg tex_res.dst_sel_z = SQ_SEL_1; 293921a55d8Smrg tex_res.dst_sel_w = SQ_SEL_1; 294921a55d8Smrg tex_res.interlaced = 0; 295921a55d8Smrg 296921a55d8Smrg tex_res.base = accel_state->src_obj[0].offset + pPriv->planev_offset; 297921a55d8Smrg tex_res.mip_base = accel_state->src_obj[0].offset + pPriv->planev_offset; 298921a55d8Smrg tex_res.size = tex_res.pitch * (pPriv->h >> 1); 299921a55d8Smrg evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); 300921a55d8Smrg 301921a55d8Smrg /* U or V sampler */ 302921a55d8Smrg tex_samp.id = 1; 303921a55d8Smrg evergreen_set_tex_sampler(pScrn, &tex_samp); 304921a55d8Smrg 305921a55d8Smrg /* U or V texture */ 306921a55d8Smrg tex_res.id = 2; 307921a55d8Smrg tex_res.format = FMT_8; 308921a55d8Smrg tex_res.w = accel_state->src_obj[0].width >> 1; 309921a55d8Smrg tex_res.h = accel_state->src_obj[0].height >> 1; 310921a55d8Smrg tex_res.pitch = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, 256); 311921a55d8Smrg tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ 312921a55d8Smrg tex_res.dst_sel_y = SQ_SEL_1; 313921a55d8Smrg tex_res.dst_sel_z = SQ_SEL_1; 314921a55d8Smrg tex_res.dst_sel_w = SQ_SEL_1; 315921a55d8Smrg tex_res.interlaced = 0; 316921a55d8Smrg 317921a55d8Smrg tex_res.base = accel_state->src_obj[0].offset + pPriv->planeu_offset; 318921a55d8Smrg tex_res.mip_base = accel_state->src_obj[0].offset + pPriv->planeu_offset; 319921a55d8Smrg tex_res.size = tex_res.pitch * (pPriv->h >> 1); 320921a55d8Smrg evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); 321921a55d8Smrg 322921a55d8Smrg /* UV sampler */ 323921a55d8Smrg tex_samp.id = 2; 324921a55d8Smrg evergreen_set_tex_sampler(pScrn, &tex_samp); 325921a55d8Smrg break; 326921a55d8Smrg case FOURCC_UYVY: 327921a55d8Smrg case FOURCC_YUY2: 328921a55d8Smrg default: 329921a55d8Smrg accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h; 330921a55d8Smrg 331921a55d8Smrg /* Y texture */ 332921a55d8Smrg tex_res.id = 0; 333921a55d8Smrg tex_res.w = accel_state->src_obj[0].width; 334921a55d8Smrg tex_res.h = accel_state->src_obj[0].height; 335921a55d8Smrg tex_res.pitch = accel_state->src_obj[0].pitch >> 1; 336921a55d8Smrg tex_res.depth = 0; 337921a55d8Smrg tex_res.dim = SQ_TEX_DIM_2D; 338921a55d8Smrg tex_res.base = accel_state->src_obj[0].offset; 339921a55d8Smrg tex_res.mip_base = accel_state->src_obj[0].offset; 340921a55d8Smrg tex_res.size = accel_state->src_size[0]; 341921a55d8Smrg tex_res.bo = accel_state->src_obj[0].bo; 342921a55d8Smrg tex_res.mip_bo = accel_state->src_obj[0].bo; 343921a55d8Smrg 344921a55d8Smrg tex_res.format = FMT_8_8; 345921a55d8Smrg if (pPriv->id == FOURCC_UYVY) 346921a55d8Smrg tex_res.dst_sel_x = SQ_SEL_Y; /* Y */ 347921a55d8Smrg else 348921a55d8Smrg tex_res.dst_sel_x = SQ_SEL_X; /* Y */ 349921a55d8Smrg tex_res.dst_sel_y = SQ_SEL_1; 350921a55d8Smrg tex_res.dst_sel_z = SQ_SEL_1; 351921a55d8Smrg tex_res.dst_sel_w = SQ_SEL_1; 352921a55d8Smrg 353921a55d8Smrg tex_res.base_level = 0; 354921a55d8Smrg tex_res.last_level = 0; 355921a55d8Smrg tex_res.perf_modulation = 0; 356921a55d8Smrg tex_res.interlaced = 0; 357921a55d8Smrg evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); 358921a55d8Smrg 359921a55d8Smrg /* Y sampler */ 360921a55d8Smrg tex_samp.id = 0; 361921a55d8Smrg tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 362921a55d8Smrg tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 363921a55d8Smrg tex_samp.clamp_z = SQ_TEX_WRAP; 364921a55d8Smrg 365921a55d8Smrg tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; 366921a55d8Smrg tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; 367921a55d8Smrg 368921a55d8Smrg tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 369921a55d8Smrg tex_samp.mip_filter = 0; /* no mipmap */ 370921a55d8Smrg evergreen_set_tex_sampler(pScrn, &tex_samp); 371921a55d8Smrg 372921a55d8Smrg /* UV texture */ 373921a55d8Smrg tex_res.id = 1; 374921a55d8Smrg tex_res.format = FMT_8_8_8_8; 375921a55d8Smrg tex_res.w = accel_state->src_obj[0].width >> 1; 376921a55d8Smrg tex_res.h = accel_state->src_obj[0].height; 377921a55d8Smrg tex_res.pitch = accel_state->src_obj[0].pitch >> 2; 378921a55d8Smrg if (pPriv->id == FOURCC_UYVY) { 379921a55d8Smrg tex_res.dst_sel_x = SQ_SEL_X; /* V */ 380921a55d8Smrg tex_res.dst_sel_y = SQ_SEL_Z; /* U */ 381921a55d8Smrg } else { 382921a55d8Smrg tex_res.dst_sel_x = SQ_SEL_Y; /* V */ 383921a55d8Smrg tex_res.dst_sel_y = SQ_SEL_W; /* U */ 384921a55d8Smrg } 385921a55d8Smrg tex_res.dst_sel_z = SQ_SEL_1; 386921a55d8Smrg tex_res.dst_sel_w = SQ_SEL_1; 387921a55d8Smrg tex_res.interlaced = 0; 388921a55d8Smrg 389921a55d8Smrg tex_res.base = accel_state->src_obj[0].offset; 390921a55d8Smrg tex_res.mip_base = accel_state->src_obj[0].offset; 391921a55d8Smrg tex_res.size = accel_state->src_size[0]; 392921a55d8Smrg evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); 393921a55d8Smrg 394921a55d8Smrg /* UV sampler */ 395921a55d8Smrg tex_samp.id = 1; 396921a55d8Smrg evergreen_set_tex_sampler(pScrn, &tex_samp); 397921a55d8Smrg break; 398921a55d8Smrg } 399921a55d8Smrg 400921a55d8Smrg cb_conf.id = 0; 401921a55d8Smrg cb_conf.w = accel_state->dst_obj.pitch; 402921a55d8Smrg cb_conf.h = accel_state->dst_obj.height; 403921a55d8Smrg cb_conf.base = accel_state->dst_obj.offset; 404921a55d8Smrg cb_conf.bo = accel_state->dst_obj.bo; 405921a55d8Smrg 406921a55d8Smrg switch (accel_state->dst_obj.bpp) { 407921a55d8Smrg case 16: 408921a55d8Smrg if (pPixmap->drawable.depth == 15) { 409921a55d8Smrg cb_conf.format = COLOR_1_5_5_5; 410921a55d8Smrg cb_conf.comp_swap = 1; /* ARGB */ 411921a55d8Smrg } else { 412921a55d8Smrg cb_conf.format = COLOR_5_6_5; 413921a55d8Smrg cb_conf.comp_swap = 2; /* RGB */ 414921a55d8Smrg } 415921a55d8Smrg break; 416921a55d8Smrg case 32: 417921a55d8Smrg cb_conf.format = COLOR_8_8_8_8; 418921a55d8Smrg cb_conf.comp_swap = 1; /* ARGB */ 419921a55d8Smrg break; 420921a55d8Smrg default: 421921a55d8Smrg return; 422921a55d8Smrg } 423921a55d8Smrg 424921a55d8Smrg cb_conf.source_format = EXPORT_4C_16BPC; 425921a55d8Smrg cb_conf.blend_clamp = 1; 426921a55d8Smrg evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); 427921a55d8Smrg 428921a55d8Smrg /* Render setup */ 429921a55d8Smrg BEGIN_BATCH(23); 430921a55d8Smrg EREG(CB_TARGET_MASK, (0x0f << TARGET0_ENABLE_shift)); 431921a55d8Smrg EREG(CB_COLOR_CONTROL, ((0xcc << ROP3_shift) | 432921a55d8Smrg (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift))); 433921a55d8Smrg EREG(CB_BLEND0_CONTROL, 0); 434921a55d8Smrg 435921a55d8Smrg /* Interpolator setup */ 436921a55d8Smrg /* export tex coords from VS */ 437921a55d8Smrg EREG(SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); 438921a55d8Smrg EREG(SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); 439921a55d8Smrg EREG(SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | 440921a55d8Smrg (0x03 << DEFAULT_VAL_shift))); 441921a55d8Smrg 442921a55d8Smrg /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x 443921a55d8Smrg * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ 444921a55d8Smrg PACK0(SPI_PS_IN_CONTROL_0, 3); 445921a55d8Smrg E32(((1 << NUM_INTERP_shift) | 446921a55d8Smrg LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0 447921a55d8Smrg E32(0); // SPI_PS_IN_CONTROL_1 448921a55d8Smrg E32(0); // SPI_INTERP_CONTROL_0 449921a55d8Smrg END_BATCH(); 450921a55d8Smrg 451921a55d8Smrg /* PS alu constants */ 452921a55d8Smrg ps_const_conf.size_bytes = 256; 453921a55d8Smrg ps_const_conf.type = SHADER_TYPE_PS; 454921a55d8Smrg ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256); 455921a55d8Smrg ps_const_conf.bo = accel_state->cbuf.vb_bo; 456921a55d8Smrg ps_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_offset; 457921a55d8Smrg 458921a55d8Smrg ps_alu_consts[0] = off[0]; 459921a55d8Smrg ps_alu_consts[1] = off[1]; 460921a55d8Smrg ps_alu_consts[2] = off[2]; 461921a55d8Smrg ps_alu_consts[3] = yco; 462921a55d8Smrg 463921a55d8Smrg ps_alu_consts[4] = uco[0]; 464921a55d8Smrg ps_alu_consts[5] = uco[1]; 465921a55d8Smrg ps_alu_consts[6] = uco[2]; 466921a55d8Smrg ps_alu_consts[7] = gamma; 467921a55d8Smrg 468921a55d8Smrg ps_alu_consts[8] = vco[0]; 469921a55d8Smrg ps_alu_consts[9] = vco[1]; 470921a55d8Smrg ps_alu_consts[10] = vco[2]; 471921a55d8Smrg ps_alu_consts[11] = 0.0; 472921a55d8Smrg 473921a55d8Smrg radeon_vbo_commit(pScrn, &accel_state->cbuf); 474921a55d8Smrg evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT); 475921a55d8Smrg 476921a55d8Smrg /* VS alu constants */ 477921a55d8Smrg vs_const_conf.size_bytes = 256; 478921a55d8Smrg vs_const_conf.type = SHADER_TYPE_VS; 479921a55d8Smrg vs_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256); 480921a55d8Smrg vs_const_conf.bo = accel_state->cbuf.vb_bo; 481921a55d8Smrg vs_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_offset; 482921a55d8Smrg 483921a55d8Smrg vs_alu_consts[0] = 1.0 / pPriv->w; 484921a55d8Smrg vs_alu_consts[1] = 1.0 / pPriv->h; 485921a55d8Smrg vs_alu_consts[2] = 0.0; 486921a55d8Smrg vs_alu_consts[3] = 0.0; 487921a55d8Smrg 488921a55d8Smrg radeon_vbo_commit(pScrn, &accel_state->cbuf); 489921a55d8Smrg evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT); 490921a55d8Smrg 491921a55d8Smrg if (pPriv->vsync) { 492921a55d8Smrg xf86CrtcPtr crtc; 493921a55d8Smrg if (pPriv->desired_crtc) 494921a55d8Smrg crtc = pPriv->desired_crtc; 495921a55d8Smrg else 496921a55d8Smrg crtc = radeon_pick_best_crtc(pScrn, 497921a55d8Smrg pPriv->drw_x, 498921a55d8Smrg pPriv->drw_x + pPriv->dst_w, 499921a55d8Smrg pPriv->drw_y, 500921a55d8Smrg pPriv->drw_y + pPriv->dst_h); 501921a55d8Smrg if (crtc) 502921a55d8Smrg evergreen_cp_wait_vline_sync(pScrn, pPixmap, 503921a55d8Smrg crtc, 504921a55d8Smrg pPriv->drw_y - crtc->y, 505921a55d8Smrg (pPriv->drw_y - crtc->y) + pPriv->dst_h); 506921a55d8Smrg } 507921a55d8Smrg 508921a55d8Smrg while (nBox--) { 509921a55d8Smrg int srcX, srcY, srcw, srch; 510921a55d8Smrg int dstX, dstY, dstw, dsth; 511921a55d8Smrg float *vb; 512921a55d8Smrg 513921a55d8Smrg 514921a55d8Smrg dstX = pBox->x1 + dstxoff; 515921a55d8Smrg dstY = pBox->y1 + dstyoff; 516921a55d8Smrg dstw = pBox->x2 - pBox->x1; 517921a55d8Smrg dsth = pBox->y2 - pBox->y1; 518921a55d8Smrg 519921a55d8Smrg srcX = pPriv->src_x; 520921a55d8Smrg srcX += ((pBox->x1 - pPriv->drw_x) * 521921a55d8Smrg pPriv->src_w) / pPriv->dst_w; 522921a55d8Smrg srcY = pPriv->src_y; 523921a55d8Smrg srcY += ((pBox->y1 - pPriv->drw_y) * 524921a55d8Smrg pPriv->src_h) / pPriv->dst_h; 525921a55d8Smrg 526921a55d8Smrg srcw = (pPriv->src_w * dstw) / pPriv->dst_w; 527921a55d8Smrg srch = (pPriv->src_h * dsth) / pPriv->dst_h; 528921a55d8Smrg 529921a55d8Smrg vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16); 530921a55d8Smrg 531921a55d8Smrg vb[0] = (float)dstX; 532921a55d8Smrg vb[1] = (float)dstY; 533921a55d8Smrg vb[2] = (float)srcX; 534921a55d8Smrg vb[3] = (float)srcY; 535921a55d8Smrg 536921a55d8Smrg vb[4] = (float)dstX; 537921a55d8Smrg vb[5] = (float)(dstY + dsth); 538921a55d8Smrg vb[6] = (float)srcX; 539921a55d8Smrg vb[7] = (float)(srcY + srch); 540921a55d8Smrg 541921a55d8Smrg vb[8] = (float)(dstX + dstw); 542921a55d8Smrg vb[9] = (float)(dstY + dsth); 543921a55d8Smrg vb[10] = (float)(srcX + srcw); 544921a55d8Smrg vb[11] = (float)(srcY + srch); 545921a55d8Smrg 546921a55d8Smrg radeon_vbo_commit(pScrn, &accel_state->vbo); 547921a55d8Smrg 548921a55d8Smrg pBox++; 549921a55d8Smrg } 550921a55d8Smrg 551921a55d8Smrg evergreen_finish_op(pScrn, 16); 552921a55d8Smrg 553921a55d8Smrg DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 554921a55d8Smrg} 555921a55d8Smrg 556921a55d8Smrg#endif 557