evergreen_textured_videofuncs.c revision 68105dcb
1921a55d8Smrg/* 2921a55d8Smrg * Copyright 2010 Advanced Micro Devices, Inc. 3921a55d8Smrg * 4921a55d8Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5921a55d8Smrg * copy of this software and associated documentation files (the "Software"), 6921a55d8Smrg * to deal in the Software without restriction, including without limitation 7921a55d8Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8921a55d8Smrg * and/or sell copies of the Software, and to permit persons to whom the 9921a55d8Smrg * Software is furnished to do so, subject to the following conditions: 10921a55d8Smrg * 11921a55d8Smrg * The above copyright notice and this permission notice (including the next 12921a55d8Smrg * paragraph) shall be included in all copies or substantial portions of the 13921a55d8Smrg * Software. 14921a55d8Smrg * 15921a55d8Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16921a55d8Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17921a55d8Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18921a55d8Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19921a55d8Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20921a55d8Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21921a55d8Smrg * SOFTWARE. 22921a55d8Smrg * 23921a55d8Smrg * Author: Alex Deucher <alexander.deucher@amd.com> 24921a55d8Smrg * 25921a55d8Smrg */ 26921a55d8Smrg 27921a55d8Smrg#ifdef HAVE_CONFIG_H 28921a55d8Smrg#include "config.h" 29921a55d8Smrg#endif 30921a55d8Smrg 31921a55d8Smrg#ifdef XF86DRM_MODE 32921a55d8Smrg 33921a55d8Smrg#include "xf86.h" 34921a55d8Smrg 35921a55d8Smrg#include "exa.h" 36921a55d8Smrg 37921a55d8Smrg#include "radeon.h" 38921a55d8Smrg#include "radeon_reg.h" 39921a55d8Smrg#include "evergreen_shader.h" 40921a55d8Smrg#include "evergreen_reg.h" 41921a55d8Smrg#include "evergreen_state.h" 42921a55d8Smrg 43921a55d8Smrg#include "radeon_video.h" 44921a55d8Smrg 45921a55d8Smrg#include <X11/extensions/Xv.h> 46921a55d8Smrg#include "fourcc.h" 47921a55d8Smrg 48921a55d8Smrg#include "damage.h" 49921a55d8Smrg 50921a55d8Smrg#include "radeon_exa_shared.h" 51921a55d8Smrg#include "radeon_vbo.h" 52921a55d8Smrg 53921a55d8Smrg/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces 54921a55d8Smrg note the difference to the parameters used in overlay are due 55921a55d8Smrg to 10bit vs. float calcs */ 56921a55d8Smrgstatic REF_TRANSFORM trans[2] = 57921a55d8Smrg{ 58921a55d8Smrg {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */ 59921a55d8Smrg {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */ 60921a55d8Smrg}; 61921a55d8Smrg 62921a55d8Smrgvoid 63921a55d8SmrgEVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) 64921a55d8Smrg{ 65921a55d8Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 66921a55d8Smrg struct radeon_accel_state *accel_state = info->accel_state; 67921a55d8Smrg PixmapPtr pPixmap = pPriv->pPixmap; 68921a55d8Smrg BoxPtr pBox = REGION_RECTS(&pPriv->clip); 69921a55d8Smrg int nBox = REGION_NUM_RECTS(&pPriv->clip); 70921a55d8Smrg int dstxoff, dstyoff; 71921a55d8Smrg struct r600_accel_object src_obj, dst_obj; 72921a55d8Smrg cb_config_t cb_conf; 73921a55d8Smrg tex_resource_t tex_res; 74921a55d8Smrg tex_sampler_t tex_samp; 75921a55d8Smrg shader_config_t vs_conf, ps_conf; 76921a55d8Smrg /* 77921a55d8Smrg * y' = y - .0625 78921a55d8Smrg * u' = u - .5 79921a55d8Smrg * v' = v - .5; 80921a55d8Smrg * 81921a55d8Smrg * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' 82921a55d8Smrg * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' 83921a55d8Smrg * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' 84921a55d8Smrg * 85921a55d8Smrg * DP3 might look like the straightforward solution 86921a55d8Smrg * but we'd need to move the texture yuv values in 87921a55d8Smrg * the same reg for this to work. Therefore use MADs. 88921a55d8Smrg * Brightness just adds to the off constant. 89921a55d8Smrg * Contrast is multiplication of luminance. 90921a55d8Smrg * Saturation and hue change the u and v coeffs. 91921a55d8Smrg * Default values (before adjustments - depend on colorspace): 92921a55d8Smrg * yco = 1.1643 93921a55d8Smrg * uco = 0, -0.39173, 2.017 94921a55d8Smrg * vco = 1.5958, -0.8129, 0 95921a55d8Smrg * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], 96921a55d8Smrg * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], 97921a55d8Smrg * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], 98921a55d8Smrg * 99921a55d8Smrg * temp = MAD(yco, yuv.yyyy, off) 100921a55d8Smrg * temp = MAD(uco, yuv.uuuu, temp) 101921a55d8Smrg * result = MAD(vco, yuv.vvvv, temp) 102921a55d8Smrg */ 103921a55d8Smrg /* TODO: calc consts in the shader */ 104921a55d8Smrg const float Loff = -0.0627; 105921a55d8Smrg const float Coff = -0.502; 106921a55d8Smrg float uvcosf, uvsinf; 107921a55d8Smrg float yco; 108921a55d8Smrg float uco[3], vco[3], off[3]; 109921a55d8Smrg float bright, cont, gamma; 110921a55d8Smrg int ref = pPriv->transform_index; 111921a55d8Smrg Bool needgamma = FALSE; 112921a55d8Smrg float *ps_alu_consts; 113921a55d8Smrg const_config_t ps_const_conf; 114921a55d8Smrg float *vs_alu_consts; 115921a55d8Smrg const_config_t vs_const_conf; 116921a55d8Smrg 117921a55d8Smrg cont = RTFContrast(pPriv->contrast); 118921a55d8Smrg bright = RTFBrightness(pPriv->brightness); 119921a55d8Smrg gamma = (float)pPriv->gamma / 1000.0; 120921a55d8Smrg uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); 121921a55d8Smrg uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); 122921a55d8Smrg /* overlay video also does pre-gamma contrast/sat adjust, should we? */ 123921a55d8Smrg 124921a55d8Smrg yco = trans[ref].RefLuma * cont; 125921a55d8Smrg uco[0] = -trans[ref].RefRCr * uvsinf; 126921a55d8Smrg uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; 127921a55d8Smrg uco[2] = trans[ref].RefBCb * uvcosf; 128921a55d8Smrg vco[0] = trans[ref].RefRCr * uvcosf; 129921a55d8Smrg vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; 130921a55d8Smrg vco[2] = trans[ref].RefBCb * uvsinf; 131921a55d8Smrg off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; 132921a55d8Smrg off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; 133921a55d8Smrg off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; 134921a55d8Smrg 135921a55d8Smrg // XXX 136921a55d8Smrg gamma = 1.0; 137921a55d8Smrg 138921a55d8Smrg if (gamma != 1.0) { 139921a55d8Smrg needgamma = TRUE; 140921a55d8Smrg /* note: gamma correction is out = in ^ gamma; 141921a55d8Smrg gpu can only do LG2/EX2 therefore we transform into 142921a55d8Smrg in ^ gamma = 2 ^ (log2(in) * gamma). 143921a55d8Smrg Lots of scalar ops, unfortunately (better solution?) - 144921a55d8Smrg without gamma that's 3 inst, with gamma it's 10... 145921a55d8Smrg could use different gamma factors per channel, 146921a55d8Smrg if that's of any use. */ 147921a55d8Smrg } 148921a55d8Smrg 149921a55d8Smrg CLEAR (cb_conf); 150921a55d8Smrg CLEAR (tex_res); 151921a55d8Smrg CLEAR (tex_samp); 152921a55d8Smrg CLEAR (vs_conf); 153921a55d8Smrg CLEAR (ps_conf); 154921a55d8Smrg CLEAR (vs_const_conf); 155921a55d8Smrg CLEAR (ps_const_conf); 156921a55d8Smrg 157c4ae5be6Smrg dst_obj.offset = 0; 158c4ae5be6Smrg src_obj.offset = 0; 159c4ae5be6Smrg dst_obj.bo = radeon_get_pixmap_bo(pPixmap); 160c4ae5be6Smrg dst_obj.tiling_flags = radeon_get_pixmap_tiling(pPixmap); 16140732134Srjs dst_obj.surface = radeon_get_pixmap_surface(pPixmap); 162c4ae5be6Smrg 163921a55d8Smrg dst_obj.pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8); 164921a55d8Smrg 165921a55d8Smrg src_obj.pitch = pPriv->src_pitch; 166921a55d8Smrg src_obj.width = pPriv->w; 167921a55d8Smrg src_obj.height = pPriv->h; 168921a55d8Smrg src_obj.bpp = 16; 169921a55d8Smrg src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 170921a55d8Smrg src_obj.bo = pPriv->src_bo[pPriv->currentBuffer]; 171c4ae5be6Smrg src_obj.tiling_flags = 0; 17240732134Srjs src_obj.surface = NULL; 173921a55d8Smrg 174921a55d8Smrg dst_obj.width = pPixmap->drawable.width; 175921a55d8Smrg dst_obj.height = pPixmap->drawable.height; 176921a55d8Smrg dst_obj.bpp = pPixmap->drawable.bitsPerPixel; 177921a55d8Smrg dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 178921a55d8Smrg 179b13dfe66Smrg if (!R600SetAccelState(pScrn, 180b13dfe66Smrg &src_obj, 181b13dfe66Smrg NULL, 182b13dfe66Smrg &dst_obj, 183b13dfe66Smrg accel_state->xv_vs_offset, accel_state->xv_ps_offset, 184b13dfe66Smrg 3, 0xffffffff)) 185921a55d8Smrg return; 186921a55d8Smrg 187921a55d8Smrg#ifdef COMPOSITE 188921a55d8Smrg dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; 189921a55d8Smrg dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; 190921a55d8Smrg#else 191921a55d8Smrg dstxoff = 0; 192921a55d8Smrg dstyoff = 0; 193921a55d8Smrg#endif 194921a55d8Smrg 195921a55d8Smrg radeon_vbo_check(pScrn, &accel_state->vbo, 16); 196921a55d8Smrg radeon_vbo_check(pScrn, &accel_state->cbuf, 512); 197921a55d8Smrg radeon_cp_start(pScrn); 198921a55d8Smrg 199921a55d8Smrg evergreen_set_default_state(pScrn); 200921a55d8Smrg 201921a55d8Smrg evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 202921a55d8Smrg evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 203921a55d8Smrg evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 204921a55d8Smrg 205921a55d8Smrg /* PS bool constant */ 206921a55d8Smrg switch(pPriv->id) { 207921a55d8Smrg case FOURCC_YV12: 208921a55d8Smrg case FOURCC_I420: 209921a55d8Smrg evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0)); 210921a55d8Smrg break; 211921a55d8Smrg case FOURCC_UYVY: 212921a55d8Smrg case FOURCC_YUY2: 213921a55d8Smrg default: 214921a55d8Smrg evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0)); 215921a55d8Smrg break; 216921a55d8Smrg } 217921a55d8Smrg 218921a55d8Smrg /* Shader */ 219921a55d8Smrg vs_conf.shader_addr = accel_state->vs_mc_addr; 220921a55d8Smrg vs_conf.shader_size = accel_state->vs_size; 221921a55d8Smrg vs_conf.num_gprs = 2; 222921a55d8Smrg vs_conf.stack_size = 0; 223921a55d8Smrg vs_conf.bo = accel_state->shaders_bo; 224921a55d8Smrg evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM); 225921a55d8Smrg 226921a55d8Smrg ps_conf.shader_addr = accel_state->ps_mc_addr; 227921a55d8Smrg ps_conf.shader_size = accel_state->ps_size; 228921a55d8Smrg ps_conf.num_gprs = 3; 229921a55d8Smrg ps_conf.stack_size = 1; 230921a55d8Smrg ps_conf.clamp_consts = 0; 231921a55d8Smrg ps_conf.export_mode = 2; 232921a55d8Smrg ps_conf.bo = accel_state->shaders_bo; 233921a55d8Smrg evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM); 234921a55d8Smrg 235921a55d8Smrg /* Texture */ 236921a55d8Smrg switch(pPriv->id) { 237921a55d8Smrg case FOURCC_YV12: 238921a55d8Smrg case FOURCC_I420: 239921a55d8Smrg accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h; 240921a55d8Smrg 241921a55d8Smrg /* Y texture */ 242921a55d8Smrg tex_res.id = 0; 243921a55d8Smrg tex_res.w = accel_state->src_obj[0].width; 244921a55d8Smrg tex_res.h = accel_state->src_obj[0].height; 245921a55d8Smrg tex_res.pitch = accel_state->src_obj[0].pitch; 246921a55d8Smrg tex_res.depth = 0; 247921a55d8Smrg tex_res.dim = SQ_TEX_DIM_2D; 248921a55d8Smrg tex_res.base = accel_state->src_obj[0].offset; 249921a55d8Smrg tex_res.mip_base = accel_state->src_obj[0].offset; 250921a55d8Smrg tex_res.size = accel_state->src_size[0]; 251921a55d8Smrg tex_res.bo = accel_state->src_obj[0].bo; 252921a55d8Smrg tex_res.mip_bo = accel_state->src_obj[0].bo; 25340732134Srjs tex_res.surface = NULL; 254921a55d8Smrg 255921a55d8Smrg tex_res.format = FMT_8; 256921a55d8Smrg tex_res.dst_sel_x = SQ_SEL_X; /* Y */ 257921a55d8Smrg tex_res.dst_sel_y = SQ_SEL_1; 258921a55d8Smrg tex_res.dst_sel_z = SQ_SEL_1; 259921a55d8Smrg tex_res.dst_sel_w = SQ_SEL_1; 260921a55d8Smrg 261921a55d8Smrg tex_res.base_level = 0; 262921a55d8Smrg tex_res.last_level = 0; 263921a55d8Smrg tex_res.perf_modulation = 0; 264921a55d8Smrg tex_res.interlaced = 0; 265b13dfe66Smrg if (accel_state->src_obj[0].tiling_flags == 0) 266b13dfe66Smrg tex_res.array_mode = 1; 267921a55d8Smrg evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); 268921a55d8Smrg 269921a55d8Smrg /* Y sampler */ 270921a55d8Smrg tex_samp.id = 0; 271921a55d8Smrg tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 272921a55d8Smrg tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 273921a55d8Smrg tex_samp.clamp_z = SQ_TEX_WRAP; 274921a55d8Smrg 275921a55d8Smrg /* xxx: switch to bicubic */ 276921a55d8Smrg tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; 277921a55d8Smrg tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; 278921a55d8Smrg 279921a55d8Smrg tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 280921a55d8Smrg tex_samp.mip_filter = 0; /* no mipmap */ 281921a55d8Smrg evergreen_set_tex_sampler(pScrn, &tex_samp); 282921a55d8Smrg 283921a55d8Smrg /* U or V texture */ 284921a55d8Smrg tex_res.id = 1; 285921a55d8Smrg tex_res.format = FMT_8; 286921a55d8Smrg tex_res.w = accel_state->src_obj[0].width >> 1; 287921a55d8Smrg tex_res.h = accel_state->src_obj[0].height >> 1; 288b13dfe66Smrg tex_res.pitch = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, pPriv->hw_align); 289921a55d8Smrg tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ 290921a55d8Smrg tex_res.dst_sel_y = SQ_SEL_1; 291921a55d8Smrg tex_res.dst_sel_z = SQ_SEL_1; 292921a55d8Smrg tex_res.dst_sel_w = SQ_SEL_1; 293921a55d8Smrg tex_res.interlaced = 0; 294921a55d8Smrg 295921a55d8Smrg tex_res.base = accel_state->src_obj[0].offset + pPriv->planev_offset; 296921a55d8Smrg tex_res.mip_base = accel_state->src_obj[0].offset + pPriv->planev_offset; 297921a55d8Smrg tex_res.size = tex_res.pitch * (pPriv->h >> 1); 298b13dfe66Smrg if (accel_state->src_obj[0].tiling_flags == 0) 299b13dfe66Smrg tex_res.array_mode = 1; 300921a55d8Smrg evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); 301921a55d8Smrg 302921a55d8Smrg /* U or V sampler */ 303921a55d8Smrg tex_samp.id = 1; 304921a55d8Smrg evergreen_set_tex_sampler(pScrn, &tex_samp); 305921a55d8Smrg 306921a55d8Smrg /* U or V texture */ 307921a55d8Smrg tex_res.id = 2; 308921a55d8Smrg tex_res.format = FMT_8; 309921a55d8Smrg tex_res.w = accel_state->src_obj[0].width >> 1; 310921a55d8Smrg tex_res.h = accel_state->src_obj[0].height >> 1; 311b13dfe66Smrg tex_res.pitch = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, pPriv->hw_align); 312921a55d8Smrg tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ 313921a55d8Smrg tex_res.dst_sel_y = SQ_SEL_1; 314921a55d8Smrg tex_res.dst_sel_z = SQ_SEL_1; 315921a55d8Smrg tex_res.dst_sel_w = SQ_SEL_1; 316921a55d8Smrg tex_res.interlaced = 0; 317921a55d8Smrg 318921a55d8Smrg tex_res.base = accel_state->src_obj[0].offset + pPriv->planeu_offset; 319921a55d8Smrg tex_res.mip_base = accel_state->src_obj[0].offset + pPriv->planeu_offset; 320921a55d8Smrg tex_res.size = tex_res.pitch * (pPriv->h >> 1); 321b13dfe66Smrg if (accel_state->src_obj[0].tiling_flags == 0) 322b13dfe66Smrg tex_res.array_mode = 1; 323921a55d8Smrg evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); 324921a55d8Smrg 325921a55d8Smrg /* UV sampler */ 326921a55d8Smrg tex_samp.id = 2; 327921a55d8Smrg evergreen_set_tex_sampler(pScrn, &tex_samp); 328921a55d8Smrg break; 329921a55d8Smrg case FOURCC_UYVY: 330921a55d8Smrg case FOURCC_YUY2: 331921a55d8Smrg default: 332921a55d8Smrg accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h; 333921a55d8Smrg 33468105dcbSveego /* YUV texture */ 335921a55d8Smrg tex_res.id = 0; 336921a55d8Smrg tex_res.w = accel_state->src_obj[0].width; 337921a55d8Smrg tex_res.h = accel_state->src_obj[0].height; 338921a55d8Smrg tex_res.pitch = accel_state->src_obj[0].pitch >> 1; 339921a55d8Smrg tex_res.depth = 0; 340921a55d8Smrg tex_res.dim = SQ_TEX_DIM_2D; 341921a55d8Smrg tex_res.base = accel_state->src_obj[0].offset; 342921a55d8Smrg tex_res.mip_base = accel_state->src_obj[0].offset; 343921a55d8Smrg tex_res.size = accel_state->src_size[0]; 344921a55d8Smrg tex_res.bo = accel_state->src_obj[0].bo; 345921a55d8Smrg tex_res.mip_bo = accel_state->src_obj[0].bo; 34640732134Srjs tex_res.surface = NULL; 347921a55d8Smrg 348921a55d8Smrg if (pPriv->id == FOURCC_UYVY) 34968105dcbSveego tex_res.format = FMT_GB_GR; 350921a55d8Smrg else 35168105dcbSveego tex_res.format = FMT_BG_RG; 35268105dcbSveego tex_res.dst_sel_x = SQ_SEL_Y; 35368105dcbSveego tex_res.dst_sel_y = SQ_SEL_X; 35468105dcbSveego tex_res.dst_sel_z = SQ_SEL_Z; 355921a55d8Smrg tex_res.dst_sel_w = SQ_SEL_1; 356921a55d8Smrg 357921a55d8Smrg tex_res.base_level = 0; 358921a55d8Smrg tex_res.last_level = 0; 359921a55d8Smrg tex_res.perf_modulation = 0; 360921a55d8Smrg tex_res.interlaced = 0; 361b13dfe66Smrg if (accel_state->src_obj[0].tiling_flags == 0) 362b13dfe66Smrg tex_res.array_mode = 1; 363921a55d8Smrg evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); 364921a55d8Smrg 36568105dcbSveego /* YUV sampler */ 366921a55d8Smrg tex_samp.id = 0; 367921a55d8Smrg tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 368921a55d8Smrg tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 369921a55d8Smrg tex_samp.clamp_z = SQ_TEX_WRAP; 370921a55d8Smrg 371921a55d8Smrg tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; 372921a55d8Smrg tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; 373921a55d8Smrg 374921a55d8Smrg tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 375921a55d8Smrg tex_samp.mip_filter = 0; /* no mipmap */ 376921a55d8Smrg evergreen_set_tex_sampler(pScrn, &tex_samp); 377921a55d8Smrg 378921a55d8Smrg break; 379921a55d8Smrg } 380921a55d8Smrg 381921a55d8Smrg cb_conf.id = 0; 382921a55d8Smrg cb_conf.w = accel_state->dst_obj.pitch; 383921a55d8Smrg cb_conf.h = accel_state->dst_obj.height; 384921a55d8Smrg cb_conf.base = accel_state->dst_obj.offset; 385921a55d8Smrg cb_conf.bo = accel_state->dst_obj.bo; 38640732134Srjs cb_conf.surface = accel_state->dst_obj.surface; 387921a55d8Smrg 388921a55d8Smrg switch (accel_state->dst_obj.bpp) { 389921a55d8Smrg case 16: 390921a55d8Smrg if (pPixmap->drawable.depth == 15) { 391921a55d8Smrg cb_conf.format = COLOR_1_5_5_5; 392921a55d8Smrg cb_conf.comp_swap = 1; /* ARGB */ 393921a55d8Smrg } else { 394921a55d8Smrg cb_conf.format = COLOR_5_6_5; 395921a55d8Smrg cb_conf.comp_swap = 2; /* RGB */ 396921a55d8Smrg } 397b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 398b13dfe66Smrg cb_conf.endian = ENDIAN_8IN16; 399b13dfe66Smrg#endif 400921a55d8Smrg break; 401921a55d8Smrg case 32: 402921a55d8Smrg cb_conf.format = COLOR_8_8_8_8; 403921a55d8Smrg cb_conf.comp_swap = 1; /* ARGB */ 404b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN 405b13dfe66Smrg cb_conf.endian = ENDIAN_8IN32; 406b13dfe66Smrg#endif 407921a55d8Smrg break; 408921a55d8Smrg default: 409921a55d8Smrg return; 410921a55d8Smrg } 411921a55d8Smrg 412921a55d8Smrg cb_conf.source_format = EXPORT_4C_16BPC; 413921a55d8Smrg cb_conf.blend_clamp = 1; 414b13dfe66Smrg cb_conf.pmask = 0xf; 415b13dfe66Smrg cb_conf.rop = 3; 416b13dfe66Smrg if (accel_state->dst_obj.tiling_flags == 0) { 417b13dfe66Smrg cb_conf.array_mode = 1; 418b13dfe66Smrg cb_conf.non_disp_tiling = 1; 419b13dfe66Smrg } 420921a55d8Smrg evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); 421921a55d8Smrg 422b13dfe66Smrg evergreen_set_spi(pScrn, (1 - 1), 1); 423921a55d8Smrg 424921a55d8Smrg /* PS alu constants */ 425921a55d8Smrg ps_const_conf.size_bytes = 256; 426921a55d8Smrg ps_const_conf.type = SHADER_TYPE_PS; 427921a55d8Smrg ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256); 428921a55d8Smrg ps_const_conf.bo = accel_state->cbuf.vb_bo; 429921a55d8Smrg ps_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_offset; 43040732134Srjs ps_const_conf.cpu_ptr = (uint32_t *)(char *)ps_alu_consts; 431921a55d8Smrg 432921a55d8Smrg ps_alu_consts[0] = off[0]; 433921a55d8Smrg ps_alu_consts[1] = off[1]; 434921a55d8Smrg ps_alu_consts[2] = off[2]; 435921a55d8Smrg ps_alu_consts[3] = yco; 436921a55d8Smrg 437921a55d8Smrg ps_alu_consts[4] = uco[0]; 438921a55d8Smrg ps_alu_consts[5] = uco[1]; 439921a55d8Smrg ps_alu_consts[6] = uco[2]; 440921a55d8Smrg ps_alu_consts[7] = gamma; 441921a55d8Smrg 442921a55d8Smrg ps_alu_consts[8] = vco[0]; 443921a55d8Smrg ps_alu_consts[9] = vco[1]; 444921a55d8Smrg ps_alu_consts[10] = vco[2]; 445921a55d8Smrg ps_alu_consts[11] = 0.0; 446921a55d8Smrg 447921a55d8Smrg radeon_vbo_commit(pScrn, &accel_state->cbuf); 448921a55d8Smrg evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT); 449921a55d8Smrg 450921a55d8Smrg /* VS alu constants */ 451921a55d8Smrg vs_const_conf.size_bytes = 256; 452921a55d8Smrg vs_const_conf.type = SHADER_TYPE_VS; 453921a55d8Smrg vs_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256); 454921a55d8Smrg vs_const_conf.bo = accel_state->cbuf.vb_bo; 455921a55d8Smrg vs_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_offset; 45640732134Srjs vs_const_conf.cpu_ptr = (uint32_t *)(char *)vs_alu_consts; 457921a55d8Smrg 458921a55d8Smrg vs_alu_consts[0] = 1.0 / pPriv->w; 459921a55d8Smrg vs_alu_consts[1] = 1.0 / pPriv->h; 460921a55d8Smrg vs_alu_consts[2] = 0.0; 461921a55d8Smrg vs_alu_consts[3] = 0.0; 462921a55d8Smrg 463921a55d8Smrg radeon_vbo_commit(pScrn, &accel_state->cbuf); 464921a55d8Smrg evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT); 465921a55d8Smrg 466921a55d8Smrg if (pPriv->vsync) { 467921a55d8Smrg xf86CrtcPtr crtc; 468921a55d8Smrg if (pPriv->desired_crtc) 469921a55d8Smrg crtc = pPriv->desired_crtc; 470921a55d8Smrg else 471921a55d8Smrg crtc = radeon_pick_best_crtc(pScrn, 472921a55d8Smrg pPriv->drw_x, 473921a55d8Smrg pPriv->drw_x + pPriv->dst_w, 474921a55d8Smrg pPriv->drw_y, 475921a55d8Smrg pPriv->drw_y + pPriv->dst_h); 476921a55d8Smrg if (crtc) 477921a55d8Smrg evergreen_cp_wait_vline_sync(pScrn, pPixmap, 478921a55d8Smrg crtc, 479921a55d8Smrg pPriv->drw_y - crtc->y, 480921a55d8Smrg (pPriv->drw_y - crtc->y) + pPriv->dst_h); 481921a55d8Smrg } 482921a55d8Smrg 483921a55d8Smrg while (nBox--) { 48468105dcbSveego float srcX, srcY, srcw, srch; 485921a55d8Smrg int dstX, dstY, dstw, dsth; 486921a55d8Smrg float *vb; 487921a55d8Smrg 488921a55d8Smrg 489921a55d8Smrg dstX = pBox->x1 + dstxoff; 490921a55d8Smrg dstY = pBox->y1 + dstyoff; 491921a55d8Smrg dstw = pBox->x2 - pBox->x1; 492921a55d8Smrg dsth = pBox->y2 - pBox->y1; 493921a55d8Smrg 494921a55d8Smrg srcX = pPriv->src_x; 495921a55d8Smrg srcX += ((pBox->x1 - pPriv->drw_x) * 49668105dcbSveego pPriv->src_w) / (float)pPriv->dst_w; 497921a55d8Smrg srcY = pPriv->src_y; 498921a55d8Smrg srcY += ((pBox->y1 - pPriv->drw_y) * 49968105dcbSveego pPriv->src_h) / (float)pPriv->dst_h; 500921a55d8Smrg 50168105dcbSveego srcw = (pPriv->src_w * dstw) / (float)pPriv->dst_w; 50268105dcbSveego srch = (pPriv->src_h * dsth) / (float)pPriv->dst_h; 503921a55d8Smrg 504921a55d8Smrg vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16); 505921a55d8Smrg 506921a55d8Smrg vb[0] = (float)dstX; 507921a55d8Smrg vb[1] = (float)dstY; 508921a55d8Smrg vb[2] = (float)srcX; 509921a55d8Smrg vb[3] = (float)srcY; 510921a55d8Smrg 511921a55d8Smrg vb[4] = (float)dstX; 512921a55d8Smrg vb[5] = (float)(dstY + dsth); 513921a55d8Smrg vb[6] = (float)srcX; 514921a55d8Smrg vb[7] = (float)(srcY + srch); 515921a55d8Smrg 516921a55d8Smrg vb[8] = (float)(dstX + dstw); 517921a55d8Smrg vb[9] = (float)(dstY + dsth); 518921a55d8Smrg vb[10] = (float)(srcX + srcw); 519921a55d8Smrg vb[11] = (float)(srcY + srch); 520921a55d8Smrg 521921a55d8Smrg radeon_vbo_commit(pScrn, &accel_state->vbo); 522921a55d8Smrg 523921a55d8Smrg pBox++; 524921a55d8Smrg } 525921a55d8Smrg 526921a55d8Smrg evergreen_finish_op(pScrn, 16); 527921a55d8Smrg 528921a55d8Smrg DamageDamageRegion(pPriv->pDraw, &pPriv->clip); 529921a55d8Smrg} 530921a55d8Smrg 531921a55d8Smrg#endif 532