r600_exa.c revision c503f109
1b7e1c893Smrg/* 2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc. 3b7e1c893Smrg * 4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"), 6b7e1c893Smrg * to deal in the Software without restriction, including without limitation 7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions: 10b7e1c893Smrg * 11b7e1c893Smrg * The above copyright notice and this permission notice (including the next 12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the 13b7e1c893Smrg * Software. 14b7e1c893Smrg * 15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21b7e1c893Smrg * SOFTWARE. 22b7e1c893Smrg * 23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com> 24b7e1c893Smrg * 25b7e1c893Smrg */ 26b7e1c893Smrg 27b7e1c893Smrg#ifdef HAVE_CONFIG_H 28b7e1c893Smrg#include "config.h" 29b7e1c893Smrg#endif 30b7e1c893Smrg 31b7e1c893Smrg#include "xf86.h" 32b7e1c893Smrg 33b7e1c893Smrg#include "exa.h" 34b7e1c893Smrg 35b7e1c893Smrg#include "radeon.h" 36b7e1c893Smrg#include "radeon_macros.h" 37b7e1c893Smrg#include "radeon_reg.h" 38b7e1c893Smrg#include "r600_shader.h" 39b7e1c893Smrg#include "r600_reg.h" 40b7e1c893Smrg#include "r600_state.h" 41b7e1c893Smrg 42b7e1c893Smrgextern PixmapPtr 43b7e1c893SmrgRADEONGetDrawablePixmap(DrawablePtr pDrawable); 44b7e1c893Smrg 45b7e1c893Smrg/* #define SHOW_VERTEXES */ 46b7e1c893Smrg 47b7e1c893Smrg# define RADEON_ROP3_ZERO 0x00000000 48b7e1c893Smrg# define RADEON_ROP3_DSa 0x00880000 49b7e1c893Smrg# define RADEON_ROP3_SDna 0x00440000 50b7e1c893Smrg# define RADEON_ROP3_S 0x00cc0000 51b7e1c893Smrg# define RADEON_ROP3_DSna 0x00220000 52b7e1c893Smrg# define RADEON_ROP3_D 0x00aa0000 53b7e1c893Smrg# define RADEON_ROP3_DSx 0x00660000 54b7e1c893Smrg# define RADEON_ROP3_DSo 0x00ee0000 55b7e1c893Smrg# define RADEON_ROP3_DSon 0x00110000 56b7e1c893Smrg# define RADEON_ROP3_DSxn 0x00990000 57b7e1c893Smrg# define RADEON_ROP3_Dn 0x00550000 58b7e1c893Smrg# define RADEON_ROP3_SDno 0x00dd0000 59b7e1c893Smrg# define RADEON_ROP3_Sn 0x00330000 60b7e1c893Smrg# define RADEON_ROP3_DSno 0x00bb0000 61b7e1c893Smrg# define RADEON_ROP3_DSan 0x00770000 62b7e1c893Smrg# define RADEON_ROP3_ONE 0x00ff0000 63b7e1c893Smrg 64b7e1c893Smrguint32_t RADEON_ROP[16] = { 65b7e1c893Smrg RADEON_ROP3_ZERO, /* GXclear */ 66b7e1c893Smrg RADEON_ROP3_DSa, /* Gxand */ 67b7e1c893Smrg RADEON_ROP3_SDna, /* GXandReverse */ 68b7e1c893Smrg RADEON_ROP3_S, /* GXcopy */ 69b7e1c893Smrg RADEON_ROP3_DSna, /* GXandInverted */ 70b7e1c893Smrg RADEON_ROP3_D, /* GXnoop */ 71b7e1c893Smrg RADEON_ROP3_DSx, /* GXxor */ 72b7e1c893Smrg RADEON_ROP3_DSo, /* GXor */ 73b7e1c893Smrg RADEON_ROP3_DSon, /* GXnor */ 74b7e1c893Smrg RADEON_ROP3_DSxn, /* GXequiv */ 75b7e1c893Smrg RADEON_ROP3_Dn, /* GXinvert */ 76b7e1c893Smrg RADEON_ROP3_SDno, /* GXorReverse */ 77b7e1c893Smrg RADEON_ROP3_Sn, /* GXcopyInverted */ 78b7e1c893Smrg RADEON_ROP3_DSno, /* GXorInverted */ 79b7e1c893Smrg RADEON_ROP3_DSan, /* GXnand */ 80b7e1c893Smrg RADEON_ROP3_ONE, /* GXset */ 81b7e1c893Smrg}; 82b7e1c893Smrg 83b7e1c893Smrgstatic void 84b7e1c893SmrgR600DoneSolid(PixmapPtr pPix); 85b7e1c893Smrg 86b7e1c893Smrgstatic void 87b7e1c893SmrgR600DoneComposite(PixmapPtr pDst); 88b7e1c893Smrg 89b7e1c893Smrg 90b7e1c893Smrgstatic Bool 91b7e1c893SmrgR600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) 92b7e1c893Smrg{ 93b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; 94b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 95b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 96b7e1c893Smrg cb_config_t cb_conf; 97b7e1c893Smrg shader_config_t vs_conf, ps_conf; 98b7e1c893Smrg int pmask = 0; 99b7e1c893Smrg uint32_t a, r, g, b; 100b7e1c893Smrg float ps_alu_consts[4]; 101b7e1c893Smrg 102b7e1c893Smrg accel_state->dst_mc_addr = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; 103b7e1c893Smrg accel_state->dst_size = exaGetPixmapPitch(pPix) * pPix->drawable.height; 104b7e1c893Smrg accel_state->dst_pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); 105b7e1c893Smrg 106b7e1c893Smrg /* bad pitch */ 107b7e1c893Smrg if (accel_state->dst_pitch & 7) 108b7e1c893Smrg return FALSE; 109b7e1c893Smrg 110b7e1c893Smrg /* bad offset */ 111b7e1c893Smrg if (accel_state->dst_mc_addr & 0xff) 112b7e1c893Smrg return FALSE; 113b7e1c893Smrg 114b7e1c893Smrg if (pPix->drawable.bitsPerPixel == 24) 115b7e1c893Smrg return FALSE; 116b7e1c893Smrg 117b7e1c893Smrg CLEAR (cb_conf); 118b7e1c893Smrg CLEAR (vs_conf); 119b7e1c893Smrg CLEAR (ps_conf); 120b7e1c893Smrg 121b7e1c893Smrg /* return FALSE; */ 122b7e1c893Smrg 123b7e1c893Smrg#ifdef SHOW_VERTEXES 124b7e1c893Smrg ErrorF("%dx%d @ %dbpp, 0x%08x\n", pPix->drawable.width, pPix->drawable.height, 125b7e1c893Smrg pPix->drawable.bitsPerPixel, exaGetPixmapPitch(pPix)); 126b7e1c893Smrg#endif 127b7e1c893Smrg 128b7e1c893Smrg accel_state->ib = RADEONCPGetBuffer(pScrn); 129b7e1c893Smrg 130b7e1c893Smrg /* Init */ 131b7e1c893Smrg start_3d(pScrn, accel_state->ib); 132b7e1c893Smrg 133b7e1c893Smrg set_default_state(pScrn, accel_state->ib); 134b7e1c893Smrg 135b7e1c893Smrg /* Scissor / viewport */ 136b7e1c893Smrg EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); 137b7e1c893Smrg EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); 138b7e1c893Smrg 139b7e1c893Smrg accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + 140b7e1c893Smrg accel_state->solid_vs_offset; 141b7e1c893Smrg accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + 142b7e1c893Smrg accel_state->solid_ps_offset; 143b7e1c893Smrg accel_state->vs_size = 512; 144b7e1c893Smrg accel_state->ps_size = 512; 145b7e1c893Smrg 146b7e1c893Smrg /* Shader */ 147b7e1c893Smrg 148b7e1c893Smrg /* flush SQ cache */ 149b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, 150b7e1c893Smrg accel_state->vs_size, accel_state->vs_mc_addr); 151b7e1c893Smrg 152b7e1c893Smrg vs_conf.shader_addr = accel_state->vs_mc_addr; 153b7e1c893Smrg vs_conf.num_gprs = 2; 154b7e1c893Smrg vs_conf.stack_size = 0; 155b7e1c893Smrg vs_setup (pScrn, accel_state->ib, &vs_conf); 156b7e1c893Smrg 157b7e1c893Smrg /* flush SQ cache */ 158b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, 159b7e1c893Smrg accel_state->ps_size, accel_state->ps_mc_addr); 160b7e1c893Smrg 161b7e1c893Smrg ps_conf.shader_addr = accel_state->ps_mc_addr; 162b7e1c893Smrg ps_conf.num_gprs = 1; 163b7e1c893Smrg ps_conf.stack_size = 0; 164b7e1c893Smrg ps_conf.uncached_first_inst = 1; 165b7e1c893Smrg ps_conf.clamp_consts = 0; 166b7e1c893Smrg ps_conf.export_mode = 2; 167b7e1c893Smrg ps_setup (pScrn, accel_state->ib, &ps_conf); 168b7e1c893Smrg 169b7e1c893Smrg /* Render setup */ 170b7e1c893Smrg if (pm & 0x000000ff) 171b7e1c893Smrg pmask |= 4; /* B */ 172b7e1c893Smrg if (pm & 0x0000ff00) 173b7e1c893Smrg pmask |= 2; /* G */ 174b7e1c893Smrg if (pm & 0x00ff0000) 175b7e1c893Smrg pmask |= 1; /* R */ 176b7e1c893Smrg if (pm & 0xff000000) 177b7e1c893Smrg pmask |= 8; /* A */ 178b7e1c893Smrg EREG(accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift)); 179b7e1c893Smrg EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); 180b7e1c893Smrg EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[alu]); 181b7e1c893Smrg 182b7e1c893Smrg cb_conf.id = 0; 183b7e1c893Smrg cb_conf.w = accel_state->dst_pitch; 184b7e1c893Smrg cb_conf.h = pPix->drawable.height; 185b7e1c893Smrg cb_conf.base = accel_state->dst_mc_addr; 186b7e1c893Smrg 187b7e1c893Smrg if (pPix->drawable.bitsPerPixel == 8) { 188b7e1c893Smrg cb_conf.format = COLOR_8; 189b7e1c893Smrg cb_conf.comp_swap = 3; /* A */ 190b7e1c893Smrg } else if (pPix->drawable.bitsPerPixel == 16) { 191b7e1c893Smrg cb_conf.format = COLOR_5_6_5; 192b7e1c893Smrg cb_conf.comp_swap = 2; /* RGB */ 193b7e1c893Smrg } else { 194b7e1c893Smrg cb_conf.format = COLOR_8_8_8_8; 195b7e1c893Smrg cb_conf.comp_swap = 1; /* ARGB */ 196b7e1c893Smrg } 197b7e1c893Smrg cb_conf.source_format = 1; 198b7e1c893Smrg cb_conf.blend_clamp = 1; 199b7e1c893Smrg set_render_target(pScrn, accel_state->ib, &cb_conf); 200b7e1c893Smrg 201b7e1c893Smrg EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | 202b7e1c893Smrg (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | 203b7e1c893Smrg (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); 204b7e1c893Smrg EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ 205b7e1c893Smrg DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ 206b7e1c893Smrg 207b7e1c893Smrg /* Interpolator setup */ 208b7e1c893Smrg /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */ 209b7e1c893Smrg EREG(accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift)); 210b7e1c893Smrg EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); 211b7e1c893Smrg 212b7e1c893Smrg /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x 213b7e1c893Smrg * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ 214b7e1c893Smrg /* no VS exports as PS input (NUM_INTERP is not zero based, no minus one) */ 215b7e1c893Smrg EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (0 << NUM_INTERP_shift)); 216b7e1c893Smrg EREG(accel_state->ib, SPI_PS_IN_CONTROL_1, 0); 217b7e1c893Smrg /* color semantic id 0 -> GPR[0] */ 218b7e1c893Smrg EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | 219b7e1c893Smrg (0x03 << DEFAULT_VAL_shift) | 220b7e1c893Smrg FLAT_SHADE_bit | 221b7e1c893Smrg SEL_CENTROID_bit)); 222b7e1c893Smrg EREG(accel_state->ib, SPI_INTERP_CONTROL_0, FLAT_SHADE_ENA_bit | 0); 223b7e1c893Smrg 224b7e1c893Smrg /* PS alu constants */ 225b7e1c893Smrg if (pPix->drawable.bitsPerPixel == 16) { 226b7e1c893Smrg r = (fg >> 11) & 0x1f; 227b7e1c893Smrg g = (fg >> 5) & 0x3f; 228b7e1c893Smrg b = (fg >> 0) & 0x1f; 229b7e1c893Smrg ps_alu_consts[0] = (float)r / 31; /* R */ 230b7e1c893Smrg ps_alu_consts[1] = (float)g / 63; /* G */ 231b7e1c893Smrg ps_alu_consts[2] = (float)b / 31; /* B */ 232b7e1c893Smrg ps_alu_consts[3] = 1.0; /* A */ 233b7e1c893Smrg } else if (pPix->drawable.bitsPerPixel == 8) { 234b7e1c893Smrg a = (fg >> 0) & 0xff; 235b7e1c893Smrg ps_alu_consts[0] = 0.0; /* R */ 236b7e1c893Smrg ps_alu_consts[1] = 0.0; /* G */ 237b7e1c893Smrg ps_alu_consts[2] = 0.0; /* B */ 238b7e1c893Smrg ps_alu_consts[3] = (float)a / 255; /* A */ 239b7e1c893Smrg } else { 240b7e1c893Smrg a = (fg >> 24) & 0xff; 241b7e1c893Smrg r = (fg >> 16) & 0xff; 242b7e1c893Smrg g = (fg >> 8) & 0xff; 243b7e1c893Smrg b = (fg >> 0) & 0xff; 244b7e1c893Smrg ps_alu_consts[0] = (float)r / 255; /* R */ 245b7e1c893Smrg ps_alu_consts[1] = (float)g / 255; /* G */ 246b7e1c893Smrg ps_alu_consts[2] = (float)b / 255; /* B */ 247b7e1c893Smrg ps_alu_consts[3] = (float)a / 255; /* A */ 248b7e1c893Smrg } 249b7e1c893Smrg set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps, 250b7e1c893Smrg sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); 251b7e1c893Smrg 252b7e1c893Smrg accel_state->vb_index = 0; 253b7e1c893Smrg 254b7e1c893Smrg#ifdef SHOW_VERTEXES 255b7e1c893Smrg ErrorF("PM: 0x%08x\n", pm); 256b7e1c893Smrg#endif 257b7e1c893Smrg 258b7e1c893Smrg return TRUE; 259b7e1c893Smrg} 260b7e1c893Smrg 261b7e1c893Smrg 262b7e1c893Smrgstatic void 263b7e1c893SmrgR600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) 264b7e1c893Smrg{ 265b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; 266b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 267b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 268b7e1c893Smrg float *vb; 269b7e1c893Smrg 270b7e1c893Smrg if (((accel_state->vb_index + 3) * 8) > (accel_state->ib->total / 2)) { 271b7e1c893Smrg R600DoneSolid(pPix); 272b7e1c893Smrg accel_state->vb_index = 0; 273b7e1c893Smrg accel_state->ib = RADEONCPGetBuffer(pScrn); 274b7e1c893Smrg } 275b7e1c893Smrg 276b7e1c893Smrg vb = (pointer)((char*)accel_state->ib->address + 277b7e1c893Smrg (accel_state->ib->total / 2) + 278b7e1c893Smrg accel_state->vb_index * 8); 279b7e1c893Smrg 280b7e1c893Smrg vb[0] = (float)x1; 281b7e1c893Smrg vb[1] = (float)y1; 282b7e1c893Smrg 283b7e1c893Smrg vb[2] = (float)x1; 284b7e1c893Smrg vb[3] = (float)y2; 285b7e1c893Smrg 286b7e1c893Smrg vb[4] = (float)x2; 287b7e1c893Smrg vb[5] = (float)y2; 288b7e1c893Smrg 289b7e1c893Smrg accel_state->vb_index += 3; 290b7e1c893Smrg 291b7e1c893Smrg} 292b7e1c893Smrg 293b7e1c893Smrgstatic void 294b7e1c893SmrgR600DoneSolid(PixmapPtr pPix) 295b7e1c893Smrg{ 296b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; 297b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 298b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 299b7e1c893Smrg draw_config_t draw_conf; 300b7e1c893Smrg vtx_resource_t vtx_res; 301b7e1c893Smrg 302b7e1c893Smrg CLEAR (draw_conf); 303b7e1c893Smrg CLEAR (vtx_res); 304b7e1c893Smrg 305b7e1c893Smrg if (accel_state->vb_index == 0) { 306b7e1c893Smrg R600IBDiscard(pScrn, accel_state->ib); 307b7e1c893Smrg return; 308b7e1c893Smrg } 309b7e1c893Smrg 310b7e1c893Smrg accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + 311b7e1c893Smrg (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); 312b7e1c893Smrg accel_state->vb_size = accel_state->vb_index * 8; 313b7e1c893Smrg 314b7e1c893Smrg /* flush vertex cache */ 315b7e1c893Smrg if ((info->ChipFamily == CHIP_FAMILY_RV610) || 316b7e1c893Smrg (info->ChipFamily == CHIP_FAMILY_RV620) || 317b7e1c893Smrg (info->ChipFamily == CHIP_FAMILY_RS780) || 318c503f109Smrg (info->ChipFamily == CHIP_FAMILY_RS880) || 319b7e1c893Smrg (info->ChipFamily == CHIP_FAMILY_RV710)) 320b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, 321b7e1c893Smrg accel_state->vb_size, accel_state->vb_mc_addr); 322b7e1c893Smrg else 323b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, 324b7e1c893Smrg accel_state->vb_size, accel_state->vb_mc_addr); 325b7e1c893Smrg 326b7e1c893Smrg /* Vertex buffer setup */ 327b7e1c893Smrg vtx_res.id = SQ_VTX_RESOURCE_vs; 328b7e1c893Smrg vtx_res.vtx_size_dw = 8 / 4; 329b7e1c893Smrg vtx_res.vtx_num_entries = accel_state->vb_size / 4; 330b7e1c893Smrg vtx_res.mem_req_size = 1; 331b7e1c893Smrg vtx_res.vb_addr = accel_state->vb_mc_addr; 332b7e1c893Smrg set_vtx_resource (pScrn, accel_state->ib, &vtx_res); 333b7e1c893Smrg 334b7e1c893Smrg /* Draw */ 335b7e1c893Smrg draw_conf.prim_type = DI_PT_RECTLIST; 336b7e1c893Smrg draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; 337b7e1c893Smrg draw_conf.num_instances = 1; 338b7e1c893Smrg draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; 339b7e1c893Smrg draw_conf.index_type = DI_INDEX_SIZE_16_BIT; 340b7e1c893Smrg 341b7e1c893Smrg draw_auto(pScrn, accel_state->ib, &draw_conf); 342b7e1c893Smrg 343b7e1c893Smrg wait_3d_idle_clean(pScrn, accel_state->ib); 344b7e1c893Smrg 345b7e1c893Smrg /* sync dst surface */ 346b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), 347b7e1c893Smrg accel_state->dst_size, accel_state->dst_mc_addr); 348b7e1c893Smrg 349b7e1c893Smrg R600CPFlushIndirect(pScrn, accel_state->ib); 350b7e1c893Smrg} 351b7e1c893Smrg 352b7e1c893Smrgstatic void 353b7e1c893SmrgR600DoPrepareCopy(ScrnInfoPtr pScrn, 354b7e1c893Smrg int src_pitch, int src_width, int src_height, uint32_t src_offset, int src_bpp, 355b7e1c893Smrg int dst_pitch, int dst_height, uint32_t dst_offset, int dst_bpp, 356b7e1c893Smrg int rop, Pixel planemask) 357b7e1c893Smrg{ 358b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 359b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 360b7e1c893Smrg int pmask = 0; 361b7e1c893Smrg cb_config_t cb_conf; 362b7e1c893Smrg tex_resource_t tex_res; 363b7e1c893Smrg tex_sampler_t tex_samp; 364b7e1c893Smrg shader_config_t vs_conf, ps_conf; 365b7e1c893Smrg 366b7e1c893Smrg CLEAR (cb_conf); 367b7e1c893Smrg CLEAR (tex_res); 368b7e1c893Smrg CLEAR (tex_samp); 369b7e1c893Smrg CLEAR (vs_conf); 370b7e1c893Smrg CLEAR (ps_conf); 371b7e1c893Smrg 372b7e1c893Smrg accel_state->ib = RADEONCPGetBuffer(pScrn); 373b7e1c893Smrg 374b7e1c893Smrg /* Init */ 375b7e1c893Smrg start_3d(pScrn, accel_state->ib); 376b7e1c893Smrg 377b7e1c893Smrg set_default_state(pScrn, accel_state->ib); 378b7e1c893Smrg 379b7e1c893Smrg /* Scissor / viewport */ 380b7e1c893Smrg EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); 381b7e1c893Smrg EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); 382b7e1c893Smrg 383b7e1c893Smrg accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + 384b7e1c893Smrg accel_state->copy_vs_offset; 385b7e1c893Smrg accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + 386b7e1c893Smrg accel_state->copy_ps_offset; 387b7e1c893Smrg accel_state->vs_size = 512; 388b7e1c893Smrg accel_state->ps_size = 512; 389b7e1c893Smrg 390b7e1c893Smrg /* Shader */ 391b7e1c893Smrg 392b7e1c893Smrg /* flush SQ cache */ 393b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, 394b7e1c893Smrg accel_state->vs_size, accel_state->vs_mc_addr); 395b7e1c893Smrg 396b7e1c893Smrg vs_conf.shader_addr = accel_state->vs_mc_addr; 397b7e1c893Smrg vs_conf.num_gprs = 2; 398b7e1c893Smrg vs_conf.stack_size = 0; 399b7e1c893Smrg vs_setup (pScrn, accel_state->ib, &vs_conf); 400b7e1c893Smrg 401b7e1c893Smrg /* flush SQ cache */ 402b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, 403b7e1c893Smrg accel_state->ps_size, accel_state->ps_mc_addr); 404b7e1c893Smrg 405b7e1c893Smrg ps_conf.shader_addr = accel_state->ps_mc_addr; 406b7e1c893Smrg ps_conf.num_gprs = 1; 407b7e1c893Smrg ps_conf.stack_size = 0; 408b7e1c893Smrg ps_conf.uncached_first_inst = 1; 409b7e1c893Smrg ps_conf.clamp_consts = 0; 410b7e1c893Smrg ps_conf.export_mode = 2; 411b7e1c893Smrg ps_setup (pScrn, accel_state->ib, &ps_conf); 412b7e1c893Smrg 413b7e1c893Smrg accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8); 414b7e1c893Smrg accel_state->src_mc_addr[0] = src_offset; 415b7e1c893Smrg accel_state->src_pitch[0] = src_pitch; 416b7e1c893Smrg accel_state->src_width[0] = src_width; 417b7e1c893Smrg accel_state->src_height[0] = src_height; 418b7e1c893Smrg accel_state->src_bpp[0] = src_bpp; 419b7e1c893Smrg 420b7e1c893Smrg /* flush texture cache */ 421b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, 422b7e1c893Smrg accel_state->src_size[0], accel_state->src_mc_addr[0]); 423b7e1c893Smrg 424b7e1c893Smrg /* Texture */ 425b7e1c893Smrg tex_res.id = 0; 426b7e1c893Smrg tex_res.w = src_width; 427b7e1c893Smrg tex_res.h = src_height; 428b7e1c893Smrg tex_res.pitch = accel_state->src_pitch[0]; 429b7e1c893Smrg tex_res.depth = 0; 430b7e1c893Smrg tex_res.dim = SQ_TEX_DIM_2D; 431b7e1c893Smrg tex_res.base = accel_state->src_mc_addr[0]; 432b7e1c893Smrg tex_res.mip_base = accel_state->src_mc_addr[0]; 433b7e1c893Smrg if (src_bpp == 8) { 434b7e1c893Smrg tex_res.format = FMT_8; 435b7e1c893Smrg tex_res.dst_sel_x = SQ_SEL_1; /* R */ 436b7e1c893Smrg tex_res.dst_sel_y = SQ_SEL_1; /* G */ 437b7e1c893Smrg tex_res.dst_sel_z = SQ_SEL_1; /* B */ 438b7e1c893Smrg tex_res.dst_sel_w = SQ_SEL_X; /* A */ 439b7e1c893Smrg } else if (src_bpp == 16) { 440b7e1c893Smrg tex_res.format = FMT_5_6_5; 441b7e1c893Smrg tex_res.dst_sel_x = SQ_SEL_Z; /* R */ 442b7e1c893Smrg tex_res.dst_sel_y = SQ_SEL_Y; /* G */ 443b7e1c893Smrg tex_res.dst_sel_z = SQ_SEL_X; /* B */ 444b7e1c893Smrg tex_res.dst_sel_w = SQ_SEL_1; /* A */ 445b7e1c893Smrg } else { 446b7e1c893Smrg tex_res.format = FMT_8_8_8_8; 447b7e1c893Smrg tex_res.dst_sel_x = SQ_SEL_Z; /* R */ 448b7e1c893Smrg tex_res.dst_sel_y = SQ_SEL_Y; /* G */ 449b7e1c893Smrg tex_res.dst_sel_z = SQ_SEL_X; /* B */ 450b7e1c893Smrg tex_res.dst_sel_w = SQ_SEL_W; /* A */ 451b7e1c893Smrg } 452b7e1c893Smrg 453b7e1c893Smrg tex_res.request_size = 1; 454b7e1c893Smrg tex_res.base_level = 0; 455b7e1c893Smrg tex_res.last_level = 0; 456b7e1c893Smrg tex_res.perf_modulation = 0; 457b7e1c893Smrg set_tex_resource (pScrn, accel_state->ib, &tex_res); 458b7e1c893Smrg 459b7e1c893Smrg tex_samp.id = 0; 460b7e1c893Smrg tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 461b7e1c893Smrg tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 462b7e1c893Smrg tex_samp.clamp_z = SQ_TEX_WRAP; 463b7e1c893Smrg tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; 464b7e1c893Smrg tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; 465b7e1c893Smrg tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 466b7e1c893Smrg tex_samp.mip_filter = 0; /* no mipmap */ 467b7e1c893Smrg set_tex_sampler (pScrn, accel_state->ib, &tex_samp); 468b7e1c893Smrg 469b7e1c893Smrg 470b7e1c893Smrg /* Render setup */ 471b7e1c893Smrg if (planemask & 0x000000ff) 472b7e1c893Smrg pmask |= 4; /* B */ 473b7e1c893Smrg if (planemask & 0x0000ff00) 474b7e1c893Smrg pmask |= 2; /* G */ 475b7e1c893Smrg if (planemask & 0x00ff0000) 476b7e1c893Smrg pmask |= 1; /* R */ 477b7e1c893Smrg if (planemask & 0xff000000) 478b7e1c893Smrg pmask |= 8; /* A */ 479b7e1c893Smrg EREG(accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift)); 480b7e1c893Smrg EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); 481b7e1c893Smrg EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[rop]); 482b7e1c893Smrg 483b7e1c893Smrg accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8); 484b7e1c893Smrg accel_state->dst_mc_addr = dst_offset; 485b7e1c893Smrg accel_state->dst_pitch = dst_pitch; 486b7e1c893Smrg accel_state->dst_height = dst_height; 487b7e1c893Smrg accel_state->dst_bpp = dst_bpp; 488b7e1c893Smrg 489b7e1c893Smrg cb_conf.id = 0; 490b7e1c893Smrg cb_conf.w = accel_state->dst_pitch; 491b7e1c893Smrg cb_conf.h = dst_height; 492b7e1c893Smrg cb_conf.base = accel_state->dst_mc_addr; 493b7e1c893Smrg if (dst_bpp == 8) { 494b7e1c893Smrg cb_conf.format = COLOR_8; 495b7e1c893Smrg cb_conf.comp_swap = 3; /* A */ 496b7e1c893Smrg } else if (dst_bpp == 16) { 497b7e1c893Smrg cb_conf.format = COLOR_5_6_5; 498b7e1c893Smrg cb_conf.comp_swap = 2; /* RGB */ 499b7e1c893Smrg } else { 500b7e1c893Smrg cb_conf.format = COLOR_8_8_8_8; 501b7e1c893Smrg cb_conf.comp_swap = 1; /* ARGB */ 502b7e1c893Smrg } 503b7e1c893Smrg cb_conf.source_format = 1; 504b7e1c893Smrg cb_conf.blend_clamp = 1; 505b7e1c893Smrg set_render_target(pScrn, accel_state->ib, &cb_conf); 506b7e1c893Smrg 507b7e1c893Smrg EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | 508b7e1c893Smrg (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | 509b7e1c893Smrg (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); 510b7e1c893Smrg EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ 511b7e1c893Smrg DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ 512b7e1c893Smrg 513b7e1c893Smrg /* Interpolator setup */ 514b7e1c893Smrg /* export tex coord from VS */ 515b7e1c893Smrg EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); 516b7e1c893Smrg EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); 517b7e1c893Smrg 518b7e1c893Smrg /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x 519b7e1c893Smrg * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ 520b7e1c893Smrg /* input tex coord from VS */ 521b7e1c893Smrg EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, ((1 << NUM_INTERP_shift))); 522b7e1c893Smrg EREG(accel_state->ib, SPI_PS_IN_CONTROL_1, 0); 523b7e1c893Smrg /* color semantic id 0 -> GPR[0] */ 524b7e1c893Smrg EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | 525b7e1c893Smrg (0x01 << DEFAULT_VAL_shift) | 526b7e1c893Smrg SEL_CENTROID_bit)); 527b7e1c893Smrg EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); 528b7e1c893Smrg 529b7e1c893Smrg accel_state->vb_index = 0; 530b7e1c893Smrg 531b7e1c893Smrg} 532b7e1c893Smrg 533b7e1c893Smrgstatic void 534b7e1c893SmrgR600DoCopy(ScrnInfoPtr pScrn) 535b7e1c893Smrg{ 536b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 537b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 538b7e1c893Smrg draw_config_t draw_conf; 539b7e1c893Smrg vtx_resource_t vtx_res; 540b7e1c893Smrg 541b7e1c893Smrg CLEAR (draw_conf); 542b7e1c893Smrg CLEAR (vtx_res); 543b7e1c893Smrg 544b7e1c893Smrg if (accel_state->vb_index == 0) { 545b7e1c893Smrg R600IBDiscard(pScrn, accel_state->ib); 546b7e1c893Smrg return; 547b7e1c893Smrg } 548b7e1c893Smrg 549b7e1c893Smrg accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + 550b7e1c893Smrg (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); 551b7e1c893Smrg accel_state->vb_size = accel_state->vb_index * 16; 552b7e1c893Smrg 553b7e1c893Smrg /* flush vertex cache */ 554b7e1c893Smrg if ((info->ChipFamily == CHIP_FAMILY_RV610) || 555b7e1c893Smrg (info->ChipFamily == CHIP_FAMILY_RV620) || 556b7e1c893Smrg (info->ChipFamily == CHIP_FAMILY_RS780) || 557c503f109Smrg (info->ChipFamily == CHIP_FAMILY_RS880) || 558b7e1c893Smrg (info->ChipFamily == CHIP_FAMILY_RV710)) 559b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, 560b7e1c893Smrg accel_state->vb_size, accel_state->vb_mc_addr); 561b7e1c893Smrg else 562b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, 563b7e1c893Smrg accel_state->vb_size, accel_state->vb_mc_addr); 564b7e1c893Smrg 565b7e1c893Smrg /* Vertex buffer setup */ 566b7e1c893Smrg vtx_res.id = SQ_VTX_RESOURCE_vs; 567b7e1c893Smrg vtx_res.vtx_size_dw = 16 / 4; 568b7e1c893Smrg vtx_res.vtx_num_entries = accel_state->vb_size / 4; 569b7e1c893Smrg vtx_res.mem_req_size = 1; 570b7e1c893Smrg vtx_res.vb_addr = accel_state->vb_mc_addr; 571b7e1c893Smrg set_vtx_resource (pScrn, accel_state->ib, &vtx_res); 572b7e1c893Smrg 573b7e1c893Smrg draw_conf.prim_type = DI_PT_RECTLIST; 574b7e1c893Smrg draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; 575b7e1c893Smrg draw_conf.num_instances = 1; 576b7e1c893Smrg draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; 577b7e1c893Smrg draw_conf.index_type = DI_INDEX_SIZE_16_BIT; 578b7e1c893Smrg 579b7e1c893Smrg draw_auto(pScrn, accel_state->ib, &draw_conf); 580b7e1c893Smrg 581b7e1c893Smrg wait_3d_idle_clean(pScrn, accel_state->ib); 582b7e1c893Smrg 583b7e1c893Smrg /* sync dst surface */ 584b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), 585b7e1c893Smrg accel_state->dst_size, accel_state->dst_mc_addr); 586b7e1c893Smrg 587b7e1c893Smrg R600CPFlushIndirect(pScrn, accel_state->ib); 588b7e1c893Smrg} 589b7e1c893Smrg 590b7e1c893Smrgstatic void 591b7e1c893SmrgR600AppendCopyVertex(ScrnInfoPtr pScrn, 592b7e1c893Smrg int srcX, int srcY, 593b7e1c893Smrg int dstX, int dstY, 594b7e1c893Smrg int w, int h) 595b7e1c893Smrg{ 596b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 597b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 598b7e1c893Smrg float *vb; 599b7e1c893Smrg 600b7e1c893Smrg if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) { 601b7e1c893Smrg R600DoCopy(pScrn); 602b7e1c893Smrg accel_state->vb_index = 0; 603b7e1c893Smrg accel_state->ib = RADEONCPGetBuffer(pScrn); 604b7e1c893Smrg } 605b7e1c893Smrg 606b7e1c893Smrg vb = (pointer)((char*)accel_state->ib->address + 607b7e1c893Smrg (accel_state->ib->total / 2) + 608b7e1c893Smrg accel_state->vb_index * 16); 609b7e1c893Smrg 610b7e1c893Smrg vb[0] = (float)dstX; 611b7e1c893Smrg vb[1] = (float)dstY; 612b7e1c893Smrg vb[2] = (float)srcX; 613b7e1c893Smrg vb[3] = (float)srcY; 614b7e1c893Smrg 615b7e1c893Smrg vb[4] = (float)dstX; 616b7e1c893Smrg vb[5] = (float)(dstY + h); 617b7e1c893Smrg vb[6] = (float)srcX; 618b7e1c893Smrg vb[7] = (float)(srcY + h); 619b7e1c893Smrg 620b7e1c893Smrg vb[8] = (float)(dstX + w); 621b7e1c893Smrg vb[9] = (float)(dstY + h); 622b7e1c893Smrg vb[10] = (float)(srcX + w); 623b7e1c893Smrg vb[11] = (float)(srcY + h); 624b7e1c893Smrg 625b7e1c893Smrg accel_state->vb_index += 3; 626b7e1c893Smrg} 627b7e1c893Smrg 628b7e1c893Smrgstatic Bool 629b7e1c893SmrgR600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, 630b7e1c893Smrg int xdir, int ydir, 631b7e1c893Smrg int rop, 632b7e1c893Smrg Pixel planemask) 633b7e1c893Smrg{ 634b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 635b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 636b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 637b7e1c893Smrg 638b7e1c893Smrg accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); 639b7e1c893Smrg accel_state->src_pitch[0] = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); 640b7e1c893Smrg 641b7e1c893Smrg accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; 642b7e1c893Smrg accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; 643b7e1c893Smrg 644b7e1c893Smrg accel_state->src_width[0] = pSrc->drawable.width; 645b7e1c893Smrg accel_state->src_height[0] = pSrc->drawable.height; 646b7e1c893Smrg accel_state->src_bpp[0] = pSrc->drawable.bitsPerPixel; 647b7e1c893Smrg accel_state->dst_height = pDst->drawable.height; 648b7e1c893Smrg accel_state->dst_bpp = pDst->drawable.bitsPerPixel; 649b7e1c893Smrg 650b7e1c893Smrg /* bad pitch */ 651b7e1c893Smrg if (accel_state->src_pitch[0] & 7) 652b7e1c893Smrg return FALSE; 653b7e1c893Smrg if (accel_state->dst_pitch & 7) 654b7e1c893Smrg return FALSE; 655b7e1c893Smrg 656b7e1c893Smrg /* bad offset */ 657b7e1c893Smrg if (accel_state->src_mc_addr[0] & 0xff) 658b7e1c893Smrg return FALSE; 659b7e1c893Smrg if (accel_state->dst_mc_addr & 0xff) 660b7e1c893Smrg return FALSE; 661b7e1c893Smrg 662b7e1c893Smrg if (pSrc->drawable.bitsPerPixel == 24) 663b7e1c893Smrg return FALSE; 664b7e1c893Smrg if (pDst->drawable.bitsPerPixel == 24) 665b7e1c893Smrg return FALSE; 666b7e1c893Smrg 667b7e1c893Smrg /* return FALSE; */ 668b7e1c893Smrg 669b7e1c893Smrg#ifdef SHOW_VERTEXES 670b7e1c893Smrg ErrorF("src: %dx%d @ %dbpp, 0x%08x\n", pSrc->drawable.width, pSrc->drawable.height, 671b7e1c893Smrg pSrc->drawable.bitsPerPixel, exaGetPixmapPitch(pSrc)); 672b7e1c893Smrg ErrorF("dst: %dx%d @ %dbpp, 0x%08x\n", pDst->drawable.width, pDst->drawable.height, 673b7e1c893Smrg pDst->drawable.bitsPerPixel, exaGetPixmapPitch(pDst)); 674b7e1c893Smrg#endif 675b7e1c893Smrg 676b7e1c893Smrg accel_state->rop = rop; 677b7e1c893Smrg accel_state->planemask = planemask; 678b7e1c893Smrg 679b7e1c893Smrg if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) { 680b7e1c893Smrg unsigned long size = pDst->drawable.height * accel_state->dst_pitch * pDst->drawable.bitsPerPixel/8; 681b7e1c893Smrg accel_state->same_surface = TRUE; 682b7e1c893Smrg 683b7e1c893Smrg if (accel_state->copy_area) { 684b7e1c893Smrg exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); 685b7e1c893Smrg accel_state->copy_area = NULL; 686b7e1c893Smrg } 687b7e1c893Smrg accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL); 688b7e1c893Smrg } else { 689b7e1c893Smrg accel_state->same_surface = FALSE; 690b7e1c893Smrg 691b7e1c893Smrg R600DoPrepareCopy(pScrn, 692b7e1c893Smrg accel_state->src_pitch[0], pSrc->drawable.width, pSrc->drawable.height, 693b7e1c893Smrg accel_state->src_mc_addr[0], pSrc->drawable.bitsPerPixel, 694b7e1c893Smrg accel_state->dst_pitch, pDst->drawable.height, 695b7e1c893Smrg accel_state->dst_mc_addr, pDst->drawable.bitsPerPixel, 696b7e1c893Smrg rop, planemask); 697b7e1c893Smrg 698b7e1c893Smrg } 699b7e1c893Smrg 700b7e1c893Smrg return TRUE; 701b7e1c893Smrg} 702b7e1c893Smrg 703b7e1c893Smrgstatic Bool 704b7e1c893Smrgis_overlap(int sx1, int sx2, int sy1, int sy2, int dx1, int dx2, int dy1, int dy2) 705b7e1c893Smrg{ 706b7e1c893Smrg if (((sx1 >= dx1) && (sx1 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || /* TL x1, y1 */ 707b7e1c893Smrg ((sx2 >= dx1) && (sx2 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || /* TR x2, y1 */ 708b7e1c893Smrg ((sx1 >= dx1) && (sx1 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2)) || /* BL x1, y2 */ 709b7e1c893Smrg ((sx2 >= dx1) && (sx2 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2))) /* BR x2, y2 */ 710b7e1c893Smrg return TRUE; 711b7e1c893Smrg else 712b7e1c893Smrg return FALSE; 713b7e1c893Smrg} 714b7e1c893Smrg 715b7e1c893Smrgstatic void 716b7e1c893SmrgR600OverlapCopy(PixmapPtr pDst, 717b7e1c893Smrg int srcX, int srcY, 718b7e1c893Smrg int dstX, int dstY, 719b7e1c893Smrg int w, int h) 720b7e1c893Smrg{ 721b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 722b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 723b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 724b7e1c893Smrg uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); 725b7e1c893Smrg uint32_t dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; 726b7e1c893Smrg int i, hchunk, vchunk; 727b7e1c893Smrg 728b7e1c893Smrg if (is_overlap(srcX, srcX + w, srcY, srcY + h, 729b7e1c893Smrg dstX, dstX + w, dstY, dstY + h)) { 730b7e1c893Smrg /* Calculate height/width of non-overlapping area */ 731b7e1c893Smrg hchunk = (srcX < dstX) ? (dstX - srcX) : (srcX - dstX); 732b7e1c893Smrg vchunk = (srcY < dstY) ? (dstY - srcY) : (srcY - dstY); 733b7e1c893Smrg 734b7e1c893Smrg /* Diagonally offset overlap is reduced to either horizontal or vertical offset-only 735b7e1c893Smrg * by copying a part of the non-overlapping portion, then adjusting coordinates 736b7e1c893Smrg * Choose horizontal vs vertical to minimize the total number of copy operations 737b7e1c893Smrg */ 738b7e1c893Smrg if (vchunk != 0 && hchunk != 0) { /* diagonal */ 739b7e1c893Smrg if ((w / hchunk) <= (h / vchunk)) { /* reduce to horizontal */ 740b7e1c893Smrg if (srcY > dstY ) { /* diagonal up */ 741b7e1c893Smrg R600DoPrepareCopy(pScrn, 742b7e1c893Smrg dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 743b7e1c893Smrg dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 744b7e1c893Smrg accel_state->rop, accel_state->planemask); 745b7e1c893Smrg R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, vchunk); 746b7e1c893Smrg R600DoCopy(pScrn); 747b7e1c893Smrg 748b7e1c893Smrg srcY = srcY + vchunk; 749b7e1c893Smrg dstY = dstY + vchunk; 750b7e1c893Smrg } else { /* diagonal down */ 751b7e1c893Smrg R600DoPrepareCopy(pScrn, 752b7e1c893Smrg dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 753b7e1c893Smrg dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 754b7e1c893Smrg accel_state->rop, accel_state->planemask); 755b7e1c893Smrg R600AppendCopyVertex(pScrn, srcX, srcY + h - vchunk, dstX, dstY + h - vchunk, w, vchunk); 756b7e1c893Smrg R600DoCopy(pScrn); 757b7e1c893Smrg } 758b7e1c893Smrg h = h - vchunk; 759b7e1c893Smrg vchunk = 0; 760b7e1c893Smrg } else { /* reduce to vertical */ 761b7e1c893Smrg if (srcX > dstX ) { /* diagonal left */ 762b7e1c893Smrg R600DoPrepareCopy(pScrn, 763b7e1c893Smrg dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 764b7e1c893Smrg dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 765b7e1c893Smrg accel_state->rop, accel_state->planemask); 766b7e1c893Smrg R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, hchunk, h); 767b7e1c893Smrg R600DoCopy(pScrn); 768b7e1c893Smrg 769b7e1c893Smrg srcX = srcX + hchunk; 770b7e1c893Smrg dstX = dstX + hchunk; 771b7e1c893Smrg } else { /* diagonal right */ 772b7e1c893Smrg R600DoPrepareCopy(pScrn, 773b7e1c893Smrg dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 774b7e1c893Smrg dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 775b7e1c893Smrg accel_state->rop, accel_state->planemask); 776b7e1c893Smrg R600AppendCopyVertex(pScrn, srcX + w - hchunk, srcY, dstX + w - hchunk, dstY, hchunk, h); 777b7e1c893Smrg R600DoCopy(pScrn); 778b7e1c893Smrg } 779b7e1c893Smrg w = w - hchunk; 780b7e1c893Smrg hchunk = 0; 781b7e1c893Smrg } 782b7e1c893Smrg } 783b7e1c893Smrg 784b7e1c893Smrg if (vchunk == 0) { /* left/right */ 785b7e1c893Smrg if (srcX < dstX) { /* right */ 786b7e1c893Smrg /* copy right to left */ 787b7e1c893Smrg for (i = w; i > 0; i -= hchunk) { 788b7e1c893Smrg R600DoPrepareCopy(pScrn, 789b7e1c893Smrg dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 790b7e1c893Smrg dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 791b7e1c893Smrg accel_state->rop, accel_state->planemask); 792b7e1c893Smrg R600AppendCopyVertex(pScrn, srcX + i - hchunk, srcY, dstX + i - hchunk, dstY, hchunk, h); 793b7e1c893Smrg R600DoCopy(pScrn); 794b7e1c893Smrg } 795b7e1c893Smrg } else { /* left */ 796b7e1c893Smrg /* copy left to right */ 797b7e1c893Smrg for (i = 0; i < w; i += hchunk) { 798b7e1c893Smrg R600DoPrepareCopy(pScrn, 799b7e1c893Smrg dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 800b7e1c893Smrg dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 801b7e1c893Smrg accel_state->rop, accel_state->planemask); 802b7e1c893Smrg 803b7e1c893Smrg R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY, hchunk, h); 804b7e1c893Smrg R600DoCopy(pScrn); 805b7e1c893Smrg } 806b7e1c893Smrg } 807b7e1c893Smrg } else { /* up/down */ 808b7e1c893Smrg if (srcY > dstY) { /* up */ 809b7e1c893Smrg /* copy top to bottom */ 810b7e1c893Smrg for (i = 0; i < h; i += vchunk) { 811b7e1c893Smrg R600DoPrepareCopy(pScrn, 812b7e1c893Smrg dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 813b7e1c893Smrg dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 814b7e1c893Smrg accel_state->rop, accel_state->planemask); 815b7e1c893Smrg 816b7e1c893Smrg if (vchunk > h - i) vchunk = h - i; 817b7e1c893Smrg R600AppendCopyVertex(pScrn, srcX, srcY + i, dstX, dstY + i, w, vchunk); 818b7e1c893Smrg R600DoCopy(pScrn); 819b7e1c893Smrg } 820b7e1c893Smrg } else { /* down */ 821b7e1c893Smrg /* copy bottom to top */ 822b7e1c893Smrg for (i = h; i > 0; i -= vchunk) { 823b7e1c893Smrg R600DoPrepareCopy(pScrn, 824b7e1c893Smrg dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 825b7e1c893Smrg dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 826b7e1c893Smrg accel_state->rop, accel_state->planemask); 827b7e1c893Smrg 828b7e1c893Smrg if (vchunk > i) vchunk = i; 829b7e1c893Smrg R600AppendCopyVertex(pScrn, srcX, srcY + i - vchunk, dstX, dstY + i - vchunk, w, vchunk); 830b7e1c893Smrg R600DoCopy(pScrn); 831b7e1c893Smrg } 832b7e1c893Smrg } 833b7e1c893Smrg } 834b7e1c893Smrg } else { 835b7e1c893Smrg R600DoPrepareCopy(pScrn, 836b7e1c893Smrg dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 837b7e1c893Smrg dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, 838b7e1c893Smrg accel_state->rop, accel_state->planemask); 839b7e1c893Smrg 840b7e1c893Smrg R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); 841b7e1c893Smrg R600DoCopy(pScrn); 842b7e1c893Smrg } 843b7e1c893Smrg} 844b7e1c893Smrg 845b7e1c893Smrgstatic void 846b7e1c893SmrgR600Copy(PixmapPtr pDst, 847b7e1c893Smrg int srcX, int srcY, 848b7e1c893Smrg int dstX, int dstY, 849b7e1c893Smrg int w, int h) 850b7e1c893Smrg{ 851b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 852b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 853b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 854b7e1c893Smrg 855b7e1c893Smrg if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY)) 856b7e1c893Smrg return; 857b7e1c893Smrg 858b7e1c893Smrg if (accel_state->same_surface && is_overlap(srcX, srcX + w, srcY, srcY + h, dstX, dstX + w, dstY, dstY + h)) { 859b7e1c893Smrg if (accel_state->copy_area) { 860b7e1c893Smrg uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); 861b7e1c893Smrg uint32_t orig_offset, tmp_offset; 862b7e1c893Smrg 863b7e1c893Smrg tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset; 864b7e1c893Smrg orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; 865b7e1c893Smrg 866b7e1c893Smrg R600DoPrepareCopy(pScrn, 867b7e1c893Smrg pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, 868b7e1c893Smrg pitch, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, 869b7e1c893Smrg accel_state->rop, accel_state->planemask); 870b7e1c893Smrg R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); 871b7e1c893Smrg R600DoCopy(pScrn); 872b7e1c893Smrg R600DoPrepareCopy(pScrn, 873b7e1c893Smrg pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, 874b7e1c893Smrg pitch, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, 875b7e1c893Smrg accel_state->rop, accel_state->planemask); 876b7e1c893Smrg R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h); 877b7e1c893Smrg R600DoCopy(pScrn); 878b7e1c893Smrg } else 879b7e1c893Smrg R600OverlapCopy(pDst, srcX, srcY, dstX, dstY, w, h); 880b7e1c893Smrg } else if (accel_state->same_surface) { 881b7e1c893Smrg uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); 882b7e1c893Smrg uint32_t offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; 883b7e1c893Smrg 884b7e1c893Smrg R600DoPrepareCopy(pScrn, 885b7e1c893Smrg pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, 886b7e1c893Smrg pitch, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, 887b7e1c893Smrg accel_state->rop, accel_state->planemask); 888b7e1c893Smrg R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); 889b7e1c893Smrg R600DoCopy(pScrn); 890b7e1c893Smrg } else { 891b7e1c893Smrg R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); 892b7e1c893Smrg } 893b7e1c893Smrg 894b7e1c893Smrg} 895b7e1c893Smrg 896b7e1c893Smrgstatic void 897b7e1c893SmrgR600DoneCopy(PixmapPtr pDst) 898b7e1c893Smrg{ 899b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 900b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 901b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 902b7e1c893Smrg 903b7e1c893Smrg if (!accel_state->same_surface) 904b7e1c893Smrg R600DoCopy(pScrn); 905b7e1c893Smrg 906b7e1c893Smrg if (accel_state->copy_area) { 907b7e1c893Smrg exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); 908b7e1c893Smrg accel_state->copy_area = NULL; 909b7e1c893Smrg } 910b7e1c893Smrg 911b7e1c893Smrg} 912b7e1c893Smrg 913b7e1c893Smrg#define RADEON_TRACE_FALL 0 914b7e1c893Smrg#define RADEON_TRACE_DRAW 0 915b7e1c893Smrg 916b7e1c893Smrg#if RADEON_TRACE_FALL 917b7e1c893Smrg#define RADEON_FALLBACK(x) \ 918b7e1c893Smrgdo { \ 919b7e1c893Smrg ErrorF("%s: ", __FUNCTION__); \ 920b7e1c893Smrg ErrorF x; \ 921b7e1c893Smrg return FALSE; \ 922b7e1c893Smrg} while (0) 923b7e1c893Smrg#else 924b7e1c893Smrg#define RADEON_FALLBACK(x) return FALSE 925b7e1c893Smrg#endif 926b7e1c893Smrg 927b7e1c893Smrg#define xFixedToFloat(f) (((float) (f)) / 65536) 928b7e1c893Smrg 929b7e1c893Smrgstatic inline void transformPoint(PictTransform *transform, xPointFixed *point) 930b7e1c893Smrg{ 931b7e1c893Smrg PictVector v; 932b7e1c893Smrg v.vector[0] = point->x; 933b7e1c893Smrg v.vector[1] = point->y; 934b7e1c893Smrg v.vector[2] = xFixed1; 935b7e1c893Smrg PictureTransformPoint(transform, &v); 936b7e1c893Smrg point->x = v.vector[0]; 937b7e1c893Smrg point->y = v.vector[1]; 938b7e1c893Smrg} 939b7e1c893Smrg 940b7e1c893Smrgstruct blendinfo { 941b7e1c893Smrg Bool dst_alpha; 942b7e1c893Smrg Bool src_alpha; 943b7e1c893Smrg uint32_t blend_cntl; 944b7e1c893Smrg}; 945b7e1c893Smrg 946b7e1c893Smrgstatic struct blendinfo R600BlendOp[] = { 947b7e1c893Smrg /* Clear */ 948b7e1c893Smrg {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 949b7e1c893Smrg /* Src */ 950b7e1c893Smrg {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 951b7e1c893Smrg /* Dst */ 952b7e1c893Smrg {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, 953b7e1c893Smrg /* Over */ 954b7e1c893Smrg {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 955b7e1c893Smrg /* OverReverse */ 956b7e1c893Smrg {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, 957b7e1c893Smrg /* In */ 958b7e1c893Smrg {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 959b7e1c893Smrg /* InReverse */ 960b7e1c893Smrg {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 961b7e1c893Smrg /* Out */ 962b7e1c893Smrg {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 963b7e1c893Smrg /* OutReverse */ 964b7e1c893Smrg {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 965b7e1c893Smrg /* Atop */ 966b7e1c893Smrg {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 967b7e1c893Smrg /* AtopReverse */ 968b7e1c893Smrg {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 969b7e1c893Smrg /* Xor */ 970b7e1c893Smrg {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 971b7e1c893Smrg /* Add */ 972b7e1c893Smrg {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, 973b7e1c893Smrg}; 974b7e1c893Smrg 975b7e1c893Smrgstruct formatinfo { 976b7e1c893Smrg unsigned int fmt; 977b7e1c893Smrg uint32_t card_fmt; 978b7e1c893Smrg}; 979b7e1c893Smrg 980b7e1c893Smrgstatic struct formatinfo R600TexFormats[] = { 981b7e1c893Smrg {PICT_a8r8g8b8, FMT_8_8_8_8}, 982b7e1c893Smrg {PICT_x8r8g8b8, FMT_8_8_8_8}, 983b7e1c893Smrg {PICT_a8b8g8r8, FMT_8_8_8_8}, 984b7e1c893Smrg {PICT_x8b8g8r8, FMT_8_8_8_8}, 985b7e1c893Smrg {PICT_r5g6b5, FMT_5_6_5}, 986b7e1c893Smrg {PICT_a1r5g5b5, FMT_1_5_5_5}, 987b7e1c893Smrg {PICT_x1r5g5b5, FMT_1_5_5_5}, 988b7e1c893Smrg {PICT_a8, FMT_8}, 989b7e1c893Smrg}; 990b7e1c893Smrg 991b7e1c893Smrgstatic uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format) 992b7e1c893Smrg{ 993b7e1c893Smrg uint32_t sblend, dblend; 994b7e1c893Smrg 995b7e1c893Smrg sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask; 996b7e1c893Smrg dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask; 997b7e1c893Smrg 998b7e1c893Smrg /* If there's no dst alpha channel, adjust the blend op so that we'll treat 999b7e1c893Smrg * it as always 1. 1000b7e1c893Smrg */ 1001b7e1c893Smrg if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) { 1002b7e1c893Smrg if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift)) 1003b7e1c893Smrg sblend = (BLEND_ONE << COLOR_SRCBLEND_shift); 1004b7e1c893Smrg else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift)) 1005b7e1c893Smrg sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift); 1006b7e1c893Smrg } 1007b7e1c893Smrg 1008b7e1c893Smrg /* If the source alpha is being used, then we should only be in a case where 1009b7e1c893Smrg * the source blend factor is 0, and the source blend value is the mask 1010b7e1c893Smrg * channels multiplied by the source picture's alpha. 1011b7e1c893Smrg */ 1012b7e1c893Smrg if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) { 1013b7e1c893Smrg if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) { 1014b7e1c893Smrg dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift); 1015b7e1c893Smrg } else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) { 1016b7e1c893Smrg dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift); 1017b7e1c893Smrg } 1018b7e1c893Smrg } 1019b7e1c893Smrg 1020b7e1c893Smrg return sblend | dblend; 1021b7e1c893Smrg} 1022b7e1c893Smrg 1023b7e1c893Smrgstatic Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format) 1024b7e1c893Smrg{ 1025b7e1c893Smrg switch (pDstPicture->format) { 1026b7e1c893Smrg case PICT_a8r8g8b8: 1027b7e1c893Smrg case PICT_x8r8g8b8: 1028b7e1c893Smrg *dst_format = COLOR_8_8_8_8; 1029b7e1c893Smrg break; 1030b7e1c893Smrg case PICT_r5g6b5: 1031b7e1c893Smrg *dst_format = COLOR_5_6_5; 1032b7e1c893Smrg break; 1033b7e1c893Smrg case PICT_a1r5g5b5: 1034b7e1c893Smrg case PICT_x1r5g5b5: 1035b7e1c893Smrg *dst_format = COLOR_1_5_5_5; 1036b7e1c893Smrg break; 1037b7e1c893Smrg case PICT_a8: 1038b7e1c893Smrg *dst_format = COLOR_8; 1039b7e1c893Smrg break; 1040b7e1c893Smrg default: 1041b7e1c893Smrg RADEON_FALLBACK(("Unsupported dest format 0x%x\n", 1042b7e1c893Smrg (int)pDstPicture->format)); 1043b7e1c893Smrg } 1044b7e1c893Smrg return TRUE; 1045b7e1c893Smrg} 1046b7e1c893Smrg 1047b7e1c893Smrgstatic Bool R600CheckCompositeTexture(PicturePtr pPict, 1048b7e1c893Smrg PicturePtr pDstPict, 1049b7e1c893Smrg int op, 1050b7e1c893Smrg int unit) 1051b7e1c893Smrg{ 1052b7e1c893Smrg int w = pPict->pDrawable->width; 1053b7e1c893Smrg int h = pPict->pDrawable->height; 1054b7e1c893Smrg unsigned int i; 1055b7e1c893Smrg int max_tex_w, max_tex_h; 1056b7e1c893Smrg 1057b7e1c893Smrg max_tex_w = 8192; 1058b7e1c893Smrg max_tex_h = 8192; 1059b7e1c893Smrg 1060b7e1c893Smrg if ((w > max_tex_w) || (h > max_tex_h)) 1061b7e1c893Smrg RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h)); 1062b7e1c893Smrg 1063b7e1c893Smrg for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) { 1064b7e1c893Smrg if (R600TexFormats[i].fmt == pPict->format) 1065b7e1c893Smrg break; 1066b7e1c893Smrg } 1067b7e1c893Smrg if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0])) 1068b7e1c893Smrg RADEON_FALLBACK(("Unsupported picture format 0x%x\n", 1069b7e1c893Smrg (int)pPict->format)); 1070b7e1c893Smrg 1071b7e1c893Smrg if (pPict->filter != PictFilterNearest && 1072b7e1c893Smrg pPict->filter != PictFilterBilinear) 1073b7e1c893Smrg RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter)); 1074b7e1c893Smrg 1075b7e1c893Smrg /* for REPEAT_NONE, Render semantics are that sampling outside the source 1076b7e1c893Smrg * picture results in alpha=0 pixels. We can implement this with a border color 1077b7e1c893Smrg * *if* our source texture has an alpha channel, otherwise we need to fall 1078b7e1c893Smrg * back. If we're not transformed then we hope that upper layers have clipped 1079b7e1c893Smrg * rendering to the bounds of the source drawable, in which case it doesn't 1080b7e1c893Smrg * matter. I have not, however, verified that the X server always does such 1081b7e1c893Smrg * clipping. 1082b7e1c893Smrg */ 1083b7e1c893Smrg /* FIXME R6xx */ 1084b7e1c893Smrg if (pPict->transform != 0 && !pPict->repeat && PICT_FORMAT_A(pPict->format) == 0) { 1085b7e1c893Smrg if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0))) 1086b7e1c893Smrg RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n")); 1087b7e1c893Smrg } 1088b7e1c893Smrg 1089b7e1c893Smrg return TRUE; 1090b7e1c893Smrg} 1091b7e1c893Smrg 1092b7e1c893Smrgstatic Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, 1093b7e1c893Smrg int unit) 1094b7e1c893Smrg{ 1095b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; 1096b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1097b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 1098b7e1c893Smrg int w = pPict->pDrawable->width; 1099b7e1c893Smrg int h = pPict->pDrawable->height; 1100b7e1c893Smrg unsigned int i; 1101b7e1c893Smrg tex_resource_t tex_res; 1102b7e1c893Smrg tex_sampler_t tex_samp; 1103b7e1c893Smrg int pix_r, pix_g, pix_b, pix_a; 1104b7e1c893Smrg 1105b7e1c893Smrg CLEAR (tex_res); 1106b7e1c893Smrg CLEAR (tex_samp); 1107b7e1c893Smrg 1108b7e1c893Smrg accel_state->src_mc_addr[unit] = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; 1109b7e1c893Smrg accel_state->src_pitch[unit] = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); 1110b7e1c893Smrg accel_state->src_size[unit] = exaGetPixmapPitch(pPix) * pPix->drawable.height; 1111b7e1c893Smrg 1112c503f109Smrg if (accel_state->src_pitch[unit] & 7) 1113b7e1c893Smrg RADEON_FALLBACK(("Bad pitch %d 0x%x\n", (int)accel_state->src_pitch[unit], unit)); 1114b7e1c893Smrg 1115c503f109Smrg if (accel_state->src_mc_addr[unit] & 0xff) 1116b7e1c893Smrg RADEON_FALLBACK(("Bad offset %d 0x%x\n", (int)accel_state->src_mc_addr[unit], unit)); 1117b7e1c893Smrg 1118b7e1c893Smrg for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) { 1119b7e1c893Smrg if (R600TexFormats[i].fmt == pPict->format) 1120b7e1c893Smrg break; 1121b7e1c893Smrg } 1122b7e1c893Smrg 1123b7e1c893Smrg accel_state->texW[unit] = w; 1124b7e1c893Smrg accel_state->texH[unit] = h; 1125b7e1c893Smrg 1126b7e1c893Smrg /* ErrorF("Tex %d setup %dx%d\n", unit, w, h); */ 1127b7e1c893Smrg 1128b7e1c893Smrg /* flush texture cache */ 1129b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, 1130b7e1c893Smrg accel_state->src_size[unit], accel_state->src_mc_addr[unit]); 1131b7e1c893Smrg 1132b7e1c893Smrg /* Texture */ 1133b7e1c893Smrg tex_res.id = unit; 1134b7e1c893Smrg tex_res.w = w; 1135b7e1c893Smrg tex_res.h = h; 1136b7e1c893Smrg tex_res.pitch = accel_state->src_pitch[unit]; 1137b7e1c893Smrg tex_res.depth = 0; 1138b7e1c893Smrg tex_res.dim = SQ_TEX_DIM_2D; 1139b7e1c893Smrg tex_res.base = accel_state->src_mc_addr[unit]; 1140b7e1c893Smrg tex_res.mip_base = accel_state->src_mc_addr[unit]; 1141b7e1c893Smrg tex_res.format = R600TexFormats[i].card_fmt; 1142b7e1c893Smrg tex_res.request_size = 1; 1143b7e1c893Smrg 1144b7e1c893Smrg /* component swizzles */ 1145b7e1c893Smrg switch (pPict->format) { 1146b7e1c893Smrg case PICT_a1r5g5b5: 1147b7e1c893Smrg case PICT_a8r8g8b8: 1148b7e1c893Smrg pix_r = SQ_SEL_Z; /* R */ 1149b7e1c893Smrg pix_g = SQ_SEL_Y; /* G */ 1150b7e1c893Smrg pix_b = SQ_SEL_X; /* B */ 1151b7e1c893Smrg pix_a = SQ_SEL_W; /* A */ 1152b7e1c893Smrg break; 1153b7e1c893Smrg case PICT_a8b8g8r8: 1154b7e1c893Smrg pix_r = SQ_SEL_X; /* R */ 1155b7e1c893Smrg pix_g = SQ_SEL_Y; /* G */ 1156b7e1c893Smrg pix_b = SQ_SEL_Z; /* B */ 1157b7e1c893Smrg pix_a = SQ_SEL_W; /* A */ 1158b7e1c893Smrg break; 1159b7e1c893Smrg case PICT_x8b8g8r8: 1160b7e1c893Smrg pix_r = SQ_SEL_X; /* R */ 1161b7e1c893Smrg pix_g = SQ_SEL_Y; /* G */ 1162b7e1c893Smrg pix_b = SQ_SEL_Z; /* B */ 1163b7e1c893Smrg pix_a = SQ_SEL_1; /* A */ 1164b7e1c893Smrg break; 1165b7e1c893Smrg case PICT_x1r5g5b5: 1166b7e1c893Smrg case PICT_x8r8g8b8: 1167b7e1c893Smrg case PICT_r5g6b5: 1168b7e1c893Smrg pix_r = SQ_SEL_Z; /* R */ 1169b7e1c893Smrg pix_g = SQ_SEL_Y; /* G */ 1170b7e1c893Smrg pix_b = SQ_SEL_X; /* B */ 1171b7e1c893Smrg pix_a = SQ_SEL_1; /* A */ 1172b7e1c893Smrg break; 1173b7e1c893Smrg case PICT_a8: 1174b7e1c893Smrg pix_r = SQ_SEL_0; /* R */ 1175b7e1c893Smrg pix_g = SQ_SEL_0; /* G */ 1176b7e1c893Smrg pix_b = SQ_SEL_0; /* B */ 1177b7e1c893Smrg pix_a = SQ_SEL_X; /* A */ 1178b7e1c893Smrg break; 1179b7e1c893Smrg default: 1180b7e1c893Smrg RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format)); 1181b7e1c893Smrg } 1182b7e1c893Smrg 1183b7e1c893Smrg if (unit == 0) { 1184b7e1c893Smrg if (!accel_state->has_mask) { 1185b7e1c893Smrg if (PICT_FORMAT_RGB(pPict->format) == 0) { 1186b7e1c893Smrg pix_r = SQ_SEL_0; 1187b7e1c893Smrg pix_g = SQ_SEL_0; 1188b7e1c893Smrg pix_b = SQ_SEL_0; 1189b7e1c893Smrg } 1190b7e1c893Smrg 1191b7e1c893Smrg if (PICT_FORMAT_A(pPict->format) == 0) 1192b7e1c893Smrg pix_a = SQ_SEL_1; 1193b7e1c893Smrg } else { 1194b7e1c893Smrg if (accel_state->component_alpha) { 1195b7e1c893Smrg if (accel_state->src_alpha) { 1196b7e1c893Smrg if (PICT_FORMAT_A(pPict->format) == 0) { 1197b7e1c893Smrg pix_r = SQ_SEL_1; 1198b7e1c893Smrg pix_g = SQ_SEL_1; 1199b7e1c893Smrg pix_b = SQ_SEL_1; 1200b7e1c893Smrg pix_a = SQ_SEL_1; 1201b7e1c893Smrg } else { 1202b7e1c893Smrg pix_r = pix_a; 1203b7e1c893Smrg pix_g = pix_a; 1204b7e1c893Smrg pix_b = pix_a; 1205b7e1c893Smrg } 1206b7e1c893Smrg } else { 1207b7e1c893Smrg if (PICT_FORMAT_A(pPict->format) == 0) 1208b7e1c893Smrg pix_a = SQ_SEL_1; 1209b7e1c893Smrg } 1210b7e1c893Smrg } else { 1211b7e1c893Smrg if (PICT_FORMAT_RGB(pPict->format) == 0) { 1212b7e1c893Smrg pix_r = SQ_SEL_0; 1213b7e1c893Smrg pix_g = SQ_SEL_0; 1214b7e1c893Smrg pix_b = SQ_SEL_0; 1215b7e1c893Smrg } 1216b7e1c893Smrg 1217b7e1c893Smrg if (PICT_FORMAT_A(pPict->format) == 0) 1218b7e1c893Smrg pix_a = SQ_SEL_1; 1219b7e1c893Smrg } 1220b7e1c893Smrg } 1221b7e1c893Smrg } else { 1222b7e1c893Smrg if (accel_state->component_alpha) { 1223b7e1c893Smrg if (PICT_FORMAT_A(pPict->format) == 0) 1224b7e1c893Smrg pix_a = SQ_SEL_1; 1225b7e1c893Smrg } else { 1226b7e1c893Smrg if (PICT_FORMAT_A(pPict->format) == 0) { 1227b7e1c893Smrg pix_r = SQ_SEL_1; 1228b7e1c893Smrg pix_g = SQ_SEL_1; 1229b7e1c893Smrg pix_b = SQ_SEL_1; 1230b7e1c893Smrg pix_a = SQ_SEL_1; 1231b7e1c893Smrg } else { 1232b7e1c893Smrg pix_r = pix_a; 1233b7e1c893Smrg pix_g = pix_a; 1234b7e1c893Smrg pix_b = pix_a; 1235b7e1c893Smrg } 1236b7e1c893Smrg } 1237b7e1c893Smrg } 1238b7e1c893Smrg 1239b7e1c893Smrg tex_res.dst_sel_x = pix_r; /* R */ 1240b7e1c893Smrg tex_res.dst_sel_y = pix_g; /* G */ 1241b7e1c893Smrg tex_res.dst_sel_z = pix_b; /* B */ 1242b7e1c893Smrg tex_res.dst_sel_w = pix_a; /* A */ 1243b7e1c893Smrg 1244b7e1c893Smrg tex_res.base_level = 0; 1245b7e1c893Smrg tex_res.last_level = 0; 1246b7e1c893Smrg tex_res.perf_modulation = 0; 1247b7e1c893Smrg set_tex_resource (pScrn, accel_state->ib, &tex_res); 1248b7e1c893Smrg 1249b7e1c893Smrg tex_samp.id = unit; 1250b7e1c893Smrg tex_samp.border_color = SQ_TEX_BORDER_COLOR_TRANS_BLACK; 1251b7e1c893Smrg 1252b7e1c893Smrg if (pPict->repeat) { 1253b7e1c893Smrg switch (pPict->repeatType) { 1254b7e1c893Smrg case RepeatNormal: 1255b7e1c893Smrg tex_samp.clamp_x = SQ_TEX_WRAP; 1256b7e1c893Smrg tex_samp.clamp_y = SQ_TEX_WRAP; 1257b7e1c893Smrg break; 1258b7e1c893Smrg case RepeatPad: 1259b7e1c893Smrg tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 1260b7e1c893Smrg tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 1261b7e1c893Smrg break; 1262b7e1c893Smrg case RepeatReflect: 1263b7e1c893Smrg tex_samp.clamp_x = SQ_TEX_MIRROR; 1264b7e1c893Smrg tex_samp.clamp_y = SQ_TEX_MIRROR; 1265b7e1c893Smrg break; 1266b7e1c893Smrg case RepeatNone: 1267b7e1c893Smrg tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER; 1268b7e1c893Smrg tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER; 1269b7e1c893Smrg break; 1270b7e1c893Smrg default: 1271b7e1c893Smrg RADEON_FALLBACK(("Bad repeat 0x%x\n", pPict->repeatType)); 1272b7e1c893Smrg } 1273b7e1c893Smrg } else { 1274b7e1c893Smrg tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER; 1275b7e1c893Smrg tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER; 1276b7e1c893Smrg } 1277b7e1c893Smrg 1278b7e1c893Smrg switch (pPict->filter) { 1279b7e1c893Smrg case PictFilterNearest: 1280b7e1c893Smrg tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; 1281b7e1c893Smrg tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; 1282b7e1c893Smrg break; 1283b7e1c893Smrg case PictFilterBilinear: 1284b7e1c893Smrg tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; 1285b7e1c893Smrg tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; 1286b7e1c893Smrg break; 1287b7e1c893Smrg default: 1288b7e1c893Smrg RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); 1289b7e1c893Smrg } 1290b7e1c893Smrg 1291b7e1c893Smrg tex_samp.clamp_z = SQ_TEX_WRAP; 1292b7e1c893Smrg tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 1293b7e1c893Smrg tex_samp.mip_filter = 0; /* no mipmap */ 1294b7e1c893Smrg set_tex_sampler (pScrn, accel_state->ib, &tex_samp); 1295b7e1c893Smrg 1296b7e1c893Smrg if (pPict->transform != 0) { 1297b7e1c893Smrg accel_state->is_transform[unit] = TRUE; 1298b7e1c893Smrg accel_state->transform[unit] = pPict->transform; 1299b7e1c893Smrg } else 1300b7e1c893Smrg accel_state->is_transform[unit] = FALSE; 1301b7e1c893Smrg 1302b7e1c893Smrg return TRUE; 1303b7e1c893Smrg} 1304b7e1c893Smrg 1305b7e1c893Smrgstatic Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, 1306b7e1c893Smrg PicturePtr pDstPicture) 1307b7e1c893Smrg{ 1308b7e1c893Smrg uint32_t tmp1; 1309b7e1c893Smrg PixmapPtr pSrcPixmap, pDstPixmap; 1310b7e1c893Smrg int max_tex_w, max_tex_h, max_dst_w, max_dst_h; 1311b7e1c893Smrg 1312b7e1c893Smrg /* Check for unsupported compositing operations. */ 1313b7e1c893Smrg if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0]))) 1314b7e1c893Smrg RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op)); 1315b7e1c893Smrg 1316b7e1c893Smrg pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable); 1317b7e1c893Smrg 1318b7e1c893Smrg max_tex_w = 8192; 1319b7e1c893Smrg max_tex_h = 8192; 1320b7e1c893Smrg max_dst_w = 8192; 1321b7e1c893Smrg max_dst_h = 8192; 1322b7e1c893Smrg 1323b7e1c893Smrg if (pSrcPixmap->drawable.width >= max_tex_w || 1324b7e1c893Smrg pSrcPixmap->drawable.height >= max_tex_h) { 1325b7e1c893Smrg RADEON_FALLBACK(("Source w/h too large (%d,%d).\n", 1326b7e1c893Smrg pSrcPixmap->drawable.width, 1327b7e1c893Smrg pSrcPixmap->drawable.height)); 1328b7e1c893Smrg } 1329b7e1c893Smrg 1330b7e1c893Smrg pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable); 1331b7e1c893Smrg 1332b7e1c893Smrg if (pDstPixmap->drawable.width >= max_dst_w || 1333b7e1c893Smrg pDstPixmap->drawable.height >= max_dst_h) { 1334b7e1c893Smrg RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n", 1335b7e1c893Smrg pDstPixmap->drawable.width, 1336b7e1c893Smrg pDstPixmap->drawable.height)); 1337b7e1c893Smrg } 1338b7e1c893Smrg 1339b7e1c893Smrg if (pMaskPicture) { 1340b7e1c893Smrg PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable); 1341b7e1c893Smrg 1342b7e1c893Smrg if (pMaskPixmap->drawable.width >= max_tex_w || 1343b7e1c893Smrg pMaskPixmap->drawable.height >= max_tex_h) { 1344b7e1c893Smrg RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n", 1345b7e1c893Smrg pMaskPixmap->drawable.width, 1346b7e1c893Smrg pMaskPixmap->drawable.height)); 1347b7e1c893Smrg } 1348b7e1c893Smrg 1349b7e1c893Smrg if (pMaskPicture->componentAlpha) { 1350b7e1c893Smrg /* Check if it's component alpha that relies on a source alpha and 1351b7e1c893Smrg * on the source value. We can only get one of those into the 1352b7e1c893Smrg * single source value that we get to blend with. 1353b7e1c893Smrg */ 1354b7e1c893Smrg if (R600BlendOp[op].src_alpha && 1355b7e1c893Smrg (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) != 1356b7e1c893Smrg (BLEND_ZERO << COLOR_SRCBLEND_shift)) { 1357b7e1c893Smrg RADEON_FALLBACK(("Component alpha not supported with source " 1358b7e1c893Smrg "alpha and source value blending.\n")); 1359b7e1c893Smrg } 1360b7e1c893Smrg } 1361b7e1c893Smrg 1362b7e1c893Smrg if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1)) 1363b7e1c893Smrg return FALSE; 1364b7e1c893Smrg } 1365b7e1c893Smrg 1366b7e1c893Smrg if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0)) 1367b7e1c893Smrg return FALSE; 1368b7e1c893Smrg 1369b7e1c893Smrg if (!R600GetDestFormat(pDstPicture, &tmp1)) 1370b7e1c893Smrg return FALSE; 1371b7e1c893Smrg 1372b7e1c893Smrg return TRUE; 1373b7e1c893Smrg 1374b7e1c893Smrg} 1375b7e1c893Smrg 1376b7e1c893Smrgstatic Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, 1377b7e1c893Smrg PicturePtr pMaskPicture, PicturePtr pDstPicture, 1378b7e1c893Smrg PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) 1379b7e1c893Smrg{ 1380b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 1381b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1382b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 1383b7e1c893Smrg uint32_t blendcntl, dst_format; 1384b7e1c893Smrg cb_config_t cb_conf; 1385b7e1c893Smrg shader_config_t vs_conf, ps_conf; 1386b7e1c893Smrg 1387b7e1c893Smrg /* return FALSE; */ 1388b7e1c893Smrg 1389b7e1c893Smrg if (pMask) { 1390b7e1c893Smrg accel_state->has_mask = TRUE; 1391b7e1c893Smrg if (pMaskPicture->componentAlpha) { 1392b7e1c893Smrg accel_state->component_alpha = TRUE; 1393b7e1c893Smrg if (R600BlendOp[op].src_alpha) 1394b7e1c893Smrg accel_state->src_alpha = TRUE; 1395b7e1c893Smrg else 1396b7e1c893Smrg accel_state->src_alpha = FALSE; 1397b7e1c893Smrg } else { 1398b7e1c893Smrg accel_state->component_alpha = FALSE; 1399b7e1c893Smrg accel_state->src_alpha = FALSE; 1400b7e1c893Smrg } 1401b7e1c893Smrg } else { 1402b7e1c893Smrg accel_state->has_mask = FALSE; 1403b7e1c893Smrg accel_state->component_alpha = FALSE; 1404b7e1c893Smrg accel_state->src_alpha = FALSE; 1405b7e1c893Smrg } 1406b7e1c893Smrg 1407b7e1c893Smrg accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; 1408b7e1c893Smrg accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); 1409b7e1c893Smrg accel_state->dst_size = exaGetPixmapPitch(pDst) * pDst->drawable.height; 1410b7e1c893Smrg 1411b7e1c893Smrg if (accel_state->dst_pitch & 7) 1412b7e1c893Smrg RADEON_FALLBACK(("Bad dst pitch 0x%x\n", (int)accel_state->dst_pitch)); 1413b7e1c893Smrg 1414b7e1c893Smrg if (accel_state->dst_mc_addr & 0xff) 1415b7e1c893Smrg RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)accel_state->dst_mc_addr)); 1416b7e1c893Smrg 1417b7e1c893Smrg if (!R600GetDestFormat(pDstPicture, &dst_format)) 1418b7e1c893Smrg return FALSE; 1419b7e1c893Smrg 1420b7e1c893Smrg CLEAR (cb_conf); 1421b7e1c893Smrg CLEAR (vs_conf); 1422b7e1c893Smrg CLEAR (ps_conf); 1423b7e1c893Smrg 1424b7e1c893Smrg accel_state->ib = RADEONCPGetBuffer(pScrn); 1425b7e1c893Smrg 1426b7e1c893Smrg /* Init */ 1427b7e1c893Smrg start_3d(pScrn, accel_state->ib); 1428b7e1c893Smrg 1429b7e1c893Smrg set_default_state(pScrn, accel_state->ib); 1430b7e1c893Smrg 1431b7e1c893Smrg /* Scissor / viewport */ 1432b7e1c893Smrg EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); 1433b7e1c893Smrg EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); 1434b7e1c893Smrg 1435b7e1c893Smrg if (!R600TextureSetup(pSrcPicture, pSrc, 0)) { 1436b7e1c893Smrg R600IBDiscard(pScrn, accel_state->ib); 1437b7e1c893Smrg return FALSE; 1438b7e1c893Smrg } 1439b7e1c893Smrg 1440b7e1c893Smrg if (pMask) { 1441b7e1c893Smrg if (!R600TextureSetup(pMaskPicture, pMask, 1)) { 1442b7e1c893Smrg R600IBDiscard(pScrn, accel_state->ib); 1443b7e1c893Smrg return FALSE; 1444b7e1c893Smrg } 1445b7e1c893Smrg } else 1446b7e1c893Smrg accel_state->is_transform[1] = FALSE; 1447b7e1c893Smrg 1448b7e1c893Smrg if (pMask) { 1449b7e1c893Smrg set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0)); 1450b7e1c893Smrg accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + 1451b7e1c893Smrg accel_state->comp_mask_ps_offset; 1452b7e1c893Smrg } else { 1453b7e1c893Smrg set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0)); 1454b7e1c893Smrg accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + 1455b7e1c893Smrg accel_state->comp_ps_offset; 1456b7e1c893Smrg } 1457b7e1c893Smrg 1458b7e1c893Smrg accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + 1459b7e1c893Smrg accel_state->comp_vs_offset; 1460b7e1c893Smrg 1461b7e1c893Smrg accel_state->vs_size = 512; 1462b7e1c893Smrg accel_state->ps_size = 512; 1463b7e1c893Smrg 1464b7e1c893Smrg /* Shader */ 1465b7e1c893Smrg 1466b7e1c893Smrg /* flush SQ cache */ 1467b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, 1468b7e1c893Smrg accel_state->vs_size, accel_state->vs_mc_addr); 1469b7e1c893Smrg 1470b7e1c893Smrg vs_conf.shader_addr = accel_state->vs_mc_addr; 1471b7e1c893Smrg vs_conf.num_gprs = 3; 1472b7e1c893Smrg vs_conf.stack_size = 1; 1473b7e1c893Smrg vs_setup (pScrn, accel_state->ib, &vs_conf); 1474b7e1c893Smrg 1475b7e1c893Smrg /* flush SQ cache */ 1476b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, 1477b7e1c893Smrg accel_state->ps_size, accel_state->ps_mc_addr); 1478b7e1c893Smrg 1479b7e1c893Smrg ps_conf.shader_addr = accel_state->ps_mc_addr; 1480b7e1c893Smrg ps_conf.num_gprs = 3; 1481b7e1c893Smrg ps_conf.stack_size = 0; 1482b7e1c893Smrg ps_conf.uncached_first_inst = 1; 1483b7e1c893Smrg ps_conf.clamp_consts = 0; 1484b7e1c893Smrg ps_conf.export_mode = 2; 1485b7e1c893Smrg ps_setup (pScrn, accel_state->ib, &ps_conf); 1486b7e1c893Smrg 1487b7e1c893Smrg EREG(accel_state->ib, CB_SHADER_MASK, (0xf << OUTPUT0_ENABLE_shift)); 1488b7e1c893Smrg EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); 1489b7e1c893Smrg 1490b7e1c893Smrg blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format); 1491b7e1c893Smrg 1492b7e1c893Smrg if (info->ChipFamily == CHIP_FAMILY_R600) { 1493b7e1c893Smrg /* no per-MRT blend on R600 */ 1494b7e1c893Smrg EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift)); 1495b7e1c893Smrg EREG(accel_state->ib, CB_BLEND_CONTROL, blendcntl); 1496b7e1c893Smrg } else { 1497b7e1c893Smrg EREG(accel_state->ib, CB_COLOR_CONTROL, (RADEON_ROP[3] | 1498b7e1c893Smrg (1 << TARGET_BLEND_ENABLE_shift) | 1499b7e1c893Smrg PER_MRT_BLEND_bit)); 1500b7e1c893Smrg EREG(accel_state->ib, CB_BLEND0_CONTROL, blendcntl); 1501b7e1c893Smrg } 1502b7e1c893Smrg 1503b7e1c893Smrg cb_conf.id = 0; 1504b7e1c893Smrg cb_conf.w = accel_state->dst_pitch; 1505b7e1c893Smrg cb_conf.h = pDst->drawable.height; 1506b7e1c893Smrg cb_conf.base = accel_state->dst_mc_addr; 1507b7e1c893Smrg cb_conf.format = dst_format; 1508b7e1c893Smrg 1509b7e1c893Smrg switch (pDstPicture->format) { 1510b7e1c893Smrg case PICT_a8r8g8b8: 1511b7e1c893Smrg case PICT_x8r8g8b8: 1512b7e1c893Smrg case PICT_a1r5g5b5: 1513b7e1c893Smrg case PICT_x1r5g5b5: 1514b7e1c893Smrg default: 1515b7e1c893Smrg cb_conf.comp_swap = 1; /* ARGB */ 1516b7e1c893Smrg break; 1517b7e1c893Smrg case PICT_r5g6b5: 1518b7e1c893Smrg cb_conf.comp_swap = 2; /* RGB */ 1519b7e1c893Smrg break; 1520b7e1c893Smrg case PICT_a8: 1521b7e1c893Smrg cb_conf.comp_swap = 3; /* A */ 1522b7e1c893Smrg break; 1523b7e1c893Smrg } 1524b7e1c893Smrg cb_conf.source_format = 1; 1525b7e1c893Smrg cb_conf.blend_clamp = 1; 1526b7e1c893Smrg set_render_target(pScrn, accel_state->ib, &cb_conf); 1527b7e1c893Smrg 1528b7e1c893Smrg EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | 1529b7e1c893Smrg (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | 1530b7e1c893Smrg (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); 1531b7e1c893Smrg EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ 1532b7e1c893Smrg DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ 1533b7e1c893Smrg 1534b7e1c893Smrg /* Interpolator setup */ 1535b7e1c893Smrg if (pMask) { 1536b7e1c893Smrg /* export 2 tex coords from VS */ 1537b7e1c893Smrg EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift)); 1538b7e1c893Smrg /* src = semantic id 0; mask = semantic id 1 */ 1539b7e1c893Smrg EREG(accel_state->ib, SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) | 1540b7e1c893Smrg (1 << SEMANTIC_1_shift))); 1541b7e1c893Smrg /* input 2 tex coords from VS */ 1542b7e1c893Smrg EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (2 << NUM_INTERP_shift)); 1543b7e1c893Smrg } else { 1544b7e1c893Smrg /* export 1 tex coords from VS */ 1545b7e1c893Smrg EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); 1546b7e1c893Smrg /* src = semantic id 0 */ 1547b7e1c893Smrg EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); 1548b7e1c893Smrg /* input 1 tex coords from VS */ 1549b7e1c893Smrg EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift)); 1550b7e1c893Smrg } 1551b7e1c893Smrg EREG(accel_state->ib, SPI_PS_IN_CONTROL_1, 0); 1552b7e1c893Smrg /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */ 1553b7e1c893Smrg EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | 1554b7e1c893Smrg (0x01 << DEFAULT_VAL_shift) | 1555b7e1c893Smrg SEL_CENTROID_bit)); 1556b7e1c893Smrg /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */ 1557b7e1c893Smrg EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (1 <<2), ((1 << SEMANTIC_shift) | 1558b7e1c893Smrg (0x01 << DEFAULT_VAL_shift) | 1559b7e1c893Smrg SEL_CENTROID_bit)); 1560b7e1c893Smrg EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); 1561b7e1c893Smrg 1562b7e1c893Smrg accel_state->vb_index = 0; 1563b7e1c893Smrg 1564b7e1c893Smrg return TRUE; 1565b7e1c893Smrg} 1566b7e1c893Smrg 1567b7e1c893Smrgstatic void R600Composite(PixmapPtr pDst, 1568b7e1c893Smrg int srcX, int srcY, 1569b7e1c893Smrg int maskX, int maskY, 1570b7e1c893Smrg int dstX, int dstY, 1571b7e1c893Smrg int w, int h) 1572b7e1c893Smrg{ 1573b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1574b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1575b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 1576b7e1c893Smrg float *vb; 1577b7e1c893Smrg xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight; 1578b7e1c893Smrg 1579b7e1c893Smrg /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n", 1580b7e1c893Smrg srcX, srcY, maskX, maskY,dstX, dstY, w, h); */ 1581b7e1c893Smrg 1582b7e1c893Smrg srcTopLeft.x = IntToxFixed(srcX); 1583b7e1c893Smrg srcTopLeft.y = IntToxFixed(srcY); 1584b7e1c893Smrg srcTopRight.x = IntToxFixed(srcX + w); 1585b7e1c893Smrg srcTopRight.y = IntToxFixed(srcY); 1586b7e1c893Smrg srcBottomLeft.x = IntToxFixed(srcX); 1587b7e1c893Smrg srcBottomLeft.y = IntToxFixed(srcY + h); 1588b7e1c893Smrg srcBottomRight.x = IntToxFixed(srcX + w); 1589b7e1c893Smrg srcBottomRight.y = IntToxFixed(srcY + h); 1590b7e1c893Smrg 1591b7e1c893Smrg /* XXX do transform in vertex shader */ 1592b7e1c893Smrg if (accel_state->is_transform[0]) { 1593b7e1c893Smrg transformPoint(accel_state->transform[0], &srcTopLeft); 1594b7e1c893Smrg transformPoint(accel_state->transform[0], &srcTopRight); 1595b7e1c893Smrg transformPoint(accel_state->transform[0], &srcBottomLeft); 1596b7e1c893Smrg transformPoint(accel_state->transform[0], &srcBottomRight); 1597b7e1c893Smrg } 1598b7e1c893Smrg 1599b7e1c893Smrg if (accel_state->has_mask) { 1600b7e1c893Smrg xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight; 1601b7e1c893Smrg 1602b7e1c893Smrg if (((accel_state->vb_index + 3) * 24) > (accel_state->ib->total / 2)) { 1603b7e1c893Smrg R600DoneComposite(pDst); 1604b7e1c893Smrg accel_state->vb_index = 0; 1605b7e1c893Smrg accel_state->ib = RADEONCPGetBuffer(pScrn); 1606b7e1c893Smrg } 1607b7e1c893Smrg 1608b7e1c893Smrg vb = (pointer)((char*)accel_state->ib->address + 1609b7e1c893Smrg (accel_state->ib->total / 2) + 1610b7e1c893Smrg accel_state->vb_index * 24); 1611b7e1c893Smrg 1612b7e1c893Smrg maskTopLeft.x = IntToxFixed(maskX); 1613b7e1c893Smrg maskTopLeft.y = IntToxFixed(maskY); 1614b7e1c893Smrg maskTopRight.x = IntToxFixed(maskX + w); 1615b7e1c893Smrg maskTopRight.y = IntToxFixed(maskY); 1616b7e1c893Smrg maskBottomLeft.x = IntToxFixed(maskX); 1617b7e1c893Smrg maskBottomLeft.y = IntToxFixed(maskY + h); 1618b7e1c893Smrg maskBottomRight.x = IntToxFixed(maskX + w); 1619b7e1c893Smrg maskBottomRight.y = IntToxFixed(maskY + h); 1620b7e1c893Smrg 1621b7e1c893Smrg if (accel_state->is_transform[1]) { 1622b7e1c893Smrg transformPoint(accel_state->transform[1], &maskTopLeft); 1623b7e1c893Smrg transformPoint(accel_state->transform[1], &maskTopRight); 1624b7e1c893Smrg transformPoint(accel_state->transform[1], &maskBottomLeft); 1625b7e1c893Smrg transformPoint(accel_state->transform[1], &maskBottomRight); 1626b7e1c893Smrg } 1627b7e1c893Smrg 1628b7e1c893Smrg vb[0] = (float)dstX; 1629b7e1c893Smrg vb[1] = (float)dstY; 1630b7e1c893Smrg vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0]; 1631b7e1c893Smrg vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0]; 1632b7e1c893Smrg vb[4] = xFixedToFloat(maskTopLeft.x) / accel_state->texW[1]; 1633b7e1c893Smrg vb[5] = xFixedToFloat(maskTopLeft.y) / accel_state->texH[1]; 1634b7e1c893Smrg 1635b7e1c893Smrg vb[6] = (float)dstX; 1636b7e1c893Smrg vb[7] = (float)(dstY + h); 1637b7e1c893Smrg vb[8] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0]; 1638b7e1c893Smrg vb[9] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0]; 1639b7e1c893Smrg vb[10] = xFixedToFloat(maskBottomLeft.x) / accel_state->texW[1]; 1640b7e1c893Smrg vb[11] = xFixedToFloat(maskBottomLeft.y) / accel_state->texH[1]; 1641b7e1c893Smrg 1642b7e1c893Smrg vb[12] = (float)(dstX + w); 1643b7e1c893Smrg vb[13] = (float)(dstY + h); 1644b7e1c893Smrg vb[14] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0]; 1645b7e1c893Smrg vb[15] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0]; 1646b7e1c893Smrg vb[16] = xFixedToFloat(maskBottomRight.x) / accel_state->texW[1]; 1647b7e1c893Smrg vb[17] = xFixedToFloat(maskBottomRight.y) / accel_state->texH[1]; 1648b7e1c893Smrg 1649b7e1c893Smrg } else { 1650b7e1c893Smrg if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) { 1651b7e1c893Smrg R600DoneComposite(pDst); 1652b7e1c893Smrg accel_state->vb_index = 0; 1653b7e1c893Smrg accel_state->ib = RADEONCPGetBuffer(pScrn); 1654b7e1c893Smrg } 1655b7e1c893Smrg 1656b7e1c893Smrg vb = (pointer)((char*)accel_state->ib->address + 1657b7e1c893Smrg (accel_state->ib->total / 2) + 1658b7e1c893Smrg accel_state->vb_index * 16); 1659b7e1c893Smrg 1660b7e1c893Smrg vb[0] = (float)dstX; 1661b7e1c893Smrg vb[1] = (float)dstY; 1662b7e1c893Smrg vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0]; 1663b7e1c893Smrg vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0]; 1664b7e1c893Smrg 1665b7e1c893Smrg vb[4] = (float)dstX; 1666b7e1c893Smrg vb[5] = (float)(dstY + h); 1667b7e1c893Smrg vb[6] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0]; 1668b7e1c893Smrg vb[7] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0]; 1669b7e1c893Smrg 1670b7e1c893Smrg vb[8] = (float)(dstX + w); 1671b7e1c893Smrg vb[9] = (float)(dstY + h); 1672b7e1c893Smrg vb[10] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0]; 1673b7e1c893Smrg vb[11] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0]; 1674b7e1c893Smrg } 1675b7e1c893Smrg 1676b7e1c893Smrg accel_state->vb_index += 3; 1677b7e1c893Smrg 1678b7e1c893Smrg} 1679b7e1c893Smrg 1680b7e1c893Smrgstatic void R600DoneComposite(PixmapPtr pDst) 1681b7e1c893Smrg{ 1682b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1683b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1684b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 1685b7e1c893Smrg draw_config_t draw_conf; 1686b7e1c893Smrg vtx_resource_t vtx_res; 1687b7e1c893Smrg 1688b7e1c893Smrg CLEAR (draw_conf); 1689b7e1c893Smrg CLEAR (vtx_res); 1690b7e1c893Smrg 1691b7e1c893Smrg if (accel_state->vb_index == 0) { 1692b7e1c893Smrg R600IBDiscard(pScrn, accel_state->ib); 1693b7e1c893Smrg return; 1694b7e1c893Smrg } 1695b7e1c893Smrg 1696b7e1c893Smrg accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + 1697b7e1c893Smrg (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); 1698b7e1c893Smrg 1699b7e1c893Smrg 1700b7e1c893Smrg /* Vertex buffer setup */ 1701b7e1c893Smrg if (accel_state->has_mask) { 1702b7e1c893Smrg accel_state->vb_size = accel_state->vb_index * 24; 1703b7e1c893Smrg vtx_res.id = SQ_VTX_RESOURCE_vs; 1704b7e1c893Smrg vtx_res.vtx_size_dw = 24 / 4; 1705b7e1c893Smrg vtx_res.vtx_num_entries = accel_state->vb_size / 4; 1706b7e1c893Smrg vtx_res.mem_req_size = 1; 1707b7e1c893Smrg vtx_res.vb_addr = accel_state->vb_mc_addr; 1708b7e1c893Smrg } else { 1709b7e1c893Smrg accel_state->vb_size = accel_state->vb_index * 16; 1710b7e1c893Smrg vtx_res.id = SQ_VTX_RESOURCE_vs; 1711b7e1c893Smrg vtx_res.vtx_size_dw = 16 / 4; 1712b7e1c893Smrg vtx_res.vtx_num_entries = accel_state->vb_size / 4; 1713b7e1c893Smrg vtx_res.mem_req_size = 1; 1714b7e1c893Smrg vtx_res.vb_addr = accel_state->vb_mc_addr; 1715b7e1c893Smrg } 1716b7e1c893Smrg /* flush vertex cache */ 1717b7e1c893Smrg if ((info->ChipFamily == CHIP_FAMILY_RV610) || 1718b7e1c893Smrg (info->ChipFamily == CHIP_FAMILY_RV620) || 1719b7e1c893Smrg (info->ChipFamily == CHIP_FAMILY_RS780) || 1720c503f109Smrg (info->ChipFamily == CHIP_FAMILY_RS880) || 1721b7e1c893Smrg (info->ChipFamily == CHIP_FAMILY_RV710)) 1722b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, 1723b7e1c893Smrg accel_state->vb_size, accel_state->vb_mc_addr); 1724b7e1c893Smrg else 1725b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, 1726b7e1c893Smrg accel_state->vb_size, accel_state->vb_mc_addr); 1727b7e1c893Smrg 1728b7e1c893Smrg set_vtx_resource (pScrn, accel_state->ib, &vtx_res); 1729b7e1c893Smrg 1730b7e1c893Smrg draw_conf.prim_type = DI_PT_RECTLIST; 1731b7e1c893Smrg draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; 1732b7e1c893Smrg draw_conf.num_instances = 1; 1733b7e1c893Smrg draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; 1734b7e1c893Smrg draw_conf.index_type = DI_INDEX_SIZE_16_BIT; 1735b7e1c893Smrg 1736b7e1c893Smrg draw_auto(pScrn, accel_state->ib, &draw_conf); 1737b7e1c893Smrg 1738b7e1c893Smrg wait_3d_idle_clean(pScrn, accel_state->ib); 1739b7e1c893Smrg 1740b7e1c893Smrg cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), 1741b7e1c893Smrg accel_state->dst_size, accel_state->dst_mc_addr); 1742b7e1c893Smrg 1743b7e1c893Smrg R600CPFlushIndirect(pScrn, accel_state->ib); 1744b7e1c893Smrg} 1745b7e1c893Smrg 1746b7e1c893SmrgBool 1747b7e1c893SmrgR600CopyToVRAM(ScrnInfoPtr pScrn, 1748b7e1c893Smrg char *src, int src_pitch, 1749b7e1c893Smrg uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_height, int bpp, 1750b7e1c893Smrg int x, int y, int w, int h) 1751b7e1c893Smrg{ 1752b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1753b7e1c893Smrg uint32_t scratch_mc_addr; 1754b7e1c893Smrg int wpass = w * (bpp/8); 1755b7e1c893Smrg int scratch_pitch_bytes = (wpass + 255) & ~255; 1756b7e1c893Smrg uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8); 1757b7e1c893Smrg int scratch_offset = 0, hpass, temph; 1758b7e1c893Smrg char *dst; 1759b7e1c893Smrg drmBufPtr scratch; 1760b7e1c893Smrg 1761b7e1c893Smrg if (dst_pitch & 7) 1762b7e1c893Smrg return FALSE; 1763b7e1c893Smrg 1764b7e1c893Smrg if (dst_mc_addr & 0xff) 1765b7e1c893Smrg return FALSE; 1766b7e1c893Smrg 1767b7e1c893Smrg scratch = RADEONCPGetBuffer(pScrn); 1768b7e1c893Smrg if (scratch == NULL) 1769b7e1c893Smrg return FALSE; 1770b7e1c893Smrg 1771b7e1c893Smrg scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total); 1772b7e1c893Smrg temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes); 1773b7e1c893Smrg dst = (char *)scratch->address; 1774b7e1c893Smrg 1775b7e1c893Smrg /* memcopy from sys to scratch */ 1776b7e1c893Smrg while (temph--) { 1777b7e1c893Smrg memcpy (dst, src, wpass); 1778b7e1c893Smrg src += src_pitch; 1779b7e1c893Smrg dst += scratch_pitch_bytes; 1780b7e1c893Smrg } 1781b7e1c893Smrg 1782b7e1c893Smrg while (h) { 1783b7e1c893Smrg uint32_t offset = scratch_mc_addr + scratch_offset; 1784b7e1c893Smrg int oldhpass = hpass; 1785b7e1c893Smrg h -= oldhpass; 1786b7e1c893Smrg temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes); 1787b7e1c893Smrg 1788b7e1c893Smrg if (hpass) { 1789b7e1c893Smrg scratch_offset = scratch->total/2 - scratch_offset; 1790b7e1c893Smrg dst = (char *)scratch->address + scratch_offset; 1791b7e1c893Smrg /* wait for the engine to be idle */ 1792b7e1c893Smrg RADEONWaitForIdleCP(pScrn); 1793b7e1c893Smrg //memcopy from sys to scratch 1794b7e1c893Smrg while (temph--) { 1795b7e1c893Smrg memcpy (dst, src, wpass); 1796b7e1c893Smrg src += src_pitch; 1797b7e1c893Smrg dst += scratch_pitch_bytes; 1798b7e1c893Smrg } 1799b7e1c893Smrg } 1800b7e1c893Smrg /* blit from scratch to vram */ 1801b7e1c893Smrg R600DoPrepareCopy(pScrn, 1802b7e1c893Smrg scratch_pitch, w, oldhpass, offset, bpp, 1803b7e1c893Smrg dst_pitch, dst_height, dst_mc_addr, bpp, 1804b7e1c893Smrg 3, 0xffffffff); 1805b7e1c893Smrg R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass); 1806b7e1c893Smrg R600DoCopy(pScrn); 1807b7e1c893Smrg y += oldhpass; 1808b7e1c893Smrg } 1809b7e1c893Smrg 1810b7e1c893Smrg R600IBDiscard(pScrn, scratch); 1811b7e1c893Smrg 1812b7e1c893Smrg return TRUE; 1813b7e1c893Smrg} 1814b7e1c893Smrg 1815b7e1c893Smrgstatic Bool 1816b7e1c893SmrgR600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 1817b7e1c893Smrg char *src, int src_pitch) 1818b7e1c893Smrg{ 1819b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1820b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1821b7e1c893Smrg uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); 1822b7e1c893Smrg uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; 1823b7e1c893Smrg uint32_t dst_height = pDst->drawable.height; 1824b7e1c893Smrg int bpp = pDst->drawable.bitsPerPixel; 1825b7e1c893Smrg 1826b7e1c893Smrg return R600CopyToVRAM(pScrn, 1827b7e1c893Smrg src, src_pitch, 1828b7e1c893Smrg dst_pitch, dst_mc_addr, dst_height, bpp, 1829b7e1c893Smrg x, y, w, h); 1830b7e1c893Smrg} 1831b7e1c893Smrg 1832b7e1c893Smrgstatic Bool 1833b7e1c893SmrgR600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 1834b7e1c893Smrg char *dst, int dst_pitch) 1835b7e1c893Smrg{ 1836b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 1837b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1838b7e1c893Smrg uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); 1839b7e1c893Smrg uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; 1840b7e1c893Smrg uint32_t src_width = pSrc->drawable.width; 1841b7e1c893Smrg uint32_t src_height = pSrc->drawable.height; 1842b7e1c893Smrg int bpp = pSrc->drawable.bitsPerPixel; 1843b7e1c893Smrg uint32_t scratch_mc_addr; 1844b7e1c893Smrg int scratch_pitch_bytes = (dst_pitch + 255) & ~255; 1845b7e1c893Smrg int scratch_offset = 0, hpass; 1846b7e1c893Smrg uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8); 1847b7e1c893Smrg int wpass = w * (bpp/8); 1848b7e1c893Smrg drmBufPtr scratch; 1849b7e1c893Smrg 1850c503f109Smrg /* RV740 seems to be particularly problematic with small xfers */ 1851c503f109Smrg if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32)) 1852c503f109Smrg return FALSE; 1853c503f109Smrg 1854b7e1c893Smrg if (src_pitch & 7) 1855b7e1c893Smrg return FALSE; 1856b7e1c893Smrg 1857b7e1c893Smrg scratch = RADEONCPGetBuffer(pScrn); 1858b7e1c893Smrg if (scratch == NULL) 1859b7e1c893Smrg return FALSE; 1860b7e1c893Smrg 1861b7e1c893Smrg scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total); 1862b7e1c893Smrg hpass = min(h, scratch->total/2 / scratch_pitch_bytes); 1863b7e1c893Smrg 1864b7e1c893Smrg /* blit from vram to scratch */ 1865b7e1c893Smrg R600DoPrepareCopy(pScrn, 1866b7e1c893Smrg src_pitch, src_width, src_height, src_mc_addr, bpp, 1867b7e1c893Smrg scratch_pitch, hpass, scratch_mc_addr, bpp, 1868b7e1c893Smrg 3, 0xffffffff); 1869b7e1c893Smrg R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); 1870b7e1c893Smrg R600DoCopy(pScrn); 1871b7e1c893Smrg 1872b7e1c893Smrg while (h) { 1873b7e1c893Smrg char *src = (char *)scratch->address + scratch_offset; 1874b7e1c893Smrg int oldhpass = hpass; 1875b7e1c893Smrg h -= oldhpass; 1876b7e1c893Smrg y += oldhpass; 1877b7e1c893Smrg hpass = min(h, scratch->total/2 / scratch_pitch_bytes); 1878b7e1c893Smrg 1879b7e1c893Smrg if (hpass) { 1880b7e1c893Smrg scratch_offset = scratch->total/2 - scratch_offset; 1881b7e1c893Smrg /* blit from vram to scratch */ 1882b7e1c893Smrg R600DoPrepareCopy(pScrn, 1883b7e1c893Smrg src_pitch, src_width, src_height, src_mc_addr, bpp, 1884b7e1c893Smrg scratch_pitch, hpass, scratch_mc_addr + scratch_offset, bpp, 1885b7e1c893Smrg 3, 0xffffffff); 1886b7e1c893Smrg R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); 1887b7e1c893Smrg R600DoCopy(pScrn); 1888b7e1c893Smrg } 1889b7e1c893Smrg 1890b7e1c893Smrg /* wait for the engine to be idle */ 1891b7e1c893Smrg RADEONWaitForIdleCP(pScrn); 1892b7e1c893Smrg /* memcopy from scratch to sys */ 1893b7e1c893Smrg while (oldhpass--) { 1894b7e1c893Smrg memcpy (dst, src, wpass); 1895b7e1c893Smrg dst += dst_pitch; 1896b7e1c893Smrg src += scratch_pitch_bytes; 1897b7e1c893Smrg } 1898b7e1c893Smrg } 1899b7e1c893Smrg 1900b7e1c893Smrg R600IBDiscard(pScrn, scratch); 1901b7e1c893Smrg 1902b7e1c893Smrg return TRUE; 1903b7e1c893Smrg 1904b7e1c893Smrg} 1905b7e1c893Smrg 1906b7e1c893Smrgstatic int 1907b7e1c893SmrgR600MarkSync(ScreenPtr pScreen) 1908b7e1c893Smrg{ 1909b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 1910b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1911b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 1912b7e1c893Smrg 1913b7e1c893Smrg return ++accel_state->exaSyncMarker; 1914b7e1c893Smrg 1915b7e1c893Smrg} 1916b7e1c893Smrg 1917b7e1c893Smrgstatic void 1918b7e1c893SmrgR600Sync(ScreenPtr pScreen, int marker) 1919b7e1c893Smrg{ 1920b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 1921b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1922b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 1923b7e1c893Smrg 1924b7e1c893Smrg if (accel_state->exaMarkerSynced != marker) { 1925b7e1c893Smrg RADEONWaitForIdleCP(pScrn); 1926b7e1c893Smrg accel_state->exaMarkerSynced = marker; 1927b7e1c893Smrg } 1928b7e1c893Smrg 1929b7e1c893Smrg} 1930b7e1c893Smrg 1931b7e1c893Smrgstatic Bool 1932b7e1c893SmrgR600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) 1933b7e1c893Smrg{ 1934b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1935b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 1936b7e1c893Smrg 1937b7e1c893Smrg /* 512 bytes per shader for now */ 1938b7e1c893Smrg int size = 512 * 9; 1939b7e1c893Smrg 1940b7e1c893Smrg accel_state->shaders = NULL; 1941b7e1c893Smrg 1942b7e1c893Smrg accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256, 1943b7e1c893Smrg TRUE, NULL, NULL); 1944b7e1c893Smrg 1945b7e1c893Smrg if (accel_state->shaders == NULL) 1946b7e1c893Smrg return FALSE; 1947b7e1c893Smrg return TRUE; 1948b7e1c893Smrg} 1949b7e1c893Smrg 1950b7e1c893SmrgBool 1951b7e1c893SmrgR600LoadShaders(ScrnInfoPtr pScrn) 1952b7e1c893Smrg{ 1953b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 1954b7e1c893Smrg struct radeon_accel_state *accel_state = info->accel_state; 1955b7e1c893Smrg RADEONChipFamily ChipSet = info->ChipFamily; 1956b7e1c893Smrg uint32_t *shader; 1957b7e1c893Smrg 1958b7e1c893Smrg shader = (pointer)((char *)info->FB + accel_state->shaders->offset); 1959b7e1c893Smrg 1960b7e1c893Smrg /* solid vs --------------------------------------- */ 1961b7e1c893Smrg accel_state->solid_vs_offset = 0; 1962b7e1c893Smrg R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4); 1963b7e1c893Smrg 1964b7e1c893Smrg /* solid ps --------------------------------------- */ 1965b7e1c893Smrg accel_state->solid_ps_offset = 512; 1966b7e1c893Smrg R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4); 1967b7e1c893Smrg 1968b7e1c893Smrg /* copy vs --------------------------------------- */ 1969b7e1c893Smrg accel_state->copy_vs_offset = 1024; 1970b7e1c893Smrg R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4); 1971b7e1c893Smrg 1972b7e1c893Smrg /* copy ps --------------------------------------- */ 1973b7e1c893Smrg accel_state->copy_ps_offset = 1536; 1974b7e1c893Smrg R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4); 1975b7e1c893Smrg 1976b7e1c893Smrg /* comp vs --------------------------------------- */ 1977b7e1c893Smrg accel_state->comp_vs_offset = 2048; 1978b7e1c893Smrg R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4); 1979b7e1c893Smrg 1980b7e1c893Smrg /* comp ps --------------------------------------- */ 1981b7e1c893Smrg accel_state->comp_ps_offset = 2560; 1982b7e1c893Smrg R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4); 1983b7e1c893Smrg 1984b7e1c893Smrg /* comp mask ps --------------------------------------- */ 1985b7e1c893Smrg accel_state->comp_mask_ps_offset = 3072; 1986b7e1c893Smrg R600_comp_mask_ps(ChipSet, shader + accel_state->comp_mask_ps_offset / 4); 1987b7e1c893Smrg 1988b7e1c893Smrg /* xv vs --------------------------------------- */ 1989b7e1c893Smrg accel_state->xv_vs_offset = 3584; 1990b7e1c893Smrg R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4); 1991b7e1c893Smrg 1992b7e1c893Smrg /* xv ps --------------------------------------- */ 1993b7e1c893Smrg accel_state->xv_ps_offset = 4096; 1994b7e1c893Smrg R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4); 1995b7e1c893Smrg 1996b7e1c893Smrg return TRUE; 1997b7e1c893Smrg} 1998b7e1c893Smrg 1999b7e1c893Smrgstatic Bool 2000b7e1c893SmrgR600PrepareAccess(PixmapPtr pPix, int index) 2001b7e1c893Smrg{ 2002b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; 2003b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 2004b7e1c893Smrg unsigned char *RADEONMMIO = info->MMIO; 2005b7e1c893Smrg 2006b7e1c893Smrg /* flush HDP read/write caches */ 2007b7e1c893Smrg OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); 2008b7e1c893Smrg 2009b7e1c893Smrg return TRUE; 2010b7e1c893Smrg} 2011b7e1c893Smrg 2012b7e1c893Smrgstatic void 2013b7e1c893SmrgR600FinishAccess(PixmapPtr pPix, int index) 2014b7e1c893Smrg{ 2015b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; 2016b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 2017b7e1c893Smrg unsigned char *RADEONMMIO = info->MMIO; 2018b7e1c893Smrg 2019b7e1c893Smrg /* flush HDP read/write caches */ 2020b7e1c893Smrg OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); 2021b7e1c893Smrg 2022b7e1c893Smrg} 2023b7e1c893Smrg 2024b7e1c893Smrg 2025b7e1c893SmrgBool 2026b7e1c893SmrgR600DrawInit(ScreenPtr pScreen) 2027b7e1c893Smrg{ 2028b7e1c893Smrg ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 2029b7e1c893Smrg RADEONInfoPtr info = RADEONPTR(pScrn); 2030b7e1c893Smrg 2031b7e1c893Smrg if (info->accel_state->exa == NULL) { 2032b7e1c893Smrg xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n"); 2033b7e1c893Smrg return FALSE; 2034b7e1c893Smrg } 2035b7e1c893Smrg 2036b7e1c893Smrg info->accel_state->exa->exa_major = EXA_VERSION_MAJOR; 2037b7e1c893Smrg info->accel_state->exa->exa_minor = EXA_VERSION_MINOR; 2038b7e1c893Smrg 2039b7e1c893Smrg info->accel_state->exa->PrepareSolid = R600PrepareSolid; 2040b7e1c893Smrg info->accel_state->exa->Solid = R600Solid; 2041b7e1c893Smrg info->accel_state->exa->DoneSolid = R600DoneSolid; 2042b7e1c893Smrg 2043b7e1c893Smrg info->accel_state->exa->PrepareCopy = R600PrepareCopy; 2044b7e1c893Smrg info->accel_state->exa->Copy = R600Copy; 2045b7e1c893Smrg info->accel_state->exa->DoneCopy = R600DoneCopy; 2046b7e1c893Smrg 2047b7e1c893Smrg info->accel_state->exa->MarkSync = R600MarkSync; 2048b7e1c893Smrg info->accel_state->exa->WaitMarker = R600Sync; 2049b7e1c893Smrg 2050b7e1c893Smrg info->accel_state->exa->PrepareAccess = R600PrepareAccess; 2051b7e1c893Smrg info->accel_state->exa->FinishAccess = R600FinishAccess; 2052b7e1c893Smrg 2053b7e1c893Smrg /* AGP seems to have problems with gart transfers */ 2054b7e1c893Smrg if (info->accelDFS) { 2055b7e1c893Smrg info->accel_state->exa->UploadToScreen = R600UploadToScreen; 2056b7e1c893Smrg info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen; 2057b7e1c893Smrg } 2058b7e1c893Smrg 2059b7e1c893Smrg info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS; 2060b7e1c893Smrg#ifdef EXA_SUPPORTS_PREPARE_AUX 2061b7e1c893Smrg info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX; 2062b7e1c893Smrg#endif 2063b7e1c893Smrg info->accel_state->exa->pixmapOffsetAlign = 256; 2064b7e1c893Smrg info->accel_state->exa->pixmapPitchAlign = 256; 2065b7e1c893Smrg 2066b7e1c893Smrg info->accel_state->exa->CheckComposite = R600CheckComposite; 2067b7e1c893Smrg info->accel_state->exa->PrepareComposite = R600PrepareComposite; 2068b7e1c893Smrg info->accel_state->exa->Composite = R600Composite; 2069b7e1c893Smrg info->accel_state->exa->DoneComposite = R600DoneComposite; 2070b7e1c893Smrg 2071b7e1c893Smrg#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3) 2072b7e1c893Smrg xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n"); 2073b7e1c893Smrg 2074b7e1c893Smrg info->accel_state->exa->maxPitchBytes = 32768; 2075b7e1c893Smrg info->accel_state->exa->maxX = 8192; 2076b7e1c893Smrg#else 2077b7e1c893Smrg info->accel_state->exa->maxX = 8192; 2078b7e1c893Smrg#endif 2079b7e1c893Smrg info->accel_state->exa->maxY = 8192; 2080b7e1c893Smrg 2081b7e1c893Smrg /* not supported yet */ 2082b7e1c893Smrg info->accel_state->vsync = FALSE; 2083b7e1c893Smrg 2084b7e1c893Smrg if (!exaDriverInit(pScreen, info->accel_state->exa)) { 2085b7e1c893Smrg xfree(info->accel_state->exa); 2086b7e1c893Smrg return FALSE; 2087b7e1c893Smrg } 2088b7e1c893Smrg 2089b7e1c893Smrg if (!info->gartLocation) 2090b7e1c893Smrg return FALSE; 2091b7e1c893Smrg 2092b7e1c893Smrg info->accel_state->XInited3D = FALSE; 2093b7e1c893Smrg info->accel_state->copy_area = NULL; 2094b7e1c893Smrg 2095b7e1c893Smrg if (!R600AllocShaders(pScrn, pScreen)) 2096b7e1c893Smrg return FALSE; 2097b7e1c893Smrg 2098b7e1c893Smrg if (!R600LoadShaders(pScrn)) 2099b7e1c893Smrg return FALSE; 2100b7e1c893Smrg 2101b7e1c893Smrg exaMarkSync(pScreen); 2102b7e1c893Smrg 2103b7e1c893Smrg return TRUE; 2104b7e1c893Smrg 2105b7e1c893Smrg} 2106b7e1c893Smrg 2107