r600_exa.c revision 2f39173d
1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Author: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif 30 31#include "xf86.h" 32 33#include "exa.h" 34 35#include "radeon.h" 36#include "radeon_macros.h" 37#include "radeon_reg.h" 38#include "r600_shader.h" 39#include "r600_reg.h" 40#include "r600_state.h" 41#include "radeon_vbo.h" 42 43#define RADEON_TRACE_FALL 0 44#define RADEON_TRACE_DRAW 0 45 46#if RADEON_TRACE_FALL 47#define RADEON_FALLBACK(x) \ 48do { \ 49 ErrorF("%s: ", __FUNCTION__); \ 50 ErrorF x; \ 51 return FALSE; \ 52} while (0) 53#else 54#define RADEON_FALLBACK(x) return FALSE 55#endif 56 57extern PixmapPtr 58RADEONGetDrawablePixmap(DrawablePtr pDrawable); 59 60/* #define SHOW_VERTEXES */ 61 62# define RADEON_ROP3_ZERO 0x00000000 63# define RADEON_ROP3_DSa 0x00880000 64# define RADEON_ROP3_SDna 0x00440000 65# define RADEON_ROP3_S 0x00cc0000 66# define RADEON_ROP3_DSna 0x00220000 67# define RADEON_ROP3_D 0x00aa0000 68# define RADEON_ROP3_DSx 0x00660000 69# define RADEON_ROP3_DSo 0x00ee0000 70# define RADEON_ROP3_DSon 0x00110000 71# define RADEON_ROP3_DSxn 0x00990000 72# define RADEON_ROP3_Dn 0x00550000 73# define RADEON_ROP3_SDno 0x00dd0000 74# define RADEON_ROP3_Sn 0x00330000 75# define RADEON_ROP3_DSno 0x00bb0000 76# define RADEON_ROP3_DSan 0x00770000 77# define RADEON_ROP3_ONE 0x00ff0000 78 79uint32_t RADEON_ROP[16] = { 80 RADEON_ROP3_ZERO, /* GXclear */ 81 RADEON_ROP3_DSa, /* Gxand */ 82 RADEON_ROP3_SDna, /* GXandReverse */ 83 RADEON_ROP3_S, /* GXcopy */ 84 RADEON_ROP3_DSna, /* GXandInverted */ 85 RADEON_ROP3_D, /* GXnoop */ 86 RADEON_ROP3_DSx, /* GXxor */ 87 RADEON_ROP3_DSo, /* GXor */ 88 RADEON_ROP3_DSon, /* GXnor */ 89 RADEON_ROP3_DSxn, /* GXequiv */ 90 RADEON_ROP3_Dn, /* GXinvert */ 91 RADEON_ROP3_SDno, /* GXorReverse */ 92 RADEON_ROP3_Sn, /* GXcopyInverted */ 93 RADEON_ROP3_DSno, /* GXorInverted */ 94 RADEON_ROP3_DSan, /* GXnand */ 95 RADEON_ROP3_ONE, /* GXset */ 96}; 97 98static void R600VlineHelperClear(ScrnInfoPtr pScrn) 99{ 100 RADEONInfoPtr info = RADEONPTR(pScrn); 101 struct radeon_accel_state *accel_state = info->accel_state; 102 103 accel_state->vline_crtc = NULL; 104 accel_state->vline_y1 = -1; 105 accel_state->vline_y2 = 0; 106} 107 108static void R600VlineHelperSet(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2) 109{ 110 RADEONInfoPtr info = RADEONPTR(pScrn); 111 struct radeon_accel_state *accel_state = info->accel_state; 112 113 accel_state->vline_crtc = radeon_pick_best_crtc(pScrn, x1, x2, y1, y2); 114 if (accel_state->vline_y1 == -1) 115 accel_state->vline_y1 = y1; 116 if (y1 < accel_state->vline_y1) 117 accel_state->vline_y1 = y1; 118 if (y2 > accel_state->vline_y2) 119 accel_state->vline_y2 = y2; 120} 121 122static Bool R600ValidPM(uint32_t pm, int bpp) 123{ 124 uint8_t r, g, b, a; 125 Bool ret = FALSE; 126 127 switch (bpp) { 128 case 8: 129 a = pm & 0xff; 130 if ((a == 0) || (a == 0xff)) 131 ret = TRUE; 132 break; 133 case 16: 134 r = (pm >> 11) & 0x1f; 135 g = (pm >> 5) & 0x3f; 136 b = (pm >> 0) & 0x1f; 137 if (((r == 0) || (r == 0x1f)) && 138 ((g == 0) || (g == 0x3f)) && 139 ((b == 0) || (b == 0x1f))) 140 ret = TRUE; 141 break; 142 case 32: 143 a = (pm >> 24) & 0xff; 144 r = (pm >> 16) & 0xff; 145 g = (pm >> 8) & 0xff; 146 b = (pm >> 0) & 0xff; 147 if (((a == 0) || (a == 0xff)) && 148 ((r == 0) || (r == 0xff)) && 149 ((g == 0) || (g == 0xff)) && 150 ((b == 0) || (b == 0xff))) 151 ret = TRUE; 152 break; 153 default: 154 break; 155 } 156 return ret; 157} 158 159static Bool R600CheckBPP(int bpp) 160{ 161 switch (bpp) { 162 case 8: 163 case 16: 164 case 32: 165 return TRUE; 166 default: 167 break; 168 } 169 return FALSE; 170} 171 172Bool 173R600SetAccelState(ScrnInfoPtr pScrn, 174 struct r600_accel_object *src0, 175 struct r600_accel_object *src1, 176 struct r600_accel_object *dst, 177 uint32_t vs_offset, uint32_t ps_offset, 178 int rop, Pixel planemask) 179{ 180 RADEONInfoPtr info = RADEONPTR(pScrn); 181 struct radeon_accel_state *accel_state = info->accel_state; 182 int ret; 183 184 if (src0) { 185 memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object)); 186 accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8); 187 } else { 188 memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object)); 189 accel_state->src_size[0] = 0; 190 } 191 192 if (src1) { 193 memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object)); 194 accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8); 195 } else { 196 memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object)); 197 accel_state->src_size[1] = 0; 198 } 199 200 if (dst) { 201 memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object)); 202 accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8); 203 } else { 204 memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object)); 205 accel_state->dst_size = 0; 206 } 207 208 accel_state->rop = rop; 209 accel_state->planemask = planemask; 210 211 /* bad pitch */ 212 if (accel_state->src_obj[0].pitch & 7) 213 RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch)); 214 215 /* bad offset */ 216 if (accel_state->src_obj[0].offset & 0xff) 217 RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset)); 218 219 /* bad pitch */ 220 if (accel_state->src_obj[1].pitch & 7) 221 RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch)); 222 223 /* bad offset */ 224 if (accel_state->src_obj[1].offset & 0xff) 225 RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset)); 226 227 if (accel_state->dst_obj.pitch & 7) 228 RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch)); 229 230 if (accel_state->dst_obj.offset & 0xff) 231 RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset)); 232 233 accel_state->vs_size = 512; 234 accel_state->ps_size = 512; 235#if defined(XF86DRM_MODE) 236 if (info->cs) { 237 accel_state->vs_mc_addr = vs_offset; 238 accel_state->ps_mc_addr = ps_offset; 239 240 radeon_cs_space_reset_bos(info->cs); 241 radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo, 242 RADEON_GEM_DOMAIN_VRAM, 0); 243 if (accel_state->src_obj[0].bo) 244 radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo, 245 accel_state->src_obj[0].domain, 0); 246 if (accel_state->src_obj[1].bo) 247 radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo, 248 accel_state->src_obj[1].domain, 0); 249 if (accel_state->dst_obj.bo) 250 radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo, 251 0, accel_state->dst_obj.domain); 252 ret = radeon_cs_space_check(info->cs); 253 if (ret) 254 RADEON_FALLBACK(("Not enough RAM to hw accel operation\n")); 255 256 } else 257#endif 258 { 259 accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + 260 vs_offset; 261 accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + 262 ps_offset; 263 } 264 265 return TRUE; 266} 267 268#if defined(XF86DRM_MODE) 269static inline void radeon_add_pixmap(struct radeon_cs *cs, PixmapPtr pPix, int read_domains, int write_domain) 270{ 271 struct radeon_exa_pixmap_priv *driver_priv = exaGetPixmapDriverPrivate(pPix); 272 273 radeon_cs_space_add_persistent_bo(cs, driver_priv->bo, read_domains, write_domain); 274} 275#endif 276 277static void 278R600DoneSolid(PixmapPtr pPix); 279 280static void 281R600DoneComposite(PixmapPtr pDst); 282 283 284static Bool 285R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) 286{ 287 ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; 288 RADEONInfoPtr info = RADEONPTR(pScrn); 289 struct radeon_accel_state *accel_state = info->accel_state; 290 cb_config_t cb_conf; 291 shader_config_t vs_conf, ps_conf; 292 int pmask = 0; 293 uint32_t a, r, g, b; 294 float ps_alu_consts[4]; 295 struct r600_accel_object dst; 296 297 if (!R600CheckBPP(pPix->drawable.bitsPerPixel)) 298 RADEON_FALLBACK(("R600CheckDatatype failed\n")); 299 if (!R600ValidPM(pm, pPix->drawable.bitsPerPixel)) 300 RADEON_FALLBACK(("invalid planemask\n")); 301 302#if defined(XF86DRM_MODE) 303 if (info->cs) { 304 dst.offset = 0; 305 dst.bo = radeon_get_pixmap_bo(pPix); 306 } else 307#endif 308 { 309 dst.offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; 310 dst.bo = NULL; 311 } 312 313 dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); 314 dst.width = pPix->drawable.width; 315 dst.height = pPix->drawable.height; 316 dst.bpp = pPix->drawable.bitsPerPixel; 317 dst.domain = RADEON_GEM_DOMAIN_VRAM; 318 319 if (!R600SetAccelState(pScrn, 320 NULL, 321 NULL, 322 &dst, 323 accel_state->solid_vs_offset, accel_state->solid_ps_offset, 324 alu, pm)) 325 return FALSE; 326 327 CLEAR (cb_conf); 328 CLEAR (vs_conf); 329 CLEAR (ps_conf); 330 331 radeon_vbo_check(pScrn, 16); 332 r600_cp_start(pScrn); 333 334 set_default_state(pScrn, accel_state->ib); 335 336 set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 337 set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 338 set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 339 340 /* Shader */ 341 342 /* flush SQ cache */ 343 cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, 344 accel_state->vs_size, accel_state->vs_mc_addr, 345 accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 346 347 vs_conf.shader_addr = accel_state->vs_mc_addr; 348 vs_conf.num_gprs = 2; 349 vs_conf.stack_size = 0; 350 vs_conf.bo = accel_state->shaders_bo; 351 vs_setup (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM); 352 353 /* flush SQ cache */ 354 cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, 355 accel_state->ps_size, accel_state->ps_mc_addr, 356 accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 357 358 ps_conf.shader_addr = accel_state->ps_mc_addr; 359 ps_conf.num_gprs = 1; 360 ps_conf.stack_size = 0; 361 ps_conf.uncached_first_inst = 1; 362 ps_conf.clamp_consts = 0; 363 ps_conf.export_mode = 2; 364 ps_conf.bo = accel_state->shaders_bo; 365 ps_setup (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM); 366 367 /* Render setup */ 368 if (accel_state->planemask & 0x000000ff) 369 pmask |= 4; /* B */ 370 if (accel_state->planemask & 0x0000ff00) 371 pmask |= 2; /* G */ 372 if (accel_state->planemask & 0x00ff0000) 373 pmask |= 1; /* R */ 374 if (accel_state->planemask & 0xff000000) 375 pmask |= 8; /* A */ 376 BEGIN_BATCH(6); 377 EREG(accel_state->ib, CB_TARGET_MASK, (pmask << TARGET0_ENABLE_shift)); 378 EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[accel_state->rop]); 379 END_BATCH(); 380 381 cb_conf.id = 0; 382 cb_conf.w = accel_state->dst_obj.pitch; 383 cb_conf.h = accel_state->dst_obj.height; 384 cb_conf.base = accel_state->dst_obj.offset; 385 cb_conf.bo = accel_state->dst_obj.bo; 386 387 if (accel_state->dst_obj.bpp == 8) { 388 cb_conf.format = COLOR_8; 389 cb_conf.comp_swap = 3; /* A */ 390 } else if (accel_state->dst_obj.bpp == 16) { 391 cb_conf.format = COLOR_5_6_5; 392 cb_conf.comp_swap = 2; /* RGB */ 393 } else { 394 cb_conf.format = COLOR_8_8_8_8; 395 cb_conf.comp_swap = 1; /* ARGB */ 396 } 397 cb_conf.source_format = 1; 398 cb_conf.blend_clamp = 1; 399 set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain); 400 401 /* Interpolator setup */ 402 /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */ 403 BEGIN_BATCH(18); 404 EREG(accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift)); 405 EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); 406 407 /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x 408 * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ 409 /* no VS exports as PS input (NUM_INTERP is not zero based, no minus one) */ 410 EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (0 << NUM_INTERP_shift)); 411 EREG(accel_state->ib, SPI_PS_IN_CONTROL_1, 0); 412 /* color semantic id 0 -> GPR[0] */ 413 EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | 414 (0x03 << DEFAULT_VAL_shift) | 415 FLAT_SHADE_bit | 416 SEL_CENTROID_bit)); 417 EREG(accel_state->ib, SPI_INTERP_CONTROL_0, FLAT_SHADE_ENA_bit); 418 END_BATCH(); 419 420 /* PS alu constants */ 421 if (accel_state->dst_obj.bpp == 16) { 422 r = (fg >> 11) & 0x1f; 423 g = (fg >> 5) & 0x3f; 424 b = (fg >> 0) & 0x1f; 425 ps_alu_consts[0] = (float)r / 31; /* R */ 426 ps_alu_consts[1] = (float)g / 63; /* G */ 427 ps_alu_consts[2] = (float)b / 31; /* B */ 428 ps_alu_consts[3] = 1.0; /* A */ 429 } else if (accel_state->dst_obj.bpp == 8) { 430 a = (fg >> 0) & 0xff; 431 ps_alu_consts[0] = 0.0; /* R */ 432 ps_alu_consts[1] = 0.0; /* G */ 433 ps_alu_consts[2] = 0.0; /* B */ 434 ps_alu_consts[3] = (float)a / 255; /* A */ 435 } else { 436 a = (fg >> 24) & 0xff; 437 r = (fg >> 16) & 0xff; 438 g = (fg >> 8) & 0xff; 439 b = (fg >> 0) & 0xff; 440 ps_alu_consts[0] = (float)r / 255; /* R */ 441 ps_alu_consts[1] = (float)g / 255; /* G */ 442 ps_alu_consts[2] = (float)b / 255; /* B */ 443 ps_alu_consts[3] = (float)a / 255; /* A */ 444 } 445 set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps, 446 sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); 447 448 if (accel_state->vsync) 449 R600VlineHelperClear(pScrn); 450 451 return TRUE; 452} 453 454 455static void 456R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) 457{ 458 ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; 459 RADEONInfoPtr info = RADEONPTR(pScrn); 460 struct radeon_accel_state *accel_state = info->accel_state; 461 float *vb; 462 463 if (accel_state->vsync) 464 R600VlineHelperSet(pScrn, x1, y1, x2, y2); 465 466 vb = radeon_vbo_space(pScrn, 8); 467 468 vb[0] = (float)x1; 469 vb[1] = (float)y1; 470 471 vb[2] = (float)x1; 472 vb[3] = (float)y2; 473 474 vb[4] = (float)x2; 475 vb[5] = (float)y2; 476 477 radeon_vbo_commit(pScrn); 478} 479 480static void 481R600DoneSolid(PixmapPtr pPix) 482{ 483 ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; 484 RADEONInfoPtr info = RADEONPTR(pScrn); 485 struct radeon_accel_state *accel_state = info->accel_state; 486 487 if (accel_state->vsync) 488 cp_wait_vline_sync(pScrn, accel_state->ib, pPix, 489 accel_state->vline_crtc, 490 accel_state->vline_y1, 491 accel_state->vline_y2); 492 493 r600_finish_op(pScrn, 8); 494} 495 496static void 497R600DoPrepareCopy(ScrnInfoPtr pScrn) 498{ 499 RADEONInfoPtr info = RADEONPTR(pScrn); 500 struct radeon_accel_state *accel_state = info->accel_state; 501 int pmask = 0; 502 cb_config_t cb_conf; 503 tex_resource_t tex_res; 504 tex_sampler_t tex_samp; 505 shader_config_t vs_conf, ps_conf; 506 507 CLEAR (cb_conf); 508 CLEAR (tex_res); 509 CLEAR (tex_samp); 510 CLEAR (vs_conf); 511 CLEAR (ps_conf); 512 513 radeon_vbo_check(pScrn, 16); 514 r600_cp_start(pScrn); 515 516 set_default_state(pScrn, accel_state->ib); 517 518 set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 519 set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 520 set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 521 522 /* Shader */ 523 524 /* flush SQ cache */ 525 cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, 526 accel_state->vs_size, accel_state->vs_mc_addr, 527 accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 528 529 vs_conf.shader_addr = accel_state->vs_mc_addr; 530 vs_conf.num_gprs = 2; 531 vs_conf.stack_size = 0; 532 vs_conf.bo = accel_state->shaders_bo; 533 vs_setup (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM); 534 535 /* flush SQ cache */ 536 cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, 537 accel_state->ps_size, accel_state->ps_mc_addr, 538 accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 539 540 ps_conf.shader_addr = accel_state->ps_mc_addr; 541 ps_conf.num_gprs = 1; 542 ps_conf.stack_size = 0; 543 ps_conf.uncached_first_inst = 1; 544 ps_conf.clamp_consts = 0; 545 ps_conf.export_mode = 2; 546 ps_conf.bo = accel_state->shaders_bo; 547 ps_setup (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM); 548 549 /* flush texture cache */ 550 cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, 551 accel_state->src_size[0], accel_state->src_obj[0].offset, 552 accel_state->src_obj[0].bo, accel_state->src_obj[0].domain, 0); 553 554 /* Texture */ 555 tex_res.id = 0; 556 tex_res.w = accel_state->src_obj[0].width; 557 tex_res.h = accel_state->src_obj[0].height; 558 tex_res.pitch = accel_state->src_obj[0].pitch; 559 tex_res.depth = 0; 560 tex_res.dim = SQ_TEX_DIM_2D; 561 tex_res.base = accel_state->src_obj[0].offset; 562 tex_res.mip_base = accel_state->src_obj[0].offset; 563 tex_res.bo = accel_state->src_obj[0].bo; 564 tex_res.mip_bo = accel_state->src_obj[0].bo; 565 if (accel_state->src_obj[0].bpp == 8) { 566 tex_res.format = FMT_8; 567 tex_res.dst_sel_x = SQ_SEL_1; /* R */ 568 tex_res.dst_sel_y = SQ_SEL_1; /* G */ 569 tex_res.dst_sel_z = SQ_SEL_1; /* B */ 570 tex_res.dst_sel_w = SQ_SEL_X; /* A */ 571 } else if (accel_state->src_obj[0].bpp == 16) { 572 tex_res.format = FMT_5_6_5; 573 tex_res.dst_sel_x = SQ_SEL_Z; /* R */ 574 tex_res.dst_sel_y = SQ_SEL_Y; /* G */ 575 tex_res.dst_sel_z = SQ_SEL_X; /* B */ 576 tex_res.dst_sel_w = SQ_SEL_1; /* A */ 577 } else { 578 tex_res.format = FMT_8_8_8_8; 579 tex_res.dst_sel_x = SQ_SEL_Z; /* R */ 580 tex_res.dst_sel_y = SQ_SEL_Y; /* G */ 581 tex_res.dst_sel_z = SQ_SEL_X; /* B */ 582 tex_res.dst_sel_w = SQ_SEL_W; /* A */ 583 } 584 585 tex_res.request_size = 1; 586 tex_res.base_level = 0; 587 tex_res.last_level = 0; 588 tex_res.perf_modulation = 0; 589 set_tex_resource (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); 590 591 tex_samp.id = 0; 592 tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 593 tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 594 tex_samp.clamp_z = SQ_TEX_WRAP; 595 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; 596 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; 597 tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 598 tex_samp.mip_filter = 0; /* no mipmap */ 599 set_tex_sampler (pScrn, accel_state->ib, &tex_samp); 600 601 602 /* Render setup */ 603 if (accel_state->planemask & 0x000000ff) 604 pmask |= 4; /* B */ 605 if (accel_state->planemask & 0x0000ff00) 606 pmask |= 2; /* G */ 607 if (accel_state->planemask & 0x00ff0000) 608 pmask |= 1; /* R */ 609 if (accel_state->planemask & 0xff000000) 610 pmask |= 8; /* A */ 611 BEGIN_BATCH(6); 612 EREG(accel_state->ib, CB_TARGET_MASK, (pmask << TARGET0_ENABLE_shift)); 613 EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[accel_state->rop]); 614 END_BATCH(); 615 616 cb_conf.id = 0; 617 cb_conf.w = accel_state->dst_obj.pitch; 618 cb_conf.h = accel_state->dst_obj.height; 619 cb_conf.base = accel_state->dst_obj.offset; 620 cb_conf.bo = accel_state->dst_obj.bo; 621 if (accel_state->dst_obj.bpp == 8) { 622 cb_conf.format = COLOR_8; 623 cb_conf.comp_swap = 3; /* A */ 624 } else if (accel_state->dst_obj.bpp == 16) { 625 cb_conf.format = COLOR_5_6_5; 626 cb_conf.comp_swap = 2; /* RGB */ 627 } else { 628 cb_conf.format = COLOR_8_8_8_8; 629 cb_conf.comp_swap = 1; /* ARGB */ 630 } 631 cb_conf.source_format = 1; 632 cb_conf.blend_clamp = 1; 633 set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain); 634 635 /* Interpolator setup */ 636 /* export tex coord from VS */ 637 BEGIN_BATCH(18); 638 EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); 639 EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); 640 641 /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x 642 * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ 643 /* input tex coord from VS */ 644 EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, ((1 << NUM_INTERP_shift))); 645 EREG(accel_state->ib, SPI_PS_IN_CONTROL_1, 0); 646 /* color semantic id 0 -> GPR[0] */ 647 EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | 648 (0x01 << DEFAULT_VAL_shift) | 649 SEL_CENTROID_bit)); 650 EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); 651 END_BATCH(); 652 653} 654 655static void 656R600DoCopy(ScrnInfoPtr pScrn) 657{ 658 r600_finish_op(pScrn, 16); 659} 660 661static void 662R600DoCopyVline(PixmapPtr pPix) 663{ 664 ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; 665 RADEONInfoPtr info = RADEONPTR(pScrn); 666 struct radeon_accel_state *accel_state = info->accel_state; 667 668 if (accel_state->vsync) 669 cp_wait_vline_sync(pScrn, accel_state->ib, pPix, 670 accel_state->vline_crtc, 671 accel_state->vline_y1, 672 accel_state->vline_y2); 673 674 r600_finish_op(pScrn, 16); 675} 676 677static void 678R600AppendCopyVertex(ScrnInfoPtr pScrn, 679 int srcX, int srcY, 680 int dstX, int dstY, 681 int w, int h) 682{ 683 float *vb; 684 685 vb = radeon_vbo_space(pScrn, 16); 686 687 vb[0] = (float)dstX; 688 vb[1] = (float)dstY; 689 vb[2] = (float)srcX; 690 vb[3] = (float)srcY; 691 692 vb[4] = (float)dstX; 693 vb[5] = (float)(dstY + h); 694 vb[6] = (float)srcX; 695 vb[7] = (float)(srcY + h); 696 697 vb[8] = (float)(dstX + w); 698 vb[9] = (float)(dstY + h); 699 vb[10] = (float)(srcX + w); 700 vb[11] = (float)(srcY + h); 701 702 radeon_vbo_commit(pScrn); 703} 704 705static Bool 706R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, 707 int xdir, int ydir, 708 int rop, 709 Pixel planemask) 710{ 711 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 712 RADEONInfoPtr info = RADEONPTR(pScrn); 713 struct radeon_accel_state *accel_state = info->accel_state; 714 struct r600_accel_object src_obj, dst_obj; 715 716 if (!R600CheckBPP(pSrc->drawable.bitsPerPixel)) 717 RADEON_FALLBACK(("R600CheckDatatype src failed\n")); 718 if (!R600CheckBPP(pDst->drawable.bitsPerPixel)) 719 RADEON_FALLBACK(("R600CheckDatatype dst failed\n")); 720 if (!R600ValidPM(planemask, pDst->drawable.bitsPerPixel)) 721 RADEON_FALLBACK(("Invalid planemask\n")); 722 723 dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); 724 src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); 725 726 accel_state->same_surface = FALSE; 727 728#if defined(XF86DRM_MODE) 729 if (info->cs) { 730 src_obj.offset = 0; 731 dst_obj.offset = 0; 732 src_obj.bo = radeon_get_pixmap_bo(pSrc); 733 dst_obj.bo = radeon_get_pixmap_bo(pDst); 734 if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst)) 735 accel_state->same_surface = TRUE; 736 } else 737#endif 738 { 739 src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; 740 dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; 741 if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) 742 accel_state->same_surface = TRUE; 743 src_obj.bo = NULL; 744 dst_obj.bo = NULL; 745 } 746 747 src_obj.width = pSrc->drawable.width; 748 src_obj.height = pSrc->drawable.height; 749 src_obj.bpp = pSrc->drawable.bitsPerPixel; 750 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 751 752 dst_obj.width = pDst->drawable.width; 753 dst_obj.height = pDst->drawable.height; 754 dst_obj.bpp = pDst->drawable.bitsPerPixel; 755 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 756 757 if (!R600SetAccelState(pScrn, 758 &src_obj, 759 NULL, 760 &dst_obj, 761 accel_state->copy_vs_offset, accel_state->copy_ps_offset, 762 rop, planemask)) 763 return FALSE; 764 765 if (accel_state->same_surface == TRUE) { 766 unsigned long size = pDst->drawable.height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8; 767 768#if defined(XF86DRM_MODE) 769 if (info->cs) { 770 if (accel_state->copy_area_bo) { 771 radeon_bo_unref(accel_state->copy_area_bo); 772 accel_state->copy_area_bo = NULL; 773 } 774 accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0, 775 RADEON_GEM_DOMAIN_VRAM, 776 0); 777 if (accel_state->copy_area_bo == NULL) 778 RADEON_FALLBACK(("temp copy surface alloc failed\n")); 779 780 radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo, 781 RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_DOMAIN_VRAM); 782 if (radeon_cs_space_check(info->cs)) { 783 radeon_bo_unref(accel_state->copy_area_bo); 784 accel_state->copy_area_bo = NULL; 785 return FALSE; 786 } 787 accel_state->copy_area = (void*)accel_state->copy_area_bo; 788 } else 789#endif 790 { 791 if (accel_state->copy_area) { 792 exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); 793 accel_state->copy_area = NULL; 794 } 795 accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL); 796 if (!accel_state->copy_area) 797 RADEON_FALLBACK(("temp copy surface alloc failed\n")); 798 } 799 } else 800 R600DoPrepareCopy(pScrn); 801 802 if (accel_state->vsync) 803 R600VlineHelperClear(pScrn); 804 805 return TRUE; 806} 807 808static void 809R600Copy(PixmapPtr pDst, 810 int srcX, int srcY, 811 int dstX, int dstY, 812 int w, int h) 813{ 814 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 815 RADEONInfoPtr info = RADEONPTR(pScrn); 816 struct radeon_accel_state *accel_state = info->accel_state; 817 818 if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY)) 819 return; 820 821 if (accel_state->vsync) 822 R600VlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h); 823 824 if (accel_state->same_surface && accel_state->copy_area) { 825 uint32_t orig_offset, tmp_offset; 826 uint32_t orig_dst_domain = accel_state->dst_obj.domain; 827 uint32_t orig_src_domain = accel_state->src_obj[0].domain; 828 struct radeon_bo *orig_bo = accel_state->dst_obj.bo; 829 830#if defined(XF86DRM_MODE) 831 if (info->cs) { 832 tmp_offset = 0; 833 orig_offset = 0; 834 } else 835#endif 836 { 837 tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset; 838 orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; 839 } 840 841 /* src to tmp */ 842 accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 843 accel_state->dst_obj.bo = accel_state->copy_area_bo; 844 accel_state->dst_obj.offset = tmp_offset; 845 R600DoPrepareCopy(pScrn); 846 R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); 847 R600DoCopy(pScrn); 848 849 /* tmp to dst */ 850 accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM; 851 accel_state->src_obj[0].bo = accel_state->copy_area_bo; 852 accel_state->src_obj[0].offset = tmp_offset; 853 accel_state->dst_obj.domain = orig_dst_domain; 854 accel_state->dst_obj.bo = orig_bo; 855 accel_state->dst_obj.offset = orig_offset; 856 R600DoPrepareCopy(pScrn); 857 R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h); 858 R600DoCopyVline(pDst); 859 860 /* restore state */ 861 accel_state->src_obj[0].domain = orig_src_domain; 862 accel_state->src_obj[0].bo = orig_bo; 863 accel_state->src_obj[0].offset = orig_offset; 864 } else 865 R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); 866 867} 868 869static void 870R600DoneCopy(PixmapPtr pDst) 871{ 872 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 873 RADEONInfoPtr info = RADEONPTR(pScrn); 874 struct radeon_accel_state *accel_state = info->accel_state; 875 876 if (!accel_state->same_surface) 877 R600DoCopyVline(pDst); 878 879 if (accel_state->copy_area) { 880 if (!info->cs) 881 exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); 882 accel_state->copy_area = NULL; 883 } 884 885} 886 887 888#define xFixedToFloat(f) (((float) (f)) / 65536) 889 890struct blendinfo { 891 Bool dst_alpha; 892 Bool src_alpha; 893 uint32_t blend_cntl; 894}; 895 896static struct blendinfo R600BlendOp[] = { 897 /* Clear */ 898 {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 899 /* Src */ 900 {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 901 /* Dst */ 902 {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, 903 /* Over */ 904 {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 905 /* OverReverse */ 906 {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, 907 /* In */ 908 {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 909 /* InReverse */ 910 {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 911 /* Out */ 912 {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 913 /* OutReverse */ 914 {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 915 /* Atop */ 916 {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 917 /* AtopReverse */ 918 {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 919 /* Xor */ 920 {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 921 /* Add */ 922 {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, 923}; 924 925struct formatinfo { 926 unsigned int fmt; 927 uint32_t card_fmt; 928}; 929 930static struct formatinfo R600TexFormats[] = { 931 {PICT_a8r8g8b8, FMT_8_8_8_8}, 932 {PICT_x8r8g8b8, FMT_8_8_8_8}, 933 {PICT_a8b8g8r8, FMT_8_8_8_8}, 934 {PICT_x8b8g8r8, FMT_8_8_8_8}, 935#ifdef PICT_TYPE_BGRA 936 {PICT_b8g8r8a8, FMT_8_8_8_8}, 937 {PICT_b8g8r8x8, FMT_8_8_8_8}, 938#endif 939 {PICT_r5g6b5, FMT_5_6_5}, 940 {PICT_a1r5g5b5, FMT_1_5_5_5}, 941 {PICT_x1r5g5b5, FMT_1_5_5_5}, 942 {PICT_a8, FMT_8}, 943}; 944 945static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format) 946{ 947 uint32_t sblend, dblend; 948 949 sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask; 950 dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask; 951 952 /* If there's no dst alpha channel, adjust the blend op so that we'll treat 953 * it as always 1. 954 */ 955 if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) { 956 if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift)) 957 sblend = (BLEND_ONE << COLOR_SRCBLEND_shift); 958 else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift)) 959 sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift); 960 } 961 962 /* If the source alpha is being used, then we should only be in a case where 963 * the source blend factor is 0, and the source blend value is the mask 964 * channels multiplied by the source picture's alpha. 965 */ 966 if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) { 967 if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) { 968 dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift); 969 } else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) { 970 dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift); 971 } 972 } 973 974 return sblend | dblend; 975} 976 977static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format) 978{ 979 switch (pDstPicture->format) { 980 case PICT_a8r8g8b8: 981 case PICT_x8r8g8b8: 982 case PICT_a8b8g8r8: 983 case PICT_x8b8g8r8: 984#ifdef PICT_TYPE_BGRA 985 case PICT_b8g8r8a8: 986 case PICT_b8g8r8x8: 987#endif 988 *dst_format = COLOR_8_8_8_8; 989 break; 990 case PICT_r5g6b5: 991 *dst_format = COLOR_5_6_5; 992 break; 993 case PICT_a1r5g5b5: 994 case PICT_x1r5g5b5: 995 *dst_format = COLOR_1_5_5_5; 996 break; 997 case PICT_a8: 998 *dst_format = COLOR_8; 999 break; 1000 default: 1001 RADEON_FALLBACK(("Unsupported dest format 0x%x\n", 1002 (int)pDstPicture->format)); 1003 } 1004 return TRUE; 1005} 1006 1007static Bool R600CheckCompositeTexture(PicturePtr pPict, 1008 PicturePtr pDstPict, 1009 int op, 1010 int unit) 1011{ 1012 int w = pPict->pDrawable->width; 1013 int h = pPict->pDrawable->height; 1014 unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone; 1015 unsigned int i; 1016 int max_tex_w, max_tex_h; 1017 1018 max_tex_w = 8192; 1019 max_tex_h = 8192; 1020 1021 if ((w > max_tex_w) || (h > max_tex_h)) 1022 RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h)); 1023 1024 for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) { 1025 if (R600TexFormats[i].fmt == pPict->format) 1026 break; 1027 } 1028 if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0])) 1029 RADEON_FALLBACK(("Unsupported picture format 0x%x\n", 1030 (int)pPict->format)); 1031 1032 if (pPict->filter != PictFilterNearest && 1033 pPict->filter != PictFilterBilinear) 1034 RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter)); 1035 1036 /* for REPEAT_NONE, Render semantics are that sampling outside the source 1037 * picture results in alpha=0 pixels. We can implement this with a border color 1038 * *if* our source texture has an alpha channel, otherwise we need to fall 1039 * back. If we're not transformed then we hope that upper layers have clipped 1040 * rendering to the bounds of the source drawable, in which case it doesn't 1041 * matter. I have not, however, verified that the X server always does such 1042 * clipping. 1043 */ 1044 /* FIXME R6xx */ 1045 if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) { 1046 if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0))) 1047 RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n")); 1048 } 1049 1050 return TRUE; 1051} 1052 1053static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, 1054 int unit) 1055{ 1056 ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; 1057 RADEONInfoPtr info = RADEONPTR(pScrn); 1058 struct radeon_accel_state *accel_state = info->accel_state; 1059 int w = pPict->pDrawable->width; 1060 int h = pPict->pDrawable->height; 1061 unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone; 1062 unsigned int i; 1063 tex_resource_t tex_res; 1064 tex_sampler_t tex_samp; 1065 int pix_r, pix_g, pix_b, pix_a; 1066 float vs_alu_consts[8]; 1067 1068 CLEAR (tex_res); 1069 CLEAR (tex_samp); 1070 1071 for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) { 1072 if (R600TexFormats[i].fmt == pPict->format) 1073 break; 1074 } 1075 1076 /* flush texture cache */ 1077 cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, 1078 accel_state->src_size[unit], accel_state->src_obj[unit].offset, 1079 accel_state->src_obj[unit].bo, accel_state->src_obj[unit].domain, 0); 1080 1081 /* Texture */ 1082 tex_res.id = unit; 1083 tex_res.w = w; 1084 tex_res.h = h; 1085 tex_res.pitch = accel_state->src_obj[unit].pitch; 1086 tex_res.depth = 0; 1087 tex_res.dim = SQ_TEX_DIM_2D; 1088 tex_res.base = accel_state->src_obj[unit].offset; 1089 tex_res.mip_base = accel_state->src_obj[unit].offset; 1090 tex_res.format = R600TexFormats[i].card_fmt; 1091 tex_res.bo = accel_state->src_obj[unit].bo; 1092 tex_res.mip_bo = accel_state->src_obj[unit].bo; 1093 tex_res.request_size = 1; 1094 1095 /* component swizzles */ 1096 switch (pPict->format) { 1097 case PICT_a1r5g5b5: 1098 case PICT_a8r8g8b8: 1099 pix_r = SQ_SEL_Z; /* R */ 1100 pix_g = SQ_SEL_Y; /* G */ 1101 pix_b = SQ_SEL_X; /* B */ 1102 pix_a = SQ_SEL_W; /* A */ 1103 break; 1104 case PICT_a8b8g8r8: 1105 pix_r = SQ_SEL_X; /* R */ 1106 pix_g = SQ_SEL_Y; /* G */ 1107 pix_b = SQ_SEL_Z; /* B */ 1108 pix_a = SQ_SEL_W; /* A */ 1109 break; 1110 case PICT_x8b8g8r8: 1111 pix_r = SQ_SEL_X; /* R */ 1112 pix_g = SQ_SEL_Y; /* G */ 1113 pix_b = SQ_SEL_Z; /* B */ 1114 pix_a = SQ_SEL_1; /* A */ 1115 break; 1116#ifdef PICT_TYPE_BGRA 1117 case PICT_b8g8r8a8: 1118 pix_r = SQ_SEL_Y; /* R */ 1119 pix_g = SQ_SEL_Z; /* G */ 1120 pix_b = SQ_SEL_W; /* B */ 1121 pix_a = SQ_SEL_X; /* A */ 1122 break; 1123 case PICT_b8g8r8x8: 1124 pix_r = SQ_SEL_Y; /* R */ 1125 pix_g = SQ_SEL_Z; /* G */ 1126 pix_b = SQ_SEL_W; /* B */ 1127 pix_a = SQ_SEL_1; /* A */ 1128 break; 1129#endif 1130 case PICT_x1r5g5b5: 1131 case PICT_x8r8g8b8: 1132 case PICT_r5g6b5: 1133 pix_r = SQ_SEL_Z; /* R */ 1134 pix_g = SQ_SEL_Y; /* G */ 1135 pix_b = SQ_SEL_X; /* B */ 1136 pix_a = SQ_SEL_1; /* A */ 1137 break; 1138 case PICT_a8: 1139 pix_r = SQ_SEL_0; /* R */ 1140 pix_g = SQ_SEL_0; /* G */ 1141 pix_b = SQ_SEL_0; /* B */ 1142 pix_a = SQ_SEL_X; /* A */ 1143 break; 1144 default: 1145 RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format)); 1146 } 1147 1148 if (unit == 0) { 1149 if (!accel_state->msk_pic) { 1150 if (PICT_FORMAT_RGB(pPict->format) == 0) { 1151 pix_r = SQ_SEL_0; 1152 pix_g = SQ_SEL_0; 1153 pix_b = SQ_SEL_0; 1154 } 1155 1156 if (PICT_FORMAT_A(pPict->format) == 0) 1157 pix_a = SQ_SEL_1; 1158 } else { 1159 if (accel_state->component_alpha) { 1160 if (accel_state->src_alpha) { 1161 if (PICT_FORMAT_A(pPict->format) == 0) { 1162 pix_r = SQ_SEL_1; 1163 pix_g = SQ_SEL_1; 1164 pix_b = SQ_SEL_1; 1165 pix_a = SQ_SEL_1; 1166 } else { 1167 pix_r = pix_a; 1168 pix_g = pix_a; 1169 pix_b = pix_a; 1170 } 1171 } else { 1172 if (PICT_FORMAT_A(pPict->format) == 0) 1173 pix_a = SQ_SEL_1; 1174 } 1175 } else { 1176 if (PICT_FORMAT_RGB(pPict->format) == 0) { 1177 pix_r = SQ_SEL_0; 1178 pix_g = SQ_SEL_0; 1179 pix_b = SQ_SEL_0; 1180 } 1181 1182 if (PICT_FORMAT_A(pPict->format) == 0) 1183 pix_a = SQ_SEL_1; 1184 } 1185 } 1186 } else { 1187 if (accel_state->component_alpha) { 1188 if (PICT_FORMAT_A(pPict->format) == 0) 1189 pix_a = SQ_SEL_1; 1190 } else { 1191 if (PICT_FORMAT_A(pPict->format) == 0) { 1192 pix_r = SQ_SEL_1; 1193 pix_g = SQ_SEL_1; 1194 pix_b = SQ_SEL_1; 1195 pix_a = SQ_SEL_1; 1196 } else { 1197 pix_r = pix_a; 1198 pix_g = pix_a; 1199 pix_b = pix_a; 1200 } 1201 } 1202 } 1203 1204 tex_res.dst_sel_x = pix_r; /* R */ 1205 tex_res.dst_sel_y = pix_g; /* G */ 1206 tex_res.dst_sel_z = pix_b; /* B */ 1207 tex_res.dst_sel_w = pix_a; /* A */ 1208 1209 tex_res.base_level = 0; 1210 tex_res.last_level = 0; 1211 tex_res.perf_modulation = 0; 1212 set_tex_resource (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain); 1213 1214 tex_samp.id = unit; 1215 tex_samp.border_color = SQ_TEX_BORDER_COLOR_TRANS_BLACK; 1216 1217 switch (repeatType) { 1218 case RepeatNormal: 1219 tex_samp.clamp_x = SQ_TEX_WRAP; 1220 tex_samp.clamp_y = SQ_TEX_WRAP; 1221 break; 1222 case RepeatPad: 1223 tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 1224 tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 1225 break; 1226 case RepeatReflect: 1227 tex_samp.clamp_x = SQ_TEX_MIRROR; 1228 tex_samp.clamp_y = SQ_TEX_MIRROR; 1229 break; 1230 case RepeatNone: 1231 tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER; 1232 tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER; 1233 break; 1234 default: 1235 RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType)); 1236 } 1237 1238 switch (pPict->filter) { 1239 case PictFilterNearest: 1240 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; 1241 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; 1242 break; 1243 case PictFilterBilinear: 1244 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; 1245 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; 1246 break; 1247 default: 1248 RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); 1249 } 1250 1251 tex_samp.clamp_z = SQ_TEX_WRAP; 1252 tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 1253 tex_samp.mip_filter = 0; /* no mipmap */ 1254 set_tex_sampler (pScrn, accel_state->ib, &tex_samp); 1255 1256 if (pPict->transform != 0) { 1257 accel_state->is_transform[unit] = TRUE; 1258 accel_state->transform[unit] = pPict->transform; 1259 1260 vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]); 1261 vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]); 1262 vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]); 1263 vs_alu_consts[3] = 1.0 / w; 1264 1265 vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]); 1266 vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]); 1267 vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]); 1268 vs_alu_consts[7] = 1.0 / h; 1269 } else { 1270 accel_state->is_transform[unit] = FALSE; 1271 1272 vs_alu_consts[0] = 1.0; 1273 vs_alu_consts[1] = 0.0; 1274 vs_alu_consts[2] = 0.0; 1275 vs_alu_consts[3] = 1.0 / w; 1276 1277 vs_alu_consts[4] = 0.0; 1278 vs_alu_consts[5] = 1.0; 1279 vs_alu_consts[6] = 0.0; 1280 vs_alu_consts[7] = 1.0 / h; 1281 } 1282 1283 /* VS alu constants */ 1284 set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2), 1285 sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts); 1286 1287 return TRUE; 1288} 1289 1290static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, 1291 PicturePtr pDstPicture) 1292{ 1293 uint32_t tmp1; 1294 PixmapPtr pSrcPixmap, pDstPixmap; 1295 int max_tex_w, max_tex_h, max_dst_w, max_dst_h; 1296 1297 /* Check for unsupported compositing operations. */ 1298 if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0]))) 1299 RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op)); 1300 1301 if (!pSrcPicture->pDrawable) 1302 RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n")); 1303 1304 pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable); 1305 1306 max_tex_w = 8192; 1307 max_tex_h = 8192; 1308 max_dst_w = 8192; 1309 max_dst_h = 8192; 1310 1311 if (pSrcPixmap->drawable.width >= max_tex_w || 1312 pSrcPixmap->drawable.height >= max_tex_h) { 1313 RADEON_FALLBACK(("Source w/h too large (%d,%d).\n", 1314 pSrcPixmap->drawable.width, 1315 pSrcPixmap->drawable.height)); 1316 } 1317 1318 pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable); 1319 1320 if (pDstPixmap->drawable.width >= max_dst_w || 1321 pDstPixmap->drawable.height >= max_dst_h) { 1322 RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n", 1323 pDstPixmap->drawable.width, 1324 pDstPixmap->drawable.height)); 1325 } 1326 1327 if (pMaskPicture) { 1328 PixmapPtr pMaskPixmap; 1329 1330 if (!pMaskPicture->pDrawable) 1331 RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n")); 1332 1333 pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable); 1334 1335 if (pMaskPixmap->drawable.width >= max_tex_w || 1336 pMaskPixmap->drawable.height >= max_tex_h) { 1337 RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n", 1338 pMaskPixmap->drawable.width, 1339 pMaskPixmap->drawable.height)); 1340 } 1341 1342 if (pMaskPicture->componentAlpha) { 1343 /* Check if it's component alpha that relies on a source alpha and 1344 * on the source value. We can only get one of those into the 1345 * single source value that we get to blend with. 1346 */ 1347 if (R600BlendOp[op].src_alpha && 1348 (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) != 1349 (BLEND_ZERO << COLOR_SRCBLEND_shift)) { 1350 RADEON_FALLBACK(("Component alpha not supported with source " 1351 "alpha and source value blending.\n")); 1352 } 1353 } 1354 1355 if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1)) 1356 return FALSE; 1357 } 1358 1359 if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0)) 1360 return FALSE; 1361 1362 if (!R600GetDestFormat(pDstPicture, &tmp1)) 1363 return FALSE; 1364 1365 return TRUE; 1366 1367} 1368 1369static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, 1370 PicturePtr pMaskPicture, PicturePtr pDstPicture, 1371 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) 1372{ 1373 ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 1374 RADEONInfoPtr info = RADEONPTR(pScrn); 1375 struct radeon_accel_state *accel_state = info->accel_state; 1376 uint32_t blendcntl, dst_format; 1377 cb_config_t cb_conf; 1378 shader_config_t vs_conf, ps_conf; 1379 struct r600_accel_object src_obj, mask_obj, dst_obj; 1380 1381 if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8) 1382 return FALSE; 1383 1384#if defined(XF86DRM_MODE) 1385 if (info->cs) { 1386 src_obj.offset = 0; 1387 dst_obj.offset = 0; 1388 src_obj.bo = radeon_get_pixmap_bo(pSrc); 1389 dst_obj.bo = radeon_get_pixmap_bo(pDst); 1390 } else 1391#endif 1392 { 1393 src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; 1394 dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; 1395 src_obj.bo = NULL; 1396 dst_obj.bo = NULL; 1397 } 1398 src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); 1399 dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); 1400 1401 src_obj.width = pSrc->drawable.width; 1402 src_obj.height = pSrc->drawable.height; 1403 src_obj.bpp = pSrc->drawable.bitsPerPixel; 1404 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 1405 1406 dst_obj.width = pDst->drawable.width; 1407 dst_obj.height = pDst->drawable.height; 1408 dst_obj.bpp = pDst->drawable.bitsPerPixel; 1409 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 1410 1411 if (pMask) { 1412#if defined(XF86DRM_MODE) 1413 if (info->cs) { 1414 mask_obj.offset = 0; 1415 mask_obj.bo = radeon_get_pixmap_bo(pMask); 1416 } else 1417#endif 1418 { 1419 mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset; 1420 mask_obj.bo = NULL; 1421 } 1422 mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8); 1423 1424 mask_obj.width = pMask->drawable.width; 1425 mask_obj.height = pMask->drawable.height; 1426 mask_obj.bpp = pMask->drawable.bitsPerPixel; 1427 mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 1428 1429 if (!R600SetAccelState(pScrn, 1430 &src_obj, 1431 &mask_obj, 1432 &dst_obj, 1433 accel_state->comp_vs_offset, accel_state->comp_mask_ps_offset, 1434 3, 0xffffffff)) 1435 return FALSE; 1436 1437 accel_state->msk_pic = pMaskPicture; 1438 if (pMaskPicture->componentAlpha) { 1439 accel_state->component_alpha = TRUE; 1440 if (R600BlendOp[op].src_alpha) 1441 accel_state->src_alpha = TRUE; 1442 else 1443 accel_state->src_alpha = FALSE; 1444 } else { 1445 accel_state->component_alpha = FALSE; 1446 accel_state->src_alpha = FALSE; 1447 } 1448 } else { 1449 if (!R600SetAccelState(pScrn, 1450 &src_obj, 1451 NULL, 1452 &dst_obj, 1453 accel_state->comp_vs_offset, accel_state->comp_ps_offset, 1454 3, 0xffffffff)) 1455 return FALSE; 1456 1457 accel_state->msk_pic = NULL; 1458 accel_state->component_alpha = FALSE; 1459 accel_state->src_alpha = FALSE; 1460 } 1461 1462 if (!R600GetDestFormat(pDstPicture, &dst_format)) 1463 return FALSE; 1464 1465 CLEAR (cb_conf); 1466 CLEAR (vs_conf); 1467 CLEAR (ps_conf); 1468 1469 if (pMask) 1470 radeon_vbo_check(pScrn, 24); 1471 else 1472 radeon_vbo_check(pScrn, 16); 1473 1474 r600_cp_start(pScrn); 1475 1476 set_default_state(pScrn, accel_state->ib); 1477 1478 set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 1479 set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 1480 set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 1481 1482 if (!R600TextureSetup(pSrcPicture, pSrc, 0)) { 1483 R600IBDiscard(pScrn, accel_state->ib); 1484 r600_vb_discard(pScrn); 1485 return FALSE; 1486 } 1487 1488 if (pMask) { 1489 if (!R600TextureSetup(pMaskPicture, pMask, 1)) { 1490 R600IBDiscard(pScrn, accel_state->ib); 1491 r600_vb_discard(pScrn); 1492 return FALSE; 1493 } 1494 } else 1495 accel_state->is_transform[1] = FALSE; 1496 1497 if (pMask) 1498 set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0)); 1499 else 1500 set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0)); 1501 1502 /* Shader */ 1503 1504 /* flush SQ cache */ 1505 cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, 1506 accel_state->vs_size, accel_state->vs_mc_addr, 1507 accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 1508 1509 vs_conf.shader_addr = accel_state->vs_mc_addr; 1510 vs_conf.num_gprs = 3; 1511 vs_conf.stack_size = 1; 1512 vs_conf.bo = accel_state->shaders_bo; 1513 vs_setup (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM); 1514 1515 /* flush SQ cache */ 1516 cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, 1517 accel_state->ps_size, accel_state->ps_mc_addr, 1518 accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); 1519 1520 ps_conf.shader_addr = accel_state->ps_mc_addr; 1521 ps_conf.num_gprs = 3; 1522 ps_conf.stack_size = 0; 1523 ps_conf.uncached_first_inst = 1; 1524 ps_conf.clamp_consts = 0; 1525 ps_conf.export_mode = 2; 1526 ps_conf.bo = accel_state->shaders_bo; 1527 ps_setup (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM); 1528 1529 BEGIN_BATCH(9); 1530 EREG(accel_state->ib, CB_TARGET_MASK, (0xf << TARGET0_ENABLE_shift)); 1531 1532 blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format); 1533 1534 if (info->ChipFamily == CHIP_FAMILY_R600) { 1535 /* no per-MRT blend on R600 */ 1536 EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift)); 1537 EREG(accel_state->ib, CB_BLEND_CONTROL, blendcntl); 1538 } else { 1539 EREG(accel_state->ib, CB_COLOR_CONTROL, (RADEON_ROP[3] | 1540 (1 << TARGET_BLEND_ENABLE_shift) | 1541 PER_MRT_BLEND_bit)); 1542 EREG(accel_state->ib, CB_BLEND0_CONTROL, blendcntl); 1543 } 1544 END_BATCH(); 1545 1546 cb_conf.id = 0; 1547 cb_conf.w = accel_state->dst_obj.pitch; 1548 cb_conf.h = accel_state->dst_obj.height; 1549 cb_conf.base = accel_state->dst_obj.offset; 1550 cb_conf.format = dst_format; 1551 cb_conf.bo = accel_state->dst_obj.bo; 1552 1553 switch (pDstPicture->format) { 1554 case PICT_a8r8g8b8: 1555 case PICT_x8r8g8b8: 1556 case PICT_a1r5g5b5: 1557 case PICT_x1r5g5b5: 1558 default: 1559 cb_conf.comp_swap = 1; /* ARGB */ 1560 break; 1561 case PICT_a8b8g8r8: 1562 case PICT_x8b8g8r8: 1563 cb_conf.comp_swap = 0; /* ABGR */ 1564 break; 1565#ifdef PICT_TYPE_BGRA 1566 case PICT_b8g8r8a8: 1567 case PICT_b8g8r8x8: 1568 cb_conf.comp_swap = 3; /* BGRA */ 1569 break; 1570#endif 1571 case PICT_r5g6b5: 1572 cb_conf.comp_swap = 2; /* RGB */ 1573 break; 1574 case PICT_a8: 1575 cb_conf.comp_swap = 3; /* A */ 1576 break; 1577 } 1578 cb_conf.source_format = 1; 1579 cb_conf.blend_clamp = 1; 1580 set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain); 1581 1582 /* Interpolator setup */ 1583 BEGIN_BATCH(21); 1584 if (pMask) { 1585 /* export 2 tex coords from VS */ 1586 EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift)); 1587 /* src = semantic id 0; mask = semantic id 1 */ 1588 EREG(accel_state->ib, SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) | 1589 (1 << SEMANTIC_1_shift))); 1590 /* input 2 tex coords from VS */ 1591 EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (2 << NUM_INTERP_shift)); 1592 } else { 1593 /* export 1 tex coords from VS */ 1594 EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); 1595 /* src = semantic id 0 */ 1596 EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); 1597 /* input 1 tex coords from VS */ 1598 EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift)); 1599 } 1600 EREG(accel_state->ib, SPI_PS_IN_CONTROL_1, 0); 1601 /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */ 1602 EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | 1603 (0x01 << DEFAULT_VAL_shift) | 1604 SEL_CENTROID_bit)); 1605 /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */ 1606 EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (1 <<2), ((1 << SEMANTIC_shift) | 1607 (0x01 << DEFAULT_VAL_shift) | 1608 SEL_CENTROID_bit)); 1609 EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); 1610 END_BATCH(); 1611 1612 if (accel_state->vsync) 1613 R600VlineHelperClear(pScrn); 1614 1615 return TRUE; 1616} 1617 1618static void R600Composite(PixmapPtr pDst, 1619 int srcX, int srcY, 1620 int maskX, int maskY, 1621 int dstX, int dstY, 1622 int w, int h) 1623{ 1624 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1625 RADEONInfoPtr info = RADEONPTR(pScrn); 1626 struct radeon_accel_state *accel_state = info->accel_state; 1627 float *vb; 1628 1629 /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n", 1630 srcX, srcY, maskX, maskY,dstX, dstY, w, h); */ 1631 1632 if (accel_state->vsync) 1633 R600VlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h); 1634 1635 if (accel_state->msk_pic) { 1636 1637 vb = radeon_vbo_space(pScrn, 24); 1638 1639 vb[0] = (float)dstX; 1640 vb[1] = (float)dstY; 1641 vb[2] = (float)srcX; 1642 vb[3] = (float)srcY; 1643 vb[4] = (float)maskX; 1644 vb[5] = (float)maskY; 1645 1646 vb[6] = (float)dstX; 1647 vb[7] = (float)(dstY + h); 1648 vb[8] = (float)srcX; 1649 vb[9] = (float)(srcY + h); 1650 vb[10] = (float)maskX; 1651 vb[11] = (float)(maskY + h); 1652 1653 vb[12] = (float)(dstX + w); 1654 vb[13] = (float)(dstY + h); 1655 vb[14] = (float)(srcX + w); 1656 vb[15] = (float)(srcY + h); 1657 vb[16] = (float)(maskX + w); 1658 vb[17] = (float)(maskY + h); 1659 1660 radeon_vbo_commit(pScrn); 1661 1662 } else { 1663 1664 vb = radeon_vbo_space(pScrn, 16); 1665 1666 vb[0] = (float)dstX; 1667 vb[1] = (float)dstY; 1668 vb[2] = (float)srcX; 1669 vb[3] = (float)srcY; 1670 1671 vb[4] = (float)dstX; 1672 vb[5] = (float)(dstY + h); 1673 vb[6] = (float)srcX; 1674 vb[7] = (float)(srcY + h); 1675 1676 vb[8] = (float)(dstX + w); 1677 vb[9] = (float)(dstY + h); 1678 vb[10] = (float)(srcX + w); 1679 vb[11] = (float)(srcY + h); 1680 1681 radeon_vbo_commit(pScrn); 1682 } 1683 1684 1685} 1686 1687static void R600DoneComposite(PixmapPtr pDst) 1688{ 1689 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1690 RADEONInfoPtr info = RADEONPTR(pScrn); 1691 struct radeon_accel_state *accel_state = info->accel_state; 1692 int vtx_size; 1693 1694 if (accel_state->vsync) 1695 cp_wait_vline_sync(pScrn, accel_state->ib, pDst, 1696 accel_state->vline_crtc, 1697 accel_state->vline_y1, 1698 accel_state->vline_y2); 1699 1700 vtx_size = accel_state->msk_pic ? 24 : 16; 1701 1702 r600_finish_op(pScrn, vtx_size); 1703} 1704 1705Bool 1706R600CopyToVRAM(ScrnInfoPtr pScrn, 1707 char *src, int src_pitch, 1708 uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_width, uint32_t dst_height, int bpp, 1709 int x, int y, int w, int h) 1710{ 1711 RADEONInfoPtr info = RADEONPTR(pScrn); 1712 struct radeon_accel_state *accel_state = info->accel_state; 1713 uint32_t scratch_mc_addr; 1714 int wpass = w * (bpp/8); 1715 int scratch_pitch_bytes = RADEON_ALIGN(wpass, 256); 1716 uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8); 1717 int scratch_offset = 0, hpass, temph; 1718 char *dst; 1719 drmBufPtr scratch; 1720 struct r600_accel_object scratch_obj, dst_obj; 1721 1722 if (dst_pitch & 7) 1723 return FALSE; 1724 1725 if (dst_mc_addr & 0xff) 1726 return FALSE; 1727 1728 scratch = RADEONCPGetBuffer(pScrn); 1729 if (scratch == NULL) 1730 return FALSE; 1731 1732 scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total); 1733 temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes); 1734 dst = (char *)scratch->address; 1735 1736 scratch_obj.pitch = scratch_pitch; 1737 scratch_obj.width = w; 1738 scratch_obj.height = hpass; 1739 scratch_obj.offset = scratch_mc_addr; 1740 scratch_obj.bpp = bpp; 1741 scratch_obj.domain = RADEON_GEM_DOMAIN_GTT; 1742 scratch_obj.bo = NULL; 1743 1744 dst_obj.pitch = dst_pitch; 1745 dst_obj.width = dst_width; 1746 dst_obj.height = dst_height; 1747 dst_obj.offset = dst_mc_addr; 1748 dst_obj.bo = NULL; 1749 dst_obj.bpp = bpp; 1750 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 1751 1752 if (!R600SetAccelState(pScrn, 1753 &scratch_obj, 1754 NULL, 1755 &dst_obj, 1756 accel_state->copy_vs_offset, accel_state->copy_ps_offset, 1757 3, 0xffffffff)) 1758 return FALSE; 1759 1760 /* memcopy from sys to scratch */ 1761 while (temph--) { 1762 memcpy (dst, src, wpass); 1763 src += src_pitch; 1764 dst += scratch_pitch_bytes; 1765 } 1766 1767 while (h) { 1768 uint32_t offset = scratch_mc_addr + scratch_offset; 1769 int oldhpass = hpass; 1770 h -= oldhpass; 1771 temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes); 1772 1773 if (hpass) { 1774 scratch_offset = scratch->total/2 - scratch_offset; 1775 dst = (char *)scratch->address + scratch_offset; 1776 /* wait for the engine to be idle */ 1777 RADEONWaitForIdleCP(pScrn); 1778 //memcopy from sys to scratch 1779 while (temph--) { 1780 memcpy (dst, src, wpass); 1781 src += src_pitch; 1782 dst += scratch_pitch_bytes; 1783 } 1784 } 1785 /* blit from scratch to vram */ 1786 info->accel_state->src_obj[0].height = oldhpass; 1787 info->accel_state->src_obj[0].offset = offset; 1788 R600DoPrepareCopy(pScrn); 1789 R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass); 1790 R600DoCopy(pScrn); 1791 y += oldhpass; 1792 } 1793 1794 R600IBDiscard(pScrn, scratch); 1795 r600_vb_discard(pScrn); 1796 1797 return TRUE; 1798} 1799 1800static Bool 1801R600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, 1802 char *src, int src_pitch) 1803{ 1804 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1805 RADEONInfoPtr info = RADEONPTR(pScrn); 1806 uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); 1807 uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; 1808 int bpp = pDst->drawable.bitsPerPixel; 1809 1810 return R600CopyToVRAM(pScrn, 1811 src, src_pitch, 1812 dst_pitch, dst_mc_addr, pDst->drawable.width, pDst->drawable.height, bpp, 1813 x, y, w, h); 1814} 1815 1816static Bool 1817R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, 1818 char *dst, int dst_pitch) 1819{ 1820 ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 1821 RADEONInfoPtr info = RADEONPTR(pScrn); 1822 struct radeon_accel_state *accel_state = info->accel_state; 1823 uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); 1824 uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; 1825 uint32_t src_width = pSrc->drawable.width; 1826 uint32_t src_height = pSrc->drawable.height; 1827 int bpp = pSrc->drawable.bitsPerPixel; 1828 uint32_t scratch_mc_addr; 1829 int scratch_pitch_bytes = RADEON_ALIGN(dst_pitch, 256); 1830 int scratch_offset = 0, hpass; 1831 uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8); 1832 int wpass = w * (bpp/8); 1833 drmBufPtr scratch; 1834 struct r600_accel_object scratch_obj, src_obj; 1835 1836 /* bad pipe setup in drm prior to 1.32 */ 1837 if (info->dri->pKernelDRMVersion->version_minor < 32) { 1838 if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32)) 1839 return FALSE; 1840 } 1841 1842 if (src_pitch & 7) 1843 return FALSE; 1844 1845 scratch = RADEONCPGetBuffer(pScrn); 1846 if (scratch == NULL) 1847 return FALSE; 1848 1849 scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total); 1850 hpass = min(h, scratch->total/2 / scratch_pitch_bytes); 1851 1852 src_obj.pitch = src_pitch; 1853 src_obj.width = src_width; 1854 src_obj.height = src_height; 1855 src_obj.offset = src_mc_addr; 1856 src_obj.bo = NULL; 1857 src_obj.bpp = bpp; 1858 src_obj.domain = RADEON_GEM_DOMAIN_VRAM; 1859 1860 scratch_obj.pitch = scratch_pitch; 1861 scratch_obj.width = src_width; 1862 scratch_obj.height = hpass; 1863 scratch_obj.offset = scratch_mc_addr; 1864 scratch_obj.bpp = bpp; 1865 scratch_obj.domain = RADEON_GEM_DOMAIN_GTT; 1866 scratch_obj.bo = NULL; 1867 1868 if (!R600SetAccelState(pScrn, 1869 &src_obj, 1870 NULL, 1871 &scratch_obj, 1872 accel_state->copy_vs_offset, accel_state->copy_ps_offset, 1873 3, 0xffffffff)) 1874 return FALSE; 1875 1876 /* blit from vram to scratch */ 1877 R600DoPrepareCopy(pScrn); 1878 R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); 1879 R600DoCopy(pScrn); 1880 1881 while (h) { 1882 char *src = (char *)scratch->address + scratch_offset; 1883 int oldhpass = hpass; 1884 h -= oldhpass; 1885 y += oldhpass; 1886 hpass = min(h, scratch->total/2 / scratch_pitch_bytes); 1887 1888 if (hpass) { 1889 scratch_offset = scratch->total/2 - scratch_offset; 1890 /* blit from vram to scratch */ 1891 info->accel_state->dst_obj.height = hpass; 1892 info->accel_state->dst_obj.offset = scratch_mc_addr + scratch_offset; 1893 R600DoPrepareCopy(pScrn); 1894 R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); 1895 R600DoCopy(pScrn); 1896 } 1897 1898 /* wait for the engine to be idle */ 1899 RADEONWaitForIdleCP(pScrn); 1900 /* memcopy from scratch to sys */ 1901 while (oldhpass--) { 1902 memcpy (dst, src, wpass); 1903 dst += dst_pitch; 1904 src += scratch_pitch_bytes; 1905 } 1906 } 1907 1908 R600IBDiscard(pScrn, scratch); 1909 r600_vb_discard(pScrn); 1910 1911 return TRUE; 1912 1913} 1914 1915#if defined(XF86DRM_MODE) 1916 1917static Bool 1918R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h, 1919 char *src, int src_pitch) 1920{ 1921 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1922 RADEONInfoPtr info = RADEONPTR(pScrn); 1923 struct radeon_accel_state *accel_state = info->accel_state; 1924 struct radeon_exa_pixmap_priv *driver_priv; 1925 struct radeon_bo *scratch; 1926 unsigned size; 1927 uint32_t dst_domain; 1928 int bpp = pDst->drawable.bitsPerPixel; 1929 uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256); 1930 uint32_t src_pitch_hw = scratch_pitch / (bpp / 8); 1931 uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8); 1932 Bool r; 1933 int i; 1934 struct r600_accel_object src_obj, dst_obj; 1935 1936 if (bpp < 8) 1937 return FALSE; 1938 1939 driver_priv = exaGetPixmapDriverPrivate(pDst); 1940 1941 /* If we know the BO won't be busy, don't bother */ 1942 if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs) && 1943 !radeon_bo_is_busy(driver_priv->bo, &dst_domain)) 1944 return FALSE; 1945 1946 size = scratch_pitch * h; 1947 scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0); 1948 if (scratch == NULL) { 1949 return FALSE; 1950 } 1951 1952 src_obj.pitch = src_pitch_hw; 1953 src_obj.width = w; 1954 src_obj.height = h; 1955 src_obj.offset = 0; 1956 src_obj.bpp = bpp; 1957 src_obj.domain = RADEON_GEM_DOMAIN_GTT; 1958 src_obj.bo = scratch; 1959 1960 dst_obj.pitch = dst_pitch_hw; 1961 dst_obj.width = pDst->drawable.width; 1962 dst_obj.height = pDst->drawable.height; 1963 dst_obj.offset = 0; 1964 dst_obj.bpp = bpp; 1965 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 1966 dst_obj.bo = radeon_get_pixmap_bo(pDst); 1967 1968 if (!R600SetAccelState(pScrn, 1969 &src_obj, 1970 NULL, 1971 &dst_obj, 1972 accel_state->copy_vs_offset, accel_state->copy_ps_offset, 1973 3, 0xffffffff)) { 1974 r = FALSE; 1975 goto out; 1976 } 1977 1978 r = radeon_bo_map(scratch, 0); 1979 if (r) { 1980 r = FALSE; 1981 goto out; 1982 } 1983 r = TRUE; 1984 size = w * bpp / 8; 1985 for (i = 0; i < h; i++) { 1986 memcpy(scratch->ptr + i * scratch_pitch, src, size); 1987 src += src_pitch; 1988 } 1989 radeon_bo_unmap(scratch); 1990 1991 if (info->accel_state->vsync) 1992 R600VlineHelperSet(pScrn, x, y, x + w, y + h); 1993 1994 /* blit from gart to vram */ 1995 R600DoPrepareCopy(pScrn); 1996 R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h); 1997 R600DoCopyVline(pDst); 1998 1999out: 2000 radeon_bo_unref(scratch); 2001 return r; 2002} 2003 2004static Bool 2005R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w, 2006 int h, char *dst, int dst_pitch) 2007{ 2008 ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; 2009 RADEONInfoPtr info = RADEONPTR(pScrn); 2010 struct radeon_accel_state *accel_state = info->accel_state; 2011 struct radeon_exa_pixmap_priv *driver_priv; 2012 struct radeon_bo *scratch; 2013 unsigned size; 2014 uint32_t src_domain = 0; 2015 int bpp = pSrc->drawable.bitsPerPixel; 2016 uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256); 2017 uint32_t dst_pitch_hw = scratch_pitch / (bpp / 8); 2018 uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8); 2019 Bool r; 2020 struct r600_accel_object src_obj, dst_obj; 2021 2022 if (bpp < 8) 2023 return FALSE; 2024 2025 driver_priv = exaGetPixmapDriverPrivate(pSrc); 2026 2027 /* If we know the BO won't end up in VRAM anyway, don't bother */ 2028 if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) { 2029 src_domain = radeon_bo_get_src_domain(driver_priv->bo); 2030 if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 2031 (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) 2032 src_domain = 0; 2033 } 2034 2035 if (!src_domain) 2036 radeon_bo_is_busy(driver_priv->bo, &src_domain); 2037 2038 if (src_domain != RADEON_GEM_DOMAIN_VRAM) 2039 return FALSE; 2040 2041 size = scratch_pitch * h; 2042 scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0); 2043 if (scratch == NULL) { 2044 return FALSE; 2045 } 2046 radeon_cs_space_reset_bos(info->cs); 2047 radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo, 2048 RADEON_GEM_DOMAIN_VRAM, 0); 2049 accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM; 2050 radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0); 2051 accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT; 2052 radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain); 2053 r = radeon_cs_space_check(info->cs); 2054 if (r) { 2055 r = FALSE; 2056 goto out; 2057 } 2058 2059 src_obj.pitch = src_pitch_hw; 2060 src_obj.width = pSrc->drawable.width; 2061 src_obj.height = pSrc->drawable.height; 2062 src_obj.offset = 0; 2063 src_obj.bpp = bpp; 2064 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 2065 src_obj.bo = radeon_get_pixmap_bo(pSrc); 2066 2067 dst_obj.pitch = dst_pitch_hw; 2068 dst_obj.width = w; 2069 dst_obj.height = h; 2070 dst_obj.offset = 0; 2071 dst_obj.bo = scratch; 2072 dst_obj.bpp = bpp; 2073 dst_obj.domain = RADEON_GEM_DOMAIN_GTT; 2074 2075 if (!R600SetAccelState(pScrn, 2076 &src_obj, 2077 NULL, 2078 &dst_obj, 2079 accel_state->copy_vs_offset, accel_state->copy_ps_offset, 2080 3, 0xffffffff)) { 2081 r = FALSE; 2082 goto out; 2083 } 2084 2085 /* blit from vram to gart */ 2086 R600DoPrepareCopy(pScrn); 2087 R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h); 2088 R600DoCopy(pScrn); 2089 2090 if (info->cs) 2091 radeon_cs_flush_indirect(pScrn); 2092 2093 r = radeon_bo_map(scratch, 0); 2094 if (r) { 2095 r = FALSE; 2096 goto out; 2097 } 2098 r = TRUE; 2099 w *= bpp / 8; 2100 size = 0; 2101 while (h--) { 2102 memcpy(dst, scratch->ptr + size, w); 2103 size += scratch_pitch; 2104 dst += dst_pitch; 2105 } 2106 radeon_bo_unmap(scratch); 2107out: 2108 radeon_bo_unref(scratch); 2109 return r; 2110} 2111#endif 2112 2113static int 2114R600MarkSync(ScreenPtr pScreen) 2115{ 2116 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 2117 RADEONInfoPtr info = RADEONPTR(pScrn); 2118 struct radeon_accel_state *accel_state = info->accel_state; 2119 2120 return ++accel_state->exaSyncMarker; 2121 2122} 2123 2124static void 2125R600Sync(ScreenPtr pScreen, int marker) 2126{ 2127 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 2128 RADEONInfoPtr info = RADEONPTR(pScrn); 2129 struct radeon_accel_state *accel_state = info->accel_state; 2130 2131 if (accel_state->exaMarkerSynced != marker) { 2132#ifdef XF86DRM_MODE 2133#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) 2134 if (!info->cs) 2135#endif 2136#endif 2137 RADEONWaitForIdleCP(pScrn); 2138 accel_state->exaMarkerSynced = marker; 2139 } 2140 2141} 2142 2143static Bool 2144R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) 2145{ 2146 RADEONInfoPtr info = RADEONPTR(pScrn); 2147 struct radeon_accel_state *accel_state = info->accel_state; 2148 2149 /* 512 bytes per shader for now */ 2150 int size = 512 * 9; 2151 2152 accel_state->shaders = NULL; 2153 2154#ifdef XF86DRM_MODE 2155#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) 2156 if (info->cs) { 2157 accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0, 2158 RADEON_GEM_DOMAIN_VRAM, 0); 2159 if (accel_state->shaders_bo == NULL) { 2160 ErrorF("Allocating shader failed\n"); 2161 return FALSE; 2162 } 2163 return TRUE; 2164 } else 2165#endif 2166#endif 2167 { 2168 accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256, 2169 TRUE, NULL, NULL); 2170 2171 if (accel_state->shaders == NULL) 2172 return FALSE; 2173 } 2174 2175 return TRUE; 2176} 2177 2178Bool 2179R600LoadShaders(ScrnInfoPtr pScrn) 2180{ 2181 RADEONInfoPtr info = RADEONPTR(pScrn); 2182 struct radeon_accel_state *accel_state = info->accel_state; 2183 RADEONChipFamily ChipSet = info->ChipFamily; 2184 uint32_t *shader; 2185#ifdef XF86DRM_MODE 2186#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) 2187 int ret; 2188 2189 if (info->cs) { 2190 ret = radeon_bo_map(accel_state->shaders_bo, 1); 2191 if (ret) { 2192 FatalError("failed to map shader %d\n", ret); 2193 return FALSE; 2194 } 2195 shader = accel_state->shaders_bo->ptr; 2196 } else 2197#endif 2198#endif 2199 shader = (pointer)((char *)info->FB + accel_state->shaders->offset); 2200 2201 /* solid vs --------------------------------------- */ 2202 accel_state->solid_vs_offset = 0; 2203 R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4); 2204 2205 /* solid ps --------------------------------------- */ 2206 accel_state->solid_ps_offset = 512; 2207 R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4); 2208 2209 /* copy vs --------------------------------------- */ 2210 accel_state->copy_vs_offset = 1024; 2211 R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4); 2212 2213 /* copy ps --------------------------------------- */ 2214 accel_state->copy_ps_offset = 1536; 2215 R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4); 2216 2217 /* comp vs --------------------------------------- */ 2218 accel_state->comp_vs_offset = 2048; 2219 R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4); 2220 2221 /* comp ps --------------------------------------- */ 2222 accel_state->comp_ps_offset = 2560; 2223 R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4); 2224 2225 /* comp mask ps --------------------------------------- */ 2226 accel_state->comp_mask_ps_offset = 3072; 2227 R600_comp_mask_ps(ChipSet, shader + accel_state->comp_mask_ps_offset / 4); 2228 2229 /* xv vs --------------------------------------- */ 2230 accel_state->xv_vs_offset = 3584; 2231 R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4); 2232 2233 /* xv ps --------------------------------------- */ 2234 accel_state->xv_ps_offset = 4096; 2235 R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4); 2236 2237#ifdef XF86DRM_MODE 2238#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) 2239 if (info->cs) { 2240 radeon_bo_unmap(accel_state->shaders_bo); 2241 } 2242#endif 2243#endif 2244 2245 return TRUE; 2246} 2247 2248static Bool 2249R600PrepareAccess(PixmapPtr pPix, int index) 2250{ 2251 ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; 2252 RADEONInfoPtr info = RADEONPTR(pScrn); 2253 unsigned char *RADEONMMIO = info->MMIO; 2254 2255 /* flush HDP read/write caches */ 2256 OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); 2257 2258 return TRUE; 2259} 2260 2261static void 2262R600FinishAccess(PixmapPtr pPix, int index) 2263{ 2264 ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; 2265 RADEONInfoPtr info = RADEONPTR(pScrn); 2266 unsigned char *RADEONMMIO = info->MMIO; 2267 2268 /* flush HDP read/write caches */ 2269 OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); 2270 2271} 2272 2273Bool 2274R600DrawInit(ScreenPtr pScreen) 2275{ 2276 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 2277 RADEONInfoPtr info = RADEONPTR(pScrn); 2278 2279 if (info->accel_state->exa == NULL) { 2280 xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n"); 2281 return FALSE; 2282 } 2283 2284 info->accel_state->exa->exa_major = EXA_VERSION_MAJOR; 2285 info->accel_state->exa->exa_minor = EXA_VERSION_MINOR; 2286 2287 info->accel_state->exa->PrepareSolid = R600PrepareSolid; 2288 info->accel_state->exa->Solid = R600Solid; 2289 info->accel_state->exa->DoneSolid = R600DoneSolid; 2290 2291 info->accel_state->exa->PrepareCopy = R600PrepareCopy; 2292 info->accel_state->exa->Copy = R600Copy; 2293 info->accel_state->exa->DoneCopy = R600DoneCopy; 2294 2295 info->accel_state->exa->MarkSync = R600MarkSync; 2296 info->accel_state->exa->WaitMarker = R600Sync; 2297 2298#ifdef XF86DRM_MODE 2299#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) 2300 if (info->cs) { 2301 info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap; 2302 info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap; 2303 info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen; 2304 info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS; 2305 info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS; 2306 info->accel_state->exa->UploadToScreen = R600UploadToScreenCS; 2307 info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS; 2308 } else 2309#endif 2310#endif 2311 { 2312 info->accel_state->exa->PrepareAccess = R600PrepareAccess; 2313 info->accel_state->exa->FinishAccess = R600FinishAccess; 2314 2315 /* AGP seems to have problems with gart transfers */ 2316 if (info->accelDFS) { 2317 info->accel_state->exa->UploadToScreen = R600UploadToScreen; 2318 info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen; 2319 } 2320 } 2321 2322 info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS; 2323#ifdef EXA_SUPPORTS_PREPARE_AUX 2324 info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX; 2325#endif 2326 2327#ifdef XF86DRM_MODE 2328#ifdef EXA_HANDLES_PIXMAPS 2329 if (info->cs) { 2330 info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS; 2331#ifdef EXA_MIXED_PIXMAPS 2332 info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS; 2333#endif 2334 } 2335#endif 2336#endif 2337 info->accel_state->exa->pixmapOffsetAlign = 256; 2338 info->accel_state->exa->pixmapPitchAlign = 256; 2339 2340 info->accel_state->exa->CheckComposite = R600CheckComposite; 2341 info->accel_state->exa->PrepareComposite = R600PrepareComposite; 2342 info->accel_state->exa->Composite = R600Composite; 2343 info->accel_state->exa->DoneComposite = R600DoneComposite; 2344 2345#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3) 2346 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n"); 2347 2348 info->accel_state->exa->maxPitchBytes = 32768; 2349 info->accel_state->exa->maxX = 8192; 2350#else 2351 info->accel_state->exa->maxX = 8192; 2352#endif 2353 info->accel_state->exa->maxY = 8192; 2354 2355 /* not supported yet */ 2356 if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) { 2357 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n"); 2358 info->accel_state->vsync = TRUE; 2359 } else 2360 info->accel_state->vsync = FALSE; 2361 2362 if (!exaDriverInit(pScreen, info->accel_state->exa)) { 2363 free(info->accel_state->exa); 2364 return FALSE; 2365 } 2366 2367#ifdef XF86DRM_MODE 2368#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) 2369 if (!info->cs) 2370#endif 2371#endif 2372 if (!info->gartLocation) 2373 return FALSE; 2374 2375 info->accel_state->XInited3D = FALSE; 2376 info->accel_state->copy_area = NULL; 2377 info->accel_state->src_obj[0].bo = NULL; 2378 info->accel_state->src_obj[1].bo = NULL; 2379 info->accel_state->dst_obj.bo = NULL; 2380 info->accel_state->copy_area_bo = NULL; 2381 info->accel_state->vb_start_op = -1; 2382 R600VlineHelperClear(pScrn); 2383 2384#ifdef XF86DRM_MODE 2385 radeon_vbo_init_lists(pScrn); 2386#endif 2387 2388 if (!R600AllocShaders(pScrn, pScreen)) 2389 return FALSE; 2390 2391 if (!R600LoadShaders(pScrn)) 2392 return FALSE; 2393 2394 exaMarkSync(pScreen); 2395 2396 return TRUE; 2397 2398} 2399 2400