r600_exa.c revision de2362d3
1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Author: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif 30 31#include "xf86.h" 32 33#include "exa.h" 34 35#include "radeon.h" 36#include "radeon_reg.h" 37#include "r600_shader.h" 38#include "r600_reg.h" 39#include "r600_state.h" 40#include "radeon_exa_shared.h" 41#include "radeon_vbo.h" 42 43/* #define SHOW_VERTEXES */ 44 45Bool 46R600SetAccelState(ScrnInfoPtr pScrn, 47 struct r600_accel_object *src0, 48 struct r600_accel_object *src1, 49 struct r600_accel_object *dst, 50 uint32_t vs_offset, uint32_t ps_offset, 51 int rop, Pixel planemask) 52{ 53 RADEONInfoPtr info = RADEONPTR(pScrn); 54 struct radeon_accel_state *accel_state = info->accel_state; 55 uint32_t pitch_align = 0x7; 56 int ret; 57 58 if (src0) { 59 memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object)); 60 accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8); 61 if (src0->surface) 62 accel_state->src_size[0] = src0->surface->bo_size; 63 64 /* bad pitch */ 65 if (accel_state->src_obj[0].pitch & pitch_align) 66 RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch)); 67 68 } else { 69 memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object)); 70 accel_state->src_size[0] = 0; 71 } 72 73 if (src1) { 74 memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object)); 75 accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8); 76 if (src1->surface) { 77 accel_state->src_size[1] = src1->surface->bo_size; 78 } 79 80 /* bad pitch */ 81 if (accel_state->src_obj[1].pitch & pitch_align) 82 RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch)); 83 84 } else { 85 memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object)); 86 accel_state->src_size[1] = 0; 87 } 88 89 if (dst) { 90 memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object)); 91 accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8); 92 if (dst->surface) { 93 accel_state->dst_size = dst->surface->bo_size; 94 } else 95 { 96 accel_state->dst_obj.tiling_flags = 0; 97 } 98 if (accel_state->dst_obj.pitch & pitch_align) 99 RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch)); 100 101 } else { 102 memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object)); 103 accel_state->dst_size = 0; 104 } 105 106 if (CS_FULL(info->cs)) 107 radeon_cs_flush_indirect(pScrn); 108 109 accel_state->rop = rop; 110 accel_state->planemask = planemask; 111 112 accel_state->vs_size = 512; 113 accel_state->ps_size = 512; 114 accel_state->vs_mc_addr = vs_offset; 115 accel_state->ps_mc_addr = ps_offset; 116 117 radeon_cs_space_reset_bos(info->cs); 118 radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo, 119 RADEON_GEM_DOMAIN_VRAM, 0); 120 if (accel_state->src_obj[0].bo) 121 radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo, 122 accel_state->src_obj[0].domain, 0); 123 if (accel_state->src_obj[1].bo) 124 radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo, 125 accel_state->src_obj[1].domain, 0); 126 if (accel_state->dst_obj.bo) 127 radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo, 128 0, accel_state->dst_obj.domain); 129 ret = radeon_cs_space_check(info->cs); 130 if (ret) 131 RADEON_FALLBACK(("Not enough RAM to hw accel operation\n")); 132 133 return TRUE; 134} 135 136static Bool 137R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) 138{ 139 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); 140 RADEONInfoPtr info = RADEONPTR(pScrn); 141 struct radeon_accel_state *accel_state = info->accel_state; 142 cb_config_t cb_conf; 143 shader_config_t vs_conf, ps_conf; 144 uint32_t a, r, g, b; 145 float ps_alu_consts[4]; 146 struct r600_accel_object dst; 147 148 if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel)) 149 RADEON_FALLBACK(("R600CheckDatatype failed\n")); 150 if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel)) 151 RADEON_FALLBACK(("invalid planemask\n")); 152 153 dst.bo = radeon_get_pixmap_bo(pPix); 154 dst.tiling_flags = radeon_get_pixmap_tiling(pPix); 155 dst.surface = radeon_get_pixmap_surface(pPix); 156 157 dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); 158 dst.width = pPix->drawable.width; 159 dst.height = pPix->drawable.height; 160 dst.bpp = pPix->drawable.bitsPerPixel; 161 dst.domain = RADEON_GEM_DOMAIN_VRAM; 162 163 if (!R600SetAccelState(pScrn, 164 NULL, 165 NULL, 166 &dst, 167 accel_state->solid_vs_offset, accel_state->solid_ps_offset, 168 alu, pm)) 169 return FALSE; 170 171 CLEAR (cb_conf); 172 CLEAR (vs_conf); 173 CLEAR (ps_conf); 174 175 radeon_vbo_check(pScrn, &accel_state->vbo, 16); 176 radeon_cp_start(pScrn); 177 178 r600_set_default_state(pScrn); 179 180 r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 181 r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 182 r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 183 184 /* Shader */ 185 vs_conf.shader_addr = accel_state->vs_mc_addr; 186 vs_conf.shader_size = accel_state->vs_size; 187 vs_conf.num_gprs = 2; 188 vs_conf.stack_size = 0; 189 vs_conf.bo = accel_state->shaders_bo; 190 r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM); 191 192 ps_conf.shader_addr = accel_state->ps_mc_addr; 193 ps_conf.shader_size = accel_state->ps_size; 194 ps_conf.num_gprs = 1; 195 ps_conf.stack_size = 0; 196 ps_conf.uncached_first_inst = 1; 197 ps_conf.clamp_consts = 0; 198 ps_conf.export_mode = 2; 199 ps_conf.bo = accel_state->shaders_bo; 200 r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM); 201 202 cb_conf.id = 0; 203 cb_conf.w = accel_state->dst_obj.pitch; 204 cb_conf.h = accel_state->dst_obj.height; 205 cb_conf.base = 0; 206 cb_conf.bo = accel_state->dst_obj.bo; 207 cb_conf.surface = accel_state->dst_obj.surface; 208 209 if (accel_state->dst_obj.bpp == 8) { 210 cb_conf.format = COLOR_8; 211 cb_conf.comp_swap = 3; /* A */ 212 } else if (accel_state->dst_obj.bpp == 16) { 213 cb_conf.format = COLOR_5_6_5; 214 cb_conf.comp_swap = 2; /* RGB */ 215#if X_BYTE_ORDER == X_BIG_ENDIAN 216 cb_conf.endian = ENDIAN_8IN16; 217#endif 218 } else { 219 cb_conf.format = COLOR_8_8_8_8; 220 cb_conf.comp_swap = 1; /* ARGB */ 221#if X_BYTE_ORDER == X_BIG_ENDIAN 222 cb_conf.endian = ENDIAN_8IN32; 223#endif 224 } 225 cb_conf.source_format = 1; 226 cb_conf.blend_clamp = 1; 227 /* Render setup */ 228 if (accel_state->planemask & 0x000000ff) 229 cb_conf.pmask |= 4; /* B */ 230 if (accel_state->planemask & 0x0000ff00) 231 cb_conf.pmask |= 2; /* G */ 232 if (accel_state->planemask & 0x00ff0000) 233 cb_conf.pmask |= 1; /* R */ 234 if (accel_state->planemask & 0xff000000) 235 cb_conf.pmask |= 8; /* A */ 236 cb_conf.rop = accel_state->rop; 237 if (accel_state->dst_obj.tiling_flags == 0) 238 cb_conf.array_mode = 0; 239 r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); 240 241 r600_set_spi(pScrn, 0, 0); 242 243 /* PS alu constants */ 244 if (accel_state->dst_obj.bpp == 16) { 245 r = (fg >> 11) & 0x1f; 246 g = (fg >> 5) & 0x3f; 247 b = (fg >> 0) & 0x1f; 248 ps_alu_consts[0] = (float)r / 31; /* R */ 249 ps_alu_consts[1] = (float)g / 63; /* G */ 250 ps_alu_consts[2] = (float)b / 31; /* B */ 251 ps_alu_consts[3] = 1.0; /* A */ 252 } else if (accel_state->dst_obj.bpp == 8) { 253 a = (fg >> 0) & 0xff; 254 ps_alu_consts[0] = 0.0; /* R */ 255 ps_alu_consts[1] = 0.0; /* G */ 256 ps_alu_consts[2] = 0.0; /* B */ 257 ps_alu_consts[3] = (float)a / 255; /* A */ 258 } else { 259 a = (fg >> 24) & 0xff; 260 r = (fg >> 16) & 0xff; 261 g = (fg >> 8) & 0xff; 262 b = (fg >> 0) & 0xff; 263 ps_alu_consts[0] = (float)r / 255; /* R */ 264 ps_alu_consts[1] = (float)g / 255; /* G */ 265 ps_alu_consts[2] = (float)b / 255; /* B */ 266 ps_alu_consts[3] = (float)a / 255; /* A */ 267 } 268 r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_ps, 269 sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); 270 271 if (accel_state->vsync) 272 RADEONVlineHelperClear(pScrn); 273 274 accel_state->dst_pix = pPix; 275 accel_state->fg = fg; 276 277 return TRUE; 278} 279 280static void 281R600DoneSolid(PixmapPtr pPix) 282{ 283 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); 284 RADEONInfoPtr info = RADEONPTR(pScrn); 285 struct radeon_accel_state *accel_state = info->accel_state; 286 287 if (accel_state->vsync) 288 r600_cp_wait_vline_sync(pScrn, pPix, 289 accel_state->vline_crtc, 290 accel_state->vline_y1, 291 accel_state->vline_y2); 292 293 r600_finish_op(pScrn, 8); 294} 295 296static void 297R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) 298{ 299 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); 300 RADEONInfoPtr info = RADEONPTR(pScrn); 301 struct radeon_accel_state *accel_state = info->accel_state; 302 float *vb; 303 304 if (CS_FULL(info->cs)) { 305 R600DoneSolid(info->accel_state->dst_pix); 306 radeon_cs_flush_indirect(pScrn); 307 R600PrepareSolid(accel_state->dst_pix, 308 accel_state->rop, 309 accel_state->planemask, 310 accel_state->fg); 311 } 312 313 if (accel_state->vsync) 314 RADEONVlineHelperSet(pScrn, x1, y1, x2, y2); 315 316 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8); 317 318 vb[0] = (float)x1; 319 vb[1] = (float)y1; 320 321 vb[2] = (float)x1; 322 vb[3] = (float)y2; 323 324 vb[4] = (float)x2; 325 vb[5] = (float)y2; 326 327 radeon_vbo_commit(pScrn, &accel_state->vbo); 328} 329 330static void 331R600DoPrepareCopy(ScrnInfoPtr pScrn) 332{ 333 RADEONInfoPtr info = RADEONPTR(pScrn); 334 struct radeon_accel_state *accel_state = info->accel_state; 335 cb_config_t cb_conf; 336 tex_resource_t tex_res; 337 tex_sampler_t tex_samp; 338 shader_config_t vs_conf, ps_conf; 339 340 CLEAR (cb_conf); 341 CLEAR (tex_res); 342 CLEAR (tex_samp); 343 CLEAR (vs_conf); 344 CLEAR (ps_conf); 345 346 radeon_vbo_check(pScrn, &accel_state->vbo, 16); 347 radeon_cp_start(pScrn); 348 349 r600_set_default_state(pScrn); 350 351 r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 352 r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 353 r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 354 355 /* Shader */ 356 vs_conf.shader_addr = accel_state->vs_mc_addr; 357 vs_conf.shader_size = accel_state->vs_size; 358 vs_conf.num_gprs = 2; 359 vs_conf.stack_size = 0; 360 vs_conf.bo = accel_state->shaders_bo; 361 r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM); 362 363 ps_conf.shader_addr = accel_state->ps_mc_addr; 364 ps_conf.shader_size = accel_state->ps_size; 365 ps_conf.num_gprs = 1; 366 ps_conf.stack_size = 0; 367 ps_conf.uncached_first_inst = 1; 368 ps_conf.clamp_consts = 0; 369 ps_conf.export_mode = 2; 370 ps_conf.bo = accel_state->shaders_bo; 371 r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM); 372 373 /* Texture */ 374 tex_res.id = 0; 375 tex_res.w = accel_state->src_obj[0].width; 376 tex_res.h = accel_state->src_obj[0].height; 377 tex_res.pitch = accel_state->src_obj[0].pitch; 378 tex_res.depth = 0; 379 tex_res.dim = SQ_TEX_DIM_2D; 380 tex_res.base = 0; 381 tex_res.mip_base = 0; 382 tex_res.size = accel_state->src_size[0]; 383 tex_res.bo = accel_state->src_obj[0].bo; 384 tex_res.mip_bo = accel_state->src_obj[0].bo; 385 tex_res.surface = accel_state->src_obj[0].surface; 386 if (accel_state->src_obj[0].bpp == 8) { 387 tex_res.format = FMT_8; 388 tex_res.dst_sel_x = SQ_SEL_1; /* R */ 389 tex_res.dst_sel_y = SQ_SEL_1; /* G */ 390 tex_res.dst_sel_z = SQ_SEL_1; /* B */ 391 tex_res.dst_sel_w = SQ_SEL_X; /* A */ 392 } else if (accel_state->src_obj[0].bpp == 16) { 393 tex_res.format = FMT_5_6_5; 394 tex_res.dst_sel_x = SQ_SEL_Z; /* R */ 395 tex_res.dst_sel_y = SQ_SEL_Y; /* G */ 396 tex_res.dst_sel_z = SQ_SEL_X; /* B */ 397 tex_res.dst_sel_w = SQ_SEL_1; /* A */ 398 } else { 399 tex_res.format = FMT_8_8_8_8; 400 tex_res.dst_sel_x = SQ_SEL_Z; /* R */ 401 tex_res.dst_sel_y = SQ_SEL_Y; /* G */ 402 tex_res.dst_sel_z = SQ_SEL_X; /* B */ 403 tex_res.dst_sel_w = SQ_SEL_W; /* A */ 404 } 405 406 tex_res.request_size = 1; 407 tex_res.base_level = 0; 408 tex_res.last_level = 0; 409 tex_res.perf_modulation = 0; 410 if (accel_state->src_obj[0].tiling_flags == 0) 411 tex_res.tile_mode = 1; 412 r600_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); 413 414 tex_samp.id = 0; 415 tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 416 tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 417 tex_samp.clamp_z = SQ_TEX_WRAP; 418 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; 419 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; 420 tex_samp.mc_coord_truncate = 1; 421 tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 422 tex_samp.mip_filter = 0; /* no mipmap */ 423 r600_set_tex_sampler(pScrn, &tex_samp); 424 425 cb_conf.id = 0; 426 cb_conf.w = accel_state->dst_obj.pitch; 427 cb_conf.h = accel_state->dst_obj.height; 428 cb_conf.base = 0; 429 cb_conf.bo = accel_state->dst_obj.bo; 430 cb_conf.surface = accel_state->dst_obj.surface; 431 if (accel_state->dst_obj.bpp == 8) { 432 cb_conf.format = COLOR_8; 433 cb_conf.comp_swap = 3; /* A */ 434 } else if (accel_state->dst_obj.bpp == 16) { 435 cb_conf.format = COLOR_5_6_5; 436 cb_conf.comp_swap = 2; /* RGB */ 437 } else { 438 cb_conf.format = COLOR_8_8_8_8; 439 cb_conf.comp_swap = 1; /* ARGB */ 440 } 441 cb_conf.source_format = 1; 442 cb_conf.blend_clamp = 1; 443 444 /* Render setup */ 445 if (accel_state->planemask & 0x000000ff) 446 cb_conf.pmask |= 4; /* B */ 447 if (accel_state->planemask & 0x0000ff00) 448 cb_conf.pmask |= 2; /* G */ 449 if (accel_state->planemask & 0x00ff0000) 450 cb_conf.pmask |= 1; /* R */ 451 if (accel_state->planemask & 0xff000000) 452 cb_conf.pmask |= 8; /* A */ 453 cb_conf.rop = accel_state->rop; 454 if (accel_state->dst_obj.tiling_flags == 0) 455 cb_conf.array_mode = 0; 456 r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); 457 458 r600_set_spi(pScrn, (1 - 1), 1); 459 460} 461 462static void 463R600DoCopy(ScrnInfoPtr pScrn) 464{ 465 r600_finish_op(pScrn, 16); 466} 467 468static void 469R600DoCopyVline(PixmapPtr pPix) 470{ 471 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); 472 RADEONInfoPtr info = RADEONPTR(pScrn); 473 struct radeon_accel_state *accel_state = info->accel_state; 474 475 if (accel_state->vsync) 476 r600_cp_wait_vline_sync(pScrn, pPix, 477 accel_state->vline_crtc, 478 accel_state->vline_y1, 479 accel_state->vline_y2); 480 481 r600_finish_op(pScrn, 16); 482} 483 484static void 485R600AppendCopyVertex(ScrnInfoPtr pScrn, 486 int srcX, int srcY, 487 int dstX, int dstY, 488 int w, int h) 489{ 490 RADEONInfoPtr info = RADEONPTR(pScrn); 491 struct radeon_accel_state *accel_state = info->accel_state; 492 float *vb; 493 494 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16); 495 496 vb[0] = (float)dstX; 497 vb[1] = (float)dstY; 498 vb[2] = (float)srcX; 499 vb[3] = (float)srcY; 500 501 vb[4] = (float)dstX; 502 vb[5] = (float)(dstY + h); 503 vb[6] = (float)srcX; 504 vb[7] = (float)(srcY + h); 505 506 vb[8] = (float)(dstX + w); 507 vb[9] = (float)(dstY + h); 508 vb[10] = (float)(srcX + w); 509 vb[11] = (float)(srcY + h); 510 511 radeon_vbo_commit(pScrn, &accel_state->vbo); 512} 513 514static Bool 515R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, 516 int xdir, int ydir, 517 int rop, 518 Pixel planemask) 519{ 520 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen); 521 RADEONInfoPtr info = RADEONPTR(pScrn); 522 struct radeon_accel_state *accel_state = info->accel_state; 523 struct r600_accel_object src_obj, dst_obj; 524 525 if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel)) 526 RADEON_FALLBACK(("R600CheckDatatype src failed\n")); 527 if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel)) 528 RADEON_FALLBACK(("R600CheckDatatype dst failed\n")); 529 if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel)) 530 RADEON_FALLBACK(("Invalid planemask\n")); 531 532 dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); 533 src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); 534 535 accel_state->same_surface = FALSE; 536 537 src_obj.bo = radeon_get_pixmap_bo(pSrc); 538 dst_obj.bo = radeon_get_pixmap_bo(pDst); 539 dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst); 540 src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc); 541 src_obj.surface = radeon_get_pixmap_surface(pSrc); 542 dst_obj.surface = radeon_get_pixmap_surface(pDst); 543 if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst)) 544 accel_state->same_surface = TRUE; 545 546 src_obj.width = pSrc->drawable.width; 547 src_obj.height = pSrc->drawable.height; 548 src_obj.bpp = pSrc->drawable.bitsPerPixel; 549 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 550 551 dst_obj.width = pDst->drawable.width; 552 dst_obj.height = pDst->drawable.height; 553 dst_obj.bpp = pDst->drawable.bitsPerPixel; 554 if (radeon_get_pixmap_shared(pDst) == TRUE) { 555 dst_obj.domain = RADEON_GEM_DOMAIN_GTT; 556 } else 557 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 558 559 if (!R600SetAccelState(pScrn, 560 &src_obj, 561 NULL, 562 &dst_obj, 563 accel_state->copy_vs_offset, accel_state->copy_ps_offset, 564 rop, planemask)) 565 return FALSE; 566 567 if (accel_state->same_surface == TRUE) { 568 unsigned long size = accel_state->dst_obj.surface->bo_size; 569 unsigned long align = accel_state->dst_obj.surface->bo_alignment; 570 571 if (accel_state->copy_area_bo) { 572 radeon_bo_unref(accel_state->copy_area_bo); 573 accel_state->copy_area_bo = NULL; 574 } 575 accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, align, 576 RADEON_GEM_DOMAIN_VRAM, 577 0); 578 if (accel_state->copy_area_bo == NULL) 579 RADEON_FALLBACK(("temp copy surface alloc failed\n")); 580 581 radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo, 582 0, RADEON_GEM_DOMAIN_VRAM); 583 if (radeon_cs_space_check(info->cs)) { 584 radeon_bo_unref(accel_state->copy_area_bo); 585 accel_state->copy_area_bo = NULL; 586 return FALSE; 587 } 588 accel_state->copy_area = (void*)accel_state->copy_area_bo; 589 } else 590 R600DoPrepareCopy(pScrn); 591 592 if (accel_state->vsync) 593 RADEONVlineHelperClear(pScrn); 594 595 accel_state->dst_pix = pDst; 596 accel_state->src_pix = pSrc; 597 accel_state->xdir = xdir; 598 accel_state->ydir = ydir; 599 600 return TRUE; 601} 602 603static void 604R600DoneCopy(PixmapPtr pDst) 605{ 606 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen); 607 RADEONInfoPtr info = RADEONPTR(pScrn); 608 struct radeon_accel_state *accel_state = info->accel_state; 609 610 if (!accel_state->same_surface) 611 R600DoCopyVline(pDst); 612 613 if (accel_state->copy_area) { 614 accel_state->copy_area = NULL; 615 } 616 617} 618 619static void 620R600Copy(PixmapPtr pDst, 621 int srcX, int srcY, 622 int dstX, int dstY, 623 int w, int h) 624{ 625 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen); 626 RADEONInfoPtr info = RADEONPTR(pScrn); 627 struct radeon_accel_state *accel_state = info->accel_state; 628 629 if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY)) 630 return; 631 632 if (CS_FULL(info->cs)) { 633 R600DoneCopy(info->accel_state->dst_pix); 634 radeon_cs_flush_indirect(pScrn); 635 R600PrepareCopy(accel_state->src_pix, 636 accel_state->dst_pix, 637 accel_state->xdir, 638 accel_state->ydir, 639 accel_state->rop, 640 accel_state->planemask); 641 } 642 643 if (accel_state->vsync) 644 RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h); 645 646 if (accel_state->same_surface && 647 (srcX + w <= dstX || dstX + w <= srcX || srcY + h <= dstY || dstY + h <= srcY)) { 648 R600DoPrepareCopy(pScrn); 649 R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); 650 R600DoCopyVline(pDst); 651 } else if (accel_state->same_surface && accel_state->copy_area) { 652 uint32_t orig_dst_domain = accel_state->dst_obj.domain; 653 uint32_t orig_src_domain = accel_state->src_obj[0].domain; 654 uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags; 655 uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags; 656 struct radeon_bo *orig_bo = accel_state->dst_obj.bo; 657 int orig_rop = accel_state->rop; 658 659 /* src to tmp */ 660 accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 661 accel_state->dst_obj.bo = accel_state->copy_area_bo; 662 accel_state->dst_obj.tiling_flags = 0; 663 accel_state->rop = 3; 664 R600DoPrepareCopy(pScrn); 665 R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); 666 R600DoCopy(pScrn); 667 668 /* tmp to dst */ 669 accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM; 670 accel_state->src_obj[0].bo = accel_state->copy_area_bo; 671 accel_state->src_obj[0].tiling_flags = 0; 672 accel_state->dst_obj.domain = orig_dst_domain; 673 accel_state->dst_obj.bo = orig_bo; 674 accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags; 675 accel_state->rop = orig_rop; 676 R600DoPrepareCopy(pScrn); 677 R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h); 678 R600DoCopyVline(pDst); 679 680 /* restore state */ 681 accel_state->src_obj[0].domain = orig_src_domain; 682 accel_state->src_obj[0].bo = orig_bo; 683 accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags; 684 } else 685 R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); 686 687} 688 689struct blendinfo { 690 Bool dst_alpha; 691 Bool src_alpha; 692 uint32_t blend_cntl; 693}; 694 695static struct blendinfo R600BlendOp[] = { 696 /* Clear */ 697 {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 698 /* Src */ 699 {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 700 /* Dst */ 701 {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, 702 /* Over */ 703 {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 704 /* OverReverse */ 705 {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, 706 /* In */ 707 {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 708 /* InReverse */ 709 {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 710 /* Out */ 711 {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 712 /* OutReverse */ 713 {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 714 /* Atop */ 715 {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 716 /* AtopReverse */ 717 {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 718 /* Xor */ 719 {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 720 /* Add */ 721 {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, 722}; 723 724struct formatinfo { 725 unsigned int fmt; 726 uint32_t card_fmt; 727}; 728 729static struct formatinfo R600TexFormats[] = { 730 {PICT_a8r8g8b8, FMT_8_8_8_8}, 731 {PICT_x8r8g8b8, FMT_8_8_8_8}, 732 {PICT_a8b8g8r8, FMT_8_8_8_8}, 733 {PICT_x8b8g8r8, FMT_8_8_8_8}, 734 {PICT_b8g8r8a8, FMT_8_8_8_8}, 735 {PICT_b8g8r8x8, FMT_8_8_8_8}, 736 {PICT_r5g6b5, FMT_5_6_5}, 737 {PICT_a1r5g5b5, FMT_1_5_5_5}, 738 {PICT_x1r5g5b5, FMT_1_5_5_5}, 739 {PICT_a8, FMT_8}, 740}; 741 742static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format) 743{ 744 uint32_t sblend, dblend; 745 746 sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask; 747 dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask; 748 749 /* If there's no dst alpha channel, adjust the blend op so that we'll treat 750 * it as always 1. 751 */ 752 if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) { 753 if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift)) 754 sblend = (BLEND_ONE << COLOR_SRCBLEND_shift); 755 else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift)) 756 sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift); 757 } 758 759 /* If the source alpha is being used, then we should only be in a case where 760 * the source blend factor is 0, and the source blend value is the mask 761 * channels multiplied by the source picture's alpha. 762 */ 763 if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) { 764 if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) { 765 dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift); 766 } else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) { 767 dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift); 768 } 769 } 770 771 return sblend | dblend; 772} 773 774static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format) 775{ 776 switch (pDstPicture->format) { 777 case PICT_a8r8g8b8: 778 case PICT_x8r8g8b8: 779 case PICT_a8b8g8r8: 780 case PICT_x8b8g8r8: 781 case PICT_b8g8r8a8: 782 case PICT_b8g8r8x8: 783 *dst_format = COLOR_8_8_8_8; 784 break; 785 case PICT_r5g6b5: 786 *dst_format = COLOR_5_6_5; 787 break; 788 case PICT_a1r5g5b5: 789 case PICT_x1r5g5b5: 790 *dst_format = COLOR_1_5_5_5; 791 break; 792 case PICT_a8: 793 *dst_format = COLOR_8; 794 break; 795 default: 796 RADEON_FALLBACK(("Unsupported dest format 0x%x\n", 797 (int)pDstPicture->format)); 798 } 799 return TRUE; 800} 801 802static Bool R600CheckCompositeTexture(PicturePtr pPict, 803 PicturePtr pDstPict, 804 int op, 805 int unit) 806{ 807 unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone; 808 unsigned int i; 809 810 for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) { 811 if (R600TexFormats[i].fmt == pPict->format) 812 break; 813 } 814 if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0])) 815 RADEON_FALLBACK(("Unsupported picture format 0x%x\n", 816 (int)pPict->format)); 817 818 if (pPict->filter != PictFilterNearest && 819 pPict->filter != PictFilterBilinear) 820 RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter)); 821 822 /* for REPEAT_NONE, Render semantics are that sampling outside the source 823 * picture results in alpha=0 pixels. We can implement this with a border color 824 * *if* our source texture has an alpha channel, otherwise we need to fall 825 * back. If we're not transformed then we hope that upper layers have clipped 826 * rendering to the bounds of the source drawable, in which case it doesn't 827 * matter. I have not, however, verified that the X server always does such 828 * clipping. 829 */ 830 /* FIXME R6xx */ 831 if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) { 832 if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0))) 833 RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n")); 834 } 835 836 if (!radeon_transform_is_affine_or_scaled(pPict->transform)) 837 RADEON_FALLBACK(("non-affine transforms not supported\n")); 838 839 return TRUE; 840} 841 842static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, 843 int unit) 844{ 845 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); 846 RADEONInfoPtr info = RADEONPTR(pScrn); 847 struct radeon_accel_state *accel_state = info->accel_state; 848 unsigned int repeatType; 849 unsigned int i; 850 tex_resource_t tex_res; 851 tex_sampler_t tex_samp; 852 int pix_r, pix_g, pix_b, pix_a; 853 float vs_alu_consts[8]; 854 855 CLEAR (tex_res); 856 CLEAR (tex_samp); 857 858 for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) { 859 if (R600TexFormats[i].fmt == pPict->format) 860 break; 861 } 862 863 /* Texture */ 864 if (pPict->pDrawable) { 865 tex_res.w = pPict->pDrawable->width; 866 tex_res.h = pPict->pDrawable->height; 867 repeatType = pPict->repeat ? pPict->repeatType : RepeatNone; 868 } else { 869 tex_res.w = 1; 870 tex_res.h = 1; 871 repeatType = RepeatNormal; 872 } 873 tex_res.id = unit; 874 tex_res.pitch = accel_state->src_obj[unit].pitch; 875 tex_res.depth = 0; 876 tex_res.dim = SQ_TEX_DIM_2D; 877 tex_res.base = 0; 878 tex_res.mip_base = 0; 879 tex_res.size = accel_state->src_size[unit]; 880 tex_res.format = R600TexFormats[i].card_fmt; 881 tex_res.bo = accel_state->src_obj[unit].bo; 882 tex_res.mip_bo = accel_state->src_obj[unit].bo; 883 tex_res.surface = accel_state->src_obj[unit].surface; 884 tex_res.request_size = 1; 885 886#if X_BYTE_ORDER == X_BIG_ENDIAN 887 switch (accel_state->src_obj[unit].bpp) { 888 case 16: 889 tex_res.endian = SQ_ENDIAN_8IN16; 890 break; 891 case 32: 892 tex_res.endian = SQ_ENDIAN_8IN32; 893 break; 894 default : 895 break; 896 } 897#endif 898 899 /* component swizzles */ 900 switch (pPict->format) { 901 case PICT_a1r5g5b5: 902 case PICT_a8r8g8b8: 903 pix_r = SQ_SEL_Z; /* R */ 904 pix_g = SQ_SEL_Y; /* G */ 905 pix_b = SQ_SEL_X; /* B */ 906 pix_a = SQ_SEL_W; /* A */ 907 break; 908 case PICT_a8b8g8r8: 909 pix_r = SQ_SEL_X; /* R */ 910 pix_g = SQ_SEL_Y; /* G */ 911 pix_b = SQ_SEL_Z; /* B */ 912 pix_a = SQ_SEL_W; /* A */ 913 break; 914 case PICT_x8b8g8r8: 915 pix_r = SQ_SEL_X; /* R */ 916 pix_g = SQ_SEL_Y; /* G */ 917 pix_b = SQ_SEL_Z; /* B */ 918 pix_a = SQ_SEL_1; /* A */ 919 break; 920 case PICT_b8g8r8a8: 921 pix_r = SQ_SEL_Y; /* R */ 922 pix_g = SQ_SEL_Z; /* G */ 923 pix_b = SQ_SEL_W; /* B */ 924 pix_a = SQ_SEL_X; /* A */ 925 break; 926 case PICT_b8g8r8x8: 927 pix_r = SQ_SEL_Y; /* R */ 928 pix_g = SQ_SEL_Z; /* G */ 929 pix_b = SQ_SEL_W; /* B */ 930 pix_a = SQ_SEL_1; /* A */ 931 break; 932 case PICT_x1r5g5b5: 933 case PICT_x8r8g8b8: 934 case PICT_r5g6b5: 935 pix_r = SQ_SEL_Z; /* R */ 936 pix_g = SQ_SEL_Y; /* G */ 937 pix_b = SQ_SEL_X; /* B */ 938 pix_a = SQ_SEL_1; /* A */ 939 break; 940 case PICT_a8: 941 pix_r = SQ_SEL_0; /* R */ 942 pix_g = SQ_SEL_0; /* G */ 943 pix_b = SQ_SEL_0; /* B */ 944 pix_a = SQ_SEL_X; /* A */ 945 break; 946 default: 947 RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format)); 948 } 949 950 if (unit == 0) { 951 if (!accel_state->msk_pic) { 952 if (PICT_FORMAT_RGB(pPict->format) == 0) { 953 pix_r = SQ_SEL_0; 954 pix_g = SQ_SEL_0; 955 pix_b = SQ_SEL_0; 956 } 957 958 if (PICT_FORMAT_A(pPict->format) == 0) 959 pix_a = SQ_SEL_1; 960 } else { 961 if (accel_state->component_alpha) { 962 if (accel_state->src_alpha) { 963 if (PICT_FORMAT_A(pPict->format) == 0) { 964 pix_r = SQ_SEL_1; 965 pix_g = SQ_SEL_1; 966 pix_b = SQ_SEL_1; 967 pix_a = SQ_SEL_1; 968 } else { 969 pix_r = pix_a; 970 pix_g = pix_a; 971 pix_b = pix_a; 972 } 973 } else { 974 if (PICT_FORMAT_A(pPict->format) == 0) 975 pix_a = SQ_SEL_1; 976 } 977 } else { 978 if (PICT_FORMAT_RGB(pPict->format) == 0) { 979 pix_r = SQ_SEL_0; 980 pix_g = SQ_SEL_0; 981 pix_b = SQ_SEL_0; 982 } 983 984 if (PICT_FORMAT_A(pPict->format) == 0) 985 pix_a = SQ_SEL_1; 986 } 987 } 988 } else { 989 if (accel_state->component_alpha) { 990 if (PICT_FORMAT_A(pPict->format) == 0) 991 pix_a = SQ_SEL_1; 992 } else { 993 if (PICT_FORMAT_A(pPict->format) == 0) { 994 pix_r = SQ_SEL_1; 995 pix_g = SQ_SEL_1; 996 pix_b = SQ_SEL_1; 997 pix_a = SQ_SEL_1; 998 } else { 999 pix_r = pix_a; 1000 pix_g = pix_a; 1001 pix_b = pix_a; 1002 } 1003 } 1004 } 1005 1006 tex_res.dst_sel_x = pix_r; /* R */ 1007 tex_res.dst_sel_y = pix_g; /* G */ 1008 tex_res.dst_sel_z = pix_b; /* B */ 1009 tex_res.dst_sel_w = pix_a; /* A */ 1010 1011 tex_res.base_level = 0; 1012 tex_res.last_level = 0; 1013 tex_res.perf_modulation = 0; 1014 if (accel_state->src_obj[unit].tiling_flags == 0) 1015 tex_res.tile_mode = 1; 1016 r600_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[unit].domain); 1017 1018 tex_samp.id = unit; 1019 tex_samp.border_color = SQ_TEX_BORDER_COLOR_TRANS_BLACK; 1020 1021 switch (repeatType) { 1022 case RepeatNormal: 1023 tex_samp.clamp_x = SQ_TEX_WRAP; 1024 tex_samp.clamp_y = SQ_TEX_WRAP; 1025 break; 1026 case RepeatPad: 1027 tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 1028 tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 1029 break; 1030 case RepeatReflect: 1031 tex_samp.clamp_x = SQ_TEX_MIRROR; 1032 tex_samp.clamp_y = SQ_TEX_MIRROR; 1033 break; 1034 case RepeatNone: 1035 tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER; 1036 tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER; 1037 break; 1038 default: 1039 RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType)); 1040 } 1041 1042 switch (pPict->filter) { 1043 case PictFilterNearest: 1044 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; 1045 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; 1046 tex_samp.mc_coord_truncate = 1; 1047 break; 1048 case PictFilterBilinear: 1049 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; 1050 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; 1051 break; 1052 default: 1053 RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); 1054 } 1055 1056 tex_samp.clamp_z = SQ_TEX_WRAP; 1057 tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 1058 tex_samp.mip_filter = 0; /* no mipmap */ 1059 r600_set_tex_sampler(pScrn, &tex_samp); 1060 1061 if (pPict->transform != 0) { 1062 accel_state->is_transform[unit] = TRUE; 1063 accel_state->transform[unit] = pPict->transform; 1064 1065 vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]); 1066 vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]); 1067 vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]); 1068 vs_alu_consts[3] = 1.0 / tex_res.w; 1069 1070 vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]); 1071 vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]); 1072 vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]); 1073 vs_alu_consts[7] = 1.0 / tex_res.h; 1074 } else { 1075 accel_state->is_transform[unit] = FALSE; 1076 1077 vs_alu_consts[0] = 1.0; 1078 vs_alu_consts[1] = 0.0; 1079 vs_alu_consts[2] = 0.0; 1080 vs_alu_consts[3] = 1.0 / tex_res.w; 1081 1082 vs_alu_consts[4] = 0.0; 1083 vs_alu_consts[5] = 1.0; 1084 vs_alu_consts[6] = 0.0; 1085 vs_alu_consts[7] = 1.0 / tex_res.h; 1086 } 1087 1088 /* VS alu constants */ 1089 r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_vs + (unit * 2), 1090 sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts); 1091 1092 return TRUE; 1093} 1094 1095static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, 1096 PicturePtr pDstPicture) 1097{ 1098 uint32_t tmp1; 1099 PixmapPtr pSrcPixmap, pDstPixmap; 1100 1101 /* Check for unsupported compositing operations. */ 1102 if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0]))) 1103 RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op)); 1104 1105 if (pSrcPicture->pDrawable) { 1106 pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable); 1107 1108 if (pSrcPixmap->drawable.width >= 8192 || 1109 pSrcPixmap->drawable.height >= 8192) { 1110 RADEON_FALLBACK(("Source w/h too large (%d,%d).\n", 1111 pSrcPixmap->drawable.width, 1112 pSrcPixmap->drawable.height)); 1113 } 1114 1115 if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0)) 1116 return FALSE; 1117 } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill) 1118 RADEON_FALLBACK(("Gradient pictures not supported yet\n")); 1119 1120 pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable); 1121 1122 if (pDstPixmap->drawable.width >= 8192 || 1123 pDstPixmap->drawable.height >= 8192) { 1124 RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n", 1125 pDstPixmap->drawable.width, 1126 pDstPixmap->drawable.height)); 1127 } 1128 1129 if (pMaskPicture) { 1130 PixmapPtr pMaskPixmap; 1131 1132 if (pMaskPicture->pDrawable) { 1133 pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable); 1134 1135 if (pMaskPixmap->drawable.width >= 8192 || 1136 pMaskPixmap->drawable.height >= 8192) { 1137 RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n", 1138 pMaskPixmap->drawable.width, 1139 pMaskPixmap->drawable.height)); 1140 } 1141 1142 if (pMaskPicture->componentAlpha) { 1143 /* Check if it's component alpha that relies on a source alpha and 1144 * on the source value. We can only get one of those into the 1145 * single source value that we get to blend with. 1146 */ 1147 if (R600BlendOp[op].src_alpha && 1148 (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) != 1149 (BLEND_ZERO << COLOR_SRCBLEND_shift)) { 1150 RADEON_FALLBACK(("Component alpha not supported with source " 1151 "alpha and source value blending.\n")); 1152 } 1153 } 1154 1155 if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1)) 1156 return FALSE; 1157 } else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill) 1158 RADEON_FALLBACK(("Gradient pictures not supported yet\n")); 1159 } 1160 1161 if (!R600GetDestFormat(pDstPicture, &tmp1)) 1162 return FALSE; 1163 1164 return TRUE; 1165 1166} 1167 1168static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, 1169 PicturePtr pMaskPicture, PicturePtr pDstPicture, 1170 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) 1171{ 1172 ScreenPtr pScreen = pDst->drawable.pScreen; 1173 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1174 RADEONInfoPtr info = RADEONPTR(pScrn); 1175 struct radeon_accel_state *accel_state = info->accel_state; 1176 uint32_t dst_format; 1177 cb_config_t cb_conf; 1178 shader_config_t vs_conf, ps_conf; 1179 struct r600_accel_object src_obj, mask_obj, dst_obj; 1180 1181 if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8)) 1182 return FALSE; 1183 1184 if (!pSrc) { 1185 pSrc = RADEONSolidPixmap(pScreen, pSrcPicture->pSourcePict->solidFill.color); 1186 if (!pSrc) 1187 RADEON_FALLBACK(("Failed to create solid scratch pixmap\n")); 1188 } 1189 1190 dst_obj.bo = radeon_get_pixmap_bo(pDst); 1191 src_obj.bo = radeon_get_pixmap_bo(pSrc); 1192 dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst); 1193 src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc); 1194 dst_obj.surface = radeon_get_pixmap_surface(pDst); 1195 src_obj.surface = radeon_get_pixmap_surface(pSrc); 1196 1197 src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); 1198 dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); 1199 1200 src_obj.width = pSrc->drawable.width; 1201 src_obj.height = pSrc->drawable.height; 1202 src_obj.bpp = pSrc->drawable.bitsPerPixel; 1203 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 1204 1205 dst_obj.width = pDst->drawable.width; 1206 dst_obj.height = pDst->drawable.height; 1207 dst_obj.bpp = pDst->drawable.bitsPerPixel; 1208 if (radeon_get_pixmap_shared(pDst) == TRUE) 1209 dst_obj.domain = RADEON_GEM_DOMAIN_GTT; 1210 else 1211 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 1212 1213 if (pMaskPicture) { 1214 if (!pMask) { 1215 pMask = RADEONSolidPixmap(pScreen, pMaskPicture->pSourcePict->solidFill.color); 1216 if (!pMask) { 1217 if (!pSrcPicture->pDrawable) 1218 pScreen->DestroyPixmap(pSrc); 1219 RADEON_FALLBACK(("Failed to create solid scratch pixmap\n")); 1220 } 1221 } 1222 1223 mask_obj.bo = radeon_get_pixmap_bo(pMask); 1224 mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask); 1225 mask_obj.surface = radeon_get_pixmap_surface(pMask); 1226 1227 mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8); 1228 1229 mask_obj.width = pMask->drawable.width; 1230 mask_obj.height = pMask->drawable.height; 1231 mask_obj.bpp = pMask->drawable.bitsPerPixel; 1232 mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 1233 1234 if (!R600SetAccelState(pScrn, 1235 &src_obj, 1236 &mask_obj, 1237 &dst_obj, 1238 accel_state->comp_vs_offset, accel_state->comp_ps_offset, 1239 3, 0xffffffff)) 1240 return FALSE; 1241 1242 accel_state->msk_pic = pMaskPicture; 1243 if (pMaskPicture->componentAlpha) { 1244 accel_state->component_alpha = TRUE; 1245 if (R600BlendOp[op].src_alpha) 1246 accel_state->src_alpha = TRUE; 1247 else 1248 accel_state->src_alpha = FALSE; 1249 } else { 1250 accel_state->component_alpha = FALSE; 1251 accel_state->src_alpha = FALSE; 1252 } 1253 } else { 1254 if (!R600SetAccelState(pScrn, 1255 &src_obj, 1256 NULL, 1257 &dst_obj, 1258 accel_state->comp_vs_offset, accel_state->comp_ps_offset, 1259 3, 0xffffffff)) 1260 return FALSE; 1261 1262 accel_state->msk_pic = NULL; 1263 accel_state->component_alpha = FALSE; 1264 accel_state->src_alpha = FALSE; 1265 } 1266 1267 if (!R600GetDestFormat(pDstPicture, &dst_format)) 1268 return FALSE; 1269 1270 CLEAR (cb_conf); 1271 CLEAR (vs_conf); 1272 CLEAR (ps_conf); 1273 1274 if (pMask) 1275 radeon_vbo_check(pScrn, &accel_state->vbo, 24); 1276 else 1277 radeon_vbo_check(pScrn, &accel_state->vbo, 16); 1278 1279 radeon_cp_start(pScrn); 1280 1281 r600_set_default_state(pScrn); 1282 1283 r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 1284 r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 1285 r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 1286 1287 if (!R600TextureSetup(pSrcPicture, pSrc, 0)) { 1288 R600IBDiscard(pScrn); 1289 return FALSE; 1290 } 1291 1292 if (pMask) { 1293 if (!R600TextureSetup(pMaskPicture, pMask, 1)) { 1294 R600IBDiscard(pScrn); 1295 return FALSE; 1296 } 1297 } else 1298 accel_state->is_transform[1] = FALSE; 1299 1300 if (pMask) { 1301 r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0)); 1302 r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0)); 1303 } else { 1304 r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0)); 1305 r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0)); 1306 } 1307 1308 /* Shader */ 1309 vs_conf.shader_addr = accel_state->vs_mc_addr; 1310 vs_conf.shader_size = accel_state->vs_size; 1311 vs_conf.num_gprs = 5; 1312 vs_conf.stack_size = 1; 1313 vs_conf.bo = accel_state->shaders_bo; 1314 r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM); 1315 1316 ps_conf.shader_addr = accel_state->ps_mc_addr; 1317 ps_conf.shader_size = accel_state->ps_size; 1318 ps_conf.num_gprs = 3; 1319 ps_conf.stack_size = 1; 1320 ps_conf.uncached_first_inst = 1; 1321 ps_conf.clamp_consts = 0; 1322 ps_conf.export_mode = 2; 1323 ps_conf.bo = accel_state->shaders_bo; 1324 r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM); 1325 1326 cb_conf.id = 0; 1327 cb_conf.w = accel_state->dst_obj.pitch; 1328 cb_conf.h = accel_state->dst_obj.height; 1329 cb_conf.base = 0; 1330 cb_conf.format = dst_format; 1331 cb_conf.bo = accel_state->dst_obj.bo; 1332 cb_conf.surface = accel_state->dst_obj.surface; 1333 1334 switch (pDstPicture->format) { 1335 case PICT_a8r8g8b8: 1336 case PICT_x8r8g8b8: 1337 case PICT_a1r5g5b5: 1338 case PICT_x1r5g5b5: 1339 default: 1340 cb_conf.comp_swap = 1; /* ARGB */ 1341 break; 1342 case PICT_a8b8g8r8: 1343 case PICT_x8b8g8r8: 1344 cb_conf.comp_swap = 0; /* ABGR */ 1345 break; 1346 case PICT_b8g8r8a8: 1347 case PICT_b8g8r8x8: 1348 cb_conf.comp_swap = 3; /* BGRA */ 1349 break; 1350 case PICT_r5g6b5: 1351 cb_conf.comp_swap = 2; /* RGB */ 1352 break; 1353 case PICT_a8: 1354 cb_conf.comp_swap = 3; /* A */ 1355 break; 1356 } 1357 cb_conf.source_format = 1; 1358 cb_conf.blend_clamp = 1; 1359 cb_conf.blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format); 1360 cb_conf.blend_enable = 1; 1361 cb_conf.pmask = 0xf; 1362 cb_conf.rop = 3; 1363 if (accel_state->dst_obj.tiling_flags == 0) 1364 cb_conf.array_mode = 0; 1365#if X_BYTE_ORDER == X_BIG_ENDIAN 1366 switch (dst_obj.bpp) { 1367 case 16: 1368 cb_conf.endian = ENDIAN_8IN16; 1369 break; 1370 case 32: 1371 cb_conf.endian = ENDIAN_8IN32; 1372 break; 1373 default: 1374 break; 1375 } 1376#endif 1377 r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); 1378 1379 if (pMask) 1380 r600_set_spi(pScrn, (2 - 1), 2); 1381 else 1382 r600_set_spi(pScrn, (1 - 1), 1); 1383 1384 if (accel_state->vsync) 1385 RADEONVlineHelperClear(pScrn); 1386 1387 accel_state->composite_op = op; 1388 accel_state->dst_pic = pDstPicture; 1389 accel_state->src_pic = pSrcPicture; 1390 accel_state->dst_pix = pDst; 1391 accel_state->msk_pix = pMask; 1392 accel_state->src_pix = pSrc; 1393 1394 return TRUE; 1395} 1396 1397static void R600FinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst, 1398 struct radeon_accel_state *accel_state) 1399{ 1400 int vtx_size; 1401 1402 if (accel_state->vsync) 1403 r600_cp_wait_vline_sync(pScrn, pDst, 1404 accel_state->vline_crtc, 1405 accel_state->vline_y1, 1406 accel_state->vline_y2); 1407 1408 vtx_size = accel_state->msk_pic ? 24 : 16; 1409 1410 r600_finish_op(pScrn, vtx_size); 1411} 1412 1413static void R600DoneComposite(PixmapPtr pDst) 1414{ 1415 ScreenPtr pScreen = pDst->drawable.pScreen; 1416 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1417 RADEONInfoPtr info = RADEONPTR(pScrn); 1418 struct radeon_accel_state *accel_state = info->accel_state; 1419 1420 R600FinishComposite(pScrn, pDst, accel_state); 1421 1422 if (!accel_state->src_pic->pDrawable) 1423 pScreen->DestroyPixmap(accel_state->src_pix); 1424 1425 if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable) 1426 pScreen->DestroyPixmap(accel_state->msk_pix); 1427} 1428 1429static void R600Composite(PixmapPtr pDst, 1430 int srcX, int srcY, 1431 int maskX, int maskY, 1432 int dstX, int dstY, 1433 int w, int h) 1434{ 1435 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen); 1436 RADEONInfoPtr info = RADEONPTR(pScrn); 1437 struct radeon_accel_state *accel_state = info->accel_state; 1438 float *vb; 1439 1440 /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n", 1441 srcX, srcY, maskX, maskY,dstX, dstY, w, h); */ 1442 1443 if (CS_FULL(info->cs)) { 1444 R600FinishComposite(pScrn, pDst, info->accel_state); 1445 radeon_cs_flush_indirect(pScrn); 1446 R600PrepareComposite(info->accel_state->composite_op, 1447 info->accel_state->src_pic, 1448 info->accel_state->msk_pic, 1449 info->accel_state->dst_pic, 1450 info->accel_state->src_pix, 1451 info->accel_state->msk_pix, 1452 info->accel_state->dst_pix); 1453 } 1454 1455 if (accel_state->vsync) 1456 RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h); 1457 1458 if (accel_state->msk_pic) { 1459 1460 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24); 1461 1462 vb[0] = (float)dstX; 1463 vb[1] = (float)dstY; 1464 vb[2] = (float)srcX; 1465 vb[3] = (float)srcY; 1466 vb[4] = (float)maskX; 1467 vb[5] = (float)maskY; 1468 1469 vb[6] = (float)dstX; 1470 vb[7] = (float)(dstY + h); 1471 vb[8] = (float)srcX; 1472 vb[9] = (float)(srcY + h); 1473 vb[10] = (float)maskX; 1474 vb[11] = (float)(maskY + h); 1475 1476 vb[12] = (float)(dstX + w); 1477 vb[13] = (float)(dstY + h); 1478 vb[14] = (float)(srcX + w); 1479 vb[15] = (float)(srcY + h); 1480 vb[16] = (float)(maskX + w); 1481 vb[17] = (float)(maskY + h); 1482 1483 radeon_vbo_commit(pScrn, &accel_state->vbo); 1484 1485 } else { 1486 1487 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16); 1488 1489 vb[0] = (float)dstX; 1490 vb[1] = (float)dstY; 1491 vb[2] = (float)srcX; 1492 vb[3] = (float)srcY; 1493 1494 vb[4] = (float)dstX; 1495 vb[5] = (float)(dstY + h); 1496 vb[6] = (float)srcX; 1497 vb[7] = (float)(srcY + h); 1498 1499 vb[8] = (float)(dstX + w); 1500 vb[9] = (float)(dstY + h); 1501 vb[10] = (float)(srcX + w); 1502 vb[11] = (float)(srcY + h); 1503 1504 radeon_vbo_commit(pScrn, &accel_state->vbo); 1505 } 1506 1507 1508} 1509 1510static Bool 1511R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h, 1512 char *src, int src_pitch) 1513{ 1514 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen); 1515 RADEONInfoPtr info = RADEONPTR(pScrn); 1516 struct radeon_accel_state *accel_state = info->accel_state; 1517 struct radeon_exa_pixmap_priv *driver_priv; 1518 struct radeon_bo *scratch = NULL; 1519 struct radeon_bo *copy_dst; 1520 unsigned char *dst; 1521 unsigned size; 1522 uint32_t dst_domain; 1523 int bpp = pDst->drawable.bitsPerPixel; 1524 uint32_t scratch_pitch; 1525 uint32_t copy_pitch; 1526 uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8); 1527 int ret; 1528 Bool flush = TRUE; 1529 Bool r; 1530 int i; 1531 struct r600_accel_object src_obj, dst_obj; 1532 uint32_t height, base_align; 1533 1534 if (bpp < 8) 1535 return FALSE; 1536 1537 driver_priv = exaGetPixmapDriverPrivate(pDst); 1538 if (!driver_priv || !driver_priv->bo) 1539 return FALSE; 1540 1541 /* If we know the BO won't be busy / in VRAM, don't bother with a scratch */ 1542 copy_dst = driver_priv->bo; 1543 copy_pitch = pDst->devKind; 1544 if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) { 1545 if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) { 1546 flush = FALSE; 1547 if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain) && 1548 !(dst_domain & RADEON_GEM_DOMAIN_VRAM)) 1549 goto copy; 1550 } 1551 /* use cpu copy for fast fb access */ 1552 if (info->is_fast_fb) 1553 goto copy; 1554 } 1555 1556 scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0)); 1557 height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0)); 1558 base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0); 1559 size = scratch_pitch * height * (bpp / 8); 1560 scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0); 1561 if (scratch == NULL) { 1562 goto copy; 1563 } 1564 1565 src_obj.pitch = scratch_pitch; 1566 src_obj.width = w; 1567 src_obj.height = h; 1568 src_obj.bpp = bpp; 1569 src_obj.domain = RADEON_GEM_DOMAIN_GTT; 1570 src_obj.bo = scratch; 1571 src_obj.tiling_flags = 0; 1572 src_obj.surface = NULL; 1573 1574 dst_obj.pitch = dst_pitch_hw; 1575 dst_obj.width = pDst->drawable.width; 1576 dst_obj.height = pDst->drawable.height; 1577 dst_obj.bpp = bpp; 1578 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 1579 dst_obj.bo = radeon_get_pixmap_bo(pDst); 1580 dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst); 1581 dst_obj.surface = radeon_get_pixmap_surface(pDst); 1582 1583 if (!R600SetAccelState(pScrn, 1584 &src_obj, 1585 NULL, 1586 &dst_obj, 1587 accel_state->copy_vs_offset, accel_state->copy_ps_offset, 1588 3, 0xffffffff)) { 1589 goto copy; 1590 } 1591 copy_dst = scratch; 1592 copy_pitch = scratch_pitch * (bpp / 8); 1593 flush = FALSE; 1594 1595copy: 1596 if (flush) 1597 radeon_cs_flush_indirect(pScrn); 1598 1599 ret = radeon_bo_map(copy_dst, 0); 1600 if (ret) { 1601 r = FALSE; 1602 goto out; 1603 } 1604 r = TRUE; 1605 size = w * bpp / 8; 1606 dst = copy_dst->ptr; 1607 if (copy_dst == driver_priv->bo) 1608 dst += y * copy_pitch + x * bpp / 8; 1609 for (i = 0; i < h; i++) { 1610 memcpy(dst + i * copy_pitch, src, size); 1611 src += src_pitch; 1612 } 1613 radeon_bo_unmap(copy_dst); 1614 1615 if (copy_dst == scratch) { 1616 if (info->accel_state->vsync) 1617 RADEONVlineHelperSet(pScrn, x, y, x + w, y + h); 1618 1619 /* blit from gart to vram */ 1620 R600DoPrepareCopy(pScrn); 1621 R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h); 1622 R600DoCopyVline(pDst); 1623 } 1624 1625out: 1626 if (scratch) 1627 radeon_bo_unref(scratch); 1628 return r; 1629} 1630 1631static Bool 1632R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w, 1633 int h, char *dst, int dst_pitch) 1634{ 1635 ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen); 1636 RADEONInfoPtr info = RADEONPTR(pScrn); 1637 struct radeon_accel_state *accel_state = info->accel_state; 1638 struct radeon_exa_pixmap_priv *driver_priv; 1639 struct radeon_bo *scratch = NULL; 1640 struct radeon_bo *copy_src; 1641 unsigned size; 1642 uint32_t src_domain = 0; 1643 int bpp = pSrc->drawable.bitsPerPixel; 1644 uint32_t scratch_pitch; 1645 uint32_t copy_pitch; 1646 uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8); 1647 int ret; 1648 Bool flush = FALSE; 1649 Bool r; 1650 struct r600_accel_object src_obj, dst_obj; 1651 uint32_t height, base_align; 1652 1653 if (bpp < 8) 1654 return FALSE; 1655 1656 driver_priv = exaGetPixmapDriverPrivate(pSrc); 1657 if (!driver_priv || !driver_priv->bo) 1658 return FALSE; 1659 1660 /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */ 1661 copy_src = driver_priv->bo; 1662 copy_pitch = pSrc->devKind; 1663 if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) { 1664 if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) { 1665 src_domain = radeon_bo_get_src_domain(driver_priv->bo); 1666 if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 1667 (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) 1668 src_domain = 0; 1669 else /* A write may be scheduled */ 1670 flush = TRUE; 1671 } 1672 1673 if (!src_domain) 1674 radeon_bo_is_busy(driver_priv->bo, &src_domain); 1675 1676 if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM) 1677 goto copy; 1678 } 1679 1680 scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0)); 1681 height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0)); 1682 base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0); 1683 size = scratch_pitch * height * (bpp / 8); 1684 scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0); 1685 if (scratch == NULL) { 1686 goto copy; 1687 } 1688 radeon_cs_space_reset_bos(info->cs); 1689 radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo, 1690 RADEON_GEM_DOMAIN_VRAM, 0); 1691 accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM; 1692 radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0); 1693 accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT; 1694 radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain); 1695 ret = radeon_cs_space_check(info->cs); 1696 if (ret) { 1697 goto copy; 1698 } 1699 1700 src_obj.pitch = src_pitch_hw; 1701 src_obj.width = pSrc->drawable.width; 1702 src_obj.height = pSrc->drawable.height; 1703 src_obj.bpp = bpp; 1704 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 1705 src_obj.bo = radeon_get_pixmap_bo(pSrc); 1706 src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc); 1707 src_obj.surface = radeon_get_pixmap_surface(pSrc); 1708 1709 dst_obj.pitch = scratch_pitch; 1710 dst_obj.width = w; 1711 dst_obj.height = h; 1712 dst_obj.bo = scratch; 1713 dst_obj.bpp = bpp; 1714 dst_obj.domain = RADEON_GEM_DOMAIN_GTT; 1715 dst_obj.tiling_flags = 0; 1716 dst_obj.surface = NULL; 1717 1718 if (!R600SetAccelState(pScrn, 1719 &src_obj, 1720 NULL, 1721 &dst_obj, 1722 accel_state->copy_vs_offset, accel_state->copy_ps_offset, 1723 3, 0xffffffff)) { 1724 goto copy; 1725 } 1726 1727 /* blit from vram to gart */ 1728 R600DoPrepareCopy(pScrn); 1729 R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h); 1730 R600DoCopy(pScrn); 1731 copy_src = scratch; 1732 copy_pitch = scratch_pitch * (bpp / 8); 1733 flush = TRUE; 1734 1735copy: 1736 if (flush) 1737 radeon_cs_flush_indirect(pScrn); 1738 1739 ret = radeon_bo_map(copy_src, 0); 1740 if (ret) { 1741 ErrorF("failed to map pixmap: %d\n", ret); 1742 r = FALSE; 1743 goto out; 1744 } 1745 r = TRUE; 1746 w *= bpp / 8; 1747 if (copy_src == driver_priv->bo) 1748 size = y * copy_pitch + x * bpp / 8; 1749 else 1750 size = 0; 1751 while (h--) { 1752 memcpy(dst, copy_src->ptr + size, w); 1753 size += copy_pitch; 1754 dst += dst_pitch; 1755 } 1756 radeon_bo_unmap(copy_src); 1757out: 1758 if (scratch) 1759 radeon_bo_unref(scratch); 1760 return r; 1761} 1762 1763static int 1764R600MarkSync(ScreenPtr pScreen) 1765{ 1766 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1767 RADEONInfoPtr info = RADEONPTR(pScrn); 1768 struct radeon_accel_state *accel_state = info->accel_state; 1769 1770 return ++accel_state->exaSyncMarker; 1771 1772} 1773 1774static void 1775R600Sync(ScreenPtr pScreen, int marker) 1776{ 1777 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1778 RADEONInfoPtr info = RADEONPTR(pScrn); 1779 struct radeon_accel_state *accel_state = info->accel_state; 1780 1781 if (accel_state->exaMarkerSynced != marker) { 1782 accel_state->exaMarkerSynced = marker; 1783 } 1784 1785} 1786 1787static Bool 1788R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) 1789{ 1790 RADEONInfoPtr info = RADEONPTR(pScrn); 1791 struct radeon_accel_state *accel_state = info->accel_state; 1792 1793 /* 512 bytes per shader for now */ 1794 int size = 512 * 9; 1795 1796 accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0, 1797 RADEON_GEM_DOMAIN_VRAM, 0); 1798 if (accel_state->shaders_bo == NULL) { 1799 ErrorF("Allocating shader failed\n"); 1800 return FALSE; 1801 } 1802 return TRUE; 1803} 1804 1805Bool 1806R600LoadShaders(ScrnInfoPtr pScrn) 1807{ 1808 RADEONInfoPtr info = RADEONPTR(pScrn); 1809 struct radeon_accel_state *accel_state = info->accel_state; 1810 RADEONChipFamily ChipSet = info->ChipFamily; 1811 uint32_t *shader; 1812 int ret; 1813 1814 ret = radeon_bo_map(accel_state->shaders_bo, 1); 1815 if (ret) { 1816 FatalError("failed to map shader %d\n", ret); 1817 return FALSE; 1818 } 1819 shader = accel_state->shaders_bo->ptr; 1820 1821 /* solid vs --------------------------------------- */ 1822 accel_state->solid_vs_offset = 0; 1823 R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4); 1824 1825 /* solid ps --------------------------------------- */ 1826 accel_state->solid_ps_offset = 512; 1827 R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4); 1828 1829 /* copy vs --------------------------------------- */ 1830 accel_state->copy_vs_offset = 1024; 1831 R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4); 1832 1833 /* copy ps --------------------------------------- */ 1834 accel_state->copy_ps_offset = 1536; 1835 R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4); 1836 1837 /* comp vs --------------------------------------- */ 1838 accel_state->comp_vs_offset = 2048; 1839 R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4); 1840 1841 /* comp ps --------------------------------------- */ 1842 accel_state->comp_ps_offset = 2560; 1843 R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4); 1844 1845 /* xv vs --------------------------------------- */ 1846 accel_state->xv_vs_offset = 3072; 1847 R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4); 1848 1849 /* xv ps --------------------------------------- */ 1850 accel_state->xv_ps_offset = 3584; 1851 R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4); 1852 1853 radeon_bo_unmap(accel_state->shaders_bo); 1854 return TRUE; 1855} 1856 1857Bool 1858R600DrawInit(ScreenPtr pScreen) 1859{ 1860 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1861 RADEONInfoPtr info = RADEONPTR(pScrn); 1862 1863 if (info->accel_state->exa == NULL) { 1864 xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n"); 1865 return FALSE; 1866 } 1867 1868 info->accel_state->exa->exa_major = EXA_VERSION_MAJOR; 1869 info->accel_state->exa->exa_minor = EXA_VERSION_MINOR; 1870 1871 info->accel_state->exa->PrepareSolid = R600PrepareSolid; 1872 info->accel_state->exa->Solid = R600Solid; 1873 info->accel_state->exa->DoneSolid = R600DoneSolid; 1874 1875 info->accel_state->exa->PrepareCopy = R600PrepareCopy; 1876 info->accel_state->exa->Copy = R600Copy; 1877 info->accel_state->exa->DoneCopy = R600DoneCopy; 1878 1879 info->accel_state->exa->MarkSync = R600MarkSync; 1880 info->accel_state->exa->WaitMarker = R600Sync; 1881 1882 info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap; 1883 info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap; 1884 info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen; 1885 info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS; 1886 info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS; 1887 info->accel_state->exa->UploadToScreen = R600UploadToScreenCS; 1888 info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS; 1889 info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2; 1890#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 6) 1891 info->accel_state->exa->SharePixmapBacking = RADEONEXASharePixmapBacking; 1892 info->accel_state->exa->SetSharedPixmapBacking = RADEONEXASetSharedPixmapBacking; 1893#endif 1894 info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS | EXA_SUPPORTS_PREPARE_AUX | 1895 EXA_HANDLES_PIXMAPS | EXA_MIXED_PIXMAPS; 1896 info->accel_state->exa->pixmapOffsetAlign = 256; 1897 info->accel_state->exa->pixmapPitchAlign = 256; 1898 1899 info->accel_state->exa->CheckComposite = R600CheckComposite; 1900 info->accel_state->exa->PrepareComposite = R600PrepareComposite; 1901 info->accel_state->exa->Composite = R600Composite; 1902 info->accel_state->exa->DoneComposite = R600DoneComposite; 1903 1904 info->accel_state->exa->maxPitchBytes = 32768; 1905 info->accel_state->exa->maxX = 8192; 1906 info->accel_state->exa->maxY = 8192; 1907 1908 /* not supported yet */ 1909 if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) { 1910 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n"); 1911 info->accel_state->vsync = TRUE; 1912 } else 1913 info->accel_state->vsync = FALSE; 1914 1915 if (!exaDriverInit(pScreen, info->accel_state->exa)) { 1916 free(info->accel_state->exa); 1917 return FALSE; 1918 } 1919 1920 info->accel_state->XInited3D = FALSE; 1921 info->accel_state->src_obj[0].bo = NULL; 1922 info->accel_state->src_obj[1].bo = NULL; 1923 info->accel_state->dst_obj.bo = NULL; 1924 info->accel_state->copy_area_bo = NULL; 1925 info->accel_state->vbo.vb_start_op = -1; 1926 info->accel_state->finish_op = r600_finish_op; 1927 info->accel_state->vbo.verts_per_op = 3; 1928 RADEONVlineHelperClear(pScrn); 1929 1930 radeon_vbo_init_lists(pScrn); 1931 1932 if (!R600AllocShaders(pScrn, pScreen)) 1933 return FALSE; 1934 1935 if (!R600LoadShaders(pScrn)) 1936 return FALSE; 1937 1938 exaMarkSync(pScreen); 1939 1940 return TRUE; 1941 1942} 1943 1944