r600_exa.c revision 18781e08
1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Author: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif 30 31#include "xf86.h" 32 33#include "exa.h" 34 35#include "radeon.h" 36#include "radeon_reg.h" 37#include "r600_shader.h" 38#include "r600_reg.h" 39#include "r600_state.h" 40#include "radeon_exa_shared.h" 41#include "radeon_vbo.h" 42 43/* #define SHOW_VERTEXES */ 44 45Bool 46R600SetAccelState(ScrnInfoPtr pScrn, 47 struct r600_accel_object *src0, 48 struct r600_accel_object *src1, 49 struct r600_accel_object *dst, 50 uint32_t vs_offset, uint32_t ps_offset, 51 int rop, Pixel planemask) 52{ 53 RADEONInfoPtr info = RADEONPTR(pScrn); 54 struct radeon_accel_state *accel_state = info->accel_state; 55 uint32_t pitch_align = 0x7; 56 int ret; 57 58 if (src0) { 59 memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object)); 60 accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8); 61 if (src0->surface) 62 accel_state->src_size[0] = src0->surface->bo_size; 63 64 /* bad pitch */ 65 if (accel_state->src_obj[0].pitch & pitch_align) 66 RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch)); 67 68 } else { 69 memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object)); 70 accel_state->src_size[0] = 0; 71 } 72 73 if (src1) { 74 memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object)); 75 accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8); 76 if (src1->surface) { 77 accel_state->src_size[1] = src1->surface->bo_size; 78 } 79 80 /* bad pitch */ 81 if (accel_state->src_obj[1].pitch & pitch_align) 82 RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch)); 83 84 } else { 85 memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object)); 86 accel_state->src_size[1] = 0; 87 } 88 89 if (dst) { 90 memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object)); 91 accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8); 92 if (dst->surface) { 93 accel_state->dst_size = dst->surface->bo_size; 94 } else 95 { 96 accel_state->dst_obj.tiling_flags = 0; 97 } 98 if (accel_state->dst_obj.pitch & pitch_align) 99 RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch)); 100 101 } else { 102 memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object)); 103 accel_state->dst_size = 0; 104 } 105 106 if (CS_FULL(info->cs)) 107 radeon_cs_flush_indirect(pScrn); 108 109 accel_state->rop = rop; 110 accel_state->planemask = planemask; 111 112 accel_state->vs_size = 512; 113 accel_state->ps_size = 512; 114 accel_state->vs_mc_addr = vs_offset; 115 accel_state->ps_mc_addr = ps_offset; 116 117 radeon_cs_space_reset_bos(info->cs); 118 radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo, 119 RADEON_GEM_DOMAIN_VRAM, 0); 120 if (accel_state->src_obj[0].bo) 121 radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo, 122 accel_state->src_obj[0].domain, 0); 123 if (accel_state->src_obj[1].bo) 124 radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo, 125 accel_state->src_obj[1].domain, 0); 126 if (accel_state->dst_obj.bo) 127 radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo, 128 0, accel_state->dst_obj.domain); 129 ret = radeon_cs_space_check(info->cs); 130 if (ret) 131 RADEON_FALLBACK(("Not enough RAM to hw accel operation\n")); 132 133 return TRUE; 134} 135 136static Bool 137R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) 138{ 139 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); 140 RADEONInfoPtr info = RADEONPTR(pScrn); 141 struct radeon_accel_state *accel_state = info->accel_state; 142 cb_config_t cb_conf; 143 shader_config_t vs_conf, ps_conf; 144 uint32_t a, r, g, b; 145 float ps_alu_consts[4]; 146 struct r600_accel_object dst; 147 148 if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel)) 149 RADEON_FALLBACK(("R600CheckDatatype failed\n")); 150 if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel)) 151 RADEON_FALLBACK(("invalid planemask\n")); 152 153 dst.bo = radeon_get_pixmap_bo(pPix); 154 dst.tiling_flags = radeon_get_pixmap_tiling(pPix); 155 dst.surface = radeon_get_pixmap_surface(pPix); 156 157 dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); 158 dst.width = pPix->drawable.width; 159 dst.height = pPix->drawable.height; 160 dst.bpp = pPix->drawable.bitsPerPixel; 161 dst.domain = RADEON_GEM_DOMAIN_VRAM; 162 163 if (!R600SetAccelState(pScrn, 164 NULL, 165 NULL, 166 &dst, 167 accel_state->solid_vs_offset, accel_state->solid_ps_offset, 168 alu, pm)) 169 return FALSE; 170 171 CLEAR (cb_conf); 172 CLEAR (vs_conf); 173 CLEAR (ps_conf); 174 175 radeon_vbo_check(pScrn, &accel_state->vbo, 16); 176 radeon_cp_start(pScrn); 177 178 r600_set_default_state(pScrn); 179 180 r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 181 r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 182 r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 183 184 /* Shader */ 185 vs_conf.shader_addr = accel_state->vs_mc_addr; 186 vs_conf.shader_size = accel_state->vs_size; 187 vs_conf.num_gprs = 2; 188 vs_conf.stack_size = 0; 189 vs_conf.bo = accel_state->shaders_bo; 190 r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM); 191 192 ps_conf.shader_addr = accel_state->ps_mc_addr; 193 ps_conf.shader_size = accel_state->ps_size; 194 ps_conf.num_gprs = 1; 195 ps_conf.stack_size = 0; 196 ps_conf.uncached_first_inst = 1; 197 ps_conf.clamp_consts = 0; 198 ps_conf.export_mode = 2; 199 ps_conf.bo = accel_state->shaders_bo; 200 r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM); 201 202 cb_conf.id = 0; 203 cb_conf.w = accel_state->dst_obj.pitch; 204 cb_conf.h = accel_state->dst_obj.height; 205 cb_conf.base = 0; 206 cb_conf.bo = accel_state->dst_obj.bo; 207 cb_conf.surface = accel_state->dst_obj.surface; 208 209 if (accel_state->dst_obj.bpp == 8) { 210 cb_conf.format = COLOR_8; 211 cb_conf.comp_swap = 3; /* A */ 212 } else if (accel_state->dst_obj.bpp == 16) { 213 cb_conf.format = COLOR_5_6_5; 214 cb_conf.comp_swap = 2; /* RGB */ 215#if X_BYTE_ORDER == X_BIG_ENDIAN 216 cb_conf.endian = ENDIAN_8IN16; 217#endif 218 } else { 219 cb_conf.format = COLOR_8_8_8_8; 220 cb_conf.comp_swap = 1; /* ARGB */ 221#if X_BYTE_ORDER == X_BIG_ENDIAN 222 cb_conf.endian = ENDIAN_8IN32; 223#endif 224 } 225 cb_conf.source_format = 1; 226 cb_conf.blend_clamp = 1; 227 /* Render setup */ 228 if (accel_state->planemask & 0x000000ff) 229 cb_conf.pmask |= 4; /* B */ 230 if (accel_state->planemask & 0x0000ff00) 231 cb_conf.pmask |= 2; /* G */ 232 if (accel_state->planemask & 0x00ff0000) 233 cb_conf.pmask |= 1; /* R */ 234 if (accel_state->planemask & 0xff000000) 235 cb_conf.pmask |= 8; /* A */ 236 cb_conf.rop = accel_state->rop; 237 if (accel_state->dst_obj.tiling_flags == 0) 238 cb_conf.array_mode = 0; 239 r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); 240 241 r600_set_spi(pScrn, 0, 0); 242 243 /* PS alu constants */ 244 if (accel_state->dst_obj.bpp == 16) { 245 r = (fg >> 11) & 0x1f; 246 g = (fg >> 5) & 0x3f; 247 b = (fg >> 0) & 0x1f; 248 ps_alu_consts[0] = (float)r / 31; /* R */ 249 ps_alu_consts[1] = (float)g / 63; /* G */ 250 ps_alu_consts[2] = (float)b / 31; /* B */ 251 ps_alu_consts[3] = 1.0; /* A */ 252 } else if (accel_state->dst_obj.bpp == 8) { 253 a = (fg >> 0) & 0xff; 254 ps_alu_consts[0] = 0.0; /* R */ 255 ps_alu_consts[1] = 0.0; /* G */ 256 ps_alu_consts[2] = 0.0; /* B */ 257 ps_alu_consts[3] = (float)a / 255; /* A */ 258 } else { 259 a = (fg >> 24) & 0xff; 260 r = (fg >> 16) & 0xff; 261 g = (fg >> 8) & 0xff; 262 b = (fg >> 0) & 0xff; 263 ps_alu_consts[0] = (float)r / 255; /* R */ 264 ps_alu_consts[1] = (float)g / 255; /* G */ 265 ps_alu_consts[2] = (float)b / 255; /* B */ 266 ps_alu_consts[3] = (float)a / 255; /* A */ 267 } 268 r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_ps, 269 sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); 270 271 if (accel_state->vsync) 272 RADEONVlineHelperClear(pScrn); 273 274 accel_state->dst_pix = pPix; 275 accel_state->fg = fg; 276 277 return TRUE; 278} 279 280static void 281R600DoneSolid(PixmapPtr pPix) 282{ 283 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); 284 RADEONInfoPtr info = RADEONPTR(pScrn); 285 struct radeon_accel_state *accel_state = info->accel_state; 286 287 if (accel_state->vsync) 288 r600_cp_wait_vline_sync(pScrn, pPix, 289 accel_state->vline_crtc, 290 accel_state->vline_y1, 291 accel_state->vline_y2); 292 293 r600_finish_op(pScrn, 8); 294} 295 296static void 297R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) 298{ 299 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); 300 RADEONInfoPtr info = RADEONPTR(pScrn); 301 struct radeon_accel_state *accel_state = info->accel_state; 302 float *vb; 303 304 if (CS_FULL(info->cs)) { 305 R600DoneSolid(info->accel_state->dst_pix); 306 radeon_cs_flush_indirect(pScrn); 307 R600PrepareSolid(accel_state->dst_pix, 308 accel_state->rop, 309 accel_state->planemask, 310 accel_state->fg); 311 } 312 313 if (accel_state->vsync) 314 RADEONVlineHelperSet(pScrn, x1, y1, x2, y2); 315 316 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8); 317 318 vb[0] = (float)x1; 319 vb[1] = (float)y1; 320 321 vb[2] = (float)x1; 322 vb[3] = (float)y2; 323 324 vb[4] = (float)x2; 325 vb[5] = (float)y2; 326 327 radeon_vbo_commit(pScrn, &accel_state->vbo); 328} 329 330static void 331R600DoPrepareCopy(ScrnInfoPtr pScrn) 332{ 333 RADEONInfoPtr info = RADEONPTR(pScrn); 334 struct radeon_accel_state *accel_state = info->accel_state; 335 cb_config_t cb_conf; 336 tex_resource_t tex_res; 337 tex_sampler_t tex_samp; 338 shader_config_t vs_conf, ps_conf; 339 340 CLEAR (cb_conf); 341 CLEAR (tex_res); 342 CLEAR (tex_samp); 343 CLEAR (vs_conf); 344 CLEAR (ps_conf); 345 346 radeon_vbo_check(pScrn, &accel_state->vbo, 16); 347 radeon_cp_start(pScrn); 348 349 r600_set_default_state(pScrn); 350 351 r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 352 r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 353 r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 354 355 /* Shader */ 356 vs_conf.shader_addr = accel_state->vs_mc_addr; 357 vs_conf.shader_size = accel_state->vs_size; 358 vs_conf.num_gprs = 2; 359 vs_conf.stack_size = 0; 360 vs_conf.bo = accel_state->shaders_bo; 361 r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM); 362 363 ps_conf.shader_addr = accel_state->ps_mc_addr; 364 ps_conf.shader_size = accel_state->ps_size; 365 ps_conf.num_gprs = 1; 366 ps_conf.stack_size = 0; 367 ps_conf.uncached_first_inst = 1; 368 ps_conf.clamp_consts = 0; 369 ps_conf.export_mode = 2; 370 ps_conf.bo = accel_state->shaders_bo; 371 r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM); 372 373 /* Texture */ 374 tex_res.id = 0; 375 tex_res.w = accel_state->src_obj[0].width; 376 tex_res.h = accel_state->src_obj[0].height; 377 tex_res.pitch = accel_state->src_obj[0].pitch; 378 tex_res.depth = 0; 379 tex_res.dim = SQ_TEX_DIM_2D; 380 tex_res.base = 0; 381 tex_res.mip_base = 0; 382 tex_res.size = accel_state->src_size[0]; 383 tex_res.bo = accel_state->src_obj[0].bo; 384 tex_res.mip_bo = accel_state->src_obj[0].bo; 385 tex_res.surface = accel_state->src_obj[0].surface; 386 if (accel_state->src_obj[0].bpp == 8) { 387 tex_res.format = FMT_8; 388 tex_res.dst_sel_x = SQ_SEL_1; /* R */ 389 tex_res.dst_sel_y = SQ_SEL_1; /* G */ 390 tex_res.dst_sel_z = SQ_SEL_1; /* B */ 391 tex_res.dst_sel_w = SQ_SEL_X; /* A */ 392 } else if (accel_state->src_obj[0].bpp == 16) { 393 tex_res.format = FMT_5_6_5; 394 tex_res.dst_sel_x = SQ_SEL_Z; /* R */ 395 tex_res.dst_sel_y = SQ_SEL_Y; /* G */ 396 tex_res.dst_sel_z = SQ_SEL_X; /* B */ 397 tex_res.dst_sel_w = SQ_SEL_1; /* A */ 398 } else { 399 tex_res.format = FMT_8_8_8_8; 400 tex_res.dst_sel_x = SQ_SEL_Z; /* R */ 401 tex_res.dst_sel_y = SQ_SEL_Y; /* G */ 402 tex_res.dst_sel_z = SQ_SEL_X; /* B */ 403 tex_res.dst_sel_w = SQ_SEL_W; /* A */ 404 } 405 406 tex_res.request_size = 1; 407 tex_res.base_level = 0; 408 tex_res.last_level = 0; 409 tex_res.perf_modulation = 0; 410 if (accel_state->src_obj[0].tiling_flags == 0) 411 tex_res.tile_mode = 1; 412 r600_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); 413 414 tex_samp.id = 0; 415 tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 416 tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 417 tex_samp.clamp_z = SQ_TEX_WRAP; 418 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; 419 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; 420 tex_samp.mc_coord_truncate = 1; 421 tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 422 tex_samp.mip_filter = 0; /* no mipmap */ 423 r600_set_tex_sampler(pScrn, &tex_samp); 424 425 cb_conf.id = 0; 426 cb_conf.w = accel_state->dst_obj.pitch; 427 cb_conf.h = accel_state->dst_obj.height; 428 cb_conf.base = 0; 429 cb_conf.bo = accel_state->dst_obj.bo; 430 cb_conf.surface = accel_state->dst_obj.surface; 431 if (accel_state->dst_obj.bpp == 8) { 432 cb_conf.format = COLOR_8; 433 cb_conf.comp_swap = 3; /* A */ 434 } else if (accel_state->dst_obj.bpp == 16) { 435 cb_conf.format = COLOR_5_6_5; 436 cb_conf.comp_swap = 2; /* RGB */ 437 } else { 438 cb_conf.format = COLOR_8_8_8_8; 439 cb_conf.comp_swap = 1; /* ARGB */ 440 } 441 cb_conf.source_format = 1; 442 cb_conf.blend_clamp = 1; 443 444 /* Render setup */ 445 if (accel_state->planemask & 0x000000ff) 446 cb_conf.pmask |= 4; /* B */ 447 if (accel_state->planemask & 0x0000ff00) 448 cb_conf.pmask |= 2; /* G */ 449 if (accel_state->planemask & 0x00ff0000) 450 cb_conf.pmask |= 1; /* R */ 451 if (accel_state->planemask & 0xff000000) 452 cb_conf.pmask |= 8; /* A */ 453 cb_conf.rop = accel_state->rop; 454 if (accel_state->dst_obj.tiling_flags == 0) 455 cb_conf.array_mode = 0; 456 r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); 457 458 r600_set_spi(pScrn, (1 - 1), 1); 459 460} 461 462static void 463R600DoCopy(ScrnInfoPtr pScrn) 464{ 465 r600_finish_op(pScrn, 16); 466} 467 468static void 469R600DoCopyVline(PixmapPtr pPix) 470{ 471 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); 472 RADEONInfoPtr info = RADEONPTR(pScrn); 473 struct radeon_accel_state *accel_state = info->accel_state; 474 475 if (accel_state->vsync) 476 r600_cp_wait_vline_sync(pScrn, pPix, 477 accel_state->vline_crtc, 478 accel_state->vline_y1, 479 accel_state->vline_y2); 480 481 r600_finish_op(pScrn, 16); 482} 483 484static void 485R600AppendCopyVertex(ScrnInfoPtr pScrn, 486 int srcX, int srcY, 487 int dstX, int dstY, 488 int w, int h) 489{ 490 RADEONInfoPtr info = RADEONPTR(pScrn); 491 struct radeon_accel_state *accel_state = info->accel_state; 492 float *vb; 493 494 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16); 495 496 vb[0] = (float)dstX; 497 vb[1] = (float)dstY; 498 vb[2] = (float)srcX; 499 vb[3] = (float)srcY; 500 501 vb[4] = (float)dstX; 502 vb[5] = (float)(dstY + h); 503 vb[6] = (float)srcX; 504 vb[7] = (float)(srcY + h); 505 506 vb[8] = (float)(dstX + w); 507 vb[9] = (float)(dstY + h); 508 vb[10] = (float)(srcX + w); 509 vb[11] = (float)(srcY + h); 510 511 radeon_vbo_commit(pScrn, &accel_state->vbo); 512} 513 514static Bool 515R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, 516 int xdir, int ydir, 517 int rop, 518 Pixel planemask) 519{ 520 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen); 521 RADEONInfoPtr info = RADEONPTR(pScrn); 522 struct radeon_accel_state *accel_state = info->accel_state; 523 struct r600_accel_object src_obj, dst_obj; 524 525 if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel)) 526 RADEON_FALLBACK(("R600CheckDatatype src failed\n")); 527 if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel)) 528 RADEON_FALLBACK(("R600CheckDatatype dst failed\n")); 529 if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel)) 530 RADEON_FALLBACK(("Invalid planemask\n")); 531 532 dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); 533 src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); 534 535 accel_state->same_surface = FALSE; 536 537 src_obj.bo = radeon_get_pixmap_bo(pSrc); 538 dst_obj.bo = radeon_get_pixmap_bo(pDst); 539 dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst); 540 src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc); 541 src_obj.surface = radeon_get_pixmap_surface(pSrc); 542 dst_obj.surface = radeon_get_pixmap_surface(pDst); 543 if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst)) 544 accel_state->same_surface = TRUE; 545 546 src_obj.width = pSrc->drawable.width; 547 src_obj.height = pSrc->drawable.height; 548 src_obj.bpp = pSrc->drawable.bitsPerPixel; 549 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 550 551 dst_obj.width = pDst->drawable.width; 552 dst_obj.height = pDst->drawable.height; 553 dst_obj.bpp = pDst->drawable.bitsPerPixel; 554 if (radeon_get_pixmap_shared(pDst) == TRUE) { 555 dst_obj.domain = RADEON_GEM_DOMAIN_GTT; 556 } else 557 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 558 559 if (!R600SetAccelState(pScrn, 560 &src_obj, 561 NULL, 562 &dst_obj, 563 accel_state->copy_vs_offset, accel_state->copy_ps_offset, 564 rop, planemask)) 565 return FALSE; 566 567 if (accel_state->same_surface == TRUE) { 568 unsigned long size = accel_state->dst_obj.surface->bo_size; 569 unsigned long align = accel_state->dst_obj.surface->bo_alignment; 570 571 if (accel_state->copy_area_bo) { 572 radeon_bo_unref(accel_state->copy_area_bo); 573 accel_state->copy_area_bo = NULL; 574 } 575 accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, align, 576 RADEON_GEM_DOMAIN_VRAM, 577 0); 578 if (accel_state->copy_area_bo == NULL) 579 RADEON_FALLBACK(("temp copy surface alloc failed\n")); 580 581 radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo, 582 0, RADEON_GEM_DOMAIN_VRAM); 583 if (radeon_cs_space_check(info->cs)) { 584 radeon_bo_unref(accel_state->copy_area_bo); 585 accel_state->copy_area_bo = NULL; 586 return FALSE; 587 } 588 accel_state->copy_area = (void*)accel_state->copy_area_bo; 589 } else 590 R600DoPrepareCopy(pScrn); 591 592 if (accel_state->vsync) 593 RADEONVlineHelperClear(pScrn); 594 595 accel_state->dst_pix = pDst; 596 accel_state->src_pix = pSrc; 597 accel_state->xdir = xdir; 598 accel_state->ydir = ydir; 599 600 return TRUE; 601} 602 603static void 604R600DoneCopy(PixmapPtr pDst) 605{ 606 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen); 607 RADEONInfoPtr info = RADEONPTR(pScrn); 608 struct radeon_accel_state *accel_state = info->accel_state; 609 610 if (!accel_state->same_surface) 611 R600DoCopyVline(pDst); 612 613 if (accel_state->copy_area) { 614 accel_state->copy_area = NULL; 615 } 616 617} 618 619static void 620R600Copy(PixmapPtr pDst, 621 int srcX, int srcY, 622 int dstX, int dstY, 623 int w, int h) 624{ 625 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen); 626 RADEONInfoPtr info = RADEONPTR(pScrn); 627 struct radeon_accel_state *accel_state = info->accel_state; 628 629 if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY)) 630 return; 631 632 if (CS_FULL(info->cs)) { 633 R600DoneCopy(info->accel_state->dst_pix); 634 radeon_cs_flush_indirect(pScrn); 635 R600PrepareCopy(accel_state->src_pix, 636 accel_state->dst_pix, 637 accel_state->xdir, 638 accel_state->ydir, 639 accel_state->rop, 640 accel_state->planemask); 641 } 642 643 if (accel_state->vsync) 644 RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h); 645 646 if (accel_state->same_surface && 647 (srcX + w <= dstX || dstX + w <= srcX || srcY + h <= dstY || dstY + h <= srcY)) { 648 R600DoPrepareCopy(pScrn); 649 R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); 650 R600DoCopyVline(pDst); 651 } else if (accel_state->same_surface && accel_state->copy_area) { 652 uint32_t orig_dst_domain = accel_state->dst_obj.domain; 653 uint32_t orig_src_domain = accel_state->src_obj[0].domain; 654 uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags; 655 uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags; 656 struct radeon_bo *orig_bo = accel_state->dst_obj.bo; 657 int orig_rop = accel_state->rop; 658 659 /* src to tmp */ 660 accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 661 accel_state->dst_obj.bo = accel_state->copy_area_bo; 662 accel_state->dst_obj.tiling_flags = 0; 663 accel_state->rop = 3; 664 R600DoPrepareCopy(pScrn); 665 R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); 666 R600DoCopy(pScrn); 667 668 /* tmp to dst */ 669 accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM; 670 accel_state->src_obj[0].bo = accel_state->copy_area_bo; 671 accel_state->src_obj[0].tiling_flags = 0; 672 accel_state->dst_obj.domain = orig_dst_domain; 673 accel_state->dst_obj.bo = orig_bo; 674 accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags; 675 accel_state->rop = orig_rop; 676 R600DoPrepareCopy(pScrn); 677 R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h); 678 R600DoCopyVline(pDst); 679 680 /* restore state */ 681 accel_state->src_obj[0].domain = orig_src_domain; 682 accel_state->src_obj[0].bo = orig_bo; 683 accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags; 684 } else 685 R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); 686 687} 688 689struct blendinfo { 690 Bool dst_alpha; 691 Bool src_alpha; 692 uint32_t blend_cntl; 693}; 694 695static struct blendinfo R600BlendOp[] = { 696 /* Clear */ 697 {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 698 /* Src */ 699 {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 700 /* Dst */ 701 {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, 702 /* Over */ 703 {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 704 /* OverReverse */ 705 {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, 706 /* In */ 707 {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 708 /* InReverse */ 709 {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 710 /* Out */ 711 {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 712 /* OutReverse */ 713 {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 714 /* Atop */ 715 {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 716 /* AtopReverse */ 717 {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 718 /* Xor */ 719 {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 720 /* Add */ 721 {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, 722}; 723 724struct formatinfo { 725 unsigned int fmt; 726 uint32_t card_fmt; 727}; 728 729static struct formatinfo R600TexFormats[] = { 730 {PICT_a8r8g8b8, FMT_8_8_8_8}, 731 {PICT_x8r8g8b8, FMT_8_8_8_8}, 732 {PICT_a8b8g8r8, FMT_8_8_8_8}, 733 {PICT_x8b8g8r8, FMT_8_8_8_8}, 734 {PICT_b8g8r8a8, FMT_8_8_8_8}, 735 {PICT_b8g8r8x8, FMT_8_8_8_8}, 736 {PICT_r5g6b5, FMT_5_6_5}, 737 {PICT_a1r5g5b5, FMT_1_5_5_5}, 738 {PICT_x1r5g5b5, FMT_1_5_5_5}, 739 {PICT_a8, FMT_8}, 740}; 741 742static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format) 743{ 744 uint32_t sblend, dblend; 745 746 sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask; 747 dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask; 748 749 /* If there's no dst alpha channel, adjust the blend op so that we'll treat 750 * it as always 1. 751 */ 752 if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) { 753 if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift)) 754 sblend = (BLEND_ONE << COLOR_SRCBLEND_shift); 755 else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift)) 756 sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift); 757 } 758 759 /* If the source alpha is being used, then we should only be in a case where 760 * the source blend factor is 0, and the source blend value is the mask 761 * channels multiplied by the source picture's alpha. 762 */ 763 if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) { 764 if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) { 765 dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift); 766 } else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) { 767 dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift); 768 } 769 770 /* With some tricks, we can still accelerate PictOpOver with solid src. 771 * This is commonly used for text rendering, so it's worth the extra 772 * effort. 773 */ 774 if (sblend == (BLEND_ONE << COLOR_SRCBLEND_shift)) { 775 sblend = (BLEND_CONSTANT_COLOR << COLOR_SRCBLEND_shift); 776 } 777 } 778 779 return sblend | dblend; 780} 781 782static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format) 783{ 784 switch (pDstPicture->format) { 785 case PICT_a8r8g8b8: 786 case PICT_x8r8g8b8: 787 case PICT_a8b8g8r8: 788 case PICT_x8b8g8r8: 789 case PICT_b8g8r8a8: 790 case PICT_b8g8r8x8: 791 *dst_format = COLOR_8_8_8_8; 792 break; 793 case PICT_r5g6b5: 794 *dst_format = COLOR_5_6_5; 795 break; 796 case PICT_a1r5g5b5: 797 case PICT_x1r5g5b5: 798 *dst_format = COLOR_1_5_5_5; 799 break; 800 case PICT_a8: 801 *dst_format = COLOR_8; 802 break; 803 default: 804 RADEON_FALLBACK(("Unsupported dest format 0x%x\n", 805 (int)pDstPicture->format)); 806 } 807 return TRUE; 808} 809 810static Bool R600CheckCompositeTexture(PicturePtr pPict, 811 PicturePtr pDstPict, 812 int op, 813 int unit) 814{ 815 unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone; 816 unsigned int i; 817 818 for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) { 819 if (R600TexFormats[i].fmt == pPict->format) 820 break; 821 } 822 if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0])) 823 RADEON_FALLBACK(("Unsupported picture format 0x%x\n", 824 (int)pPict->format)); 825 826 if (pPict->filter != PictFilterNearest && 827 pPict->filter != PictFilterBilinear) 828 RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter)); 829 830 /* for REPEAT_NONE, Render semantics are that sampling outside the source 831 * picture results in alpha=0 pixels. We can implement this with a border color 832 * *if* our source texture has an alpha channel, otherwise we need to fall 833 * back. If we're not transformed then we hope that upper layers have clipped 834 * rendering to the bounds of the source drawable, in which case it doesn't 835 * matter. I have not, however, verified that the X server always does such 836 * clipping. 837 */ 838 /* FIXME R6xx */ 839 if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) { 840 if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0))) 841 RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n")); 842 } 843 844 if (!radeon_transform_is_affine_or_scaled(pPict->transform)) 845 RADEON_FALLBACK(("non-affine transforms not supported\n")); 846 847 return TRUE; 848} 849 850static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, 851 int unit) 852{ 853 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); 854 RADEONInfoPtr info = RADEONPTR(pScrn); 855 struct radeon_accel_state *accel_state = info->accel_state; 856 unsigned int repeatType; 857 unsigned int i; 858 tex_resource_t tex_res; 859 tex_sampler_t tex_samp; 860 int pix_r, pix_g, pix_b, pix_a; 861 float vs_alu_consts[8]; 862 863 CLEAR (tex_res); 864 CLEAR (tex_samp); 865 866 for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) { 867 if (R600TexFormats[i].fmt == pPict->format) 868 break; 869 } 870 871 /* Texture */ 872 if (pPict->pDrawable) { 873 tex_res.w = pPict->pDrawable->width; 874 tex_res.h = pPict->pDrawable->height; 875 repeatType = pPict->repeat ? pPict->repeatType : RepeatNone; 876 } else { 877 tex_res.w = 1; 878 tex_res.h = 1; 879 repeatType = RepeatNormal; 880 } 881 tex_res.id = unit; 882 tex_res.pitch = accel_state->src_obj[unit].pitch; 883 tex_res.depth = 0; 884 tex_res.dim = SQ_TEX_DIM_2D; 885 tex_res.base = 0; 886 tex_res.mip_base = 0; 887 tex_res.size = accel_state->src_size[unit]; 888 tex_res.format = R600TexFormats[i].card_fmt; 889 tex_res.bo = accel_state->src_obj[unit].bo; 890 tex_res.mip_bo = accel_state->src_obj[unit].bo; 891 tex_res.surface = accel_state->src_obj[unit].surface; 892 tex_res.request_size = 1; 893 894#if X_BYTE_ORDER == X_BIG_ENDIAN 895 switch (accel_state->src_obj[unit].bpp) { 896 case 16: 897 tex_res.endian = SQ_ENDIAN_8IN16; 898 break; 899 case 32: 900 tex_res.endian = SQ_ENDIAN_8IN32; 901 break; 902 default : 903 break; 904 } 905#endif 906 907 /* component swizzles */ 908 switch (pPict->format) { 909 case PICT_a1r5g5b5: 910 case PICT_a8r8g8b8: 911 pix_r = SQ_SEL_Z; /* R */ 912 pix_g = SQ_SEL_Y; /* G */ 913 pix_b = SQ_SEL_X; /* B */ 914 pix_a = SQ_SEL_W; /* A */ 915 break; 916 case PICT_a8b8g8r8: 917 pix_r = SQ_SEL_X; /* R */ 918 pix_g = SQ_SEL_Y; /* G */ 919 pix_b = SQ_SEL_Z; /* B */ 920 pix_a = SQ_SEL_W; /* A */ 921 break; 922 case PICT_x8b8g8r8: 923 pix_r = SQ_SEL_X; /* R */ 924 pix_g = SQ_SEL_Y; /* G */ 925 pix_b = SQ_SEL_Z; /* B */ 926 pix_a = SQ_SEL_1; /* A */ 927 break; 928 case PICT_b8g8r8a8: 929 pix_r = SQ_SEL_Y; /* R */ 930 pix_g = SQ_SEL_Z; /* G */ 931 pix_b = SQ_SEL_W; /* B */ 932 pix_a = SQ_SEL_X; /* A */ 933 break; 934 case PICT_b8g8r8x8: 935 pix_r = SQ_SEL_Y; /* R */ 936 pix_g = SQ_SEL_Z; /* G */ 937 pix_b = SQ_SEL_W; /* B */ 938 pix_a = SQ_SEL_1; /* A */ 939 break; 940 case PICT_x1r5g5b5: 941 case PICT_x8r8g8b8: 942 case PICT_r5g6b5: 943 pix_r = SQ_SEL_Z; /* R */ 944 pix_g = SQ_SEL_Y; /* G */ 945 pix_b = SQ_SEL_X; /* B */ 946 pix_a = SQ_SEL_1; /* A */ 947 break; 948 case PICT_a8: 949 pix_r = SQ_SEL_0; /* R */ 950 pix_g = SQ_SEL_0; /* G */ 951 pix_b = SQ_SEL_0; /* B */ 952 pix_a = SQ_SEL_X; /* A */ 953 break; 954 default: 955 RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format)); 956 } 957 958 if (unit == 0) { 959 if (!accel_state->msk_pic) { 960 if (PICT_FORMAT_RGB(pPict->format) == 0) { 961 pix_r = SQ_SEL_0; 962 pix_g = SQ_SEL_0; 963 pix_b = SQ_SEL_0; 964 } 965 966 if (PICT_FORMAT_A(pPict->format) == 0) 967 pix_a = SQ_SEL_1; 968 } else { 969 if (accel_state->component_alpha) { 970 if (accel_state->src_alpha) { 971 if (PICT_FORMAT_A(pPict->format) == 0) { 972 pix_r = SQ_SEL_1; 973 pix_g = SQ_SEL_1; 974 pix_b = SQ_SEL_1; 975 pix_a = SQ_SEL_1; 976 } else { 977 pix_r = pix_a; 978 pix_g = pix_a; 979 pix_b = pix_a; 980 } 981 } else { 982 if (PICT_FORMAT_A(pPict->format) == 0) 983 pix_a = SQ_SEL_1; 984 } 985 } else { 986 if (PICT_FORMAT_RGB(pPict->format) == 0) { 987 pix_r = SQ_SEL_0; 988 pix_g = SQ_SEL_0; 989 pix_b = SQ_SEL_0; 990 } 991 992 if (PICT_FORMAT_A(pPict->format) == 0) 993 pix_a = SQ_SEL_1; 994 } 995 } 996 } else { 997 if (accel_state->component_alpha) { 998 if (PICT_FORMAT_A(pPict->format) == 0) 999 pix_a = SQ_SEL_1; 1000 } else { 1001 if (PICT_FORMAT_A(pPict->format) == 0) { 1002 pix_r = SQ_SEL_1; 1003 pix_g = SQ_SEL_1; 1004 pix_b = SQ_SEL_1; 1005 pix_a = SQ_SEL_1; 1006 } else { 1007 pix_r = pix_a; 1008 pix_g = pix_a; 1009 pix_b = pix_a; 1010 } 1011 } 1012 } 1013 1014 tex_res.dst_sel_x = pix_r; /* R */ 1015 tex_res.dst_sel_y = pix_g; /* G */ 1016 tex_res.dst_sel_z = pix_b; /* B */ 1017 tex_res.dst_sel_w = pix_a; /* A */ 1018 1019 tex_res.base_level = 0; 1020 tex_res.last_level = 0; 1021 tex_res.perf_modulation = 0; 1022 if (accel_state->src_obj[unit].tiling_flags == 0) 1023 tex_res.tile_mode = 1; 1024 r600_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[unit].domain); 1025 1026 tex_samp.id = unit; 1027 tex_samp.border_color = SQ_TEX_BORDER_COLOR_TRANS_BLACK; 1028 1029 switch (repeatType) { 1030 case RepeatNormal: 1031 tex_samp.clamp_x = SQ_TEX_WRAP; 1032 tex_samp.clamp_y = SQ_TEX_WRAP; 1033 break; 1034 case RepeatPad: 1035 tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 1036 tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 1037 break; 1038 case RepeatReflect: 1039 tex_samp.clamp_x = SQ_TEX_MIRROR; 1040 tex_samp.clamp_y = SQ_TEX_MIRROR; 1041 break; 1042 case RepeatNone: 1043 tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER; 1044 tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER; 1045 break; 1046 default: 1047 RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType)); 1048 } 1049 1050 switch (pPict->filter) { 1051 case PictFilterNearest: 1052 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; 1053 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; 1054 tex_samp.mc_coord_truncate = 1; 1055 break; 1056 case PictFilterBilinear: 1057 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; 1058 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; 1059 break; 1060 default: 1061 RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); 1062 } 1063 1064 tex_samp.clamp_z = SQ_TEX_WRAP; 1065 tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 1066 tex_samp.mip_filter = 0; /* no mipmap */ 1067 r600_set_tex_sampler(pScrn, &tex_samp); 1068 1069 if (pPict->transform != 0) { 1070 accel_state->is_transform[unit] = TRUE; 1071 accel_state->transform[unit] = pPict->transform; 1072 1073 vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]); 1074 vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]); 1075 vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]); 1076 vs_alu_consts[3] = 1.0 / tex_res.w; 1077 1078 vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]); 1079 vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]); 1080 vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]); 1081 vs_alu_consts[7] = 1.0 / tex_res.h; 1082 } else { 1083 accel_state->is_transform[unit] = FALSE; 1084 1085 vs_alu_consts[0] = 1.0; 1086 vs_alu_consts[1] = 0.0; 1087 vs_alu_consts[2] = 0.0; 1088 vs_alu_consts[3] = 1.0 / tex_res.w; 1089 1090 vs_alu_consts[4] = 0.0; 1091 vs_alu_consts[5] = 1.0; 1092 vs_alu_consts[6] = 0.0; 1093 vs_alu_consts[7] = 1.0 / tex_res.h; 1094 } 1095 1096 /* VS alu constants */ 1097 r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_vs + (unit * 2), 1098 sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts); 1099 1100 return TRUE; 1101} 1102 1103static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, 1104 PicturePtr pDstPicture) 1105{ 1106 uint32_t tmp1; 1107 PixmapPtr pSrcPixmap, pDstPixmap; 1108 1109 /* Check for unsupported compositing operations. */ 1110 if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0]))) 1111 RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op)); 1112 1113 if (pSrcPicture->pDrawable) { 1114 pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable); 1115 1116 if (pSrcPixmap->drawable.width >= 8192 || 1117 pSrcPixmap->drawable.height >= 8192) { 1118 RADEON_FALLBACK(("Source w/h too large (%d,%d).\n", 1119 pSrcPixmap->drawable.width, 1120 pSrcPixmap->drawable.height)); 1121 } 1122 1123 if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0)) 1124 return FALSE; 1125 } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill) 1126 RADEON_FALLBACK(("Gradient pictures not supported yet\n")); 1127 1128 pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable); 1129 1130 if (pDstPixmap->drawable.width >= 8192 || 1131 pDstPixmap->drawable.height >= 8192) { 1132 RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n", 1133 pDstPixmap->drawable.width, 1134 pDstPixmap->drawable.height)); 1135 } 1136 1137 if (pMaskPicture) { 1138 PixmapPtr pMaskPixmap; 1139 1140 if (pMaskPicture->pDrawable) { 1141 pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable); 1142 1143 if (pMaskPixmap->drawable.width >= 8192 || 1144 pMaskPixmap->drawable.height >= 8192) { 1145 RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n", 1146 pMaskPixmap->drawable.width, 1147 pMaskPixmap->drawable.height)); 1148 } 1149 1150 if (pMaskPicture->componentAlpha) { 1151 /* Check if it's component alpha that relies on a source alpha and 1152 * on the source value. We can only get one of those into the 1153 * single source value that we get to blend with. 1154 * 1155 * We can cheat a bit if the src is solid, though. PictOpOver 1156 * can use the constant blend color to sneak a second blend 1157 * source in. 1158 */ 1159 if (R600BlendOp[op].src_alpha && 1160 (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) != 1161 (BLEND_ZERO << COLOR_SRCBLEND_shift)) { 1162 if (pSrcPicture->pDrawable || op != PictOpOver) 1163 RADEON_FALLBACK(("Component alpha not supported with source " 1164 "alpha and source value blending.\n")); 1165 } 1166 } 1167 1168 if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1)) 1169 return FALSE; 1170 } else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill) 1171 RADEON_FALLBACK(("Gradient pictures not supported yet\n")); 1172 } 1173 1174 if (!R600GetDestFormat(pDstPicture, &tmp1)) 1175 return FALSE; 1176 1177 return TRUE; 1178 1179} 1180 1181static void R600SetSolidConsts(ScrnInfoPtr pScrn, float *buf, int format, uint32_t fg, int unit) 1182{ 1183 RADEONInfoPtr info = RADEONPTR(pScrn); 1184 struct radeon_accel_state *accel_state = info->accel_state; 1185 float pix_r = 0, pix_g = 0, pix_b = 0, pix_a = 0; 1186 1187 uint32_t w = (fg >> 24) & 0xff; 1188 uint32_t z = (fg >> 16) & 0xff; 1189 uint32_t y = (fg >> 8) & 0xff; 1190 uint32_t x = (fg >> 0) & 0xff; 1191 float xf = (float)x / 255; /* R */ 1192 float yf = (float)y / 255; /* G */ 1193 float zf = (float)z / 255; /* B */ 1194 float wf = (float)w / 255; /* A */ 1195 1196 /* component swizzles */ 1197 switch (format) { 1198 case PICT_a1r5g5b5: 1199 case PICT_a8r8g8b8: 1200 pix_r = zf; /* R */ 1201 pix_g = yf; /* G */ 1202 pix_b = xf; /* B */ 1203 pix_a = wf; /* A */ 1204 break; 1205 case PICT_a8b8g8r8: 1206 pix_r = xf; /* R */ 1207 pix_g = yf; /* G */ 1208 pix_b = zf; /* B */ 1209 pix_a = wf; /* A */ 1210 break; 1211 case PICT_x8b8g8r8: 1212 pix_r = xf; /* R */ 1213 pix_g = yf; /* G */ 1214 pix_b = zf; /* B */ 1215 pix_a = 1.0; /* A */ 1216 break; 1217 case PICT_b8g8r8a8: 1218 pix_r = yf; /* R */ 1219 pix_g = zf; /* G */ 1220 pix_b = wf; /* B */ 1221 pix_a = xf; /* A */ 1222 break; 1223 case PICT_b8g8r8x8: 1224 pix_r = yf; /* R */ 1225 pix_g = zf; /* G */ 1226 pix_b = wf; /* B */ 1227 pix_a = 1.0; /* A */ 1228 break; 1229 case PICT_x1r5g5b5: 1230 case PICT_x8r8g8b8: 1231 case PICT_r5g6b5: 1232 pix_r = zf; /* R */ 1233 pix_g = yf; /* G */ 1234 pix_b = xf; /* B */ 1235 pix_a = 1.0; /* A */ 1236 break; 1237 case PICT_a8: 1238 pix_r = 0.0; /* R */ 1239 pix_g = 0.0; /* G */ 1240 pix_b = 0.0; /* B */ 1241 pix_a = xf; /* A */ 1242 break; 1243 default: 1244 ErrorF("Bad format 0x%x\n", format); 1245 } 1246 1247 if (unit == 0) { 1248 if (!accel_state->msk_pic) { 1249 if (PICT_FORMAT_RGB(format) == 0) { 1250 pix_r = 0.0; 1251 pix_g = 0.0; 1252 pix_b = 0.0; 1253 } 1254 1255 if (PICT_FORMAT_A(format) == 0) 1256 pix_a = 1.0; 1257 } else { 1258 if (accel_state->component_alpha) { 1259 if (accel_state->src_alpha) { 1260 /* required for PictOpOver */ 1261 float cblend[4] = { pix_r / pix_a, pix_g / pix_a, 1262 pix_b / pix_a, pix_a / pix_a }; 1263 r600_set_blend_color(pScrn, cblend); 1264 1265 if (PICT_FORMAT_A(format) == 0) { 1266 pix_r = 1.0; 1267 pix_g = 1.0; 1268 pix_b = 1.0; 1269 pix_a = 1.0; 1270 } else { 1271 pix_r = pix_a; 1272 pix_g = pix_a; 1273 pix_b = pix_a; 1274 } 1275 } else { 1276 if (PICT_FORMAT_A(format) == 0) 1277 pix_a = 1.0; 1278 } 1279 } else { 1280 if (PICT_FORMAT_RGB(format) == 0) { 1281 pix_r = 0; 1282 pix_g = 0; 1283 pix_b = 0; 1284 } 1285 1286 if (PICT_FORMAT_A(format) == 0) 1287 pix_a = 1.0; 1288 } 1289 } 1290 } else { 1291 if (accel_state->component_alpha) { 1292 if (PICT_FORMAT_A(format) == 0) 1293 pix_a = 1.0; 1294 } else { 1295 if (PICT_FORMAT_A(format) == 0) { 1296 pix_r = 1.0; 1297 pix_g = 1.0; 1298 pix_b = 1.0; 1299 pix_a = 1.0; 1300 } else { 1301 pix_r = pix_a; 1302 pix_g = pix_a; 1303 pix_b = pix_a; 1304 } 1305 } 1306 } 1307 1308 buf[0] = pix_r; 1309 buf[1] = pix_g; 1310 buf[2] = pix_b; 1311 buf[3] = pix_a; 1312} 1313 1314static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, 1315 PicturePtr pMaskPicture, PicturePtr pDstPicture, 1316 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) 1317{ 1318 ScreenPtr pScreen = pDst->drawable.pScreen; 1319 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1320 RADEONInfoPtr info = RADEONPTR(pScrn); 1321 struct radeon_accel_state *accel_state = info->accel_state; 1322 uint32_t dst_format; 1323 cb_config_t cb_conf; 1324 shader_config_t vs_conf, ps_conf; 1325 struct r600_accel_object src_obj, mask_obj, dst_obj; 1326 uint32_t ps_bool_consts = 0; 1327 float ps_alu_consts[8]; 1328 1329 if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8)) 1330 return FALSE; 1331 1332 if (pSrc) { 1333 src_obj.bo = radeon_get_pixmap_bo(pSrc); 1334 src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc); 1335 src_obj.surface = radeon_get_pixmap_surface(pSrc); 1336 src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); 1337 src_obj.width = pSrc->drawable.width; 1338 src_obj.height = pSrc->drawable.height; 1339 src_obj.bpp = pSrc->drawable.bitsPerPixel; 1340 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 1341 } 1342 1343 dst_obj.bo = radeon_get_pixmap_bo(pDst); 1344 dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst); 1345 dst_obj.surface = radeon_get_pixmap_surface(pDst); 1346 dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); 1347 dst_obj.width = pDst->drawable.width; 1348 dst_obj.height = pDst->drawable.height; 1349 dst_obj.bpp = pDst->drawable.bitsPerPixel; 1350 if (radeon_get_pixmap_shared(pDst) == TRUE) 1351 dst_obj.domain = RADEON_GEM_DOMAIN_GTT; 1352 else 1353 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 1354 1355 if (pMaskPicture) { 1356 if (pMask) { 1357 mask_obj.bo = radeon_get_pixmap_bo(pMask); 1358 mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask); 1359 mask_obj.surface = radeon_get_pixmap_surface(pMask); 1360 mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8); 1361 mask_obj.width = pMask->drawable.width; 1362 mask_obj.height = pMask->drawable.height; 1363 mask_obj.bpp = pMask->drawable.bitsPerPixel; 1364 mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 1365 } 1366 1367 accel_state->msk_pic = pMaskPicture; 1368 if (pMaskPicture->componentAlpha) { 1369 accel_state->component_alpha = TRUE; 1370 if (R600BlendOp[op].src_alpha) 1371 accel_state->src_alpha = TRUE; 1372 else 1373 accel_state->src_alpha = FALSE; 1374 } else { 1375 accel_state->component_alpha = FALSE; 1376 accel_state->src_alpha = FALSE; 1377 } 1378 } else { 1379 accel_state->msk_pic = NULL; 1380 accel_state->component_alpha = FALSE; 1381 accel_state->src_alpha = FALSE; 1382 } 1383 1384 if (!R600SetAccelState(pScrn, 1385 pSrc ? &src_obj : NULL, 1386 (pMaskPicture && pMask) ? &mask_obj : NULL, 1387 &dst_obj, 1388 accel_state->comp_vs_offset, accel_state->comp_ps_offset, 1389 3, 0xffffffff)) 1390 return FALSE; 1391 1392 if (!R600GetDestFormat(pDstPicture, &dst_format)) 1393 return FALSE; 1394 1395 CLEAR (cb_conf); 1396 CLEAR (vs_conf); 1397 CLEAR (ps_conf); 1398 1399 if (pMask) 1400 radeon_vbo_check(pScrn, &accel_state->vbo, 24); 1401 else 1402 radeon_vbo_check(pScrn, &accel_state->vbo, 16); 1403 1404 radeon_cp_start(pScrn); 1405 1406 r600_set_default_state(pScrn); 1407 1408 r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 1409 r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 1410 r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 1411 1412 if (pSrc) { 1413 if (!R600TextureSetup(pSrcPicture, pSrc, 0)) { 1414 R600IBDiscard(pScrn); 1415 return FALSE; 1416 } 1417 } else 1418 accel_state->is_transform[0] = FALSE; 1419 1420 if (pMask) { 1421 if (!R600TextureSetup(pMaskPicture, pMask, 1)) { 1422 R600IBDiscard(pScrn); 1423 return FALSE; 1424 } 1425 } else 1426 accel_state->is_transform[1] = FALSE; 1427 1428 if (pSrc) 1429 ps_bool_consts |= (1 << 0); 1430 if (pMask) 1431 ps_bool_consts |= (1 << 1); 1432 r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, ps_bool_consts); 1433 1434 if (pMask) { 1435 r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0)); 1436 } else { 1437 r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0)); 1438 } 1439 1440 /* Shader */ 1441 vs_conf.shader_addr = accel_state->vs_mc_addr; 1442 vs_conf.shader_size = accel_state->vs_size; 1443 vs_conf.num_gprs = 5; 1444 vs_conf.stack_size = 1; 1445 vs_conf.bo = accel_state->shaders_bo; 1446 r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM); 1447 1448 ps_conf.shader_addr = accel_state->ps_mc_addr; 1449 ps_conf.shader_size = accel_state->ps_size; 1450 ps_conf.num_gprs = 2; 1451 ps_conf.stack_size = 1; 1452 ps_conf.uncached_first_inst = 1; 1453 ps_conf.clamp_consts = 0; 1454 ps_conf.export_mode = 2; 1455 ps_conf.bo = accel_state->shaders_bo; 1456 r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM); 1457 1458 cb_conf.id = 0; 1459 cb_conf.w = accel_state->dst_obj.pitch; 1460 cb_conf.h = accel_state->dst_obj.height; 1461 cb_conf.base = 0; 1462 cb_conf.format = dst_format; 1463 cb_conf.bo = accel_state->dst_obj.bo; 1464 cb_conf.surface = accel_state->dst_obj.surface; 1465 1466 switch (pDstPicture->format) { 1467 case PICT_a8r8g8b8: 1468 case PICT_x8r8g8b8: 1469 case PICT_a1r5g5b5: 1470 case PICT_x1r5g5b5: 1471 default: 1472 cb_conf.comp_swap = 1; /* ARGB */ 1473 break; 1474 case PICT_a8b8g8r8: 1475 case PICT_x8b8g8r8: 1476 cb_conf.comp_swap = 0; /* ABGR */ 1477 break; 1478 case PICT_b8g8r8a8: 1479 case PICT_b8g8r8x8: 1480 cb_conf.comp_swap = 3; /* BGRA */ 1481 break; 1482 case PICT_r5g6b5: 1483 cb_conf.comp_swap = 2; /* RGB */ 1484 break; 1485 case PICT_a8: 1486 cb_conf.comp_swap = 3; /* A */ 1487 break; 1488 } 1489 cb_conf.source_format = 1; 1490 cb_conf.blend_clamp = 1; 1491 cb_conf.blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format); 1492 cb_conf.blend_enable = 1; 1493 cb_conf.pmask = 0xf; 1494 cb_conf.rop = 3; 1495 if (accel_state->dst_obj.tiling_flags == 0) 1496 cb_conf.array_mode = 0; 1497#if X_BYTE_ORDER == X_BIG_ENDIAN 1498 switch (dst_obj.bpp) { 1499 case 16: 1500 cb_conf.endian = ENDIAN_8IN16; 1501 break; 1502 case 32: 1503 cb_conf.endian = ENDIAN_8IN32; 1504 break; 1505 default: 1506 break; 1507 } 1508#endif 1509 r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); 1510 1511 if (pMask) 1512 r600_set_spi(pScrn, (2 - 1), 2); 1513 else 1514 r600_set_spi(pScrn, (1 - 1), 1); 1515 1516 if (!pSrc) { 1517 /* solid src color */ 1518 R600SetSolidConsts(pScrn, &ps_alu_consts[0], pSrcPicture->format, 1519 pSrcPicture->pSourcePict->solidFill.color, 0); 1520 } 1521 1522 if (!pMaskPicture) { 1523 /* use identity constant if there is no mask */ 1524 ps_alu_consts[4] = 1.0; 1525 ps_alu_consts[5] = 1.0; 1526 ps_alu_consts[6] = 1.0; 1527 ps_alu_consts[7] = 1.0; 1528 } else if (!pMask) { 1529 /* solid mask color */ 1530 R600SetSolidConsts(pScrn, &ps_alu_consts[4], pMaskPicture->format, 1531 pMaskPicture->pSourcePict->solidFill.color, 1); 1532 } 1533 1534 r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_ps, 1535 sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); 1536 1537 if (accel_state->vsync) 1538 RADEONVlineHelperClear(pScrn); 1539 1540 accel_state->composite_op = op; 1541 accel_state->dst_pic = pDstPicture; 1542 accel_state->src_pic = pSrcPicture; 1543 accel_state->dst_pix = pDst; 1544 accel_state->msk_pix = pMask; 1545 accel_state->src_pix = pSrc; 1546 1547 return TRUE; 1548} 1549 1550static void R600FinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst, 1551 struct radeon_accel_state *accel_state) 1552{ 1553 int vtx_size; 1554 1555 if (accel_state->vsync) 1556 r600_cp_wait_vline_sync(pScrn, pDst, 1557 accel_state->vline_crtc, 1558 accel_state->vline_y1, 1559 accel_state->vline_y2); 1560 1561 vtx_size = accel_state->msk_pix ? 24 : 16; 1562 1563 r600_finish_op(pScrn, vtx_size); 1564} 1565 1566static void R600DoneComposite(PixmapPtr pDst) 1567{ 1568 ScreenPtr pScreen = pDst->drawable.pScreen; 1569 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1570 RADEONInfoPtr info = RADEONPTR(pScrn); 1571 struct radeon_accel_state *accel_state = info->accel_state; 1572 1573 R600FinishComposite(pScrn, pDst, accel_state); 1574} 1575 1576static void R600Composite(PixmapPtr pDst, 1577 int srcX, int srcY, 1578 int maskX, int maskY, 1579 int dstX, int dstY, 1580 int w, int h) 1581{ 1582 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen); 1583 RADEONInfoPtr info = RADEONPTR(pScrn); 1584 struct radeon_accel_state *accel_state = info->accel_state; 1585 float *vb; 1586 1587 /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n", 1588 srcX, srcY, maskX, maskY,dstX, dstY, w, h); */ 1589 1590 if (CS_FULL(info->cs)) { 1591 R600FinishComposite(pScrn, pDst, info->accel_state); 1592 radeon_cs_flush_indirect(pScrn); 1593 R600PrepareComposite(info->accel_state->composite_op, 1594 info->accel_state->src_pic, 1595 info->accel_state->msk_pic, 1596 info->accel_state->dst_pic, 1597 info->accel_state->src_pix, 1598 info->accel_state->msk_pix, 1599 info->accel_state->dst_pix); 1600 } 1601 1602 if (accel_state->vsync) 1603 RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h); 1604 1605 if (accel_state->msk_pix) { 1606 1607 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24); 1608 1609 vb[0] = (float)dstX; 1610 vb[1] = (float)dstY; 1611 vb[2] = (float)srcX; 1612 vb[3] = (float)srcY; 1613 vb[4] = (float)maskX; 1614 vb[5] = (float)maskY; 1615 1616 vb[6] = (float)dstX; 1617 vb[7] = (float)(dstY + h); 1618 vb[8] = (float)srcX; 1619 vb[9] = (float)(srcY + h); 1620 vb[10] = (float)maskX; 1621 vb[11] = (float)(maskY + h); 1622 1623 vb[12] = (float)(dstX + w); 1624 vb[13] = (float)(dstY + h); 1625 vb[14] = (float)(srcX + w); 1626 vb[15] = (float)(srcY + h); 1627 vb[16] = (float)(maskX + w); 1628 vb[17] = (float)(maskY + h); 1629 1630 radeon_vbo_commit(pScrn, &accel_state->vbo); 1631 1632 } else { 1633 1634 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16); 1635 1636 vb[0] = (float)dstX; 1637 vb[1] = (float)dstY; 1638 vb[2] = (float)srcX; 1639 vb[3] = (float)srcY; 1640 1641 vb[4] = (float)dstX; 1642 vb[5] = (float)(dstY + h); 1643 vb[6] = (float)srcX; 1644 vb[7] = (float)(srcY + h); 1645 1646 vb[8] = (float)(dstX + w); 1647 vb[9] = (float)(dstY + h); 1648 vb[10] = (float)(srcX + w); 1649 vb[11] = (float)(srcY + h); 1650 1651 radeon_vbo_commit(pScrn, &accel_state->vbo); 1652 } 1653 1654 1655} 1656 1657static Bool 1658R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h, 1659 char *src, int src_pitch) 1660{ 1661 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen); 1662 RADEONInfoPtr info = RADEONPTR(pScrn); 1663 struct radeon_accel_state *accel_state = info->accel_state; 1664 struct radeon_exa_pixmap_priv *driver_priv; 1665 struct radeon_bo *scratch = NULL; 1666 struct radeon_bo *copy_dst; 1667 unsigned char *dst; 1668 unsigned size; 1669 uint32_t dst_domain; 1670 int bpp = pDst->drawable.bitsPerPixel; 1671 uint32_t scratch_pitch; 1672 uint32_t copy_pitch; 1673 uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8); 1674 int ret; 1675 Bool flush = TRUE; 1676 Bool r; 1677 int i; 1678 struct r600_accel_object src_obj, dst_obj; 1679 uint32_t height, base_align; 1680 1681 if (bpp < 8) 1682 return FALSE; 1683 1684 driver_priv = exaGetPixmapDriverPrivate(pDst); 1685 if (!driver_priv || !driver_priv->bo) 1686 return FALSE; 1687 1688 /* If we know the BO won't be busy / in VRAM, don't bother with a scratch */ 1689 copy_dst = driver_priv->bo; 1690 copy_pitch = pDst->devKind; 1691 if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) { 1692 if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) { 1693 flush = FALSE; 1694 if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain) && 1695 !(dst_domain & RADEON_GEM_DOMAIN_VRAM)) 1696 goto copy; 1697 } 1698 /* use cpu copy for fast fb access */ 1699 if (info->is_fast_fb) 1700 goto copy; 1701 } 1702 1703 scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0)); 1704 height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0)); 1705 base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0); 1706 size = scratch_pitch * height * (bpp / 8); 1707 scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0); 1708 if (scratch == NULL) { 1709 goto copy; 1710 } 1711 1712 src_obj.pitch = scratch_pitch; 1713 src_obj.width = w; 1714 src_obj.height = h; 1715 src_obj.bpp = bpp; 1716 src_obj.domain = RADEON_GEM_DOMAIN_GTT; 1717 src_obj.bo = scratch; 1718 src_obj.tiling_flags = 0; 1719 src_obj.surface = NULL; 1720 1721 dst_obj.pitch = dst_pitch_hw; 1722 dst_obj.width = pDst->drawable.width; 1723 dst_obj.height = pDst->drawable.height; 1724 dst_obj.bpp = bpp; 1725 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 1726 dst_obj.bo = radeon_get_pixmap_bo(pDst); 1727 dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst); 1728 dst_obj.surface = radeon_get_pixmap_surface(pDst); 1729 1730 if (!R600SetAccelState(pScrn, 1731 &src_obj, 1732 NULL, 1733 &dst_obj, 1734 accel_state->copy_vs_offset, accel_state->copy_ps_offset, 1735 3, 0xffffffff)) { 1736 goto copy; 1737 } 1738 copy_dst = scratch; 1739 copy_pitch = scratch_pitch * (bpp / 8); 1740 flush = FALSE; 1741 1742copy: 1743 if (flush) 1744 radeon_cs_flush_indirect(pScrn); 1745 1746 ret = radeon_bo_map(copy_dst, 0); 1747 if (ret) { 1748 r = FALSE; 1749 goto out; 1750 } 1751 r = TRUE; 1752 size = w * bpp / 8; 1753 dst = copy_dst->ptr; 1754 if (copy_dst == driver_priv->bo) 1755 dst += y * copy_pitch + x * bpp / 8; 1756 for (i = 0; i < h; i++) { 1757 memcpy(dst + i * copy_pitch, src, size); 1758 src += src_pitch; 1759 } 1760 radeon_bo_unmap(copy_dst); 1761 1762 if (copy_dst == scratch) { 1763 if (info->accel_state->vsync) 1764 RADEONVlineHelperSet(pScrn, x, y, x + w, y + h); 1765 1766 /* blit from gart to vram */ 1767 R600DoPrepareCopy(pScrn); 1768 R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h); 1769 R600DoCopyVline(pDst); 1770 } 1771 1772out: 1773 if (scratch) 1774 radeon_bo_unref(scratch); 1775 return r; 1776} 1777 1778static Bool 1779R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w, 1780 int h, char *dst, int dst_pitch) 1781{ 1782 ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen); 1783 RADEONInfoPtr info = RADEONPTR(pScrn); 1784 struct radeon_accel_state *accel_state = info->accel_state; 1785 struct radeon_exa_pixmap_priv *driver_priv; 1786 struct radeon_bo *scratch = NULL; 1787 struct radeon_bo *copy_src; 1788 unsigned size; 1789 uint32_t src_domain = 0; 1790 int bpp = pSrc->drawable.bitsPerPixel; 1791 uint32_t scratch_pitch; 1792 uint32_t copy_pitch; 1793 uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8); 1794 int ret; 1795 Bool flush = FALSE; 1796 Bool r; 1797 struct r600_accel_object src_obj, dst_obj; 1798 uint32_t height, base_align; 1799 1800 if (bpp < 8) 1801 return FALSE; 1802 1803 driver_priv = exaGetPixmapDriverPrivate(pSrc); 1804 if (!driver_priv || !driver_priv->bo) 1805 return FALSE; 1806 1807 /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */ 1808 copy_src = driver_priv->bo; 1809 copy_pitch = pSrc->devKind; 1810 if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) { 1811 if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) { 1812 src_domain = radeon_bo_get_src_domain(driver_priv->bo); 1813 if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 1814 (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) 1815 src_domain = 0; 1816 else /* A write may be scheduled */ 1817 flush = TRUE; 1818 } 1819 1820 if (!src_domain) 1821 radeon_bo_is_busy(driver_priv->bo, &src_domain); 1822 1823 if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM) 1824 goto copy; 1825 } 1826 1827 scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0)); 1828 height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0)); 1829 base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0); 1830 size = scratch_pitch * height * (bpp / 8); 1831 scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0); 1832 if (scratch == NULL) { 1833 goto copy; 1834 } 1835 radeon_cs_space_reset_bos(info->cs); 1836 radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo, 1837 RADEON_GEM_DOMAIN_VRAM, 0); 1838 accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM; 1839 radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0); 1840 accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT; 1841 radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain); 1842 ret = radeon_cs_space_check(info->cs); 1843 if (ret) { 1844 goto copy; 1845 } 1846 1847 src_obj.pitch = src_pitch_hw; 1848 src_obj.width = pSrc->drawable.width; 1849 src_obj.height = pSrc->drawable.height; 1850 src_obj.bpp = bpp; 1851 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 1852 src_obj.bo = radeon_get_pixmap_bo(pSrc); 1853 src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc); 1854 src_obj.surface = radeon_get_pixmap_surface(pSrc); 1855 1856 dst_obj.pitch = scratch_pitch; 1857 dst_obj.width = w; 1858 dst_obj.height = h; 1859 dst_obj.bo = scratch; 1860 dst_obj.bpp = bpp; 1861 dst_obj.domain = RADEON_GEM_DOMAIN_GTT; 1862 dst_obj.tiling_flags = 0; 1863 dst_obj.surface = NULL; 1864 1865 if (!R600SetAccelState(pScrn, 1866 &src_obj, 1867 NULL, 1868 &dst_obj, 1869 accel_state->copy_vs_offset, accel_state->copy_ps_offset, 1870 3, 0xffffffff)) { 1871 goto copy; 1872 } 1873 1874 /* blit from vram to gart */ 1875 R600DoPrepareCopy(pScrn); 1876 R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h); 1877 R600DoCopy(pScrn); 1878 copy_src = scratch; 1879 copy_pitch = scratch_pitch * (bpp / 8); 1880 flush = TRUE; 1881 1882copy: 1883 if (flush) 1884 radeon_cs_flush_indirect(pScrn); 1885 1886 ret = radeon_bo_map(copy_src, 0); 1887 if (ret) { 1888 ErrorF("failed to map pixmap: %d\n", ret); 1889 r = FALSE; 1890 goto out; 1891 } 1892 r = TRUE; 1893 w *= bpp / 8; 1894 if (copy_src == driver_priv->bo) 1895 size = y * copy_pitch + x * bpp / 8; 1896 else 1897 size = 0; 1898 while (h--) { 1899 memcpy(dst, copy_src->ptr + size, w); 1900 size += copy_pitch; 1901 dst += dst_pitch; 1902 } 1903 radeon_bo_unmap(copy_src); 1904out: 1905 if (scratch) 1906 radeon_bo_unref(scratch); 1907 return r; 1908} 1909 1910static int 1911R600MarkSync(ScreenPtr pScreen) 1912{ 1913 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1914 RADEONInfoPtr info = RADEONPTR(pScrn); 1915 struct radeon_accel_state *accel_state = info->accel_state; 1916 1917 return ++accel_state->exaSyncMarker; 1918 1919} 1920 1921static void 1922R600Sync(ScreenPtr pScreen, int marker) 1923{ 1924 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1925 RADEONInfoPtr info = RADEONPTR(pScrn); 1926 struct radeon_accel_state *accel_state = info->accel_state; 1927 1928 if (accel_state->exaMarkerSynced != marker) { 1929 accel_state->exaMarkerSynced = marker; 1930 } 1931 1932} 1933 1934static Bool 1935R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) 1936{ 1937 RADEONInfoPtr info = RADEONPTR(pScrn); 1938 struct radeon_accel_state *accel_state = info->accel_state; 1939 1940 /* 512 bytes per shader for now */ 1941 int size = 512 * 9; 1942 1943 accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0, 1944 RADEON_GEM_DOMAIN_VRAM, 0); 1945 if (accel_state->shaders_bo == NULL) { 1946 ErrorF("Allocating shader failed\n"); 1947 return FALSE; 1948 } 1949 return TRUE; 1950} 1951 1952Bool 1953R600LoadShaders(ScrnInfoPtr pScrn) 1954{ 1955 RADEONInfoPtr info = RADEONPTR(pScrn); 1956 struct radeon_accel_state *accel_state = info->accel_state; 1957 RADEONChipFamily ChipSet = info->ChipFamily; 1958 uint32_t *shader; 1959 int ret; 1960 1961 ret = radeon_bo_map(accel_state->shaders_bo, 1); 1962 if (ret) { 1963 FatalError("failed to map shader %d\n", ret); 1964 return FALSE; 1965 } 1966 shader = accel_state->shaders_bo->ptr; 1967 1968 /* solid vs --------------------------------------- */ 1969 accel_state->solid_vs_offset = 0; 1970 R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4); 1971 1972 /* solid ps --------------------------------------- */ 1973 accel_state->solid_ps_offset = 512; 1974 R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4); 1975 1976 /* copy vs --------------------------------------- */ 1977 accel_state->copy_vs_offset = 1024; 1978 R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4); 1979 1980 /* copy ps --------------------------------------- */ 1981 accel_state->copy_ps_offset = 1536; 1982 R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4); 1983 1984 /* comp vs --------------------------------------- */ 1985 accel_state->comp_vs_offset = 2048; 1986 R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4); 1987 1988 /* comp ps --------------------------------------- */ 1989 accel_state->comp_ps_offset = 2560; 1990 R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4); 1991 1992 /* xv vs --------------------------------------- */ 1993 accel_state->xv_vs_offset = 3072; 1994 R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4); 1995 1996 /* xv ps --------------------------------------- */ 1997 accel_state->xv_ps_offset = 3584; 1998 R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4); 1999 2000 radeon_bo_unmap(accel_state->shaders_bo); 2001 return TRUE; 2002} 2003 2004Bool 2005R600DrawInit(ScreenPtr pScreen) 2006{ 2007 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 2008 RADEONInfoPtr info = RADEONPTR(pScrn); 2009 2010 if (info->accel_state->exa == NULL) { 2011 xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n"); 2012 return FALSE; 2013 } 2014 2015 info->accel_state->exa->exa_major = EXA_VERSION_MAJOR; 2016 info->accel_state->exa->exa_minor = EXA_VERSION_MINOR; 2017 2018 info->accel_state->exa->PrepareSolid = R600PrepareSolid; 2019 info->accel_state->exa->Solid = R600Solid; 2020 info->accel_state->exa->DoneSolid = R600DoneSolid; 2021 2022 info->accel_state->exa->PrepareCopy = R600PrepareCopy; 2023 info->accel_state->exa->Copy = R600Copy; 2024 info->accel_state->exa->DoneCopy = R600DoneCopy; 2025 2026 info->accel_state->exa->MarkSync = R600MarkSync; 2027 info->accel_state->exa->WaitMarker = R600Sync; 2028 2029 info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap; 2030 info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap; 2031 info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen; 2032 info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS; 2033 info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS; 2034 info->accel_state->exa->UploadToScreen = R600UploadToScreenCS; 2035 info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS; 2036 info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2; 2037#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 6) 2038 info->accel_state->exa->SharePixmapBacking = RADEONEXASharePixmapBacking; 2039 info->accel_state->exa->SetSharedPixmapBacking = RADEONEXASetSharedPixmapBacking; 2040#endif 2041 info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS | EXA_SUPPORTS_PREPARE_AUX | 2042 EXA_HANDLES_PIXMAPS | EXA_MIXED_PIXMAPS; 2043 info->accel_state->exa->pixmapOffsetAlign = 256; 2044 info->accel_state->exa->pixmapPitchAlign = 256; 2045 2046 info->accel_state->exa->CheckComposite = R600CheckComposite; 2047 info->accel_state->exa->PrepareComposite = R600PrepareComposite; 2048 info->accel_state->exa->Composite = R600Composite; 2049 info->accel_state->exa->DoneComposite = R600DoneComposite; 2050 2051 info->accel_state->exa->maxPitchBytes = 32768; 2052 info->accel_state->exa->maxX = 8192; 2053 info->accel_state->exa->maxY = 8192; 2054 2055 /* not supported yet */ 2056 if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) { 2057 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n"); 2058 info->accel_state->vsync = TRUE; 2059 } else 2060 info->accel_state->vsync = FALSE; 2061 2062 if (!exaDriverInit(pScreen, info->accel_state->exa)) { 2063 free(info->accel_state->exa); 2064 return FALSE; 2065 } 2066 2067 info->accel_state->XInited3D = FALSE; 2068 info->accel_state->src_obj[0].bo = NULL; 2069 info->accel_state->src_obj[1].bo = NULL; 2070 info->accel_state->dst_obj.bo = NULL; 2071 info->accel_state->copy_area_bo = NULL; 2072 info->accel_state->vbo.vb_start_op = -1; 2073 info->accel_state->finish_op = r600_finish_op; 2074 info->accel_state->vbo.verts_per_op = 3; 2075 RADEONVlineHelperClear(pScrn); 2076 2077 radeon_vbo_init_lists(pScrn); 2078 2079 if (!R600AllocShaders(pScrn, pScreen)) 2080 return FALSE; 2081 2082 if (!R600LoadShaders(pScrn)) 2083 return FALSE; 2084 2085 exaMarkSync(pScreen); 2086 2087 return TRUE; 2088 2089} 2090 2091