1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Author: Alex Deucher <alexander.deucher@amd.com> 24 * 25 */ 26 27#ifdef HAVE_CONFIG_H 28#include "config.h" 29#endif 30 31#include "xf86.h" 32 33#include "exa.h" 34 35#include "radeon.h" 36#include "radeon_reg.h" 37#include "r600_shader.h" 38#include "r600_reg.h" 39#include "r600_state.h" 40#include "radeon_exa_shared.h" 41#include "radeon_vbo.h" 42 43/* #define SHOW_VERTEXES */ 44 45Bool 46R600SetAccelState(ScrnInfoPtr pScrn, 47 struct r600_accel_object *src0, 48 struct r600_accel_object *src1, 49 struct r600_accel_object *dst, 50 uint32_t vs_offset, uint32_t ps_offset, 51 int rop, Pixel planemask) 52{ 53 RADEONInfoPtr info = RADEONPTR(pScrn); 54 struct radeon_accel_state *accel_state = info->accel_state; 55 uint32_t pitch_align = 0x7; 56 int ret; 57 58 if (src0) { 59 memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object)); 60 accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8); 61 if (src0->surface) 62 accel_state->src_size[0] = src0->surface->bo_size; 63 64 /* bad pitch */ 65 if (accel_state->src_obj[0].pitch & pitch_align) 66 RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch)); 67 68 } else { 69 memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object)); 70 accel_state->src_size[0] = 0; 71 } 72 73 if (src1) { 74 memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object)); 75 accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8); 76 if (src1->surface) { 77 accel_state->src_size[1] = src1->surface->bo_size; 78 } 79 80 /* bad pitch */ 81 if (accel_state->src_obj[1].pitch & pitch_align) 82 RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch)); 83 84 } else { 85 memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object)); 86 accel_state->src_size[1] = 0; 87 } 88 89 if (dst) { 90 memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object)); 91 accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8); 92 if (dst->surface) { 93 accel_state->dst_size = dst->surface->bo_size; 94 } else 95 { 96 accel_state->dst_obj.tiling_flags = 0; 97 } 98 if (accel_state->dst_obj.pitch & pitch_align) 99 RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch)); 100 101 } else { 102 memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object)); 103 accel_state->dst_size = 0; 104 } 105 106 if (CS_FULL(info->cs)) 107 radeon_cs_flush_indirect(pScrn); 108 109 accel_state->rop = rop; 110 accel_state->planemask = planemask; 111 112 accel_state->vs_size = 512; 113 accel_state->ps_size = 512; 114 accel_state->vs_mc_addr = vs_offset; 115 accel_state->ps_mc_addr = ps_offset; 116 117 radeon_cs_space_reset_bos(info->cs); 118 radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo, 119 RADEON_GEM_DOMAIN_VRAM, 0); 120 if (accel_state->src_obj[0].bo) 121 radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo, 122 accel_state->src_obj[0].domain, 0); 123 if (accel_state->src_obj[1].bo) 124 radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo, 125 accel_state->src_obj[1].domain, 0); 126 if (accel_state->dst_obj.bo) 127 radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo, 128 0, accel_state->dst_obj.domain); 129 ret = radeon_cs_space_check(info->cs); 130 if (ret) 131 RADEON_FALLBACK(("Not enough RAM to hw accel operation\n")); 132 133 return TRUE; 134} 135 136static Bool 137R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) 138{ 139 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); 140 RADEONInfoPtr info = RADEONPTR(pScrn); 141 struct radeon_accel_state *accel_state = info->accel_state; 142 cb_config_t cb_conf; 143 shader_config_t vs_conf, ps_conf; 144 uint32_t a, r, g, b; 145 float ps_alu_consts[4]; 146 struct r600_accel_object dst; 147 148 if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel)) 149 RADEON_FALLBACK(("R600CheckDatatype failed\n")); 150 if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel)) 151 RADEON_FALLBACK(("invalid planemask\n")); 152 153 dst.bo = radeon_get_pixmap_bo(pPix)->bo.radeon; 154 dst.tiling_flags = radeon_get_pixmap_tiling(pPix); 155 dst.surface = radeon_get_pixmap_surface(pPix); 156 157 dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); 158 dst.width = pPix->drawable.width; 159 dst.height = pPix->drawable.height; 160 dst.bpp = pPix->drawable.bitsPerPixel; 161 dst.domain = RADEON_GEM_DOMAIN_VRAM; 162 163 if (!R600SetAccelState(pScrn, 164 NULL, 165 NULL, 166 &dst, 167 accel_state->solid_vs_offset, accel_state->solid_ps_offset, 168 alu, pm)) 169 return FALSE; 170 171 CLEAR (cb_conf); 172 CLEAR (vs_conf); 173 CLEAR (ps_conf); 174 175 radeon_vbo_check(pScrn, &accel_state->vbo, 16); 176 radeon_cp_start(pScrn); 177 178 r600_set_default_state(pScrn); 179 180 r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 181 r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 182 r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 183 184 /* Shader */ 185 vs_conf.shader_addr = accel_state->vs_mc_addr; 186 vs_conf.shader_size = accel_state->vs_size; 187 vs_conf.num_gprs = 2; 188 vs_conf.stack_size = 0; 189 vs_conf.bo = accel_state->shaders_bo; 190 r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM); 191 192 ps_conf.shader_addr = accel_state->ps_mc_addr; 193 ps_conf.shader_size = accel_state->ps_size; 194 ps_conf.num_gprs = 1; 195 ps_conf.stack_size = 0; 196 ps_conf.uncached_first_inst = 1; 197 ps_conf.clamp_consts = 0; 198 ps_conf.export_mode = 2; 199 ps_conf.bo = accel_state->shaders_bo; 200 r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM); 201 202 cb_conf.id = 0; 203 cb_conf.w = accel_state->dst_obj.pitch; 204 cb_conf.h = accel_state->dst_obj.height; 205 cb_conf.base = 0; 206 cb_conf.bo = accel_state->dst_obj.bo; 207 cb_conf.surface = accel_state->dst_obj.surface; 208 209 if (accel_state->dst_obj.bpp == 8) { 210 cb_conf.format = COLOR_8; 211 cb_conf.comp_swap = 3; /* A */ 212 } else if (accel_state->dst_obj.bpp == 16) { 213 cb_conf.format = COLOR_5_6_5; 214 cb_conf.comp_swap = 2; /* RGB */ 215#if X_BYTE_ORDER == X_BIG_ENDIAN 216 cb_conf.endian = ENDIAN_8IN16; 217#endif 218 } else { 219 cb_conf.format = COLOR_8_8_8_8; 220 cb_conf.comp_swap = 1; /* ARGB */ 221#if X_BYTE_ORDER == X_BIG_ENDIAN 222 cb_conf.endian = ENDIAN_8IN32; 223#endif 224 } 225 cb_conf.source_format = 1; 226 cb_conf.blend_clamp = 1; 227 /* Render setup */ 228 if (accel_state->planemask & 0x000000ff) 229 cb_conf.pmask |= 4; /* B */ 230 if (accel_state->planemask & 0x0000ff00) 231 cb_conf.pmask |= 2; /* G */ 232 if (accel_state->planemask & 0x00ff0000) 233 cb_conf.pmask |= 1; /* R */ 234 if (accel_state->planemask & 0xff000000) 235 cb_conf.pmask |= 8; /* A */ 236 cb_conf.rop = accel_state->rop; 237 if (accel_state->dst_obj.tiling_flags == 0) 238 cb_conf.array_mode = 0; 239 r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); 240 241 r600_set_spi(pScrn, 0, 0); 242 243 /* PS alu constants */ 244 if (accel_state->dst_obj.bpp == 16) { 245 r = (fg >> 11) & 0x1f; 246 g = (fg >> 5) & 0x3f; 247 b = (fg >> 0) & 0x1f; 248 ps_alu_consts[0] = (float)r / 31; /* R */ 249 ps_alu_consts[1] = (float)g / 63; /* G */ 250 ps_alu_consts[2] = (float)b / 31; /* B */ 251 ps_alu_consts[3] = 1.0; /* A */ 252 } else if (accel_state->dst_obj.bpp == 8) { 253 a = (fg >> 0) & 0xff; 254 ps_alu_consts[0] = 0.0; /* R */ 255 ps_alu_consts[1] = 0.0; /* G */ 256 ps_alu_consts[2] = 0.0; /* B */ 257 ps_alu_consts[3] = (float)a / 255; /* A */ 258 } else { 259 a = (fg >> 24) & 0xff; 260 r = (fg >> 16) & 0xff; 261 g = (fg >> 8) & 0xff; 262 b = (fg >> 0) & 0xff; 263 ps_alu_consts[0] = (float)r / 255; /* R */ 264 ps_alu_consts[1] = (float)g / 255; /* G */ 265 ps_alu_consts[2] = (float)b / 255; /* B */ 266 ps_alu_consts[3] = (float)a / 255; /* A */ 267 } 268 r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_ps, 269 sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); 270 271 if (accel_state->vsync) 272 RADEONVlineHelperClear(pScrn); 273 274 accel_state->dst_pix = pPix; 275 accel_state->fg = fg; 276 277 return TRUE; 278} 279 280static void 281R600DoneSolid(PixmapPtr pPix) 282{ 283 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); 284 RADEONInfoPtr info = RADEONPTR(pScrn); 285 struct radeon_accel_state *accel_state = info->accel_state; 286 287 if (accel_state->vsync) 288 r600_cp_wait_vline_sync(pScrn, pPix, 289 accel_state->vline_crtc, 290 accel_state->vline_y1, 291 accel_state->vline_y2); 292 293 r600_finish_op(pScrn, 8); 294} 295 296static void 297R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) 298{ 299 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); 300 RADEONInfoPtr info = RADEONPTR(pScrn); 301 struct radeon_accel_state *accel_state = info->accel_state; 302 float *vb; 303 304 if (CS_FULL(info->cs)) { 305 R600DoneSolid(info->accel_state->dst_pix); 306 radeon_cs_flush_indirect(pScrn); 307 R600PrepareSolid(accel_state->dst_pix, 308 accel_state->rop, 309 accel_state->planemask, 310 accel_state->fg); 311 } 312 313 if (accel_state->vsync) 314 RADEONVlineHelperSet(pScrn, x1, y1, x2, y2); 315 316 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8); 317 318 vb[0] = (float)x1; 319 vb[1] = (float)y1; 320 321 vb[2] = (float)x1; 322 vb[3] = (float)y2; 323 324 vb[4] = (float)x2; 325 vb[5] = (float)y2; 326 327 radeon_vbo_commit(pScrn, &accel_state->vbo); 328} 329 330static void 331R600DoPrepareCopy(ScrnInfoPtr pScrn) 332{ 333 RADEONInfoPtr info = RADEONPTR(pScrn); 334 struct radeon_accel_state *accel_state = info->accel_state; 335 cb_config_t cb_conf; 336 tex_resource_t tex_res; 337 tex_sampler_t tex_samp; 338 shader_config_t vs_conf, ps_conf; 339 340 CLEAR (cb_conf); 341 CLEAR (tex_res); 342 CLEAR (tex_samp); 343 CLEAR (vs_conf); 344 CLEAR (ps_conf); 345 346 radeon_vbo_check(pScrn, &accel_state->vbo, 16); 347 radeon_cp_start(pScrn); 348 349 r600_set_default_state(pScrn); 350 351 r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 352 r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 353 r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 354 355 /* Shader */ 356 vs_conf.shader_addr = accel_state->vs_mc_addr; 357 vs_conf.shader_size = accel_state->vs_size; 358 vs_conf.num_gprs = 2; 359 vs_conf.stack_size = 0; 360 vs_conf.bo = accel_state->shaders_bo; 361 r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM); 362 363 ps_conf.shader_addr = accel_state->ps_mc_addr; 364 ps_conf.shader_size = accel_state->ps_size; 365 ps_conf.num_gprs = 1; 366 ps_conf.stack_size = 0; 367 ps_conf.uncached_first_inst = 1; 368 ps_conf.clamp_consts = 0; 369 ps_conf.export_mode = 2; 370 ps_conf.bo = accel_state->shaders_bo; 371 r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM); 372 373 /* Texture */ 374 tex_res.id = 0; 375 tex_res.w = accel_state->src_obj[0].width; 376 tex_res.h = accel_state->src_obj[0].height; 377 tex_res.pitch = accel_state->src_obj[0].pitch; 378 tex_res.depth = 0; 379 tex_res.dim = SQ_TEX_DIM_2D; 380 tex_res.base = 0; 381 tex_res.mip_base = 0; 382 tex_res.size = accel_state->src_size[0]; 383 tex_res.bo = accel_state->src_obj[0].bo; 384 tex_res.mip_bo = accel_state->src_obj[0].bo; 385 tex_res.surface = accel_state->src_obj[0].surface; 386 if (accel_state->src_obj[0].bpp == 8) { 387 tex_res.format = FMT_8; 388 tex_res.dst_sel_x = SQ_SEL_1; /* R */ 389 tex_res.dst_sel_y = SQ_SEL_1; /* G */ 390 tex_res.dst_sel_z = SQ_SEL_1; /* B */ 391 tex_res.dst_sel_w = SQ_SEL_X; /* A */ 392 } else if (accel_state->src_obj[0].bpp == 16) { 393 tex_res.format = FMT_5_6_5; 394 tex_res.dst_sel_x = SQ_SEL_Z; /* R */ 395 tex_res.dst_sel_y = SQ_SEL_Y; /* G */ 396 tex_res.dst_sel_z = SQ_SEL_X; /* B */ 397 tex_res.dst_sel_w = SQ_SEL_1; /* A */ 398 } else { 399 tex_res.format = FMT_8_8_8_8; 400 tex_res.dst_sel_x = SQ_SEL_Z; /* R */ 401 tex_res.dst_sel_y = SQ_SEL_Y; /* G */ 402 tex_res.dst_sel_z = SQ_SEL_X; /* B */ 403 tex_res.dst_sel_w = SQ_SEL_W; /* A */ 404 } 405 406 tex_res.request_size = 1; 407 tex_res.base_level = 0; 408 tex_res.last_level = 0; 409 tex_res.perf_modulation = 0; 410 if (accel_state->src_obj[0].tiling_flags == 0) 411 tex_res.tile_mode = 1; 412 r600_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); 413 414 tex_samp.id = 0; 415 tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 416 tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 417 tex_samp.clamp_z = SQ_TEX_WRAP; 418 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; 419 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; 420 tex_samp.mc_coord_truncate = 1; 421 tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 422 tex_samp.mip_filter = 0; /* no mipmap */ 423 r600_set_tex_sampler(pScrn, &tex_samp); 424 425 cb_conf.id = 0; 426 cb_conf.w = accel_state->dst_obj.pitch; 427 cb_conf.h = accel_state->dst_obj.height; 428 cb_conf.base = 0; 429 cb_conf.bo = accel_state->dst_obj.bo; 430 cb_conf.surface = accel_state->dst_obj.surface; 431 if (accel_state->dst_obj.bpp == 8) { 432 cb_conf.format = COLOR_8; 433 cb_conf.comp_swap = 3; /* A */ 434 } else if (accel_state->dst_obj.bpp == 16) { 435 cb_conf.format = COLOR_5_6_5; 436 cb_conf.comp_swap = 2; /* RGB */ 437 } else { 438 cb_conf.format = COLOR_8_8_8_8; 439 cb_conf.comp_swap = 1; /* ARGB */ 440 } 441 cb_conf.source_format = 1; 442 cb_conf.blend_clamp = 1; 443 444 /* Render setup */ 445 if (accel_state->planemask & 0x000000ff) 446 cb_conf.pmask |= 4; /* B */ 447 if (accel_state->planemask & 0x0000ff00) 448 cb_conf.pmask |= 2; /* G */ 449 if (accel_state->planemask & 0x00ff0000) 450 cb_conf.pmask |= 1; /* R */ 451 if (accel_state->planemask & 0xff000000) 452 cb_conf.pmask |= 8; /* A */ 453 cb_conf.rop = accel_state->rop; 454 if (accel_state->dst_obj.tiling_flags == 0) 455 cb_conf.array_mode = 0; 456 r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); 457 458 r600_set_spi(pScrn, (1 - 1), 1); 459 460} 461 462static void 463R600DoCopy(ScrnInfoPtr pScrn) 464{ 465 r600_finish_op(pScrn, 16); 466} 467 468static void 469R600DoCopyVline(PixmapPtr pPix) 470{ 471 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); 472 RADEONInfoPtr info = RADEONPTR(pScrn); 473 struct radeon_accel_state *accel_state = info->accel_state; 474 475 if (accel_state->vsync) 476 r600_cp_wait_vline_sync(pScrn, pPix, 477 accel_state->vline_crtc, 478 accel_state->vline_y1, 479 accel_state->vline_y2); 480 481 r600_finish_op(pScrn, 16); 482} 483 484static void 485R600AppendCopyVertex(ScrnInfoPtr pScrn, 486 int srcX, int srcY, 487 int dstX, int dstY, 488 int w, int h) 489{ 490 RADEONInfoPtr info = RADEONPTR(pScrn); 491 struct radeon_accel_state *accel_state = info->accel_state; 492 float *vb; 493 494 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16); 495 496 vb[0] = (float)dstX; 497 vb[1] = (float)dstY; 498 vb[2] = (float)srcX; 499 vb[3] = (float)srcY; 500 501 vb[4] = (float)dstX; 502 vb[5] = (float)(dstY + h); 503 vb[6] = (float)srcX; 504 vb[7] = (float)(srcY + h); 505 506 vb[8] = (float)(dstX + w); 507 vb[9] = (float)(dstY + h); 508 vb[10] = (float)(srcX + w); 509 vb[11] = (float)(srcY + h); 510 511 radeon_vbo_commit(pScrn, &accel_state->vbo); 512} 513 514static Bool 515R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, 516 int xdir, int ydir, 517 int rop, 518 Pixel planemask) 519{ 520 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen); 521 RADEONInfoPtr info = RADEONPTR(pScrn); 522 struct radeon_accel_state *accel_state = info->accel_state; 523 struct r600_accel_object src_obj, dst_obj; 524 525 if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel)) 526 RADEON_FALLBACK(("R600CheckDatatype src failed\n")); 527 if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel)) 528 RADEON_FALLBACK(("R600CheckDatatype dst failed\n")); 529 if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel)) 530 RADEON_FALLBACK(("Invalid planemask\n")); 531 532 dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); 533 src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); 534 535 accel_state->same_surface = FALSE; 536 537 src_obj.bo = radeon_get_pixmap_bo(pSrc)->bo.radeon; 538 dst_obj.bo = radeon_get_pixmap_bo(pDst)->bo.radeon; 539 dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst); 540 src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc); 541 src_obj.surface = radeon_get_pixmap_surface(pSrc); 542 dst_obj.surface = radeon_get_pixmap_surface(pDst); 543 if (src_obj.bo == dst_obj.bo) 544 accel_state->same_surface = TRUE; 545 546 src_obj.width = pSrc->drawable.width; 547 src_obj.height = pSrc->drawable.height; 548 src_obj.bpp = pSrc->drawable.bitsPerPixel; 549 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 550 551 dst_obj.width = pDst->drawable.width; 552 dst_obj.height = pDst->drawable.height; 553 dst_obj.bpp = pDst->drawable.bitsPerPixel; 554 if (radeon_get_pixmap_shared(pDst) == TRUE) { 555 dst_obj.domain = RADEON_GEM_DOMAIN_GTT; 556 } else 557 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 558 559 if (!R600SetAccelState(pScrn, 560 &src_obj, 561 NULL, 562 &dst_obj, 563 accel_state->copy_vs_offset, accel_state->copy_ps_offset, 564 rop, planemask)) 565 return FALSE; 566 567 if (accel_state->same_surface == TRUE) { 568 unsigned long size = accel_state->dst_obj.surface->bo_size; 569 unsigned long align = accel_state->dst_obj.surface->bo_alignment; 570 571 if (accel_state->copy_area_bo) { 572 radeon_bo_unref(accel_state->copy_area_bo); 573 accel_state->copy_area_bo = NULL; 574 } 575 accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, align, 576 RADEON_GEM_DOMAIN_VRAM, 577 0); 578 if (!accel_state->copy_area_bo) 579 RADEON_FALLBACK(("temp copy surface alloc failed\n")); 580 581 radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo, 582 0, RADEON_GEM_DOMAIN_VRAM); 583 if (radeon_cs_space_check(info->cs)) { 584 radeon_bo_unref(accel_state->copy_area_bo); 585 accel_state->copy_area_bo = NULL; 586 return FALSE; 587 } 588 accel_state->copy_area = (void*)accel_state->copy_area_bo; 589 } else 590 R600DoPrepareCopy(pScrn); 591 592 if (accel_state->vsync) 593 RADEONVlineHelperClear(pScrn); 594 595 accel_state->dst_pix = pDst; 596 accel_state->src_pix = pSrc; 597 accel_state->xdir = xdir; 598 accel_state->ydir = ydir; 599 600 return TRUE; 601} 602 603static void 604R600DoneCopy(PixmapPtr pDst) 605{ 606 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen); 607 RADEONInfoPtr info = RADEONPTR(pScrn); 608 struct radeon_accel_state *accel_state = info->accel_state; 609 610 if (!accel_state->same_surface) 611 R600DoCopyVline(pDst); 612 613 if (accel_state->copy_area) { 614 accel_state->copy_area = NULL; 615 } 616 617} 618 619static void 620R600Copy(PixmapPtr pDst, 621 int srcX, int srcY, 622 int dstX, int dstY, 623 int w, int h) 624{ 625 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen); 626 RADEONInfoPtr info = RADEONPTR(pScrn); 627 struct radeon_accel_state *accel_state = info->accel_state; 628 629 if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY)) 630 return; 631 632 if (CS_FULL(info->cs)) { 633 R600DoneCopy(info->accel_state->dst_pix); 634 radeon_cs_flush_indirect(pScrn); 635 R600PrepareCopy(accel_state->src_pix, 636 accel_state->dst_pix, 637 accel_state->xdir, 638 accel_state->ydir, 639 accel_state->rop, 640 accel_state->planemask); 641 } 642 643 if (accel_state->vsync) 644 RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h); 645 646 if (accel_state->same_surface && 647 (srcX + w <= dstX || dstX + w <= srcX || srcY + h <= dstY || dstY + h <= srcY)) { 648 R600DoPrepareCopy(pScrn); 649 R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); 650 R600DoCopyVline(pDst); 651 } else if (accel_state->same_surface && accel_state->copy_area) { 652 uint32_t orig_dst_domain = accel_state->dst_obj.domain; 653 uint32_t orig_src_domain = accel_state->src_obj[0].domain; 654 uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags; 655 uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags; 656 struct radeon_bo *orig_bo = accel_state->dst_obj.bo; 657 int orig_rop = accel_state->rop; 658 659 /* src to tmp */ 660 accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 661 accel_state->dst_obj.bo = accel_state->copy_area_bo; 662 accel_state->dst_obj.tiling_flags = 0; 663 accel_state->rop = 3; 664 R600DoPrepareCopy(pScrn); 665 R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); 666 R600DoCopy(pScrn); 667 668 /* tmp to dst */ 669 accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM; 670 accel_state->src_obj[0].bo = accel_state->copy_area_bo; 671 accel_state->src_obj[0].tiling_flags = 0; 672 accel_state->dst_obj.domain = orig_dst_domain; 673 accel_state->dst_obj.bo = orig_bo; 674 accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags; 675 accel_state->rop = orig_rop; 676 R600DoPrepareCopy(pScrn); 677 R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h); 678 R600DoCopyVline(pDst); 679 680 /* restore state */ 681 accel_state->src_obj[0].domain = orig_src_domain; 682 accel_state->src_obj[0].bo = orig_bo; 683 accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags; 684 } else 685 R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); 686 687} 688 689struct blendinfo { 690 Bool dst_alpha; 691 Bool src_alpha; 692 uint32_t blend_cntl; 693}; 694 695static struct blendinfo R600BlendOp[] = { 696 /* Clear */ 697 {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 698 /* Src */ 699 {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 700 /* Dst */ 701 {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, 702 /* Over */ 703 {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 704 /* OverReverse */ 705 {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, 706 /* In */ 707 {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 708 /* InReverse */ 709 {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 710 /* Out */ 711 {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)}, 712 /* OutReverse */ 713 {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 714 /* Atop */ 715 {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 716 /* AtopReverse */ 717 {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 718 /* Xor */ 719 {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)}, 720 /* Add */ 721 {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)}, 722}; 723 724struct formatinfo { 725 unsigned int fmt; 726 uint32_t card_fmt; 727}; 728 729static struct formatinfo R600TexFormats[] = { 730 {PICT_a2r10g10b10, FMT_2_10_10_10}, 731 {PICT_x2r10g10b10, FMT_2_10_10_10}, 732 {PICT_a2b10g10r10, FMT_2_10_10_10}, 733 {PICT_x2b10g10r10, FMT_2_10_10_10}, 734 {PICT_a8r8g8b8, FMT_8_8_8_8}, 735 {PICT_x8r8g8b8, FMT_8_8_8_8}, 736 {PICT_a8b8g8r8, FMT_8_8_8_8}, 737 {PICT_x8b8g8r8, FMT_8_8_8_8}, 738 {PICT_b8g8r8a8, FMT_8_8_8_8}, 739 {PICT_b8g8r8x8, FMT_8_8_8_8}, 740 {PICT_r5g6b5, FMT_5_6_5}, 741 {PICT_a1r5g5b5, FMT_1_5_5_5}, 742 {PICT_x1r5g5b5, FMT_1_5_5_5}, 743 {PICT_a8, FMT_8}, 744}; 745 746static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format) 747{ 748 uint32_t sblend, dblend; 749 750 sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask; 751 dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask; 752 753 /* If there's no dst alpha channel, adjust the blend op so that we'll treat 754 * it as always 1. 755 */ 756 if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) { 757 if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift)) 758 sblend = (BLEND_ONE << COLOR_SRCBLEND_shift); 759 else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift)) 760 sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift); 761 } 762 763 /* If the source alpha is being used, then we should only be in a case where 764 * the source blend factor is 0, and the source blend value is the mask 765 * channels multiplied by the source picture's alpha. 766 */ 767 if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) { 768 if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) { 769 dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift); 770 } else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) { 771 dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift); 772 } 773 774 /* With some tricks, we can still accelerate PictOpOver with solid src. 775 * This is commonly used for text rendering, so it's worth the extra 776 * effort. 777 */ 778 if (sblend == (BLEND_ONE << COLOR_SRCBLEND_shift)) { 779 sblend = (BLEND_CONSTANT_COLOR << COLOR_SRCBLEND_shift); 780 } 781 } 782 783 return sblend | dblend; 784} 785 786static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format) 787{ 788 switch (pDstPicture->format) { 789 case PICT_a2r10g10b10: 790 case PICT_x2r10g10b10: 791 case PICT_a2b10g10r10: 792 case PICT_x2b10g10r10: 793 *dst_format = COLOR_2_10_10_10; 794 break; 795 case PICT_a8r8g8b8: 796 case PICT_x8r8g8b8: 797 case PICT_a8b8g8r8: 798 case PICT_x8b8g8r8: 799 case PICT_b8g8r8a8: 800 case PICT_b8g8r8x8: 801 *dst_format = COLOR_8_8_8_8; 802 break; 803 case PICT_r5g6b5: 804 *dst_format = COLOR_5_6_5; 805 break; 806 case PICT_a1r5g5b5: 807 case PICT_x1r5g5b5: 808 *dst_format = COLOR_1_5_5_5; 809 break; 810 case PICT_a8: 811 *dst_format = COLOR_8; 812 break; 813 default: 814 RADEON_FALLBACK(("Unsupported dest format 0x%x\n", 815 (int)pDstPicture->format)); 816 } 817 return TRUE; 818} 819 820static Bool R600CheckCompositeTexture(PicturePtr pPict, 821 PicturePtr pDstPict, 822 int op, 823 int unit) 824{ 825 unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone; 826 unsigned int i; 827 828 for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) { 829 if (R600TexFormats[i].fmt == pPict->format) 830 break; 831 } 832 if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0])) 833 RADEON_FALLBACK(("Unsupported picture format 0x%x\n", 834 (int)pPict->format)); 835 836 if (pPict->filter != PictFilterNearest && 837 pPict->filter != PictFilterBilinear) 838 RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter)); 839 840 /* for REPEAT_NONE, Render semantics are that sampling outside the source 841 * picture results in alpha=0 pixels. We can implement this with a border color 842 * *if* our source texture has an alpha channel, otherwise we need to fall 843 * back. If we're not transformed then we hope that upper layers have clipped 844 * rendering to the bounds of the source drawable, in which case it doesn't 845 * matter. I have not, however, verified that the X server always does such 846 * clipping. 847 */ 848 /* FIXME R6xx */ 849 if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) { 850 if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0))) 851 RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n")); 852 } 853 854 if (!radeon_transform_is_affine_or_scaled(pPict->transform)) 855 RADEON_FALLBACK(("non-affine transforms not supported\n")); 856 857 return TRUE; 858} 859 860static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, 861 int unit) 862{ 863 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); 864 RADEONInfoPtr info = RADEONPTR(pScrn); 865 struct radeon_accel_state *accel_state = info->accel_state; 866 unsigned int repeatType; 867 unsigned int i; 868 tex_resource_t tex_res; 869 tex_sampler_t tex_samp; 870 int pix_r, pix_g, pix_b, pix_a; 871 float vs_alu_consts[8]; 872 873 CLEAR (tex_res); 874 CLEAR (tex_samp); 875 876 for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) { 877 if (R600TexFormats[i].fmt == pPict->format) 878 break; 879 } 880 881 /* Texture */ 882 if (pPict->pDrawable) { 883 tex_res.w = pPict->pDrawable->width; 884 tex_res.h = pPict->pDrawable->height; 885 repeatType = pPict->repeat ? pPict->repeatType : RepeatNone; 886 } else { 887 tex_res.w = 1; 888 tex_res.h = 1; 889 repeatType = RepeatNormal; 890 } 891 tex_res.id = unit; 892 tex_res.pitch = accel_state->src_obj[unit].pitch; 893 tex_res.depth = 0; 894 tex_res.dim = SQ_TEX_DIM_2D; 895 tex_res.base = 0; 896 tex_res.mip_base = 0; 897 tex_res.size = accel_state->src_size[unit]; 898 tex_res.format = R600TexFormats[i].card_fmt; 899 tex_res.bo = accel_state->src_obj[unit].bo; 900 tex_res.mip_bo = accel_state->src_obj[unit].bo; 901 tex_res.surface = accel_state->src_obj[unit].surface; 902 tex_res.request_size = 1; 903 904#if X_BYTE_ORDER == X_BIG_ENDIAN 905 switch (accel_state->src_obj[unit].bpp) { 906 case 16: 907 tex_res.endian = SQ_ENDIAN_8IN16; 908 break; 909 case 32: 910 tex_res.endian = SQ_ENDIAN_8IN32; 911 break; 912 default : 913 break; 914 } 915#endif 916 917 /* component swizzles */ 918 switch (pPict->format) { 919 case PICT_a2r10g10b10: 920 case PICT_a1r5g5b5: 921 case PICT_a8r8g8b8: 922 pix_r = SQ_SEL_Z; /* R */ 923 pix_g = SQ_SEL_Y; /* G */ 924 pix_b = SQ_SEL_X; /* B */ 925 pix_a = SQ_SEL_W; /* A */ 926 break; 927 case PICT_a2b10g10r10: 928 case PICT_a8b8g8r8: 929 pix_r = SQ_SEL_X; /* R */ 930 pix_g = SQ_SEL_Y; /* G */ 931 pix_b = SQ_SEL_Z; /* B */ 932 pix_a = SQ_SEL_W; /* A */ 933 break; 934 case PICT_x2b10g10r10: 935 case PICT_x8b8g8r8: 936 pix_r = SQ_SEL_X; /* R */ 937 pix_g = SQ_SEL_Y; /* G */ 938 pix_b = SQ_SEL_Z; /* B */ 939 pix_a = SQ_SEL_1; /* A */ 940 break; 941 case PICT_b8g8r8a8: 942 pix_r = SQ_SEL_Y; /* R */ 943 pix_g = SQ_SEL_Z; /* G */ 944 pix_b = SQ_SEL_W; /* B */ 945 pix_a = SQ_SEL_X; /* A */ 946 break; 947 case PICT_b8g8r8x8: 948 pix_r = SQ_SEL_Y; /* R */ 949 pix_g = SQ_SEL_Z; /* G */ 950 pix_b = SQ_SEL_W; /* B */ 951 pix_a = SQ_SEL_1; /* A */ 952 break; 953 case PICT_x2r10g10b10: 954 case PICT_x1r5g5b5: 955 case PICT_x8r8g8b8: 956 case PICT_r5g6b5: 957 pix_r = SQ_SEL_Z; /* R */ 958 pix_g = SQ_SEL_Y; /* G */ 959 pix_b = SQ_SEL_X; /* B */ 960 pix_a = SQ_SEL_1; /* A */ 961 break; 962 case PICT_a8: 963 pix_r = SQ_SEL_0; /* R */ 964 pix_g = SQ_SEL_0; /* G */ 965 pix_b = SQ_SEL_0; /* B */ 966 pix_a = SQ_SEL_X; /* A */ 967 break; 968 default: 969 RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format)); 970 } 971 972 if (unit == 0) { 973 if (!accel_state->msk_pic) { 974 if (PICT_FORMAT_RGB(pPict->format) == 0) { 975 pix_r = SQ_SEL_0; 976 pix_g = SQ_SEL_0; 977 pix_b = SQ_SEL_0; 978 } 979 980 if (PICT_FORMAT_A(pPict->format) == 0) 981 pix_a = SQ_SEL_1; 982 } else { 983 if (accel_state->component_alpha) { 984 if (accel_state->src_alpha) { 985 if (PICT_FORMAT_A(pPict->format) == 0) { 986 pix_r = SQ_SEL_1; 987 pix_g = SQ_SEL_1; 988 pix_b = SQ_SEL_1; 989 pix_a = SQ_SEL_1; 990 } else { 991 pix_r = pix_a; 992 pix_g = pix_a; 993 pix_b = pix_a; 994 } 995 } else { 996 if (PICT_FORMAT_A(pPict->format) == 0) 997 pix_a = SQ_SEL_1; 998 } 999 } else { 1000 if (PICT_FORMAT_RGB(pPict->format) == 0) { 1001 pix_r = SQ_SEL_0; 1002 pix_g = SQ_SEL_0; 1003 pix_b = SQ_SEL_0; 1004 } 1005 1006 if (PICT_FORMAT_A(pPict->format) == 0) 1007 pix_a = SQ_SEL_1; 1008 } 1009 } 1010 } else { 1011 if (accel_state->component_alpha) { 1012 if (PICT_FORMAT_A(pPict->format) == 0) 1013 pix_a = SQ_SEL_1; 1014 } else { 1015 if (PICT_FORMAT_A(pPict->format) == 0) { 1016 pix_r = SQ_SEL_1; 1017 pix_g = SQ_SEL_1; 1018 pix_b = SQ_SEL_1; 1019 pix_a = SQ_SEL_1; 1020 } else { 1021 pix_r = pix_a; 1022 pix_g = pix_a; 1023 pix_b = pix_a; 1024 } 1025 } 1026 } 1027 1028 tex_res.dst_sel_x = pix_r; /* R */ 1029 tex_res.dst_sel_y = pix_g; /* G */ 1030 tex_res.dst_sel_z = pix_b; /* B */ 1031 tex_res.dst_sel_w = pix_a; /* A */ 1032 1033 tex_res.base_level = 0; 1034 tex_res.last_level = 0; 1035 tex_res.perf_modulation = 0; 1036 if (accel_state->src_obj[unit].tiling_flags == 0) 1037 tex_res.tile_mode = 1; 1038 r600_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[unit].domain); 1039 1040 tex_samp.id = unit; 1041 tex_samp.border_color = SQ_TEX_BORDER_COLOR_TRANS_BLACK; 1042 1043 switch (repeatType) { 1044 case RepeatNormal: 1045 tex_samp.clamp_x = SQ_TEX_WRAP; 1046 tex_samp.clamp_y = SQ_TEX_WRAP; 1047 break; 1048 case RepeatPad: 1049 tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; 1050 tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; 1051 break; 1052 case RepeatReflect: 1053 tex_samp.clamp_x = SQ_TEX_MIRROR; 1054 tex_samp.clamp_y = SQ_TEX_MIRROR; 1055 break; 1056 case RepeatNone: 1057 tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER; 1058 tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER; 1059 break; 1060 default: 1061 RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType)); 1062 } 1063 1064 switch (pPict->filter) { 1065 case PictFilterNearest: 1066 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT; 1067 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT; 1068 tex_samp.mc_coord_truncate = 1; 1069 break; 1070 case PictFilterBilinear: 1071 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; 1072 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; 1073 break; 1074 default: 1075 RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); 1076 } 1077 1078 tex_samp.clamp_z = SQ_TEX_WRAP; 1079 tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; 1080 tex_samp.mip_filter = 0; /* no mipmap */ 1081 r600_set_tex_sampler(pScrn, &tex_samp); 1082 1083 if (pPict->transform != 0) { 1084 accel_state->is_transform[unit] = TRUE; 1085 accel_state->transform[unit] = pPict->transform; 1086 1087 vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]); 1088 vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]); 1089 vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]); 1090 vs_alu_consts[3] = 1.0 / tex_res.w; 1091 1092 vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]); 1093 vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]); 1094 vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]); 1095 vs_alu_consts[7] = 1.0 / tex_res.h; 1096 } else { 1097 accel_state->is_transform[unit] = FALSE; 1098 1099 vs_alu_consts[0] = 1.0; 1100 vs_alu_consts[1] = 0.0; 1101 vs_alu_consts[2] = 0.0; 1102 vs_alu_consts[3] = 1.0 / tex_res.w; 1103 1104 vs_alu_consts[4] = 0.0; 1105 vs_alu_consts[5] = 1.0; 1106 vs_alu_consts[6] = 0.0; 1107 vs_alu_consts[7] = 1.0 / tex_res.h; 1108 } 1109 1110 /* VS alu constants */ 1111 r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_vs + (unit * 2), 1112 sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts); 1113 1114 return TRUE; 1115} 1116 1117static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, 1118 PicturePtr pDstPicture) 1119{ 1120 uint32_t tmp1; 1121 PixmapPtr pSrcPixmap, pDstPixmap; 1122 1123 /* Check for unsupported compositing operations. */ 1124 if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0]))) 1125 RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op)); 1126 1127 if (pSrcPicture->pDrawable) { 1128 pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable); 1129 1130 if (pSrcPixmap->drawable.width >= 8192 || 1131 pSrcPixmap->drawable.height >= 8192) { 1132 RADEON_FALLBACK(("Source w/h too large (%d,%d).\n", 1133 pSrcPixmap->drawable.width, 1134 pSrcPixmap->drawable.height)); 1135 } 1136 1137 if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0)) 1138 return FALSE; 1139 } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill) 1140 RADEON_FALLBACK(("Gradient pictures not supported yet\n")); 1141 1142 pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable); 1143 1144 if (pDstPixmap->drawable.width >= 8192 || 1145 pDstPixmap->drawable.height >= 8192) { 1146 RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n", 1147 pDstPixmap->drawable.width, 1148 pDstPixmap->drawable.height)); 1149 } 1150 1151 if (pMaskPicture) { 1152 PixmapPtr pMaskPixmap; 1153 1154 if (pMaskPicture->pDrawable) { 1155 pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable); 1156 1157 if (pMaskPixmap->drawable.width >= 8192 || 1158 pMaskPixmap->drawable.height >= 8192) { 1159 RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n", 1160 pMaskPixmap->drawable.width, 1161 pMaskPixmap->drawable.height)); 1162 } 1163 1164 if (pMaskPicture->componentAlpha) { 1165 /* Check if it's component alpha that relies on a source alpha and 1166 * on the source value. We can only get one of those into the 1167 * single source value that we get to blend with. 1168 * 1169 * We can cheat a bit if the src is solid, though. PictOpOver 1170 * can use the constant blend color to sneak a second blend 1171 * source in. 1172 */ 1173 if (R600BlendOp[op].src_alpha && 1174 (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) != 1175 (BLEND_ZERO << COLOR_SRCBLEND_shift)) { 1176 if (pSrcPicture->pDrawable || op != PictOpOver) 1177 RADEON_FALLBACK(("Component alpha not supported with source " 1178 "alpha and source value blending.\n")); 1179 } 1180 } 1181 1182 if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1)) 1183 return FALSE; 1184 } else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill) 1185 RADEON_FALLBACK(("Gradient pictures not supported yet\n")); 1186 } 1187 1188 if (!R600GetDestFormat(pDstPicture, &tmp1)) 1189 return FALSE; 1190 1191 return TRUE; 1192 1193} 1194 1195static void R600SetSolidConsts(ScrnInfoPtr pScrn, float *buf, int format, uint32_t fg, int unit) 1196{ 1197 RADEONInfoPtr info = RADEONPTR(pScrn); 1198 struct radeon_accel_state *accel_state = info->accel_state; 1199 float pix_r = 0, pix_g = 0, pix_b = 0, pix_a = 0; 1200 1201 uint32_t w = (fg >> 24) & 0xff; 1202 uint32_t z = (fg >> 16) & 0xff; 1203 uint32_t y = (fg >> 8) & 0xff; 1204 uint32_t x = (fg >> 0) & 0xff; 1205 float xf = (float)x / 255; /* R */ 1206 float yf = (float)y / 255; /* G */ 1207 float zf = (float)z / 255; /* B */ 1208 float wf = (float)w / 255; /* A */ 1209 1210 /* component swizzles */ 1211 switch (format) { 1212 case PICT_a1r5g5b5: 1213 case PICT_a8r8g8b8: 1214 pix_r = zf; /* R */ 1215 pix_g = yf; /* G */ 1216 pix_b = xf; /* B */ 1217 pix_a = wf; /* A */ 1218 break; 1219 case PICT_a8b8g8r8: 1220 pix_r = xf; /* R */ 1221 pix_g = yf; /* G */ 1222 pix_b = zf; /* B */ 1223 pix_a = wf; /* A */ 1224 break; 1225 case PICT_x8b8g8r8: 1226 pix_r = xf; /* R */ 1227 pix_g = yf; /* G */ 1228 pix_b = zf; /* B */ 1229 pix_a = 1.0; /* A */ 1230 break; 1231 case PICT_b8g8r8a8: 1232 pix_r = yf; /* R */ 1233 pix_g = zf; /* G */ 1234 pix_b = wf; /* B */ 1235 pix_a = xf; /* A */ 1236 break; 1237 case PICT_b8g8r8x8: 1238 pix_r = yf; /* R */ 1239 pix_g = zf; /* G */ 1240 pix_b = wf; /* B */ 1241 pix_a = 1.0; /* A */ 1242 break; 1243 case PICT_x1r5g5b5: 1244 case PICT_x8r8g8b8: 1245 case PICT_r5g6b5: 1246 pix_r = zf; /* R */ 1247 pix_g = yf; /* G */ 1248 pix_b = xf; /* B */ 1249 pix_a = 1.0; /* A */ 1250 break; 1251 case PICT_a8: 1252 pix_r = 0.0; /* R */ 1253 pix_g = 0.0; /* G */ 1254 pix_b = 0.0; /* B */ 1255 pix_a = xf; /* A */ 1256 break; 1257 default: 1258 ErrorF("Bad format 0x%x\n", format); 1259 } 1260 1261 if (unit == 0) { 1262 if (!accel_state->msk_pic) { 1263 if (PICT_FORMAT_RGB(format) == 0) { 1264 pix_r = 0.0; 1265 pix_g = 0.0; 1266 pix_b = 0.0; 1267 } 1268 1269 if (PICT_FORMAT_A(format) == 0) 1270 pix_a = 1.0; 1271 } else { 1272 if (accel_state->component_alpha) { 1273 if (accel_state->src_alpha) { 1274 /* required for PictOpOver */ 1275 float cblend[4] = { pix_r / pix_a, pix_g / pix_a, 1276 pix_b / pix_a, pix_a / pix_a }; 1277 r600_set_blend_color(pScrn, cblend); 1278 1279 if (PICT_FORMAT_A(format) == 0) { 1280 pix_r = 1.0; 1281 pix_g = 1.0; 1282 pix_b = 1.0; 1283 pix_a = 1.0; 1284 } else { 1285 pix_r = pix_a; 1286 pix_g = pix_a; 1287 pix_b = pix_a; 1288 } 1289 } else { 1290 if (PICT_FORMAT_A(format) == 0) 1291 pix_a = 1.0; 1292 } 1293 } else { 1294 if (PICT_FORMAT_RGB(format) == 0) { 1295 pix_r = 0; 1296 pix_g = 0; 1297 pix_b = 0; 1298 } 1299 1300 if (PICT_FORMAT_A(format) == 0) 1301 pix_a = 1.0; 1302 } 1303 } 1304 } else { 1305 if (accel_state->component_alpha) { 1306 if (PICT_FORMAT_A(format) == 0) 1307 pix_a = 1.0; 1308 } else { 1309 if (PICT_FORMAT_A(format) == 0) { 1310 pix_r = 1.0; 1311 pix_g = 1.0; 1312 pix_b = 1.0; 1313 pix_a = 1.0; 1314 } else { 1315 pix_r = pix_a; 1316 pix_g = pix_a; 1317 pix_b = pix_a; 1318 } 1319 } 1320 } 1321 1322 buf[0] = pix_r; 1323 buf[1] = pix_g; 1324 buf[2] = pix_b; 1325 buf[3] = pix_a; 1326} 1327 1328static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, 1329 PicturePtr pMaskPicture, PicturePtr pDstPicture, 1330 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) 1331{ 1332 ScreenPtr pScreen = pDst->drawable.pScreen; 1333 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1334 RADEONInfoPtr info = RADEONPTR(pScrn); 1335 struct radeon_accel_state *accel_state = info->accel_state; 1336 uint32_t dst_format; 1337 cb_config_t cb_conf; 1338 shader_config_t vs_conf, ps_conf; 1339 struct r600_accel_object src_obj, mask_obj, dst_obj; 1340 uint32_t ps_bool_consts = 0; 1341 float ps_alu_consts[8]; 1342 1343 if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8)) 1344 return FALSE; 1345 1346 if (pSrc) { 1347 src_obj.bo = radeon_get_pixmap_bo(pSrc)->bo.radeon; 1348 src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc); 1349 src_obj.surface = radeon_get_pixmap_surface(pSrc); 1350 src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); 1351 src_obj.width = pSrc->drawable.width; 1352 src_obj.height = pSrc->drawable.height; 1353 src_obj.bpp = pSrc->drawable.bitsPerPixel; 1354 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 1355 } 1356 1357 dst_obj.bo = radeon_get_pixmap_bo(pDst)->bo.radeon; 1358 dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst); 1359 dst_obj.surface = radeon_get_pixmap_surface(pDst); 1360 dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); 1361 dst_obj.width = pDst->drawable.width; 1362 dst_obj.height = pDst->drawable.height; 1363 dst_obj.bpp = pDst->drawable.bitsPerPixel; 1364 if (radeon_get_pixmap_shared(pDst) == TRUE) 1365 dst_obj.domain = RADEON_GEM_DOMAIN_GTT; 1366 else 1367 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 1368 1369 if (pMaskPicture) { 1370 if (pMask) { 1371 mask_obj.bo = radeon_get_pixmap_bo(pMask)->bo.radeon; 1372 mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask); 1373 mask_obj.surface = radeon_get_pixmap_surface(pMask); 1374 mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8); 1375 mask_obj.width = pMask->drawable.width; 1376 mask_obj.height = pMask->drawable.height; 1377 mask_obj.bpp = pMask->drawable.bitsPerPixel; 1378 mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 1379 } 1380 1381 accel_state->msk_pic = pMaskPicture; 1382 if (pMaskPicture->componentAlpha) { 1383 accel_state->component_alpha = TRUE; 1384 if (R600BlendOp[op].src_alpha) 1385 accel_state->src_alpha = TRUE; 1386 else 1387 accel_state->src_alpha = FALSE; 1388 } else { 1389 accel_state->component_alpha = FALSE; 1390 accel_state->src_alpha = FALSE; 1391 } 1392 } else { 1393 accel_state->msk_pic = NULL; 1394 accel_state->component_alpha = FALSE; 1395 accel_state->src_alpha = FALSE; 1396 } 1397 1398 if (!R600SetAccelState(pScrn, 1399 pSrc ? &src_obj : NULL, 1400 (pMaskPicture && pMask) ? &mask_obj : NULL, 1401 &dst_obj, 1402 accel_state->comp_vs_offset, accel_state->comp_ps_offset, 1403 3, 0xffffffff)) 1404 return FALSE; 1405 1406 if (!R600GetDestFormat(pDstPicture, &dst_format)) 1407 return FALSE; 1408 1409 CLEAR (cb_conf); 1410 CLEAR (vs_conf); 1411 CLEAR (ps_conf); 1412 1413 if (pMask) 1414 radeon_vbo_check(pScrn, &accel_state->vbo, 24); 1415 else 1416 radeon_vbo_check(pScrn, &accel_state->vbo, 16); 1417 1418 radeon_cp_start(pScrn); 1419 1420 r600_set_default_state(pScrn); 1421 1422 r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 1423 r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 1424 r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); 1425 1426 if (pSrc) { 1427 if (!R600TextureSetup(pSrcPicture, pSrc, 0)) { 1428 R600IBDiscard(pScrn); 1429 return FALSE; 1430 } 1431 } else 1432 accel_state->is_transform[0] = FALSE; 1433 1434 if (pMask) { 1435 if (!R600TextureSetup(pMaskPicture, pMask, 1)) { 1436 R600IBDiscard(pScrn); 1437 return FALSE; 1438 } 1439 } else 1440 accel_state->is_transform[1] = FALSE; 1441 1442 if (pSrc) 1443 ps_bool_consts |= (1 << 0); 1444 if (pMask) 1445 ps_bool_consts |= (1 << 1); 1446 r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, ps_bool_consts); 1447 1448 if (pMask) { 1449 r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0)); 1450 } else { 1451 r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0)); 1452 } 1453 1454 /* Shader */ 1455 vs_conf.shader_addr = accel_state->vs_mc_addr; 1456 vs_conf.shader_size = accel_state->vs_size; 1457 vs_conf.num_gprs = 5; 1458 vs_conf.stack_size = 1; 1459 vs_conf.bo = accel_state->shaders_bo; 1460 r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM); 1461 1462 ps_conf.shader_addr = accel_state->ps_mc_addr; 1463 ps_conf.shader_size = accel_state->ps_size; 1464 ps_conf.num_gprs = 2; 1465 ps_conf.stack_size = 1; 1466 ps_conf.uncached_first_inst = 1; 1467 ps_conf.clamp_consts = 0; 1468 ps_conf.export_mode = 2; 1469 ps_conf.bo = accel_state->shaders_bo; 1470 r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM); 1471 1472 cb_conf.id = 0; 1473 cb_conf.w = accel_state->dst_obj.pitch; 1474 cb_conf.h = accel_state->dst_obj.height; 1475 cb_conf.base = 0; 1476 cb_conf.format = dst_format; 1477 cb_conf.bo = accel_state->dst_obj.bo; 1478 cb_conf.surface = accel_state->dst_obj.surface; 1479 1480 switch (pDstPicture->format) { 1481 case PICT_a2r10g10b10: 1482 case PICT_x2r10g10b10: 1483 case PICT_a8r8g8b8: 1484 case PICT_x8r8g8b8: 1485 case PICT_a1r5g5b5: 1486 case PICT_x1r5g5b5: 1487 default: 1488 cb_conf.comp_swap = 1; /* ARGB */ 1489 break; 1490 case PICT_a2b10g10r10: 1491 case PICT_x2b10g10r10: 1492 case PICT_a8b8g8r8: 1493 case PICT_x8b8g8r8: 1494 cb_conf.comp_swap = 0; /* ABGR */ 1495 break; 1496 case PICT_b8g8r8a8: 1497 case PICT_b8g8r8x8: 1498 cb_conf.comp_swap = 3; /* BGRA */ 1499 break; 1500 case PICT_r5g6b5: 1501 cb_conf.comp_swap = 2; /* RGB */ 1502 break; 1503 case PICT_a8: 1504 cb_conf.comp_swap = 3; /* A */ 1505 break; 1506 } 1507 cb_conf.source_format = 1; 1508 cb_conf.blend_clamp = 1; 1509 cb_conf.blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format); 1510 cb_conf.blend_enable = 1; 1511 cb_conf.pmask = 0xf; 1512 cb_conf.rop = 3; 1513 if (accel_state->dst_obj.tiling_flags == 0) 1514 cb_conf.array_mode = 0; 1515#if X_BYTE_ORDER == X_BIG_ENDIAN 1516 switch (dst_obj.bpp) { 1517 case 16: 1518 cb_conf.endian = ENDIAN_8IN16; 1519 break; 1520 case 32: 1521 cb_conf.endian = ENDIAN_8IN32; 1522 break; 1523 default: 1524 break; 1525 } 1526#endif 1527 r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); 1528 1529 if (pMask) 1530 r600_set_spi(pScrn, (2 - 1), 2); 1531 else 1532 r600_set_spi(pScrn, (1 - 1), 1); 1533 1534 if (!pSrc) { 1535 /* solid src color */ 1536 R600SetSolidConsts(pScrn, &ps_alu_consts[0], pSrcPicture->format, 1537 pSrcPicture->pSourcePict->solidFill.color, 0); 1538 } 1539 1540 if (!pMaskPicture) { 1541 /* use identity constant if there is no mask */ 1542 ps_alu_consts[4] = 1.0; 1543 ps_alu_consts[5] = 1.0; 1544 ps_alu_consts[6] = 1.0; 1545 ps_alu_consts[7] = 1.0; 1546 } else if (!pMask) { 1547 /* solid mask color */ 1548 R600SetSolidConsts(pScrn, &ps_alu_consts[4], pMaskPicture->format, 1549 pMaskPicture->pSourcePict->solidFill.color, 1); 1550 } 1551 1552 r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_ps, 1553 sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); 1554 1555 if (accel_state->vsync) 1556 RADEONVlineHelperClear(pScrn); 1557 1558 accel_state->composite_op = op; 1559 accel_state->dst_pic = pDstPicture; 1560 accel_state->src_pic = pSrcPicture; 1561 accel_state->dst_pix = pDst; 1562 accel_state->msk_pix = pMask; 1563 accel_state->src_pix = pSrc; 1564 1565 return TRUE; 1566} 1567 1568static void R600FinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst, 1569 struct radeon_accel_state *accel_state) 1570{ 1571 int vtx_size; 1572 1573 if (accel_state->vsync) 1574 r600_cp_wait_vline_sync(pScrn, pDst, 1575 accel_state->vline_crtc, 1576 accel_state->vline_y1, 1577 accel_state->vline_y2); 1578 1579 vtx_size = accel_state->msk_pix ? 24 : 16; 1580 1581 r600_finish_op(pScrn, vtx_size); 1582} 1583 1584static void R600DoneComposite(PixmapPtr pDst) 1585{ 1586 ScreenPtr pScreen = pDst->drawable.pScreen; 1587 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1588 RADEONInfoPtr info = RADEONPTR(pScrn); 1589 struct radeon_accel_state *accel_state = info->accel_state; 1590 1591 R600FinishComposite(pScrn, pDst, accel_state); 1592} 1593 1594static void R600Composite(PixmapPtr pDst, 1595 int srcX, int srcY, 1596 int maskX, int maskY, 1597 int dstX, int dstY, 1598 int w, int h) 1599{ 1600 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen); 1601 RADEONInfoPtr info = RADEONPTR(pScrn); 1602 struct radeon_accel_state *accel_state = info->accel_state; 1603 float *vb; 1604 1605 /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n", 1606 srcX, srcY, maskX, maskY,dstX, dstY, w, h); */ 1607 1608 if (CS_FULL(info->cs)) { 1609 R600FinishComposite(pScrn, pDst, info->accel_state); 1610 radeon_cs_flush_indirect(pScrn); 1611 R600PrepareComposite(info->accel_state->composite_op, 1612 info->accel_state->src_pic, 1613 info->accel_state->msk_pic, 1614 info->accel_state->dst_pic, 1615 info->accel_state->src_pix, 1616 info->accel_state->msk_pix, 1617 info->accel_state->dst_pix); 1618 } 1619 1620 if (accel_state->vsync) 1621 RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h); 1622 1623 if (accel_state->msk_pix) { 1624 1625 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24); 1626 1627 vb[0] = (float)dstX; 1628 vb[1] = (float)dstY; 1629 vb[2] = (float)srcX; 1630 vb[3] = (float)srcY; 1631 vb[4] = (float)maskX; 1632 vb[5] = (float)maskY; 1633 1634 vb[6] = (float)dstX; 1635 vb[7] = (float)(dstY + h); 1636 vb[8] = (float)srcX; 1637 vb[9] = (float)(srcY + h); 1638 vb[10] = (float)maskX; 1639 vb[11] = (float)(maskY + h); 1640 1641 vb[12] = (float)(dstX + w); 1642 vb[13] = (float)(dstY + h); 1643 vb[14] = (float)(srcX + w); 1644 vb[15] = (float)(srcY + h); 1645 vb[16] = (float)(maskX + w); 1646 vb[17] = (float)(maskY + h); 1647 1648 radeon_vbo_commit(pScrn, &accel_state->vbo); 1649 1650 } else { 1651 1652 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16); 1653 1654 vb[0] = (float)dstX; 1655 vb[1] = (float)dstY; 1656 vb[2] = (float)srcX; 1657 vb[3] = (float)srcY; 1658 1659 vb[4] = (float)dstX; 1660 vb[5] = (float)(dstY + h); 1661 vb[6] = (float)srcX; 1662 vb[7] = (float)(srcY + h); 1663 1664 vb[8] = (float)(dstX + w); 1665 vb[9] = (float)(dstY + h); 1666 vb[10] = (float)(srcX + w); 1667 vb[11] = (float)(srcY + h); 1668 1669 radeon_vbo_commit(pScrn, &accel_state->vbo); 1670 } 1671 1672 1673} 1674 1675static Bool 1676R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h, 1677 char *src, int src_pitch) 1678{ 1679 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen); 1680 RADEONInfoPtr info = RADEONPTR(pScrn); 1681 struct radeon_accel_state *accel_state = info->accel_state; 1682 struct radeon_exa_pixmap_priv *driver_priv; 1683 struct radeon_bo *scratch = NULL; 1684 struct radeon_bo *copy_dst; 1685 unsigned char *dst; 1686 unsigned size; 1687 uint32_t dst_domain; 1688 int bpp = pDst->drawable.bitsPerPixel; 1689 uint32_t scratch_pitch; 1690 uint32_t copy_pitch; 1691 uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8); 1692 int ret; 1693 Bool flush = TRUE; 1694 Bool r; 1695 int i; 1696 struct r600_accel_object src_obj, dst_obj; 1697 uint32_t height, base_align; 1698 1699 if (bpp < 8) 1700 return FALSE; 1701 1702 driver_priv = exaGetPixmapDriverPrivate(pDst); 1703 if (!driver_priv || !driver_priv->bo->bo.radeon) 1704 return FALSE; 1705 1706 /* If we know the BO won't be busy / in VRAM, don't bother with a scratch */ 1707 copy_dst = driver_priv->bo->bo.radeon; 1708 copy_pitch = pDst->devKind; 1709 if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) { 1710 if (!radeon_bo_is_referenced_by_cs(driver_priv->bo->bo.radeon, info->cs)) { 1711 flush = FALSE; 1712 if (!radeon_bo_is_busy(driver_priv->bo->bo.radeon, &dst_domain) && 1713 !(dst_domain & RADEON_GEM_DOMAIN_VRAM)) 1714 goto copy; 1715 } 1716 /* use cpu copy for fast fb access */ 1717 if (info->is_fast_fb) 1718 goto copy; 1719 } 1720 1721 scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0)); 1722 height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0)); 1723 base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0); 1724 size = scratch_pitch * height * (bpp / 8); 1725 scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0); 1726 if (!scratch) { 1727 goto copy; 1728 } 1729 1730 src_obj.pitch = scratch_pitch; 1731 src_obj.width = w; 1732 src_obj.height = h; 1733 src_obj.bpp = bpp; 1734 src_obj.domain = RADEON_GEM_DOMAIN_GTT; 1735 src_obj.bo = scratch; 1736 src_obj.tiling_flags = 0; 1737 src_obj.surface = NULL; 1738 1739 dst_obj.pitch = dst_pitch_hw; 1740 dst_obj.width = pDst->drawable.width; 1741 dst_obj.height = pDst->drawable.height; 1742 dst_obj.bpp = bpp; 1743 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; 1744 dst_obj.bo = radeon_get_pixmap_bo(pDst)->bo.radeon; 1745 dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst); 1746 dst_obj.surface = radeon_get_pixmap_surface(pDst); 1747 1748 if (!R600SetAccelState(pScrn, 1749 &src_obj, 1750 NULL, 1751 &dst_obj, 1752 accel_state->copy_vs_offset, accel_state->copy_ps_offset, 1753 3, 0xffffffff)) { 1754 goto copy; 1755 } 1756 copy_dst = scratch; 1757 copy_pitch = scratch_pitch * (bpp / 8); 1758 flush = FALSE; 1759 1760copy: 1761 if (flush) 1762 radeon_cs_flush_indirect(pScrn); 1763 1764 ret = radeon_bo_map(copy_dst, 0); 1765 if (ret) { 1766 r = FALSE; 1767 goto out; 1768 } 1769 r = TRUE; 1770 size = w * bpp / 8; 1771 dst = copy_dst->ptr; 1772 if (copy_dst == driver_priv->bo->bo.radeon) 1773 dst += y * copy_pitch + x * bpp / 8; 1774 for (i = 0; i < h; i++) { 1775 memcpy(dst + i * copy_pitch, src, size); 1776 src += src_pitch; 1777 } 1778 radeon_bo_unmap(copy_dst); 1779 1780 if (copy_dst == scratch) { 1781 if (info->accel_state->vsync) 1782 RADEONVlineHelperSet(pScrn, x, y, x + w, y + h); 1783 1784 /* blit from gart to vram */ 1785 R600DoPrepareCopy(pScrn); 1786 R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h); 1787 R600DoCopyVline(pDst); 1788 } 1789 1790out: 1791 if (scratch) 1792 radeon_bo_unref(scratch); 1793 return r; 1794} 1795 1796static Bool 1797R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w, 1798 int h, char *dst, int dst_pitch) 1799{ 1800 ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen); 1801 RADEONInfoPtr info = RADEONPTR(pScrn); 1802 struct radeon_accel_state *accel_state = info->accel_state; 1803 struct radeon_exa_pixmap_priv *driver_priv; 1804 struct radeon_bo *scratch = NULL; 1805 struct radeon_bo *copy_src; 1806 unsigned size; 1807 uint32_t src_domain = 0; 1808 int bpp = pSrc->drawable.bitsPerPixel; 1809 uint32_t scratch_pitch; 1810 uint32_t copy_pitch; 1811 uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8); 1812 int ret; 1813 Bool flush = FALSE; 1814 Bool r; 1815 struct r600_accel_object src_obj, dst_obj; 1816 uint32_t height, base_align; 1817 1818 if (bpp < 8) 1819 return FALSE; 1820 1821 driver_priv = exaGetPixmapDriverPrivate(pSrc); 1822 if (!driver_priv || !driver_priv->bo->bo.radeon) 1823 return FALSE; 1824 1825 /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */ 1826 copy_src = driver_priv->bo->bo.radeon; 1827 copy_pitch = pSrc->devKind; 1828 if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) { 1829 if (radeon_bo_is_referenced_by_cs(driver_priv->bo->bo.radeon, info->cs)) { 1830 src_domain = radeon_bo_get_src_domain(driver_priv->bo->bo.radeon); 1831 if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 1832 (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) 1833 src_domain = 0; 1834 else /* A write may be scheduled */ 1835 flush = TRUE; 1836 } 1837 1838 if (!src_domain) 1839 radeon_bo_is_busy(driver_priv->bo->bo.radeon, &src_domain); 1840 1841 if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM) 1842 goto copy; 1843 } 1844 1845 scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0)); 1846 height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0)); 1847 base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0); 1848 size = scratch_pitch * height * (bpp / 8); 1849 scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0); 1850 if (!scratch) { 1851 goto copy; 1852 } 1853 radeon_cs_space_reset_bos(info->cs); 1854 radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo, 1855 RADEON_GEM_DOMAIN_VRAM, 0); 1856 accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM; 1857 radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0); 1858 accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT; 1859 radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain); 1860 ret = radeon_cs_space_check(info->cs); 1861 if (ret) { 1862 goto copy; 1863 } 1864 1865 src_obj.pitch = src_pitch_hw; 1866 src_obj.width = pSrc->drawable.width; 1867 src_obj.height = pSrc->drawable.height; 1868 src_obj.bpp = bpp; 1869 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; 1870 src_obj.bo = radeon_get_pixmap_bo(pSrc)->bo.radeon; 1871 src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc); 1872 src_obj.surface = radeon_get_pixmap_surface(pSrc); 1873 1874 dst_obj.pitch = scratch_pitch; 1875 dst_obj.width = w; 1876 dst_obj.height = h; 1877 dst_obj.bo = scratch; 1878 dst_obj.bpp = bpp; 1879 dst_obj.domain = RADEON_GEM_DOMAIN_GTT; 1880 dst_obj.tiling_flags = 0; 1881 dst_obj.surface = NULL; 1882 1883 if (!R600SetAccelState(pScrn, 1884 &src_obj, 1885 NULL, 1886 &dst_obj, 1887 accel_state->copy_vs_offset, accel_state->copy_ps_offset, 1888 3, 0xffffffff)) { 1889 goto copy; 1890 } 1891 1892 /* blit from vram to gart */ 1893 R600DoPrepareCopy(pScrn); 1894 R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h); 1895 R600DoCopy(pScrn); 1896 copy_src = scratch; 1897 copy_pitch = scratch_pitch * (bpp / 8); 1898 flush = TRUE; 1899 1900copy: 1901 if (flush) 1902 radeon_cs_flush_indirect(pScrn); 1903 1904 ret = radeon_bo_map(copy_src, 0); 1905 if (ret) { 1906 ErrorF("failed to map pixmap: %d\n", ret); 1907 r = FALSE; 1908 goto out; 1909 } 1910 r = TRUE; 1911 w *= bpp / 8; 1912 if (copy_src == driver_priv->bo->bo.radeon) 1913 size = y * copy_pitch + x * bpp / 8; 1914 else 1915 size = 0; 1916 while (h--) { 1917 memcpy(dst, copy_src->ptr + size, w); 1918 size += copy_pitch; 1919 dst += dst_pitch; 1920 } 1921 radeon_bo_unmap(copy_src); 1922out: 1923 if (scratch) 1924 radeon_bo_unref(scratch); 1925 return r; 1926} 1927 1928static int 1929R600MarkSync(ScreenPtr pScreen) 1930{ 1931 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1932 RADEONInfoPtr info = RADEONPTR(pScrn); 1933 struct radeon_accel_state *accel_state = info->accel_state; 1934 1935 return ++accel_state->exaSyncMarker; 1936 1937} 1938 1939static void 1940R600Sync(ScreenPtr pScreen, int marker) 1941{ 1942 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1943 RADEONInfoPtr info = RADEONPTR(pScrn); 1944 struct radeon_accel_state *accel_state = info->accel_state; 1945 1946 if (accel_state->exaMarkerSynced != marker) { 1947 accel_state->exaMarkerSynced = marker; 1948 } 1949 1950} 1951 1952static Bool 1953R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) 1954{ 1955 RADEONInfoPtr info = RADEONPTR(pScrn); 1956 struct radeon_accel_state *accel_state = info->accel_state; 1957 1958 /* 512 bytes per shader for now */ 1959 int size = 512 * 9; 1960 1961 accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0, 1962 RADEON_GEM_DOMAIN_VRAM, 0); 1963 if (!accel_state->shaders_bo) { 1964 ErrorF("Allocating shader failed\n"); 1965 return FALSE; 1966 } 1967 return TRUE; 1968} 1969 1970Bool 1971R600LoadShaders(ScrnInfoPtr pScrn) 1972{ 1973 RADEONInfoPtr info = RADEONPTR(pScrn); 1974 struct radeon_accel_state *accel_state = info->accel_state; 1975 RADEONChipFamily ChipSet = info->ChipFamily; 1976 uint32_t *shader; 1977 int ret; 1978 1979 ret = radeon_bo_map(accel_state->shaders_bo, 1); 1980 if (ret) { 1981 FatalError("failed to map shader %d\n", ret); 1982 return FALSE; 1983 } 1984 shader = accel_state->shaders_bo->ptr; 1985 1986 /* solid vs --------------------------------------- */ 1987 accel_state->solid_vs_offset = 0; 1988 R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4); 1989 1990 /* solid ps --------------------------------------- */ 1991 accel_state->solid_ps_offset = 512; 1992 R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4); 1993 1994 /* copy vs --------------------------------------- */ 1995 accel_state->copy_vs_offset = 1024; 1996 R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4); 1997 1998 /* copy ps --------------------------------------- */ 1999 accel_state->copy_ps_offset = 1536; 2000 R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4); 2001 2002 /* comp vs --------------------------------------- */ 2003 accel_state->comp_vs_offset = 2048; 2004 R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4); 2005 2006 /* comp ps --------------------------------------- */ 2007 accel_state->comp_ps_offset = 2560; 2008 R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4); 2009 2010 /* xv vs --------------------------------------- */ 2011 accel_state->xv_vs_offset = 3072; 2012 R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4); 2013 2014 /* xv ps --------------------------------------- */ 2015 accel_state->xv_ps_offset = 3584; 2016 R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4); 2017 2018 radeon_bo_unmap(accel_state->shaders_bo); 2019 return TRUE; 2020} 2021 2022Bool 2023R600DrawInit(ScreenPtr pScreen) 2024{ 2025 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 2026 RADEONInfoPtr info = RADEONPTR(pScrn); 2027 2028 if (!info->accel_state->exa) { 2029 xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n"); 2030 return FALSE; 2031 } 2032 2033 info->accel_state->exa->exa_major = EXA_VERSION_MAJOR; 2034 info->accel_state->exa->exa_minor = EXA_VERSION_MINOR; 2035 2036 info->accel_state->exa->PrepareSolid = R600PrepareSolid; 2037 info->accel_state->exa->Solid = R600Solid; 2038 info->accel_state->exa->DoneSolid = R600DoneSolid; 2039 2040 info->accel_state->exa->PrepareCopy = R600PrepareCopy; 2041 info->accel_state->exa->Copy = R600Copy; 2042 info->accel_state->exa->DoneCopy = R600DoneCopy; 2043 2044 info->accel_state->exa->MarkSync = R600MarkSync; 2045 info->accel_state->exa->WaitMarker = R600Sync; 2046 2047 info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap; 2048 info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen; 2049 info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS; 2050 info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS; 2051 info->accel_state->exa->UploadToScreen = R600UploadToScreenCS; 2052 info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS; 2053 info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2; 2054 info->accel_state->exa->SharePixmapBacking = RADEONEXASharePixmapBacking; 2055 info->accel_state->exa->SetSharedPixmapBacking = RADEONEXASetSharedPixmapBacking; 2056 info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS | EXA_SUPPORTS_PREPARE_AUX | 2057 EXA_HANDLES_PIXMAPS | EXA_MIXED_PIXMAPS; 2058 info->accel_state->exa->pixmapOffsetAlign = 256; 2059 info->accel_state->exa->pixmapPitchAlign = 256; 2060 2061 info->accel_state->exa->CheckComposite = R600CheckComposite; 2062 info->accel_state->exa->PrepareComposite = R600PrepareComposite; 2063 info->accel_state->exa->Composite = R600Composite; 2064 info->accel_state->exa->DoneComposite = R600DoneComposite; 2065 2066 info->accel_state->exa->maxPitchBytes = 32768; 2067 info->accel_state->exa->maxX = 8192; 2068 info->accel_state->exa->maxY = 8192; 2069 2070 /* not supported yet */ 2071 if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) { 2072 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n"); 2073 info->accel_state->vsync = TRUE; 2074 } else 2075 info->accel_state->vsync = FALSE; 2076 2077 if (!exaDriverInit(pScreen, info->accel_state->exa)) { 2078 free(info->accel_state->exa); 2079 return FALSE; 2080 } 2081 2082 info->accel_state->XInited3D = FALSE; 2083 info->accel_state->src_obj[0].bo = NULL; 2084 info->accel_state->src_obj[1].bo = NULL; 2085 info->accel_state->dst_obj.bo = NULL; 2086 info->accel_state->copy_area_bo = NULL; 2087 info->accel_state->vbo.vb_start_op = -1; 2088 info->accel_state->finish_op = r600_finish_op; 2089 info->accel_state->vbo.verts_per_op = 3; 2090 RADEONVlineHelperClear(pScrn); 2091 2092 radeon_vbo_init_lists(pScrn); 2093 2094 if (!R600AllocShaders(pScrn, pScreen)) 2095 return FALSE; 2096 2097 if (!R600LoadShaders(pScrn)) 2098 return FALSE; 2099 2100 exaMarkSync(pScreen); 2101 2102 return TRUE; 2103 2104} 2105 2106