1/* 2 * Copyright © 2006,2008 Intel Corporation 3 * Copyright © 2007 Red Hat, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Wang Zhenyu <zhenyu.z.wang@intel.com> 26 * Eric Anholt <eric@anholt.net> 27 * Carl Worth <cworth@redhat.com> 28 * Keith Packard <keithp@keithp.com> 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <assert.h> 37#include "xf86.h" 38#include "i830.h" 39#include "i915_reg.h" 40 41/* bring in brw structs */ 42#include "brw_defines.h" 43#include "brw_structs.h" 44 45/* 24 = 4 vertices/composite * 3 texcoords/vertex * 2 floats/texcoord 46 * 47 * This is an upper-bound based on the case of a non-affine 48 * transformation and with a mask, but useful for sizing all cases for 49 * simplicity. 50 */ 51#define VERTEX_FLOATS_PER_COMPOSITE 24 52#define VERTEX_BUFFER_SIZE (256 * VERTEX_FLOATS_PER_COMPOSITE) 53 54struct blendinfo { 55 Bool dst_alpha; 56 Bool src_alpha; 57 uint32_t src_blend; 58 uint32_t dst_blend; 59}; 60 61struct formatinfo { 62 int fmt; 63 uint32_t card_fmt; 64}; 65 66// refer vol2, 3d rasterization 3.8.1 67 68/* defined in brw_defines.h */ 69static struct blendinfo i965_blend_op[] = { 70 /* Clear */ 71 {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ZERO}, 72 /* Src */ 73 {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO}, 74 /* Dst */ 75 {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ONE}, 76 /* Over */ 77 {0, 1, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 78 /* OverReverse */ 79 {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE}, 80 /* In */ 81 {1, 0, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, 82 /* InReverse */ 83 {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_SRC_ALPHA}, 84 /* Out */ 85 {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, 86 /* OutReverse */ 87 {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 88 /* Atop */ 89 {1, 1, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 90 /* AtopReverse */ 91 {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA}, 92 /* Xor */ 93 {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 94 /* Add */ 95 {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ONE}, 96}; 97/** 98 * Highest-valued BLENDFACTOR used in i965_blend_op. 99 * 100 * This leaves out BRW_BLENDFACTOR_INV_DST_COLOR, 101 * BRW_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 102 * BRW_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 103 */ 104#define BRW_BLENDFACTOR_COUNT (BRW_BLENDFACTOR_INV_DST_ALPHA + 1) 105 106/* FIXME: surface format defined in brw_defines.h, shared Sampling engine 107 * 1.7.2 108 */ 109static struct formatinfo i965_tex_formats[] = { 110 {PICT_a8r8g8b8, BRW_SURFACEFORMAT_B8G8R8A8_UNORM }, 111 {PICT_x8r8g8b8, BRW_SURFACEFORMAT_B8G8R8X8_UNORM }, 112 {PICT_a8b8g8r8, BRW_SURFACEFORMAT_R8G8B8A8_UNORM }, 113 {PICT_x8b8g8r8, BRW_SURFACEFORMAT_R8G8B8X8_UNORM }, 114 {PICT_r5g6b5, BRW_SURFACEFORMAT_B5G6R5_UNORM }, 115 {PICT_a1r5g5b5, BRW_SURFACEFORMAT_B5G5R5A1_UNORM }, 116 {PICT_a8, BRW_SURFACEFORMAT_A8_UNORM }, 117}; 118 119static void i965_get_blend_cntl(int op, PicturePtr pMask, uint32_t dst_format, 120 uint32_t *sblend, uint32_t *dblend) 121{ 122 123 *sblend = i965_blend_op[op].src_blend; 124 *dblend = i965_blend_op[op].dst_blend; 125 126 /* If there's no dst alpha channel, adjust the blend op so that we'll treat 127 * it as always 1. 128 */ 129 if (PICT_FORMAT_A(dst_format) == 0 && i965_blend_op[op].dst_alpha) { 130 if (*sblend == BRW_BLENDFACTOR_DST_ALPHA) 131 *sblend = BRW_BLENDFACTOR_ONE; 132 else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA) 133 *sblend = BRW_BLENDFACTOR_ZERO; 134 } 135 136 /* If the source alpha is being used, then we should only be in a case where 137 * the source blend factor is 0, and the source blend value is the mask 138 * channels multiplied by the source picture's alpha. 139 */ 140 if (pMask && pMask->componentAlpha && PICT_FORMAT_RGB(pMask->format) 141 && i965_blend_op[op].src_alpha) { 142 if (*dblend == BRW_BLENDFACTOR_SRC_ALPHA) { 143 *dblend = BRW_BLENDFACTOR_SRC_COLOR; 144 } else if (*dblend == BRW_BLENDFACTOR_INV_SRC_ALPHA) { 145 *dblend = BRW_BLENDFACTOR_INV_SRC_COLOR; 146 } 147 } 148 149} 150 151static Bool i965_get_dest_format(PicturePtr pDstPicture, uint32_t *dst_format) 152{ 153 ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum]; 154 155 switch (pDstPicture->format) { 156 case PICT_a8r8g8b8: 157 case PICT_x8r8g8b8: 158 *dst_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; 159 break; 160 case PICT_r5g6b5: 161 *dst_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; 162 break; 163 case PICT_a1r5g5b5: 164 *dst_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM; 165 break; 166 case PICT_x1r5g5b5: 167 *dst_format = BRW_SURFACEFORMAT_B5G5R5X1_UNORM; 168 break; 169 case PICT_a8: 170 *dst_format = BRW_SURFACEFORMAT_A8_UNORM; 171 break; 172 case PICT_a4r4g4b4: 173 case PICT_x4r4g4b4: 174 *dst_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; 175 break; 176 default: 177 I830FALLBACK("Unsupported dest format 0x%x\n", 178 (int)pDstPicture->format); 179 } 180 181 return TRUE; 182} 183 184static Bool i965_check_composite_texture(ScrnInfoPtr pScrn, PicturePtr pPict, int unit) 185{ 186 if (pPict->repeatType > RepeatReflect) 187 I830FALLBACK("extended repeat (%d) not supported\n", 188 pPict->repeatType); 189 190 if (pPict->filter != PictFilterNearest && 191 pPict->filter != PictFilterBilinear) 192 { 193 I830FALLBACK("Unsupported filter 0x%x\n", pPict->filter); 194 } 195 196 if (pPict->pDrawable) 197 { 198 int w, h, i; 199 200 w = pPict->pDrawable->width; 201 h = pPict->pDrawable->height; 202 if ((w > 8192) || (h > 8192)) 203 I830FALLBACK("Picture w/h too large (%dx%d)\n", w, h); 204 205 for (i = 0; i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]); 206 i++) 207 { 208 if (i965_tex_formats[i].fmt == pPict->format) 209 break; 210 } 211 if (i == sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0])) 212 I830FALLBACK("Unsupported picture format 0x%x\n", 213 (int)pPict->format); 214 } 215 216 return TRUE; 217} 218 219Bool 220i965_check_composite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, 221 PicturePtr pDstPicture) 222{ 223 ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum]; 224 uint32_t tmp1; 225 226 /* Check for unsupported compositing operations. */ 227 if (op >= sizeof(i965_blend_op) / sizeof(i965_blend_op[0])) 228 I830FALLBACK("Unsupported Composite op 0x%x\n", op); 229 230 if (pMaskPicture && pMaskPicture->componentAlpha && 231 PICT_FORMAT_RGB(pMaskPicture->format)) { 232 /* Check if it's component alpha that relies on a source alpha and on 233 * the source value. We can only get one of those into the single 234 * source value that we get to blend with. 235 */ 236 if (i965_blend_op[op].src_alpha && 237 (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) 238 { 239 I830FALLBACK("Component alpha not supported with source " 240 "alpha and source value blending.\n"); 241 } 242 } 243 244 if (!i965_check_composite_texture(pScrn, pSrcPicture, 0)) 245 I830FALLBACK("Check Src picture texture\n"); 246 if (pMaskPicture != NULL && !i965_check_composite_texture(pScrn, pMaskPicture, 1)) 247 I830FALLBACK("Check Mask picture texture\n"); 248 249 if (!i965_get_dest_format(pDstPicture, &tmp1)) 250 I830FALLBACK("Get Color buffer format\n"); 251 252 return TRUE; 253 254} 255 256#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) 257 258/* Set up a default static partitioning of the URB, which is supposed to 259 * allow anything we would want to do, at potentially lower performance. 260 */ 261#define URB_CS_ENTRY_SIZE 0 262#define URB_CS_ENTRIES 0 263 264#define URB_VS_ENTRY_SIZE 1 // each 512-bit row 265#define URB_VS_ENTRIES 8 // we needs at least 8 entries 266 267#define URB_GS_ENTRY_SIZE 0 268#define URB_GS_ENTRIES 0 269 270#define URB_CLIP_ENTRY_SIZE 0 271#define URB_CLIP_ENTRIES 0 272 273#define URB_SF_ENTRY_SIZE 2 274#define URB_SF_ENTRIES 1 275 276static const uint32_t sip_kernel_static[][4] = { 277/* wait (1) a0<1>UW a145<0,1,0>UW { align1 + } */ 278 { 0x00000030, 0x20000108, 0x00001220, 0x00000000 }, 279/* nop (4) g0<1>UD { align1 + } */ 280 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 281/* nop (4) g0<1>UD { align1 + } */ 282 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 283/* nop (4) g0<1>UD { align1 + } */ 284 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 285/* nop (4) g0<1>UD { align1 + } */ 286 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 287/* nop (4) g0<1>UD { align1 + } */ 288 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 289/* nop (4) g0<1>UD { align1 + } */ 290 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 291/* nop (4) g0<1>UD { align1 + } */ 292 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 293/* nop (4) g0<1>UD { align1 + } */ 294 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 295/* nop (4) g0<1>UD { align1 + } */ 296 { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 297}; 298 299/* 300 * this program computes dA/dx and dA/dy for the texture coordinates along 301 * with the base texture coordinate. It was extracted from the Mesa driver 302 */ 303 304#define SF_KERNEL_NUM_GRF 16 305#define SF_MAX_THREADS 2 306 307static const uint32_t sf_kernel_static[][4] = { 308#include "exa_sf.g4b" 309}; 310 311static const uint32_t sf_kernel_mask_static[][4] = { 312#include "exa_sf_mask.g4b" 313}; 314 315/* ps kernels */ 316#define PS_KERNEL_NUM_GRF 32 317#define PS_MAX_THREADS 48 318 319static const uint32_t ps_kernel_nomask_affine_static [][4] = { 320#include "exa_wm_xy.g4b" 321#include "exa_wm_src_affine.g4b" 322#include "exa_wm_src_sample_argb.g4b" 323#include "exa_wm_write.g4b" 324}; 325 326static const uint32_t ps_kernel_nomask_projective_static [][4] = { 327#include "exa_wm_xy.g4b" 328#include "exa_wm_src_projective.g4b" 329#include "exa_wm_src_sample_argb.g4b" 330#include "exa_wm_write.g4b" 331}; 332 333static const uint32_t ps_kernel_maskca_affine_static [][4] = { 334#include "exa_wm_xy.g4b" 335#include "exa_wm_src_affine.g4b" 336#include "exa_wm_src_sample_argb.g4b" 337#include "exa_wm_mask_affine.g4b" 338#include "exa_wm_mask_sample_argb.g4b" 339#include "exa_wm_ca.g4b" 340#include "exa_wm_write.g4b" 341}; 342 343static const uint32_t ps_kernel_maskca_projective_static [][4] = { 344#include "exa_wm_xy.g4b" 345#include "exa_wm_src_projective.g4b" 346#include "exa_wm_src_sample_argb.g4b" 347#include "exa_wm_mask_projective.g4b" 348#include "exa_wm_mask_sample_argb.g4b" 349#include "exa_wm_ca.g4b" 350#include "exa_wm_write.g4b" 351}; 352 353static const uint32_t ps_kernel_maskca_srcalpha_affine_static [][4] = { 354#include "exa_wm_xy.g4b" 355#include "exa_wm_src_affine.g4b" 356#include "exa_wm_src_sample_a.g4b" 357#include "exa_wm_mask_affine.g4b" 358#include "exa_wm_mask_sample_argb.g4b" 359#include "exa_wm_ca_srcalpha.g4b" 360#include "exa_wm_write.g4b" 361}; 362 363static const uint32_t ps_kernel_maskca_srcalpha_projective_static [][4] = { 364#include "exa_wm_xy.g4b" 365#include "exa_wm_src_projective.g4b" 366#include "exa_wm_src_sample_a.g4b" 367#include "exa_wm_mask_projective.g4b" 368#include "exa_wm_mask_sample_argb.g4b" 369#include "exa_wm_ca_srcalpha.g4b" 370#include "exa_wm_write.g4b" 371}; 372 373static const uint32_t ps_kernel_masknoca_affine_static [][4] = { 374#include "exa_wm_xy.g4b" 375#include "exa_wm_src_affine.g4b" 376#include "exa_wm_src_sample_argb.g4b" 377#include "exa_wm_mask_affine.g4b" 378#include "exa_wm_mask_sample_a.g4b" 379#include "exa_wm_noca.g4b" 380#include "exa_wm_write.g4b" 381}; 382 383static const uint32_t ps_kernel_masknoca_projective_static [][4] = { 384#include "exa_wm_xy.g4b" 385#include "exa_wm_src_projective.g4b" 386#include "exa_wm_src_sample_argb.g4b" 387#include "exa_wm_mask_projective.g4b" 388#include "exa_wm_mask_sample_a.g4b" 389#include "exa_wm_noca.g4b" 390#include "exa_wm_write.g4b" 391}; 392 393/* new programs for IGDNG */ 394static const uint32_t sf_kernel_static_gen5[][4] = { 395#include "exa_sf.g4b.gen5" 396}; 397 398static const uint32_t sf_kernel_mask_static_gen5[][4] = { 399#include "exa_sf_mask.g4b.gen5" 400}; 401 402static const uint32_t ps_kernel_nomask_affine_static_gen5 [][4] = { 403#include "exa_wm_xy.g4b.gen5" 404#include "exa_wm_src_affine.g4b.gen5" 405#include "exa_wm_src_sample_argb.g4b.gen5" 406#include "exa_wm_write.g4b.gen5" 407}; 408 409static const uint32_t ps_kernel_nomask_projective_static_gen5 [][4] = { 410#include "exa_wm_xy.g4b.gen5" 411#include "exa_wm_src_projective.g4b.gen5" 412#include "exa_wm_src_sample_argb.g4b.gen5" 413#include "exa_wm_write.g4b.gen5" 414}; 415 416static const uint32_t ps_kernel_maskca_affine_static_gen5 [][4] = { 417#include "exa_wm_xy.g4b.gen5" 418#include "exa_wm_src_affine.g4b.gen5" 419#include "exa_wm_src_sample_argb.g4b.gen5" 420#include "exa_wm_mask_affine.g4b.gen5" 421#include "exa_wm_mask_sample_argb.g4b.gen5" 422#include "exa_wm_ca.g4b.gen5" 423#include "exa_wm_write.g4b.gen5" 424}; 425 426static const uint32_t ps_kernel_maskca_projective_static_gen5 [][4] = { 427#include "exa_wm_xy.g4b.gen5" 428#include "exa_wm_src_projective.g4b.gen5" 429#include "exa_wm_src_sample_argb.g4b.gen5" 430#include "exa_wm_mask_projective.g4b.gen5" 431#include "exa_wm_mask_sample_argb.g4b.gen5" 432#include "exa_wm_ca.g4b.gen5" 433#include "exa_wm_write.g4b.gen5" 434}; 435 436static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen5 [][4] = { 437#include "exa_wm_xy.g4b.gen5" 438#include "exa_wm_src_affine.g4b.gen5" 439#include "exa_wm_src_sample_a.g4b.gen5" 440#include "exa_wm_mask_affine.g4b.gen5" 441#include "exa_wm_mask_sample_argb.g4b.gen5" 442#include "exa_wm_ca_srcalpha.g4b.gen5" 443#include "exa_wm_write.g4b.gen5" 444}; 445 446static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen5 [][4] = { 447#include "exa_wm_xy.g4b.gen5" 448#include "exa_wm_src_projective.g4b.gen5" 449#include "exa_wm_src_sample_a.g4b.gen5" 450#include "exa_wm_mask_projective.g4b.gen5" 451#include "exa_wm_mask_sample_argb.g4b.gen5" 452#include "exa_wm_ca_srcalpha.g4b.gen5" 453#include "exa_wm_write.g4b.gen5" 454}; 455 456static const uint32_t ps_kernel_masknoca_affine_static_gen5 [][4] = { 457#include "exa_wm_xy.g4b.gen5" 458#include "exa_wm_src_affine.g4b.gen5" 459#include "exa_wm_src_sample_argb.g4b.gen5" 460#include "exa_wm_mask_affine.g4b.gen5" 461#include "exa_wm_mask_sample_a.g4b.gen5" 462#include "exa_wm_noca.g4b.gen5" 463#include "exa_wm_write.g4b.gen5" 464}; 465 466static const uint32_t ps_kernel_masknoca_projective_static_gen5 [][4] = { 467#include "exa_wm_xy.g4b.gen5" 468#include "exa_wm_src_projective.g4b.gen5" 469#include "exa_wm_src_sample_argb.g4b.gen5" 470#include "exa_wm_mask_projective.g4b.gen5" 471#include "exa_wm_mask_sample_a.g4b.gen5" 472#include "exa_wm_noca.g4b.gen5" 473#include "exa_wm_write.g4b.gen5" 474}; 475 476#define WM_STATE_DECL(kernel) \ 477 struct brw_wm_unit_state wm_state_ ## kernel[SAMPLER_STATE_FILTER_COUNT] \ 478 [SAMPLER_STATE_EXTEND_COUNT] \ 479 [SAMPLER_STATE_FILTER_COUNT] \ 480 [SAMPLER_STATE_EXTEND_COUNT] 481 482/* Many of the fields in the state structure must be aligned to a 483 * 64-byte boundary, (or a 32-byte boundary, but 64 is good enough for 484 * those too). 485 */ 486#define PAD64_MULTI(previous, idx, factor) char previous ## _pad ## idx [(64 - (sizeof(struct previous) * (factor)) % 64) % 64] 487#define PAD64(previous, idx) PAD64_MULTI(previous, idx, 1) 488 489typedef enum { 490 SAMPLER_STATE_FILTER_NEAREST, 491 SAMPLER_STATE_FILTER_BILINEAR, 492 SAMPLER_STATE_FILTER_COUNT 493} sampler_state_filter_t; 494 495typedef enum { 496 SAMPLER_STATE_EXTEND_NONE, 497 SAMPLER_STATE_EXTEND_REPEAT, 498 SAMPLER_STATE_EXTEND_PAD, 499 SAMPLER_STATE_EXTEND_REFLECT, 500 SAMPLER_STATE_EXTEND_COUNT 501} sampler_state_extend_t; 502 503typedef enum { 504 WM_KERNEL_NOMASK_AFFINE, 505 WM_KERNEL_NOMASK_PROJECTIVE, 506 WM_KERNEL_MASKCA_AFFINE, 507 WM_KERNEL_MASKCA_PROJECTIVE, 508 WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 509 WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 510 WM_KERNEL_MASKNOCA_AFFINE, 511 WM_KERNEL_MASKNOCA_PROJECTIVE, 512 WM_KERNEL_COUNT 513} wm_kernel_t; 514 515#define KERNEL(kernel_enum, kernel, masked) \ 516 [kernel_enum] = {__UNCONST(&kernel), sizeof(kernel), masked} 517struct wm_kernel_info { 518 void *data; 519 unsigned int size; 520 Bool has_mask; 521}; 522 523static struct wm_kernel_info wm_kernels[] = { 524 KERNEL(WM_KERNEL_NOMASK_AFFINE, 525 ps_kernel_nomask_affine_static, FALSE), 526 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, 527 ps_kernel_nomask_projective_static, FALSE), 528 KERNEL(WM_KERNEL_MASKCA_AFFINE, 529 ps_kernel_maskca_affine_static, TRUE), 530 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, 531 ps_kernel_maskca_projective_static, TRUE), 532 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 533 ps_kernel_maskca_srcalpha_affine_static, TRUE), 534 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 535 ps_kernel_maskca_srcalpha_projective_static, TRUE), 536 KERNEL(WM_KERNEL_MASKNOCA_AFFINE, 537 ps_kernel_masknoca_affine_static, TRUE), 538 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, 539 ps_kernel_masknoca_projective_static, TRUE), 540}; 541 542static struct wm_kernel_info wm_kernels_gen5[] = { 543 KERNEL(WM_KERNEL_NOMASK_AFFINE, 544 ps_kernel_nomask_affine_static_gen5, FALSE), 545 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, 546 ps_kernel_nomask_projective_static_gen5, FALSE), 547 KERNEL(WM_KERNEL_MASKCA_AFFINE, 548 ps_kernel_maskca_affine_static_gen5, TRUE), 549 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, 550 ps_kernel_maskca_projective_static_gen5, TRUE), 551 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 552 ps_kernel_maskca_srcalpha_affine_static_gen5, TRUE), 553 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 554 ps_kernel_maskca_srcalpha_projective_static_gen5, TRUE), 555 KERNEL(WM_KERNEL_MASKNOCA_AFFINE, 556 ps_kernel_masknoca_affine_static_gen5, TRUE), 557 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, 558 ps_kernel_masknoca_projective_static_gen5, TRUE), 559}; 560#undef KERNEL 561 562typedef struct _brw_cc_unit_state_padded { 563 struct brw_cc_unit_state state; 564 char pad[64 - sizeof (struct brw_cc_unit_state)]; 565} brw_cc_unit_state_padded; 566 567typedef struct brw_surface_state_padded { 568 struct brw_surface_state state; 569 char pad[32 - sizeof (struct brw_surface_state)]; 570} brw_surface_state_padded; 571 572struct gen4_cc_unit_state { 573 /* Index by [src_blend][dst_blend] */ 574 brw_cc_unit_state_padded cc_state[BRW_BLENDFACTOR_COUNT] 575 [BRW_BLENDFACTOR_COUNT]; 576}; 577 578typedef float gen4_vertex_buffer[VERTEX_BUFFER_SIZE]; 579 580typedef struct gen4_composite_op { 581 int op; 582 PicturePtr source_picture; 583 PicturePtr mask_picture; 584 PicturePtr dest_picture; 585 PixmapPtr source; 586 PixmapPtr mask; 587 PixmapPtr dest; 588 drm_intel_bo *binding_table_bo; 589 sampler_state_filter_t src_filter; 590 sampler_state_filter_t mask_filter; 591 sampler_state_extend_t src_extend; 592 sampler_state_extend_t mask_extend; 593 Bool is_affine; 594 wm_kernel_t wm_kernel; 595} gen4_composite_op; 596 597/** Private data for gen4 render accel implementation. */ 598struct gen4_render_state { 599 drm_intel_bo *vs_state_bo; 600 drm_intel_bo *sf_state_bo; 601 drm_intel_bo *sf_mask_state_bo; 602 drm_intel_bo *cc_state_bo; 603 drm_intel_bo *wm_state_bo[WM_KERNEL_COUNT] 604 [SAMPLER_STATE_FILTER_COUNT] 605 [SAMPLER_STATE_EXTEND_COUNT] 606 [SAMPLER_STATE_FILTER_COUNT] 607 [SAMPLER_STATE_EXTEND_COUNT]; 608 drm_intel_bo *wm_kernel_bo[WM_KERNEL_COUNT]; 609 610 drm_intel_bo *sip_kernel_bo; 611 dri_bo* vertex_buffer_bo; 612 613 gen4_composite_op composite_op; 614 615 int vb_offset; 616 int vertex_size; 617 618 Bool needs_state_emit; 619}; 620 621/** 622 * Sets up the SF state pointing at an SF kernel. 623 * 624 * The SF kernel does coord interp: for each attribute, 625 * calculate dA/dx and dA/dy. Hand these interpolation coefficients 626 * back to SF which then hands pixels off to WM. 627 */ 628static drm_intel_bo * 629gen4_create_sf_state(ScrnInfoPtr scrn, drm_intel_bo *kernel_bo) 630{ 631 I830Ptr pI830 = I830PTR(scrn); 632 struct brw_sf_unit_state *sf_state; 633 drm_intel_bo *sf_state_bo; 634 635 sf_state_bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 SF state", 636 sizeof(*sf_state), 4096); 637 drm_intel_bo_map(sf_state_bo, TRUE); 638 sf_state = sf_state_bo->virtual; 639 640 memset(sf_state, 0, sizeof(*sf_state)); 641 sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); 642 sf_state->thread0.kernel_start_pointer = 643 intel_emit_reloc(sf_state_bo, 644 offsetof(struct brw_sf_unit_state, thread0), 645 kernel_bo, sf_state->thread0.grf_reg_count << 1, 646 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; 647 sf_state->sf1.single_program_flow = 1; 648 sf_state->sf1.binding_table_entry_count = 0; 649 sf_state->sf1.thread_priority = 0; 650 sf_state->sf1.floating_point_mode = 0; /* Mesa does this */ 651 sf_state->sf1.illegal_op_exception_enable = 1; 652 sf_state->sf1.mask_stack_exception_enable = 1; 653 sf_state->sf1.sw_exception_enable = 1; 654 sf_state->thread2.per_thread_scratch_space = 0; 655 /* scratch space is not used in our kernel */ 656 sf_state->thread2.scratch_space_base_pointer = 0; 657 sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */ 658 sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ 659 sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ 660 /* don't smash vertex header, read start from dw8 */ 661 sf_state->thread3.urb_entry_read_offset = 1; 662 sf_state->thread3.dispatch_grf_start_reg = 3; 663 sf_state->thread4.max_threads = SF_MAX_THREADS - 1; 664 sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; 665 sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES; 666 sf_state->thread4.stats_enable = 1; 667 sf_state->sf5.viewport_transform = FALSE; /* skip viewport */ 668 sf_state->sf6.cull_mode = BRW_CULLMODE_NONE; 669 sf_state->sf6.scissor = 0; 670 sf_state->sf7.trifan_pv = 2; 671 sf_state->sf6.dest_org_vbias = 0x8; 672 sf_state->sf6.dest_org_hbias = 0x8; 673 674 drm_intel_bo_unmap(sf_state_bo); 675 676 return sf_state_bo; 677} 678 679static drm_intel_bo * 680sampler_border_color_create(ScrnInfoPtr scrn) 681{ 682 struct brw_sampler_legacy_border_color sampler_border_color; 683 684 /* Set up the sampler border color (always transparent black) */ 685 memset(&sampler_border_color, 0, sizeof(sampler_border_color)); 686 sampler_border_color.color[0] = 0; /* R */ 687 sampler_border_color.color[1] = 0; /* G */ 688 sampler_border_color.color[2] = 0; /* B */ 689 sampler_border_color.color[3] = 0; /* A */ 690 691 return intel_bo_alloc_for_data(scrn, 692 &sampler_border_color, 693 sizeof(sampler_border_color), 694 "gen4 render sampler border color"); 695} 696 697static void 698sampler_state_init (drm_intel_bo *sampler_state_bo, 699 struct brw_sampler_state *sampler_state, 700 sampler_state_filter_t filter, 701 sampler_state_extend_t extend, 702 drm_intel_bo *border_color_bo) 703{ 704 uint32_t sampler_state_offset; 705 706 sampler_state_offset = (char *)sampler_state - 707 (char *)sampler_state_bo->virtual; 708 709 /* PS kernel use this sampler */ 710 memset(sampler_state, 0, sizeof(*sampler_state)); 711 712 sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 713 714 /* We use the legacy mode to get the semantics specified by 715 * the Render extension. */ 716 sampler_state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY; 717 718 switch(filter) { 719 default: 720 case SAMPLER_STATE_FILTER_NEAREST: 721 sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; 722 sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; 723 break; 724 case SAMPLER_STATE_FILTER_BILINEAR: 725 sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; 726 sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; 727 break; 728 } 729 730 switch (extend) { 731 default: 732 case SAMPLER_STATE_EXTEND_NONE: 733 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 734 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 735 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 736 break; 737 case SAMPLER_STATE_EXTEND_REPEAT: 738 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; 739 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; 740 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; 741 break; 742 case SAMPLER_STATE_EXTEND_PAD: 743 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 744 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 745 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 746 break; 747 case SAMPLER_STATE_EXTEND_REFLECT: 748 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 749 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 750 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 751 break; 752 } 753 754 sampler_state->ss2.border_color_pointer = 755 intel_emit_reloc(sampler_state_bo, sampler_state_offset + 756 offsetof(struct brw_sampler_state, ss2), 757 border_color_bo, 0, 758 I915_GEM_DOMAIN_SAMPLER, 0) >> 5; 759 760 sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ 761} 762 763static drm_intel_bo * 764gen4_create_sampler_state(ScrnInfoPtr scrn, 765 sampler_state_filter_t src_filter, 766 sampler_state_extend_t src_extend, 767 sampler_state_filter_t mask_filter, 768 sampler_state_extend_t mask_extend, 769 drm_intel_bo *border_color_bo) 770{ 771 I830Ptr pI830 = I830PTR(scrn); 772 drm_intel_bo *sampler_state_bo; 773 struct brw_sampler_state *sampler_state; 774 775 sampler_state_bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 sampler state", 776 sizeof(struct brw_sampler_state) * 2, 777 4096); 778 drm_intel_bo_map(sampler_state_bo, TRUE); 779 sampler_state = sampler_state_bo->virtual; 780 781 sampler_state_init(sampler_state_bo, 782 &sampler_state[0], 783 src_filter, src_extend, 784 border_color_bo); 785 sampler_state_init(sampler_state_bo, 786 &sampler_state[1], 787 mask_filter, mask_extend, 788 border_color_bo); 789 790 drm_intel_bo_unmap(sampler_state_bo); 791 792 return sampler_state_bo; 793} 794 795static void 796cc_state_init (drm_intel_bo *cc_state_bo, 797 uint32_t cc_state_offset, 798 int src_blend, 799 int dst_blend, 800 drm_intel_bo *cc_vp_bo) 801{ 802 struct brw_cc_unit_state *cc_state; 803 804 cc_state = (struct brw_cc_unit_state *)((char *)cc_state_bo->virtual + 805 cc_state_offset); 806 807 memset(cc_state, 0, sizeof(*cc_state)); 808 cc_state->cc0.stencil_enable = 0; /* disable stencil */ 809 cc_state->cc2.depth_test = 0; /* disable depth test */ 810 cc_state->cc2.logicop_enable = 0; /* disable logic op */ 811 cc_state->cc3.ia_blend_enable = 0; /* blend alpha same as colors */ 812 cc_state->cc3.blend_enable = 1; /* enable color blend */ 813 cc_state->cc3.alpha_test = 0; /* disable alpha test */ 814 815 cc_state->cc4.cc_viewport_state_offset = 816 intel_emit_reloc(cc_state_bo, cc_state_offset + 817 offsetof(struct brw_cc_unit_state, cc4), 818 cc_vp_bo, 0, 819 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; 820 821 cc_state->cc5.dither_enable = 0; /* disable dither */ 822 cc_state->cc5.logicop_func = 0xc; /* COPY */ 823 cc_state->cc5.statistics_enable = 1; 824 cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; 825 826 /* Fill in alpha blend factors same as color, for the future. */ 827 cc_state->cc5.ia_src_blend_factor = src_blend; 828 cc_state->cc5.ia_dest_blend_factor = dst_blend; 829 830 cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD; 831 cc_state->cc6.clamp_post_alpha_blend = 1; 832 cc_state->cc6.clamp_pre_alpha_blend = 1; 833 cc_state->cc6.clamp_range = 0; /* clamp range [0,1] */ 834 835 cc_state->cc6.src_blend_factor = src_blend; 836 cc_state->cc6.dest_blend_factor = dst_blend; 837} 838 839static drm_intel_bo * 840gen4_create_wm_state(ScrnInfoPtr scrn, 841 Bool has_mask, drm_intel_bo *kernel_bo, 842 drm_intel_bo *sampler_bo) 843{ 844 I830Ptr pI830 = I830PTR(scrn); 845 struct brw_wm_unit_state *wm_state; 846 drm_intel_bo *wm_state_bo; 847 848 wm_state_bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 WM state", 849 sizeof(*wm_state), 4096); 850 drm_intel_bo_map(wm_state_bo, TRUE); 851 wm_state = wm_state_bo->virtual; 852 853 memset(wm_state, 0, sizeof (*wm_state)); 854 wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); 855 wm_state->thread0.kernel_start_pointer = 856 intel_emit_reloc(wm_state_bo, 857 offsetof(struct brw_wm_unit_state, thread0), 858 kernel_bo, wm_state->thread0.grf_reg_count << 1, 859 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; 860 861 wm_state->thread1.single_program_flow = 0; 862 863 /* scratch space is not used in our kernel */ 864 wm_state->thread2.scratch_space_base_pointer = 0; 865 wm_state->thread2.per_thread_scratch_space = 0; 866 867 wm_state->thread3.const_urb_entry_read_length = 0; 868 wm_state->thread3.const_urb_entry_read_offset = 0; 869 870 wm_state->thread3.urb_entry_read_offset = 0; 871 /* wm kernel use urb from 3, see wm_program in compiler module */ 872 wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */ 873 874 wm_state->wm4.stats_enable = 1; /* statistic */ 875 876 if (IS_IGDNG(pI830)) 877 wm_state->wm4.sampler_count = 0; /* hardware requirement */ 878 else 879 wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */ 880 881 wm_state->wm4.sampler_state_pointer = 882 intel_emit_reloc(wm_state_bo, offsetof(struct brw_wm_unit_state, wm4), 883 sampler_bo, 884 wm_state->wm4.stats_enable + 885 (wm_state->wm4.sampler_count << 2), 886 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; 887 wm_state->wm5.max_threads = PS_MAX_THREADS - 1; 888 wm_state->wm5.transposed_urb_read = 0; 889 wm_state->wm5.thread_dispatch_enable = 1; 890 /* just use 16-pixel dispatch (4 subspans), don't need to change kernel 891 * start point 892 */ 893 wm_state->wm5.enable_16_pix = 1; 894 wm_state->wm5.enable_8_pix = 0; 895 wm_state->wm5.early_depth_test = 1; 896 897 /* Each pair of attributes (src/mask coords) is two URB entries */ 898 if (has_mask) { 899 wm_state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */ 900 wm_state->thread3.urb_entry_read_length = 4; 901 } else { 902 wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ 903 wm_state->thread3.urb_entry_read_length = 2; 904 } 905 906 /* binding table entry count is only used for prefetching, and it has to 907 * be set 0 for IGDNG 908 */ 909 if (IS_IGDNG(pI830)) 910 wm_state->thread1.binding_table_entry_count = 0; 911 912 drm_intel_bo_unmap(wm_state_bo); 913 914 return wm_state_bo; 915} 916 917static drm_intel_bo * 918gen4_create_cc_viewport(ScrnInfoPtr scrn) 919{ 920 I830Ptr pI830 = I830PTR(scrn); 921 drm_intel_bo *bo; 922 struct brw_cc_viewport cc_viewport; 923 924 cc_viewport.min_depth = -1.e35; 925 cc_viewport.max_depth = 1.e35; 926 927 bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 render unit state", 928 sizeof(cc_viewport), 4096); 929 drm_intel_bo_subdata(bo, 0, sizeof(cc_viewport), &cc_viewport); 930 931 return bo; 932} 933 934static drm_intel_bo * 935gen4_create_vs_unit_state(ScrnInfoPtr scrn) 936{ 937 I830Ptr pI830 = I830PTR(scrn); 938 struct brw_vs_unit_state vs_state; 939 memset(&vs_state, 0, sizeof(vs_state)); 940 941 /* Set up the vertex shader to be disabled (passthrough) */ 942 if (IS_IGDNG(pI830)) 943 vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; /* hardware requirement */ 944 else 945 vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES; 946 vs_state.thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; 947 vs_state.vs6.vs_enable = 0; 948 vs_state.vs6.vert_cache_disable = 1; 949 950 return intel_bo_alloc_for_data(scrn, &vs_state, sizeof(vs_state), 951 "gen4 render VS state"); 952} 953 954/** 955 * Set up all combinations of cc state: each blendfactor for source and 956 * dest. 957 */ 958static drm_intel_bo * 959gen4_create_cc_unit_state(ScrnInfoPtr scrn) 960{ 961 I830Ptr pI830 = I830PTR(scrn); 962 struct gen4_cc_unit_state *cc_state; 963 drm_intel_bo *cc_state_bo, *cc_vp_bo; 964 int i, j; 965 966 cc_vp_bo = gen4_create_cc_viewport(scrn); 967 968 cc_state_bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 CC state", 969 sizeof(*cc_state), 4096); 970 drm_intel_bo_map(cc_state_bo, TRUE); 971 cc_state = cc_state_bo->virtual; 972 for (i = 0; i < BRW_BLENDFACTOR_COUNT; i++) { 973 for (j = 0; j < BRW_BLENDFACTOR_COUNT; j++) { 974 cc_state_init(cc_state_bo, 975 offsetof(struct gen4_cc_unit_state, 976 cc_state[i][j].state), 977 i, j, cc_vp_bo); 978 } 979 } 980 drm_intel_bo_unmap(cc_state_bo); 981 982 drm_intel_bo_unreference(cc_vp_bo); 983 984 return cc_state_bo; 985} 986 987static uint32_t 988i965_get_card_format(PicturePtr pPict) 989{ 990 int i; 991 992 for (i = 0; i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]); 993 i++) 994 { 995 if (i965_tex_formats[i].fmt == pPict->format) 996 break; 997 } 998 assert(i != sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0])); 999 1000 return i965_tex_formats[i].card_fmt; 1001} 1002 1003static sampler_state_filter_t 1004sampler_state_filter_from_picture (int filter) 1005{ 1006 switch (filter) { 1007 case PictFilterNearest: 1008 return SAMPLER_STATE_FILTER_NEAREST; 1009 case PictFilterBilinear: 1010 return SAMPLER_STATE_FILTER_BILINEAR; 1011 default: 1012 return -1; 1013 } 1014} 1015 1016static sampler_state_extend_t 1017sampler_state_extend_from_picture (int repeat_type) 1018{ 1019 switch (repeat_type) { 1020 case RepeatNone: 1021 return SAMPLER_STATE_EXTEND_NONE; 1022 case RepeatNormal: 1023 return SAMPLER_STATE_EXTEND_REPEAT; 1024 case RepeatPad: 1025 return SAMPLER_STATE_EXTEND_PAD; 1026 case RepeatReflect: 1027 return SAMPLER_STATE_EXTEND_REFLECT; 1028 default: 1029 return -1; 1030 } 1031} 1032 1033/** 1034 * Sets up the common fields for a surface state buffer for the given 1035 * picture in the given surface state buffer. 1036 */ 1037static void 1038i965_set_picture_surface_state(dri_bo *ss_bo, int ss_index, 1039 PicturePtr pPicture, PixmapPtr pPixmap, 1040 Bool is_dst) 1041{ 1042 struct brw_surface_state_padded *ss; 1043 struct brw_surface_state local_ss; 1044 dri_bo *pixmap_bo = i830_get_pixmap_bo(pPixmap); 1045 1046 ss = (struct brw_surface_state_padded *)ss_bo->virtual + ss_index; 1047 1048 /* Since ss is a pointer to WC memory, do all of our bit operations 1049 * into a local temporary first. 1050 */ 1051 memset(&local_ss, 0, sizeof(local_ss)); 1052 local_ss.ss0.surface_type = BRW_SURFACE_2D; 1053 if (is_dst) { 1054 uint32_t dst_format = 0; 1055 Bool ret = TRUE; 1056 1057 ret = i965_get_dest_format(pPicture, &dst_format); 1058 assert(ret == TRUE); 1059 local_ss.ss0.surface_format = dst_format; 1060 } else { 1061 local_ss.ss0.surface_format = i965_get_card_format(pPicture); 1062 } 1063 1064 local_ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; 1065 local_ss.ss0.writedisable_alpha = 0; 1066 local_ss.ss0.writedisable_red = 0; 1067 local_ss.ss0.writedisable_green = 0; 1068 local_ss.ss0.writedisable_blue = 0; 1069 local_ss.ss0.color_blend = 1; 1070 local_ss.ss0.vert_line_stride = 0; 1071 local_ss.ss0.vert_line_stride_ofs = 0; 1072 local_ss.ss0.mipmap_layout_mode = 0; 1073 local_ss.ss0.render_cache_read_mode = 0; 1074 if (pixmap_bo != NULL) 1075 local_ss.ss1.base_addr = pixmap_bo->offset; 1076 else 1077 local_ss.ss1.base_addr = intel_get_pixmap_offset(pPixmap); 1078 1079 local_ss.ss2.mip_count = 0; 1080 local_ss.ss2.render_target_rotation = 0; 1081 local_ss.ss2.height = pPixmap->drawable.height - 1; 1082 local_ss.ss2.width = pPixmap->drawable.width - 1; 1083 local_ss.ss3.pitch = intel_get_pixmap_pitch(pPixmap) - 1; 1084 local_ss.ss3.tile_walk = 0; /* Tiled X */ 1085 local_ss.ss3.tiled_surface = i830_pixmap_tiled(pPixmap) ? 1 : 0; 1086 1087 memcpy(ss, &local_ss, sizeof(local_ss)); 1088 1089 if (pixmap_bo != NULL) { 1090 uint32_t write_domain, read_domains; 1091 1092 if (is_dst) { 1093 write_domain = I915_GEM_DOMAIN_RENDER; 1094 read_domains = I915_GEM_DOMAIN_RENDER; 1095 } else { 1096 write_domain = 0; 1097 read_domains = I915_GEM_DOMAIN_SAMPLER; 1098 } 1099 dri_bo_emit_reloc(ss_bo, read_domains, write_domain, 1100 0, 1101 ss_index * sizeof(*ss) + 1102 offsetof(struct brw_surface_state, ss1), 1103 pixmap_bo); 1104 } 1105} 1106 1107static void 1108i965_emit_composite_state(ScrnInfoPtr pScrn) 1109{ 1110 I830Ptr pI830 = I830PTR(pScrn); 1111 struct gen4_render_state *render_state= pI830->gen4_render_state; 1112 gen4_composite_op *composite_op = &render_state->composite_op; 1113 int op = composite_op->op; 1114 PicturePtr pMaskPicture = composite_op->mask_picture; 1115 PicturePtr pDstPicture = composite_op->dest_picture; 1116 PixmapPtr pMask = composite_op->mask; 1117 PixmapPtr pDst = composite_op->dest; 1118 sampler_state_filter_t src_filter = composite_op->src_filter; 1119 sampler_state_filter_t mask_filter = composite_op->mask_filter; 1120 sampler_state_extend_t src_extend = composite_op->src_extend; 1121 sampler_state_extend_t mask_extend = composite_op->mask_extend; 1122 Bool is_affine = composite_op->is_affine; 1123 int urb_vs_start, urb_vs_size; 1124 int urb_gs_start, urb_gs_size; 1125 int urb_clip_start, urb_clip_size; 1126 int urb_sf_start, urb_sf_size; 1127 int urb_cs_start, urb_cs_size; 1128 uint32_t src_blend, dst_blend; 1129 dri_bo *binding_table_bo = composite_op->binding_table_bo; 1130 1131 render_state->needs_state_emit = FALSE; 1132 1133 IntelEmitInvarientState(pScrn); 1134 pI830->last_3d = LAST_3D_RENDER; 1135 1136 urb_vs_start = 0; 1137 urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; 1138 urb_gs_start = urb_vs_start + urb_vs_size; 1139 urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; 1140 urb_clip_start = urb_gs_start + urb_gs_size; 1141 urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; 1142 urb_sf_start = urb_clip_start + urb_clip_size; 1143 urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; 1144 urb_cs_start = urb_sf_start + urb_sf_size; 1145 urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; 1146 1147 i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format, 1148 &src_blend, &dst_blend); 1149 1150 /* Begin the long sequence of commands needed to set up the 3D 1151 * rendering pipe 1152 */ 1153 { 1154 BEGIN_BATCH(2); 1155 OUT_BATCH(MI_FLUSH | 1156 MI_STATE_INSTRUCTION_CACHE_FLUSH | 1157 BRW_MI_GLOBAL_SNAPSHOT_RESET); 1158 OUT_BATCH(MI_NOOP); 1159 ADVANCE_BATCH(); 1160 } 1161 { 1162 if (IS_IGDNG(pI830)) 1163 BEGIN_BATCH(14); 1164 else 1165 BEGIN_BATCH(12); 1166 1167 /* Match Mesa driver setup */ 1168 if (IS_G4X(pI830) || IS_IGDNG(pI830)) 1169 OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 1170 else 1171 OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 1172 1173 OUT_BATCH(BRW_CS_URB_STATE | 0); 1174 OUT_BATCH((0 << 4) | /* URB Entry Allocation Size */ 1175 (0 << 0)); /* Number of URB Entries */ 1176 1177 /* Zero out the two base address registers so all offsets are 1178 * absolute. 1179 */ 1180 if (IS_IGDNG(pI830)) { 1181 OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6); 1182 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ 1183 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */ 1184 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ 1185 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Instruction base address */ 1186 /* general state max addr, disabled */ 1187 OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY); 1188 /* media object state max addr, disabled */ 1189 OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY); 1190 /* Instruction max addr, disabled */ 1191 OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY); 1192 } else { 1193 OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4); 1194 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ 1195 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */ 1196 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ 1197 /* general state max addr, disabled */ 1198 OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY); 1199 /* media object state max addr, disabled */ 1200 OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY); 1201 } 1202 /* Set system instruction pointer */ 1203 OUT_BATCH(BRW_STATE_SIP | 0); 1204 OUT_RELOC(render_state->sip_kernel_bo, 1205 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1206 OUT_BATCH(MI_NOOP); 1207 ADVANCE_BATCH(); 1208 } 1209 { 1210 int pipe_ctrl; 1211 BEGIN_BATCH(26); 1212 /* Pipe control */ 1213 1214 if (IS_IGDNG(pI830)) 1215 pipe_ctrl = BRW_PIPE_CONTROL_NOWRITE; 1216 else 1217 pipe_ctrl = BRW_PIPE_CONTROL_NOWRITE | BRW_PIPE_CONTROL_IS_FLUSH; 1218 1219 OUT_BATCH(BRW_PIPE_CONTROL | pipe_ctrl | 2); 1220 OUT_BATCH(0); /* Destination address */ 1221 OUT_BATCH(0); /* Immediate data low DW */ 1222 OUT_BATCH(0); /* Immediate data high DW */ 1223 1224 /* Binding table pointers */ 1225 OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); 1226 OUT_BATCH(0); /* vs */ 1227 OUT_BATCH(0); /* gs */ 1228 OUT_BATCH(0); /* clip */ 1229 OUT_BATCH(0); /* sf */ 1230 /* Only the PS uses the binding table */ 1231 OUT_RELOC(binding_table_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); 1232 1233 /* The drawing rectangle clipping is always on. Set it to values that 1234 * shouldn't do any clipping. 1235 */ 1236 OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); 1237 OUT_BATCH(0x00000000); /* ymin, xmin */ 1238 OUT_BATCH(DRAW_YMAX(pDst->drawable.height - 1) | 1239 DRAW_XMAX(pDst->drawable.width - 1)); /* ymax, xmax */ 1240 OUT_BATCH(0x00000000); /* yorigin, xorigin */ 1241 1242 /* skip the depth buffer */ 1243 /* skip the polygon stipple */ 1244 /* skip the polygon stipple offset */ 1245 /* skip the line stipple */ 1246 1247 /* Set the pointers to the 3d pipeline state */ 1248 OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5); 1249 OUT_RELOC(render_state->vs_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1250 OUT_BATCH(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */ 1251 OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */ 1252 if (pMask) { 1253 OUT_RELOC(render_state->sf_mask_state_bo, 1254 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1255 } else { 1256 OUT_RELOC(render_state->sf_state_bo, 1257 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1258 } 1259 1260 OUT_RELOC(render_state->wm_state_bo[composite_op->wm_kernel] 1261 [src_filter][src_extend] 1262 [mask_filter][mask_extend], 1263 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1264 1265 OUT_RELOC(render_state->cc_state_bo, 1266 I915_GEM_DOMAIN_INSTRUCTION, 0, 1267 offsetof(struct gen4_cc_unit_state, 1268 cc_state[src_blend][dst_blend])); 1269 1270 /* URB fence */ 1271 OUT_BATCH(BRW_URB_FENCE | 1272 UF0_CS_REALLOC | 1273 UF0_SF_REALLOC | 1274 UF0_CLIP_REALLOC | 1275 UF0_GS_REALLOC | 1276 UF0_VS_REALLOC | 1277 1); 1278 OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | 1279 ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | 1280 ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); 1281 OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | 1282 ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); 1283 1284 /* Constant buffer state */ 1285 OUT_BATCH(BRW_CS_URB_STATE | 0); 1286 OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) | 1287 (URB_CS_ENTRIES << 0)); 1288 ADVANCE_BATCH(); 1289 } 1290 { 1291 /* 1292 * number of extra parameters per vertex 1293 */ 1294 int nelem = pMask ? 2: 1; 1295 /* 1296 * size of extra parameters: 1297 * 3 for homogenous (xyzw) 1298 * 2 for cartesian (xy) 1299 */ 1300 int selem = is_affine ? 2 : 3; 1301 uint32_t w_component; 1302 uint32_t src_format; 1303 1304 render_state->vertex_size = 4 * (2 + nelem * selem); 1305 1306 if (is_affine) 1307 { 1308 src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; 1309 w_component = BRW_VFCOMPONENT_STORE_1_FLT; 1310 } 1311 else 1312 { 1313 src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; 1314 w_component = BRW_VFCOMPONENT_STORE_SRC; 1315 } 1316 1317 if (IS_IGDNG(pI830)) { 1318 BEGIN_BATCH(pMask?9:7); 1319 /* 1320 * The reason to add this extra vertex element in the header is that 1321 * IGDNG has different vertex header definition and origin method to 1322 * set destination element offset doesn't exist anymore, which means 1323 * hardware requires a predefined vertex element layout. 1324 * 1325 * haihao proposed this approach to fill the first vertex element, so 1326 * origin layout for Gen4 doesn't need to change, and origin shader 1327 * programs behavior is also kept. 1328 * 1329 * I think this is not bad. - zhenyu 1330 */ 1331 1332 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * (2 + nelem)) - 1)); 1333 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1334 VE0_VALID | 1335 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1336 (0 << VE0_OFFSET_SHIFT)); 1337 1338 OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | 1339 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | 1340 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | 1341 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); 1342 } else { 1343 BEGIN_BATCH(pMask?7:5); 1344 /* Set up our vertex elements, sourced from the single vertex buffer. 1345 * that will be set up later. 1346 */ 1347 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * (1 + nelem)) - 1)); 1348 } 1349 1350 /* x,y */ 1351 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1352 VE0_VALID | 1353 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1354 (0 << VE0_OFFSET_SHIFT)); 1355 1356 if (IS_IGDNG(pI830)) 1357 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1358 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1359 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 1360 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1361 else 1362 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1363 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1364 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 1365 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 1366 (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); 1367 /* u0, v0, w0 */ 1368 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1369 VE0_VALID | 1370 (src_format << VE0_FORMAT_SHIFT) | 1371 ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ 1372 1373 if (IS_IGDNG(pI830)) 1374 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1375 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1376 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1377 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1378 else 1379 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1380 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1381 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1382 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 1383 ((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ 1384 /* u1, v1, w1 */ 1385 if (pMask) { 1386 OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1387 VE0_VALID | 1388 (src_format << VE0_FORMAT_SHIFT) | 1389 (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ 1390 1391 if (IS_IGDNG(pI830)) 1392 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1393 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1394 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1395 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1396 else 1397 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1398 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1399 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1400 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 1401 ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ 1402 } 1403 1404 ADVANCE_BATCH(); 1405 } 1406} 1407 1408/** 1409 * Returns whether the current set of composite state plus vertex buffer is 1410 * expected to fit in the aperture. 1411 */ 1412static Bool 1413i965_composite_check_aperture(ScrnInfoPtr pScrn) 1414{ 1415 I830Ptr pI830 = I830PTR(pScrn); 1416 struct gen4_render_state *render_state= pI830->gen4_render_state; 1417 gen4_composite_op *composite_op = &render_state->composite_op; 1418 drm_intel_bo *bo_table[] = { 1419 pI830->batch_bo, 1420 composite_op->binding_table_bo, 1421 render_state->vertex_buffer_bo, 1422 render_state->vs_state_bo, 1423 render_state->sf_state_bo, 1424 render_state->sf_mask_state_bo, 1425 render_state->wm_state_bo[composite_op->wm_kernel] 1426 [composite_op->src_filter] 1427 [composite_op->src_extend] 1428 [composite_op->mask_filter] 1429 [composite_op->mask_extend], 1430 render_state->cc_state_bo, 1431 render_state->sip_kernel_bo, 1432 }; 1433 1434 return drm_intel_bufmgr_check_aperture_space(bo_table, 1435 ARRAY_SIZE(bo_table)) == 0; 1436} 1437 1438Bool 1439i965_prepare_composite(int op, PicturePtr pSrcPicture, 1440 PicturePtr pMaskPicture, PicturePtr pDstPicture, 1441 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) 1442{ 1443 ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum]; 1444 I830Ptr pI830 = I830PTR(pScrn); 1445 struct gen4_render_state *render_state= pI830->gen4_render_state; 1446 gen4_composite_op *composite_op = &render_state->composite_op; 1447 uint32_t *binding_table; 1448 drm_intel_bo *binding_table_bo, *surface_state_bo; 1449 1450 if (composite_op->src_filter < 0) 1451 I830FALLBACK("Bad src filter 0x%x\n", pSrcPicture->filter); 1452 composite_op->src_extend = 1453 sampler_state_extend_from_picture(pSrcPicture->repeatType); 1454 if (composite_op->src_extend < 0) 1455 I830FALLBACK("Bad src repeat 0x%x\n", pSrcPicture->repeatType); 1456 1457 if (pMaskPicture) { 1458 composite_op->mask_filter = 1459 sampler_state_filter_from_picture(pMaskPicture->filter); 1460 if (composite_op->mask_filter < 0) 1461 I830FALLBACK("Bad mask filter 0x%x\n", pMaskPicture->filter); 1462 composite_op->mask_extend = 1463 sampler_state_extend_from_picture(pMaskPicture->repeatType); 1464 if (composite_op->mask_extend < 0) 1465 I830FALLBACK("Bad mask repeat 0x%x\n", pMaskPicture->repeatType); 1466 } else { 1467 composite_op->mask_filter = SAMPLER_STATE_FILTER_NEAREST; 1468 composite_op->mask_extend = SAMPLER_STATE_EXTEND_NONE; 1469 } 1470 1471 /* Set up the surface states. */ 1472 surface_state_bo = dri_bo_alloc(pI830->bufmgr, "surface_state", 1473 3 * sizeof (brw_surface_state_padded), 1474 4096); 1475 if (dri_bo_map(surface_state_bo, 1) != 0) { 1476 dri_bo_unreference(surface_state_bo); 1477 return FALSE; 1478 } 1479 /* Set up the state buffer for the destination surface */ 1480 i965_set_picture_surface_state(surface_state_bo, 0, 1481 pDstPicture, pDst, TRUE); 1482 /* Set up the source surface state buffer */ 1483 i965_set_picture_surface_state(surface_state_bo, 1, 1484 pSrcPicture, pSrc, FALSE); 1485 if (pMask) { 1486 /* Set up the mask surface state buffer */ 1487 i965_set_picture_surface_state(surface_state_bo, 2, 1488 pMaskPicture, pMask, 1489 FALSE); 1490 } 1491 dri_bo_unmap(surface_state_bo); 1492 1493 /* Set up the binding table of surface indices to surface state. */ 1494 binding_table_bo = dri_bo_alloc(pI830->bufmgr, "binding_table", 1495 3 * sizeof(uint32_t), 4096); 1496 if (dri_bo_map (binding_table_bo, 1) != 0) { 1497 dri_bo_unreference(binding_table_bo); 1498 dri_bo_unreference(surface_state_bo); 1499 return FALSE; 1500 } 1501 1502 binding_table = binding_table_bo->virtual; 1503 binding_table[0] = intel_emit_reloc(binding_table_bo, 1504 0 * sizeof(uint32_t), 1505 surface_state_bo, 1506 0 * sizeof(brw_surface_state_padded), 1507 I915_GEM_DOMAIN_INSTRUCTION, 0); 1508 1509 binding_table[1] = intel_emit_reloc(binding_table_bo, 1510 1 * sizeof(uint32_t), 1511 surface_state_bo, 1512 1 * sizeof(brw_surface_state_padded), 1513 I915_GEM_DOMAIN_INSTRUCTION, 0); 1514 1515 if (pMask) { 1516 binding_table[2] = intel_emit_reloc(binding_table_bo, 1517 2 * sizeof(uint32_t), 1518 surface_state_bo, 1519 2 * sizeof(brw_surface_state_padded), 1520 I915_GEM_DOMAIN_INSTRUCTION, 0); 1521 } else { 1522 binding_table[2] = 0; 1523 } 1524 dri_bo_unmap(binding_table_bo); 1525 /* All refs to surface_state are now contained in binding_table_bo. */ 1526 drm_intel_bo_unreference(surface_state_bo); 1527 1528 composite_op->op = op; 1529 composite_op->source_picture = pSrcPicture; 1530 composite_op->mask_picture = pMaskPicture; 1531 composite_op->dest_picture = pDstPicture; 1532 composite_op->source = pSrc; 1533 composite_op->mask = pMask; 1534 composite_op->dest = pDst; 1535 drm_intel_bo_unreference(composite_op->binding_table_bo); 1536 composite_op->binding_table_bo = binding_table_bo; 1537 composite_op->src_filter = 1538 sampler_state_filter_from_picture(pSrcPicture->filter); 1539 1540 pI830->scale_units[0][0] = pSrc->drawable.width; 1541 pI830->scale_units[0][1] = pSrc->drawable.height; 1542 1543 pI830->transform[0] = pSrcPicture->transform; 1544 composite_op->is_affine = 1545 i830_transform_is_affine(pI830->transform[0]); 1546 1547 if (!pMask) { 1548 pI830->transform[1] = NULL; 1549 pI830->scale_units[1][0] = -1; 1550 pI830->scale_units[1][1] = -1; 1551 } else { 1552 pI830->transform[1] = pMaskPicture->transform; 1553 pI830->scale_units[1][0] = pMask->drawable.width; 1554 pI830->scale_units[1][1] = pMask->drawable.height; 1555 composite_op->is_affine |= 1556 i830_transform_is_affine(pI830->transform[1]); 1557 } 1558 1559 1560 if (pMask) { 1561 if (pMaskPicture->componentAlpha && 1562 PICT_FORMAT_RGB(pMaskPicture->format)) 1563 { 1564 if (i965_blend_op[op].src_alpha) { 1565 if (composite_op->is_affine) 1566 composite_op->wm_kernel = WM_KERNEL_MASKCA_SRCALPHA_AFFINE; 1567 else 1568 composite_op->wm_kernel = WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE; 1569 } else { 1570 if (composite_op->is_affine) 1571 composite_op->wm_kernel = WM_KERNEL_MASKCA_AFFINE; 1572 else 1573 composite_op->wm_kernel = WM_KERNEL_MASKCA_PROJECTIVE; 1574 } 1575 } else { 1576 if (composite_op->is_affine) 1577 composite_op->wm_kernel = WM_KERNEL_MASKNOCA_AFFINE; 1578 else 1579 composite_op->wm_kernel = WM_KERNEL_MASKNOCA_PROJECTIVE; 1580 } 1581 } else { 1582 if (composite_op->is_affine) 1583 composite_op->wm_kernel = WM_KERNEL_NOMASK_AFFINE; 1584 else 1585 composite_op->wm_kernel = WM_KERNEL_NOMASK_PROJECTIVE; 1586 } 1587 1588 if (!i965_composite_check_aperture(pScrn)) { 1589 intel_batch_flush(pScrn, FALSE); 1590 if (!i965_composite_check_aperture(pScrn)) 1591 I830FALLBACK("Couldn't fit render operation in aperture\n"); 1592 } 1593 1594 render_state->needs_state_emit = TRUE; 1595 1596 return TRUE; 1597} 1598 1599static drm_intel_bo * 1600i965_get_vb_space(ScrnInfoPtr pScrn) 1601{ 1602 I830Ptr pI830 = I830PTR(pScrn); 1603 struct gen4_render_state *render_state = pI830->gen4_render_state; 1604 1605 /* If the vertex buffer is too full, then we free the old and a new one 1606 * gets made. 1607 */ 1608 if (render_state->vb_offset + VERTEX_FLOATS_PER_COMPOSITE > 1609 VERTEX_BUFFER_SIZE) { 1610 drm_intel_bo_unreference(render_state->vertex_buffer_bo); 1611 render_state->vertex_buffer_bo = NULL; 1612 } 1613 1614 /* Alloc a new vertex buffer if necessary. */ 1615 if (render_state->vertex_buffer_bo == NULL) { 1616 render_state->vertex_buffer_bo = drm_intel_bo_alloc(pI830->bufmgr, "vb", 1617 sizeof(gen4_vertex_buffer), 1618 4096); 1619 render_state->vb_offset = 0; 1620 } 1621 1622 drm_intel_bo_reference(render_state->vertex_buffer_bo); 1623 return render_state->vertex_buffer_bo; 1624} 1625 1626void 1627i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, 1628 int dstX, int dstY, int w, int h) 1629{ 1630 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1631 I830Ptr pI830 = I830PTR(pScrn); 1632 struct gen4_render_state *render_state = pI830->gen4_render_state; 1633 Bool has_mask; 1634 float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; 1635 int i; 1636 drm_intel_bo *vb_bo; 1637 float vb[18]; 1638 Bool is_affine = render_state->composite_op.is_affine; 1639 1640 if (is_affine) 1641 { 1642 if (!i830_get_transformed_coordinates(srcX, srcY, 1643 pI830->transform[0], 1644 &src_x[0], &src_y[0])) 1645 return; 1646 if (!i830_get_transformed_coordinates(srcX, srcY + h, 1647 pI830->transform[0], 1648 &src_x[1], &src_y[1])) 1649 return; 1650 if (!i830_get_transformed_coordinates(srcX + w, srcY + h, 1651 pI830->transform[0], 1652 &src_x[2], &src_y[2])) 1653 return; 1654 } 1655 else 1656 { 1657 if (!i830_get_transformed_coordinates_3d(srcX, srcY, 1658 pI830->transform[0], 1659 &src_x[0], &src_y[0], 1660 &src_w[0])) 1661 return; 1662 if (!i830_get_transformed_coordinates_3d(srcX, srcY + h, 1663 pI830->transform[0], 1664 &src_x[1], &src_y[1], 1665 &src_w[1])) 1666 return; 1667 if (!i830_get_transformed_coordinates_3d(srcX + w, srcY + h, 1668 pI830->transform[0], 1669 &src_x[2], &src_y[2], 1670 &src_w[2])) 1671 return; 1672 } 1673 1674 if (pI830->scale_units[1][0] == -1 || pI830->scale_units[1][1] == -1) { 1675 has_mask = FALSE; 1676 } else { 1677 has_mask = TRUE; 1678 if (is_affine) { 1679 if (!i830_get_transformed_coordinates(maskX, maskY, 1680 pI830->transform[1], 1681 &mask_x[0], &mask_y[0])) 1682 return; 1683 if (!i830_get_transformed_coordinates(maskX, maskY + h, 1684 pI830->transform[1], 1685 &mask_x[1], &mask_y[1])) 1686 return; 1687 if (!i830_get_transformed_coordinates(maskX + w, maskY + h, 1688 pI830->transform[1], 1689 &mask_x[2], &mask_y[2])) 1690 return; 1691 } else { 1692 if (!i830_get_transformed_coordinates_3d(maskX, maskY, 1693 pI830->transform[1], 1694 &mask_x[0], &mask_y[0], 1695 &mask_w[0])) 1696 return; 1697 if (!i830_get_transformed_coordinates_3d(maskX, maskY + h, 1698 pI830->transform[1], 1699 &mask_x[1], &mask_y[1], 1700 &mask_w[1])) 1701 return; 1702 if (!i830_get_transformed_coordinates_3d(maskX + w, maskY + h, 1703 pI830->transform[1], 1704 &mask_x[2], &mask_y[2], 1705 &mask_w[2])) 1706 return; 1707 } 1708 } 1709 1710 vb_bo = i965_get_vb_space(pScrn); 1711 if (vb_bo == NULL) 1712 return; 1713 i = 0; 1714 /* rect (x2,y2) */ 1715 vb[i++] = (float)(dstX + w); 1716 vb[i++] = (float)(dstY + h); 1717 vb[i++] = src_x[2] / pI830->scale_units[0][0]; 1718 vb[i++] = src_y[2] / pI830->scale_units[0][1]; 1719 if (!is_affine) 1720 vb[i++] = src_w[2]; 1721 if (has_mask) { 1722 vb[i++] = mask_x[2] / pI830->scale_units[1][0]; 1723 vb[i++] = mask_y[2] / pI830->scale_units[1][1]; 1724 if (!is_affine) 1725 vb[i++] = mask_w[2]; 1726 } 1727 1728 /* rect (x1,y2) */ 1729 vb[i++] = (float)dstX; 1730 vb[i++] = (float)(dstY + h); 1731 vb[i++] = src_x[1] / pI830->scale_units[0][0]; 1732 vb[i++] = src_y[1] / pI830->scale_units[0][1]; 1733 if (!is_affine) 1734 vb[i++] = src_w[1]; 1735 if (has_mask) { 1736 vb[i++] = mask_x[1] / pI830->scale_units[1][0]; 1737 vb[i++] = mask_y[1] / pI830->scale_units[1][1]; 1738 if (!is_affine) 1739 vb[i++] = mask_w[1]; 1740 } 1741 1742 /* rect (x1,y1) */ 1743 vb[i++] = (float)dstX; 1744 vb[i++] = (float)dstY; 1745 vb[i++] = src_x[0] / pI830->scale_units[0][0]; 1746 vb[i++] = src_y[0] / pI830->scale_units[0][1]; 1747 if (!is_affine) 1748 vb[i++] = src_w[0]; 1749 if (has_mask) { 1750 vb[i++] = mask_x[0] / pI830->scale_units[1][0]; 1751 vb[i++] = mask_y[0] / pI830->scale_units[1][1]; 1752 if (!is_affine) 1753 vb[i++] = mask_w[0]; 1754 } 1755 assert (i <= VERTEX_BUFFER_SIZE); 1756 drm_intel_bo_subdata(vb_bo, render_state->vb_offset * 4, i * 4, vb); 1757 1758 if (!i965_composite_check_aperture(pScrn)) 1759 intel_batch_flush(pScrn, FALSE); 1760 1761 intel_batch_start_atomic(pScrn, 200); 1762 if (render_state->needs_state_emit) 1763 i965_emit_composite_state(pScrn); 1764 1765 BEGIN_BATCH(12); 1766 OUT_BATCH(MI_FLUSH); 1767 /* Set up the pointer to our (single) vertex buffer */ 1768 OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); 1769 OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) | 1770 VB0_VERTEXDATA | 1771 (render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT)); 1772 OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, render_state->vb_offset * 4); 1773 1774 if (IS_IGDNG(pI830)) 1775 OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, render_state->vb_offset * 4 + i * 4); 1776 else 1777 OUT_BATCH(3); 1778 1779 OUT_BATCH(0); // ignore for VERTEXDATA, but still there 1780 1781 OUT_BATCH(BRW_3DPRIMITIVE | 1782 BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | 1783 (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | 1784 (0 << 9) | /* CTG - indirect vertex count */ 1785 4); 1786 OUT_BATCH(3); /* vertex count per instance */ 1787 OUT_BATCH(0); /* start vertex offset */ 1788 OUT_BATCH(1); /* single instance */ 1789 OUT_BATCH(0); /* start instance location */ 1790 OUT_BATCH(0); /* index buffer offset, ignored */ 1791 ADVANCE_BATCH(); 1792 1793 render_state->vb_offset += i; 1794 drm_intel_bo_unreference(vb_bo); 1795 1796 intel_batch_end_atomic(pScrn); 1797 1798 i830_debug_sync(pScrn); 1799} 1800 1801void 1802i965_batch_flush_notify(ScrnInfoPtr pScrn) 1803{ 1804 I830Ptr pI830 = I830PTR(pScrn); 1805 struct gen4_render_state *render_state = pI830->gen4_render_state; 1806 1807 /* Once a batch is emitted, we never want to map again any buffer 1808 * object being referenced by that batch, (which would be very 1809 * expensive). */ 1810 if (render_state->vertex_buffer_bo) { 1811 dri_bo_unreference (render_state->vertex_buffer_bo); 1812 render_state->vertex_buffer_bo = NULL; 1813 } 1814 1815 render_state->needs_state_emit = TRUE; 1816} 1817 1818/** 1819 * Called at EnterVT so we can set up our offsets into the state buffer. 1820 */ 1821void 1822gen4_render_state_init(ScrnInfoPtr pScrn) 1823{ 1824 I830Ptr pI830 = I830PTR(pScrn); 1825 struct gen4_render_state *render_state; 1826 int i, j, k, l, m; 1827 drm_intel_bo *sf_kernel_bo, *sf_kernel_mask_bo; 1828 drm_intel_bo *border_color_bo; 1829 1830 if (pI830->gen4_render_state == NULL) 1831 pI830->gen4_render_state = calloc(sizeof(*render_state), 1); 1832 1833 render_state = pI830->gen4_render_state; 1834 render_state->vb_offset = 0; 1835 1836 render_state->vs_state_bo = gen4_create_vs_unit_state(pScrn); 1837 1838 /* Set up the two SF states (one for blending with a mask, one without) */ 1839 if (IS_IGDNG(pI830)) { 1840 sf_kernel_bo = intel_bo_alloc_for_data(pScrn, 1841 sf_kernel_static_gen5, 1842 sizeof(sf_kernel_static_gen5), 1843 "sf kernel gen5"); 1844 sf_kernel_mask_bo = intel_bo_alloc_for_data(pScrn, 1845 sf_kernel_mask_static_gen5, 1846 sizeof(sf_kernel_mask_static_gen5), 1847 "sf mask kernel"); 1848 } else { 1849 sf_kernel_bo = intel_bo_alloc_for_data(pScrn, 1850 sf_kernel_static, 1851 sizeof(sf_kernel_static), 1852 "sf kernel"); 1853 sf_kernel_mask_bo = intel_bo_alloc_for_data(pScrn, 1854 sf_kernel_mask_static, 1855 sizeof(sf_kernel_mask_static), 1856 "sf mask kernel"); 1857 } 1858 render_state->sf_state_bo = gen4_create_sf_state(pScrn, sf_kernel_bo); 1859 render_state->sf_mask_state_bo = gen4_create_sf_state(pScrn, 1860 sf_kernel_mask_bo); 1861 drm_intel_bo_unreference(sf_kernel_bo); 1862 drm_intel_bo_unreference(sf_kernel_mask_bo); 1863 1864 for (m = 0; m < WM_KERNEL_COUNT; m++) { 1865 if (IS_IGDNG(pI830)) 1866 render_state->wm_kernel_bo[m] = 1867 intel_bo_alloc_for_data(pScrn, 1868 wm_kernels_gen5[m].data, wm_kernels_gen5[m].size, 1869 "WM kernel gen5"); 1870 else 1871 render_state->wm_kernel_bo[m] = 1872 intel_bo_alloc_for_data(pScrn, 1873 wm_kernels[m].data, wm_kernels[m].size, 1874 "WM kernel"); 1875 } 1876 1877 /* Set up the WM states: each filter/extend type for source and mask, per 1878 * kernel. 1879 */ 1880 border_color_bo = sampler_border_color_create(pScrn); 1881 for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++) { 1882 for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) { 1883 for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) { 1884 for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++) { 1885 drm_intel_bo *sampler_state_bo; 1886 1887 sampler_state_bo = 1888 gen4_create_sampler_state(pScrn, 1889 i, j, 1890 k, l, 1891 border_color_bo); 1892 1893 for (m = 0; m < WM_KERNEL_COUNT; m++) { 1894 if (IS_IGDNG(pI830)) 1895 render_state->wm_state_bo[m][i][j][k][l] = 1896 gen4_create_wm_state(pScrn, 1897 wm_kernels_gen5[m].has_mask, 1898 render_state->wm_kernel_bo[m], 1899 sampler_state_bo); 1900 else 1901 render_state->wm_state_bo[m][i][j][k][l] = 1902 gen4_create_wm_state(pScrn, 1903 wm_kernels[m].has_mask, 1904 render_state->wm_kernel_bo[m], 1905 sampler_state_bo); 1906 } 1907 drm_intel_bo_unreference(sampler_state_bo); 1908 } 1909 } 1910 } 1911 } 1912 drm_intel_bo_unreference(border_color_bo); 1913 1914 render_state->cc_state_bo = gen4_create_cc_unit_state(pScrn); 1915 render_state->sip_kernel_bo = intel_bo_alloc_for_data(pScrn, 1916 sip_kernel_static, 1917 sizeof(sip_kernel_static), 1918 "sip kernel"); 1919} 1920 1921/** 1922 * Called at LeaveVT. 1923 */ 1924void 1925gen4_render_state_cleanup(ScrnInfoPtr pScrn) 1926{ 1927 I830Ptr pI830 = I830PTR(pScrn); 1928 struct gen4_render_state *render_state= pI830->gen4_render_state; 1929 int i, j, k, l, m; 1930 gen4_composite_op *composite_op = &render_state->composite_op; 1931 1932 drm_intel_bo_unreference(composite_op->binding_table_bo); 1933 drm_intel_bo_unreference(render_state->vertex_buffer_bo); 1934 1935 drm_intel_bo_unreference(render_state->vs_state_bo); 1936 drm_intel_bo_unreference(render_state->sf_state_bo); 1937 drm_intel_bo_unreference(render_state->sf_mask_state_bo); 1938 1939 for (i = 0; i < WM_KERNEL_COUNT; i++) 1940 drm_intel_bo_unreference(render_state->wm_kernel_bo[i]); 1941 1942 for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++) 1943 for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) 1944 for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) 1945 for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++) 1946 for (m = 0; m < WM_KERNEL_COUNT; m++) 1947 drm_intel_bo_unreference(render_state->wm_state_bo[m][i][j][k][l]); 1948 1949 drm_intel_bo_unreference(render_state->cc_state_bo); 1950 drm_intel_bo_unreference(render_state->sip_kernel_bo); 1951 1952 free(pI830->gen4_render_state); 1953 pI830->gen4_render_state = NULL; 1954} 1955