i965_render.c revision 03b705cf
1/* 2 * Copyright © 2006,2008 Intel Corporation 3 * Copyright © 2007 Red Hat, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Wang Zhenyu <zhenyu.z.wang@intel.com> 26 * Eric Anholt <eric@anholt.net> 27 * Carl Worth <cworth@redhat.com> 28 * Keith Packard <keithp@keithp.com> 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <assert.h> 37#include "xf86.h" 38#include "intel.h" 39#include "i830_reg.h" 40#include "i965_reg.h" 41 42/* bring in brw structs */ 43#include "brw_defines.h" 44#include "brw_structs.h" 45 46// refer vol2, 3d rasterization 3.8.1 47 48/* defined in brw_defines.h */ 49static const struct blendinfo { 50 Bool dst_alpha; 51 Bool src_alpha; 52 uint32_t src_blend; 53 uint32_t dst_blend; 54} i965_blend_op[] = { 55 /* Clear */ 56 {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ZERO}, 57 /* Src */ 58 {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO}, 59 /* Dst */ 60 {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ONE}, 61 /* Over */ 62 {0, 1, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 63 /* OverReverse */ 64 {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE}, 65 /* In */ 66 {1, 0, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, 67 /* InReverse */ 68 {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_SRC_ALPHA}, 69 /* Out */ 70 {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, 71 /* OutReverse */ 72 {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 73 /* Atop */ 74 {1, 1, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 75 /* AtopReverse */ 76 {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA}, 77 /* Xor */ 78 {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 79 /* Add */ 80 {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ONE}, 81}; 82 83/** 84 * Highest-valued BLENDFACTOR used in i965_blend_op. 85 * 86 * This leaves out BRW_BLENDFACTOR_INV_DST_COLOR, 87 * BRW_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 88 * BRW_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 89 */ 90#define BRW_BLENDFACTOR_COUNT (BRW_BLENDFACTOR_INV_DST_ALPHA + 1) 91 92/* FIXME: surface format defined in brw_defines.h, shared Sampling engine 93 * 1.7.2 94 */ 95static const struct formatinfo { 96 int fmt; 97 uint32_t card_fmt; 98} i965_tex_formats[] = { 99 {PICT_a8, BRW_SURFACEFORMAT_A8_UNORM}, 100 {PICT_a8r8g8b8, BRW_SURFACEFORMAT_B8G8R8A8_UNORM}, 101 {PICT_x8r8g8b8, BRW_SURFACEFORMAT_B8G8R8X8_UNORM}, 102 {PICT_a8b8g8r8, BRW_SURFACEFORMAT_R8G8B8A8_UNORM}, 103 {PICT_x8b8g8r8, BRW_SURFACEFORMAT_R8G8B8X8_UNORM}, 104 {PICT_r8g8b8, BRW_SURFACEFORMAT_R8G8B8_UNORM}, 105 {PICT_r5g6b5, BRW_SURFACEFORMAT_B5G6R5_UNORM}, 106 {PICT_a1r5g5b5, BRW_SURFACEFORMAT_B5G5R5A1_UNORM}, 107#if XORG_VERSION_CURRENT >= 10699900 108 {PICT_a2r10g10b10, BRW_SURFACEFORMAT_B10G10R10A2_UNORM}, 109 {PICT_x2r10g10b10, BRW_SURFACEFORMAT_B10G10R10X2_UNORM}, 110 {PICT_a2b10g10r10, BRW_SURFACEFORMAT_R10G10B10A2_UNORM}, 111 {PICT_x2r10g10b10, BRW_SURFACEFORMAT_B10G10R10X2_UNORM}, 112#endif 113 {PICT_a4r4g4b4, BRW_SURFACEFORMAT_B4G4R4A4_UNORM}, 114}; 115 116static void i965_get_blend_cntl(int op, PicturePtr mask, uint32_t dst_format, 117 uint32_t * sblend, uint32_t * dblend) 118{ 119 120 *sblend = i965_blend_op[op].src_blend; 121 *dblend = i965_blend_op[op].dst_blend; 122 123 /* If there's no dst alpha channel, adjust the blend op so that we'll treat 124 * it as always 1. 125 */ 126 if (PICT_FORMAT_A(dst_format) == 0 && i965_blend_op[op].dst_alpha) { 127 if (*sblend == BRW_BLENDFACTOR_DST_ALPHA) 128 *sblend = BRW_BLENDFACTOR_ONE; 129 else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA) 130 *sblend = BRW_BLENDFACTOR_ZERO; 131 } 132 133 /* If the source alpha is being used, then we should only be in a case where 134 * the source blend factor is 0, and the source blend value is the mask 135 * channels multiplied by the source picture's alpha. 136 */ 137 if (mask && mask->componentAlpha && PICT_FORMAT_RGB(mask->format) 138 && i965_blend_op[op].src_alpha) { 139 if (*dblend == BRW_BLENDFACTOR_SRC_ALPHA) { 140 *dblend = BRW_BLENDFACTOR_SRC_COLOR; 141 } else if (*dblend == BRW_BLENDFACTOR_INV_SRC_ALPHA) { 142 *dblend = BRW_BLENDFACTOR_INV_SRC_COLOR; 143 } 144 } 145 146} 147 148static uint32_t i965_get_dest_format(PicturePtr dest_picture) 149{ 150 switch (dest_picture->format) { 151 case PICT_a8r8g8b8: 152 case PICT_x8r8g8b8: 153 return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; 154 case PICT_a8b8g8r8: 155 case PICT_x8b8g8r8: 156 return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; 157#if XORG_VERSION_CURRENT >= 10699900 158 case PICT_a2r10g10b10: 159 case PICT_x2r10g10b10: 160 return BRW_SURFACEFORMAT_B10G10R10A2_UNORM; 161#endif 162 case PICT_r5g6b5: 163 return BRW_SURFACEFORMAT_B5G6R5_UNORM; 164 case PICT_x1r5g5b5: 165 case PICT_a1r5g5b5: 166 return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; 167 case PICT_a8: 168 return BRW_SURFACEFORMAT_A8_UNORM; 169 case PICT_a4r4g4b4: 170 case PICT_x4r4g4b4: 171 return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; 172 default: 173 return -1; 174 } 175} 176 177Bool 178i965_check_composite(int op, 179 PicturePtr source_picture, 180 PicturePtr mask_picture, 181 PicturePtr dest_picture, 182 int width, int height) 183{ 184 ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen); 185 186 /* Check for unsupported compositing operations. */ 187 if (op >= sizeof(i965_blend_op) / sizeof(i965_blend_op[0])) { 188 intel_debug_fallback(scrn, 189 "Unsupported Composite op 0x%x\n", op); 190 return FALSE; 191 } 192 193 if (mask_picture && mask_picture->componentAlpha && 194 PICT_FORMAT_RGB(mask_picture->format)) { 195 /* Check if it's component alpha that relies on a source alpha and on 196 * the source value. We can only get one of those into the single 197 * source value that we get to blend with. 198 */ 199 if (i965_blend_op[op].src_alpha && 200 (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) { 201 intel_debug_fallback(scrn, 202 "Component alpha not supported " 203 "with source alpha and source " 204 "value blending.\n"); 205 return FALSE; 206 } 207 } 208 209 if (i965_get_dest_format(dest_picture) == -1) { 210 intel_debug_fallback(scrn, "Usupported Color buffer format 0x%x\n", 211 (int)dest_picture->format); 212 return FALSE; 213 } 214 215 return TRUE; 216} 217 218Bool 219i965_check_composite_texture(ScreenPtr screen, PicturePtr picture) 220{ 221 if (picture->repeatType > RepeatReflect) { 222 ScrnInfoPtr scrn = xf86ScreenToScrn(screen); 223 intel_debug_fallback(scrn, 224 "extended repeat (%d) not supported\n", 225 picture->repeatType); 226 return FALSE; 227 } 228 229 if (picture->filter != PictFilterNearest && 230 picture->filter != PictFilterBilinear) { 231 ScrnInfoPtr scrn = xf86ScreenToScrn(screen); 232 intel_debug_fallback(scrn, "Unsupported filter 0x%x\n", 233 picture->filter); 234 return FALSE; 235 } 236 237 if (picture->pDrawable) { 238 int w, h, i; 239 240 w = picture->pDrawable->width; 241 h = picture->pDrawable->height; 242 if ((w > 8192) || (h > 8192)) { 243 ScrnInfoPtr scrn = xf86ScreenToScrn(screen); 244 intel_debug_fallback(scrn, 245 "Picture w/h too large (%dx%d)\n", 246 w, h); 247 return FALSE; 248 } 249 250 for (i = 0; 251 i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]); 252 i++) { 253 if (i965_tex_formats[i].fmt == picture->format) 254 break; 255 } 256 if (i == sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0])) 257 { 258 ScrnInfoPtr scrn = xf86ScreenToScrn(screen); 259 intel_debug_fallback(scrn, 260 "Unsupported picture format " 261 "0x%x\n", 262 (int)picture->format); 263 return FALSE; 264 } 265 266 return TRUE; 267 } 268 269 return FALSE; 270} 271 272 273#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) 274 275/* Set up a default static partitioning of the URB, which is supposed to 276 * allow anything we would want to do, at potentially lower performance. 277 */ 278#define URB_CS_ENTRY_SIZE 0 279#define URB_CS_ENTRIES 0 280 281#define URB_VS_ENTRY_SIZE 1 // each 512-bit row 282#define URB_VS_ENTRIES 8 // we needs at least 8 entries 283 284#define URB_GS_ENTRY_SIZE 0 285#define URB_GS_ENTRIES 0 286 287#define URB_CLIP_ENTRY_SIZE 0 288#define URB_CLIP_ENTRIES 0 289 290#define URB_SF_ENTRY_SIZE 2 291#define URB_SF_ENTRIES 1 292 293/* 294 * this program computes dA/dx and dA/dy for the texture coordinates along 295 * with the base texture coordinate. It was extracted from the Mesa driver 296 */ 297 298#define SF_KERNEL_NUM_GRF 16 299#define SF_MAX_THREADS 2 300 301static const uint32_t sf_kernel_static[][4] = { 302#include "exa_sf.g4b" 303}; 304 305static const uint32_t sf_kernel_mask_static[][4] = { 306#include "exa_sf_mask.g4b" 307}; 308 309/* ps kernels */ 310#define PS_KERNEL_NUM_GRF 32 311#define PS_MAX_THREADS 48 312 313static const uint32_t ps_kernel_nomask_affine_static[][4] = { 314#include "exa_wm_xy.g4b" 315#include "exa_wm_src_affine.g4b" 316#include "exa_wm_src_sample_argb.g4b" 317#include "exa_wm_write.g4b" 318}; 319 320static const uint32_t ps_kernel_nomask_projective_static[][4] = { 321#include "exa_wm_xy.g4b" 322#include "exa_wm_src_projective.g4b" 323#include "exa_wm_src_sample_argb.g4b" 324#include "exa_wm_write.g4b" 325}; 326 327static const uint32_t ps_kernel_maskca_affine_static[][4] = { 328#include "exa_wm_xy.g4b" 329#include "exa_wm_src_affine.g4b" 330#include "exa_wm_src_sample_argb.g4b" 331#include "exa_wm_mask_affine.g4b" 332#include "exa_wm_mask_sample_argb.g4b" 333#include "exa_wm_ca.g4b" 334#include "exa_wm_write.g4b" 335}; 336 337static const uint32_t ps_kernel_maskca_projective_static[][4] = { 338#include "exa_wm_xy.g4b" 339#include "exa_wm_src_projective.g4b" 340#include "exa_wm_src_sample_argb.g4b" 341#include "exa_wm_mask_projective.g4b" 342#include "exa_wm_mask_sample_argb.g4b" 343#include "exa_wm_ca.g4b" 344#include "exa_wm_write.g4b" 345}; 346 347static const uint32_t ps_kernel_maskca_srcalpha_affine_static[][4] = { 348#include "exa_wm_xy.g4b" 349#include "exa_wm_src_affine.g4b" 350#include "exa_wm_src_sample_a.g4b" 351#include "exa_wm_mask_affine.g4b" 352#include "exa_wm_mask_sample_argb.g4b" 353#include "exa_wm_ca_srcalpha.g4b" 354#include "exa_wm_write.g4b" 355}; 356 357static const uint32_t ps_kernel_maskca_srcalpha_projective_static[][4] = { 358#include "exa_wm_xy.g4b" 359#include "exa_wm_src_projective.g4b" 360#include "exa_wm_src_sample_a.g4b" 361#include "exa_wm_mask_projective.g4b" 362#include "exa_wm_mask_sample_argb.g4b" 363#include "exa_wm_ca_srcalpha.g4b" 364#include "exa_wm_write.g4b" 365}; 366 367static const uint32_t ps_kernel_masknoca_affine_static[][4] = { 368#include "exa_wm_xy.g4b" 369#include "exa_wm_src_affine.g4b" 370#include "exa_wm_src_sample_argb.g4b" 371#include "exa_wm_mask_affine.g4b" 372#include "exa_wm_mask_sample_a.g4b" 373#include "exa_wm_noca.g4b" 374#include "exa_wm_write.g4b" 375}; 376 377static const uint32_t ps_kernel_masknoca_projective_static[][4] = { 378#include "exa_wm_xy.g4b" 379#include "exa_wm_src_projective.g4b" 380#include "exa_wm_src_sample_argb.g4b" 381#include "exa_wm_mask_projective.g4b" 382#include "exa_wm_mask_sample_a.g4b" 383#include "exa_wm_noca.g4b" 384#include "exa_wm_write.g4b" 385}; 386 387/* new programs for Ironlake */ 388static const uint32_t sf_kernel_static_gen5[][4] = { 389#include "exa_sf.g4b.gen5" 390}; 391 392static const uint32_t sf_kernel_mask_static_gen5[][4] = { 393#include "exa_sf_mask.g4b.gen5" 394}; 395 396static const uint32_t ps_kernel_nomask_affine_static_gen5[][4] = { 397#include "exa_wm_xy.g4b.gen5" 398#include "exa_wm_src_affine.g4b.gen5" 399#include "exa_wm_src_sample_argb.g4b.gen5" 400#include "exa_wm_write.g4b.gen5" 401}; 402 403static const uint32_t ps_kernel_nomask_projective_static_gen5[][4] = { 404#include "exa_wm_xy.g4b.gen5" 405#include "exa_wm_src_projective.g4b.gen5" 406#include "exa_wm_src_sample_argb.g4b.gen5" 407#include "exa_wm_write.g4b.gen5" 408}; 409 410static const uint32_t ps_kernel_maskca_affine_static_gen5[][4] = { 411#include "exa_wm_xy.g4b.gen5" 412#include "exa_wm_src_affine.g4b.gen5" 413#include "exa_wm_src_sample_argb.g4b.gen5" 414#include "exa_wm_mask_affine.g4b.gen5" 415#include "exa_wm_mask_sample_argb.g4b.gen5" 416#include "exa_wm_ca.g4b.gen5" 417#include "exa_wm_write.g4b.gen5" 418}; 419 420static const uint32_t ps_kernel_maskca_projective_static_gen5[][4] = { 421#include "exa_wm_xy.g4b.gen5" 422#include "exa_wm_src_projective.g4b.gen5" 423#include "exa_wm_src_sample_argb.g4b.gen5" 424#include "exa_wm_mask_projective.g4b.gen5" 425#include "exa_wm_mask_sample_argb.g4b.gen5" 426#include "exa_wm_ca.g4b.gen5" 427#include "exa_wm_write.g4b.gen5" 428}; 429 430static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen5[][4] = { 431#include "exa_wm_xy.g4b.gen5" 432#include "exa_wm_src_affine.g4b.gen5" 433#include "exa_wm_src_sample_a.g4b.gen5" 434#include "exa_wm_mask_affine.g4b.gen5" 435#include "exa_wm_mask_sample_argb.g4b.gen5" 436#include "exa_wm_ca_srcalpha.g4b.gen5" 437#include "exa_wm_write.g4b.gen5" 438}; 439 440static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen5[][4] = { 441#include "exa_wm_xy.g4b.gen5" 442#include "exa_wm_src_projective.g4b.gen5" 443#include "exa_wm_src_sample_a.g4b.gen5" 444#include "exa_wm_mask_projective.g4b.gen5" 445#include "exa_wm_mask_sample_argb.g4b.gen5" 446#include "exa_wm_ca_srcalpha.g4b.gen5" 447#include "exa_wm_write.g4b.gen5" 448}; 449 450static const uint32_t ps_kernel_masknoca_affine_static_gen5[][4] = { 451#include "exa_wm_xy.g4b.gen5" 452#include "exa_wm_src_affine.g4b.gen5" 453#include "exa_wm_src_sample_argb.g4b.gen5" 454#include "exa_wm_mask_affine.g4b.gen5" 455#include "exa_wm_mask_sample_a.g4b.gen5" 456#include "exa_wm_noca.g4b.gen5" 457#include "exa_wm_write.g4b.gen5" 458}; 459 460static const uint32_t ps_kernel_masknoca_projective_static_gen5[][4] = { 461#include "exa_wm_xy.g4b.gen5" 462#include "exa_wm_src_projective.g4b.gen5" 463#include "exa_wm_src_sample_argb.g4b.gen5" 464#include "exa_wm_mask_projective.g4b.gen5" 465#include "exa_wm_mask_sample_a.g4b.gen5" 466#include "exa_wm_noca.g4b.gen5" 467#include "exa_wm_write.g4b.gen5" 468}; 469 470/* programs for GEN6 */ 471static const uint32_t ps_kernel_nomask_affine_static_gen6[][4] = { 472#include "exa_wm_src_affine.g6b" 473#include "exa_wm_src_sample_argb.g6b" 474#include "exa_wm_write.g6b" 475}; 476 477static const uint32_t ps_kernel_nomask_projective_static_gen6[][4] = { 478#include "exa_wm_src_projective.g6b" 479#include "exa_wm_src_sample_argb.g6b" 480#include "exa_wm_write.g6b" 481}; 482 483static const uint32_t ps_kernel_maskca_affine_static_gen6[][4] = { 484#include "exa_wm_src_affine.g6b" 485#include "exa_wm_src_sample_argb.g6b" 486#include "exa_wm_mask_affine.g6b" 487#include "exa_wm_mask_sample_argb.g6b" 488#include "exa_wm_ca.g6b" 489#include "exa_wm_write.g6b" 490}; 491 492static const uint32_t ps_kernel_maskca_projective_static_gen6[][4] = { 493#include "exa_wm_src_projective.g6b" 494#include "exa_wm_src_sample_argb.g6b" 495#include "exa_wm_mask_projective.g6b" 496#include "exa_wm_mask_sample_argb.g6b" 497#include "exa_wm_ca.g4b.gen5" 498#include "exa_wm_write.g6b" 499}; 500 501static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen6[][4] = { 502#include "exa_wm_src_affine.g6b" 503#include "exa_wm_src_sample_a.g6b" 504#include "exa_wm_mask_affine.g6b" 505#include "exa_wm_mask_sample_argb.g6b" 506#include "exa_wm_ca_srcalpha.g6b" 507#include "exa_wm_write.g6b" 508}; 509 510static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen6[][4] = { 511#include "exa_wm_src_projective.g6b" 512#include "exa_wm_src_sample_a.g6b" 513#include "exa_wm_mask_projective.g6b" 514#include "exa_wm_mask_sample_argb.g6b" 515#include "exa_wm_ca_srcalpha.g6b" 516#include "exa_wm_write.g6b" 517}; 518 519static const uint32_t ps_kernel_masknoca_affine_static_gen6[][4] = { 520#include "exa_wm_src_affine.g6b" 521#include "exa_wm_src_sample_argb.g6b" 522#include "exa_wm_mask_affine.g6b" 523#include "exa_wm_mask_sample_a.g6b" 524#include "exa_wm_noca.g6b" 525#include "exa_wm_write.g6b" 526}; 527 528static const uint32_t ps_kernel_masknoca_projective_static_gen6[][4] = { 529#include "exa_wm_src_projective.g6b" 530#include "exa_wm_src_sample_argb.g6b" 531#include "exa_wm_mask_projective.g6b" 532#include "exa_wm_mask_sample_a.g6b" 533#include "exa_wm_noca.g6b" 534#include "exa_wm_write.g6b" 535}; 536 537/* programs for GEN7 */ 538static const uint32_t ps_kernel_nomask_affine_static_gen7[][4] = { 539#include "exa_wm_src_affine.g7b" 540#include "exa_wm_src_sample_argb.g7b" 541#include "exa_wm_write.g7b" 542}; 543 544static const uint32_t ps_kernel_nomask_projective_static_gen7[][4] = { 545#include "exa_wm_src_projective.g7b" 546#include "exa_wm_src_sample_argb.g7b" 547#include "exa_wm_write.g7b" 548}; 549 550static const uint32_t ps_kernel_maskca_affine_static_gen7[][4] = { 551#include "exa_wm_src_affine.g7b" 552#include "exa_wm_src_sample_argb.g7b" 553#include "exa_wm_mask_affine.g7b" 554#include "exa_wm_mask_sample_argb.g7b" 555#include "exa_wm_ca.g6b" 556#include "exa_wm_write.g7b" 557}; 558 559static const uint32_t ps_kernel_maskca_projective_static_gen7[][4] = { 560#include "exa_wm_src_projective.g7b" 561#include "exa_wm_src_sample_argb.g7b" 562#include "exa_wm_mask_projective.g7b" 563#include "exa_wm_mask_sample_argb.g7b" 564#include "exa_wm_ca.g4b.gen5" 565#include "exa_wm_write.g7b" 566}; 567 568static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen7[][4] = { 569#include "exa_wm_src_affine.g7b" 570#include "exa_wm_src_sample_a.g7b" 571#include "exa_wm_mask_affine.g7b" 572#include "exa_wm_mask_sample_argb.g7b" 573#include "exa_wm_ca_srcalpha.g6b" 574#include "exa_wm_write.g7b" 575}; 576 577static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen7[][4] = { 578#include "exa_wm_src_projective.g7b" 579#include "exa_wm_src_sample_a.g7b" 580#include "exa_wm_mask_projective.g7b" 581#include "exa_wm_mask_sample_argb.g7b" 582#include "exa_wm_ca_srcalpha.g6b" 583#include "exa_wm_write.g7b" 584}; 585 586static const uint32_t ps_kernel_masknoca_affine_static_gen7[][4] = { 587#include "exa_wm_src_affine.g7b" 588#include "exa_wm_src_sample_argb.g7b" 589#include "exa_wm_mask_affine.g7b" 590#include "exa_wm_mask_sample_a.g7b" 591#include "exa_wm_noca.g6b" 592#include "exa_wm_write.g7b" 593}; 594 595static const uint32_t ps_kernel_masknoca_projective_static_gen7[][4] = { 596#include "exa_wm_src_projective.g7b" 597#include "exa_wm_src_sample_argb.g7b" 598#include "exa_wm_mask_projective.g7b" 599#include "exa_wm_mask_sample_a.g7b" 600#include "exa_wm_noca.g6b" 601#include "exa_wm_write.g7b" 602}; 603 604 605typedef enum { 606 SS_INVALID_FILTER = -1, 607 SS_FILTER_NEAREST, 608 SS_FILTER_BILINEAR, 609 FILTER_COUNT, 610} sampler_state_filter_t; 611 612typedef enum { 613 SS_INVALID_EXTEND = -1, 614 SS_EXTEND_NONE, 615 SS_EXTEND_REPEAT, 616 SS_EXTEND_PAD, 617 SS_EXTEND_REFLECT, 618 EXTEND_COUNT, 619} sampler_state_extend_t; 620 621typedef enum { 622 WM_KERNEL_NOMASK_AFFINE, 623 WM_KERNEL_NOMASK_PROJECTIVE, 624 WM_KERNEL_MASKCA_AFFINE, 625 WM_KERNEL_MASKCA_PROJECTIVE, 626 WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 627 WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 628 WM_KERNEL_MASKNOCA_AFFINE, 629 WM_KERNEL_MASKNOCA_PROJECTIVE, 630 KERNEL_COUNT 631} wm_kernel_t; 632 633#define KERNEL(kernel_enum, kernel, masked) \ 634 [kernel_enum] = {&kernel, sizeof(kernel), masked} 635struct wm_kernel_info { 636 const void *data; 637 unsigned int size; 638 Bool has_mask; 639}; 640 641static const struct wm_kernel_info wm_kernels_gen4[] = { 642 KERNEL(WM_KERNEL_NOMASK_AFFINE, 643 ps_kernel_nomask_affine_static, FALSE), 644 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, 645 ps_kernel_nomask_projective_static, FALSE), 646 KERNEL(WM_KERNEL_MASKCA_AFFINE, 647 ps_kernel_maskca_affine_static, TRUE), 648 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, 649 ps_kernel_maskca_projective_static, TRUE), 650 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 651 ps_kernel_maskca_srcalpha_affine_static, TRUE), 652 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 653 ps_kernel_maskca_srcalpha_projective_static, TRUE), 654 KERNEL(WM_KERNEL_MASKNOCA_AFFINE, 655 ps_kernel_masknoca_affine_static, TRUE), 656 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, 657 ps_kernel_masknoca_projective_static, TRUE), 658}; 659 660static const struct wm_kernel_info wm_kernels_gen5[] = { 661 KERNEL(WM_KERNEL_NOMASK_AFFINE, 662 ps_kernel_nomask_affine_static_gen5, FALSE), 663 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, 664 ps_kernel_nomask_projective_static_gen5, FALSE), 665 KERNEL(WM_KERNEL_MASKCA_AFFINE, 666 ps_kernel_maskca_affine_static_gen5, TRUE), 667 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, 668 ps_kernel_maskca_projective_static_gen5, TRUE), 669 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 670 ps_kernel_maskca_srcalpha_affine_static_gen5, TRUE), 671 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 672 ps_kernel_maskca_srcalpha_projective_static_gen5, TRUE), 673 KERNEL(WM_KERNEL_MASKNOCA_AFFINE, 674 ps_kernel_masknoca_affine_static_gen5, TRUE), 675 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, 676 ps_kernel_masknoca_projective_static_gen5, TRUE), 677}; 678 679static const struct wm_kernel_info wm_kernels_gen6[] = { 680 KERNEL(WM_KERNEL_NOMASK_AFFINE, 681 ps_kernel_nomask_affine_static_gen6, FALSE), 682 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, 683 ps_kernel_nomask_projective_static_gen6, FALSE), 684 KERNEL(WM_KERNEL_MASKCA_AFFINE, 685 ps_kernel_maskca_affine_static_gen6, TRUE), 686 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, 687 ps_kernel_maskca_projective_static_gen6, TRUE), 688 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 689 ps_kernel_maskca_srcalpha_affine_static_gen6, TRUE), 690 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 691 ps_kernel_maskca_srcalpha_projective_static_gen6, TRUE), 692 KERNEL(WM_KERNEL_MASKNOCA_AFFINE, 693 ps_kernel_masknoca_affine_static_gen6, TRUE), 694 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, 695 ps_kernel_masknoca_projective_static_gen6, TRUE), 696}; 697 698static const struct wm_kernel_info wm_kernels_gen7[] = { 699 KERNEL(WM_KERNEL_NOMASK_AFFINE, 700 ps_kernel_nomask_affine_static_gen7, FALSE), 701 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, 702 ps_kernel_nomask_projective_static_gen7, FALSE), 703 KERNEL(WM_KERNEL_MASKCA_AFFINE, 704 ps_kernel_maskca_affine_static_gen7, TRUE), 705 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, 706 ps_kernel_maskca_projective_static_gen7, TRUE), 707 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 708 ps_kernel_maskca_srcalpha_affine_static_gen7, TRUE), 709 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 710 ps_kernel_maskca_srcalpha_projective_static_gen7, TRUE), 711 KERNEL(WM_KERNEL_MASKNOCA_AFFINE, 712 ps_kernel_masknoca_affine_static_gen7, TRUE), 713 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, 714 ps_kernel_masknoca_projective_static_gen7, TRUE), 715}; 716 717#undef KERNEL 718 719typedef struct _brw_cc_unit_state_padded { 720 struct brw_cc_unit_state state; 721 char pad[64 - sizeof(struct brw_cc_unit_state)]; 722} brw_cc_unit_state_padded; 723 724#ifndef MAX 725#define MAX(a, b) ((a) > (b) ? (a) : (b)) 726#endif 727#define SURFACE_STATE_PADDED_SIZE ALIGN(MAX(sizeof(struct brw_surface_state), sizeof(struct gen7_surface_state)), 32) 728 729struct gen4_cc_unit_state { 730 /* Index by [src_blend][dst_blend] */ 731 brw_cc_unit_state_padded cc_state[BRW_BLENDFACTOR_COUNT][BRW_BLENDFACTOR_COUNT]; 732}; 733 734typedef struct gen4_composite_op { 735 int op; 736 sampler_state_filter_t src_filter; 737 sampler_state_filter_t mask_filter; 738 sampler_state_extend_t src_extend; 739 sampler_state_extend_t mask_extend; 740 Bool is_affine; 741 wm_kernel_t wm_kernel; 742 int vertex_id; 743} gen4_composite_op; 744 745/** Private data for gen4 render accel implementation. */ 746struct gen4_render_state { 747 drm_intel_bo *vs_state_bo; 748 drm_intel_bo *sf_state_bo; 749 drm_intel_bo *sf_mask_state_bo; 750 drm_intel_bo *cc_state_bo; 751 drm_intel_bo *wm_state_bo[KERNEL_COUNT] 752 [FILTER_COUNT] [EXTEND_COUNT] 753 [FILTER_COUNT] [EXTEND_COUNT]; 754 drm_intel_bo *wm_kernel_bo[KERNEL_COUNT]; 755 756 drm_intel_bo *cc_vp_bo; 757 drm_intel_bo *gen6_blend_bo; 758 drm_intel_bo *gen6_depth_stencil_bo; 759 drm_intel_bo *ps_sampler_state_bo[FILTER_COUNT] 760 [EXTEND_COUNT] 761 [FILTER_COUNT] 762 [EXTEND_COUNT]; 763 gen4_composite_op composite_op; 764}; 765 766static void gen6_emit_composite_state(struct intel_screen_private *intel); 767static void gen6_render_state_init(ScrnInfoPtr scrn); 768 769/** 770 * Sets up the SF state pointing at an SF kernel. 771 * 772 * The SF kernel does coord interp: for each attribute, 773 * calculate dA/dx and dA/dy. Hand these interpolation coefficients 774 * back to SF which then hands pixels off to WM. 775 */ 776static drm_intel_bo *gen4_create_sf_state(intel_screen_private *intel, 777 drm_intel_bo * kernel_bo) 778{ 779 struct brw_sf_unit_state *sf_state; 780 drm_intel_bo *sf_state_bo; 781 int ret; 782 783 sf_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 SF state", 784 sizeof(*sf_state), 4096); 785 assert(sf_state_bo); 786 787 ret = drm_intel_bo_map(sf_state_bo, TRUE); 788 assert(ret == 0); 789 790 sf_state = memset(sf_state_bo->virtual, 0, sizeof(*sf_state)); 791 sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); 792 sf_state->thread0.kernel_start_pointer = 793 intel_emit_reloc(sf_state_bo, 794 offsetof(struct brw_sf_unit_state, thread0), 795 kernel_bo, sf_state->thread0.grf_reg_count << 1, 796 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; 797 sf_state->sf1.single_program_flow = 1; 798 sf_state->sf1.binding_table_entry_count = 0; 799 sf_state->sf1.thread_priority = 0; 800 sf_state->sf1.floating_point_mode = 0; /* Mesa does this */ 801 sf_state->sf1.illegal_op_exception_enable = 1; 802 sf_state->sf1.mask_stack_exception_enable = 1; 803 sf_state->sf1.sw_exception_enable = 1; 804 sf_state->thread2.per_thread_scratch_space = 0; 805 /* scratch space is not used in our kernel */ 806 sf_state->thread2.scratch_space_base_pointer = 0; 807 sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */ 808 sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ 809 sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ 810 /* don't smash vertex header, read start from dw8 */ 811 sf_state->thread3.urb_entry_read_offset = 1; 812 sf_state->thread3.dispatch_grf_start_reg = 3; 813 sf_state->thread4.max_threads = SF_MAX_THREADS - 1; 814 sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; 815 sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES; 816 sf_state->sf5.viewport_transform = FALSE; /* skip viewport */ 817 sf_state->sf6.cull_mode = BRW_CULLMODE_NONE; 818 sf_state->sf6.scissor = 0; 819 sf_state->sf7.trifan_pv = 2; 820 sf_state->sf6.dest_org_vbias = 0x8; 821 sf_state->sf6.dest_org_hbias = 0x8; 822 823 drm_intel_bo_unmap(sf_state_bo); 824 825 return sf_state_bo; 826 (void)ret; 827} 828 829static drm_intel_bo *sampler_border_color_create(intel_screen_private *intel) 830{ 831 struct brw_sampler_legacy_border_color sampler_border_color; 832 833 /* Set up the sampler border color (always transparent black) */ 834 memset(&sampler_border_color, 0, sizeof(sampler_border_color)); 835 sampler_border_color.color[0] = 0; /* R */ 836 sampler_border_color.color[1] = 0; /* G */ 837 sampler_border_color.color[2] = 0; /* B */ 838 sampler_border_color.color[3] = 0; /* A */ 839 840 return intel_bo_alloc_for_data(intel, 841 &sampler_border_color, 842 sizeof(sampler_border_color), 843 "gen4 render sampler border color"); 844} 845 846static void 847gen4_sampler_state_init(drm_intel_bo * sampler_state_bo, 848 struct brw_sampler_state *sampler_state, 849 sampler_state_filter_t filter, 850 sampler_state_extend_t extend, 851 drm_intel_bo * border_color_bo) 852{ 853 uint32_t sampler_state_offset; 854 855 sampler_state_offset = (char *)sampler_state - 856 (char *)sampler_state_bo->virtual; 857 858 /* PS kernel use this sampler */ 859 memset(sampler_state, 0, sizeof(*sampler_state)); 860 861 sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 862 863 /* We use the legacy mode to get the semantics specified by 864 * the Render extension. */ 865 sampler_state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY; 866 867 switch (filter) { 868 default: 869 case SS_FILTER_NEAREST: 870 sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; 871 sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; 872 break; 873 case SS_FILTER_BILINEAR: 874 sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; 875 sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; 876 break; 877 } 878 879 switch (extend) { 880 default: 881 case SS_EXTEND_NONE: 882 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 883 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 884 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 885 break; 886 case SS_EXTEND_REPEAT: 887 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; 888 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; 889 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; 890 break; 891 case SS_EXTEND_PAD: 892 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 893 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 894 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 895 break; 896 case SS_EXTEND_REFLECT: 897 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 898 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 899 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 900 break; 901 } 902 903 sampler_state->ss2.border_color_pointer = 904 intel_emit_reloc(sampler_state_bo, sampler_state_offset + 905 offsetof(struct brw_sampler_state, ss2), 906 border_color_bo, 0, 907 I915_GEM_DOMAIN_SAMPLER, 0) >> 5; 908 909 sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ 910} 911 912static void 913gen7_sampler_state_init(drm_intel_bo * sampler_state_bo, 914 struct gen7_sampler_state *sampler_state, 915 sampler_state_filter_t filter, 916 sampler_state_extend_t extend, 917 drm_intel_bo * border_color_bo) 918{ 919 uint32_t sampler_state_offset; 920 921 sampler_state_offset = (char *)sampler_state - 922 (char *)sampler_state_bo->virtual; 923 924 /* PS kernel use this sampler */ 925 memset(sampler_state, 0, sizeof(*sampler_state)); 926 927 sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 928 929 /* We use the legacy mode to get the semantics specified by 930 * the Render extension. */ 931 sampler_state->ss0.default_color_mode = BRW_BORDER_COLOR_MODE_LEGACY; 932 933 switch (filter) { 934 default: 935 case SS_FILTER_NEAREST: 936 sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; 937 sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; 938 break; 939 case SS_FILTER_BILINEAR: 940 sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; 941 sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; 942 break; 943 } 944 945 switch (extend) { 946 default: 947 case SS_EXTEND_NONE: 948 sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 949 sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 950 sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 951 break; 952 case SS_EXTEND_REPEAT: 953 sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; 954 sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; 955 sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; 956 break; 957 case SS_EXTEND_PAD: 958 sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 959 sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 960 sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 961 break; 962 case SS_EXTEND_REFLECT: 963 sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 964 sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 965 sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 966 break; 967 } 968 969 sampler_state->ss2.default_color_pointer = 970 intel_emit_reloc(sampler_state_bo, sampler_state_offset + 971 offsetof(struct gen7_sampler_state, ss2), 972 border_color_bo, 0, 973 I915_GEM_DOMAIN_SAMPLER, 0) >> 5; 974 975 sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ 976} 977 978 979 980static drm_intel_bo *gen4_create_sampler_state(intel_screen_private *intel, 981 sampler_state_filter_t src_filter, 982 sampler_state_extend_t src_extend, 983 sampler_state_filter_t mask_filter, 984 sampler_state_extend_t mask_extend, 985 drm_intel_bo * border_color_bo) 986{ 987 drm_intel_bo *sampler_state_bo; 988 struct brw_sampler_state *sampler_state; 989 int ret; 990 991 sampler_state_bo = 992 drm_intel_bo_alloc(intel->bufmgr, "gen4 sampler state", 993 sizeof(struct brw_sampler_state) * 2, 4096); 994 assert(sampler_state_bo); 995 996 ret = drm_intel_bo_map(sampler_state_bo, TRUE); 997 assert(ret == 0); 998 999 sampler_state = sampler_state_bo->virtual; 1000 1001 gen4_sampler_state_init(sampler_state_bo, 1002 &sampler_state[0], 1003 src_filter, src_extend, border_color_bo); 1004 gen4_sampler_state_init(sampler_state_bo, 1005 &sampler_state[1], 1006 mask_filter, mask_extend, border_color_bo); 1007 1008 drm_intel_bo_unmap(sampler_state_bo); 1009 1010 return sampler_state_bo; 1011 (void)ret; 1012} 1013 1014static drm_intel_bo * 1015gen7_create_sampler_state(intel_screen_private *intel, 1016 sampler_state_filter_t src_filter, 1017 sampler_state_extend_t src_extend, 1018 sampler_state_filter_t mask_filter, 1019 sampler_state_extend_t mask_extend, 1020 drm_intel_bo * border_color_bo) 1021{ 1022 drm_intel_bo *sampler_state_bo; 1023 struct gen7_sampler_state *sampler_state; 1024 int ret; 1025 1026 sampler_state_bo = 1027 drm_intel_bo_alloc(intel->bufmgr, "gen7 sampler state", 1028 sizeof(struct gen7_sampler_state) * 2, 4096); 1029 assert(sampler_state_bo); 1030 1031 ret = drm_intel_bo_map(sampler_state_bo, TRUE); 1032 assert(ret == 0); 1033 1034 sampler_state = sampler_state_bo->virtual; 1035 1036 gen7_sampler_state_init(sampler_state_bo, 1037 &sampler_state[0], 1038 src_filter, src_extend, border_color_bo); 1039 gen7_sampler_state_init(sampler_state_bo, 1040 &sampler_state[1], 1041 mask_filter, mask_extend, border_color_bo); 1042 1043 drm_intel_bo_unmap(sampler_state_bo); 1044 1045 return sampler_state_bo; 1046 (void)ret; 1047} 1048 1049static inline drm_intel_bo * 1050i965_create_sampler_state(intel_screen_private *intel, 1051 sampler_state_filter_t src_filter, 1052 sampler_state_extend_t src_extend, 1053 sampler_state_filter_t mask_filter, 1054 sampler_state_extend_t mask_extend, 1055 drm_intel_bo * border_color_bo) 1056{ 1057 if (INTEL_INFO(intel)->gen < 070) 1058 return gen4_create_sampler_state(intel, src_filter, src_extend, 1059 mask_filter, mask_extend, 1060 border_color_bo); 1061 return gen7_create_sampler_state(intel, src_filter, src_extend, 1062 mask_filter, mask_extend, 1063 border_color_bo); 1064} 1065 1066 1067static void 1068cc_state_init(drm_intel_bo * cc_state_bo, 1069 uint32_t cc_state_offset, 1070 int src_blend, int dst_blend, drm_intel_bo * cc_vp_bo) 1071{ 1072 struct brw_cc_unit_state *cc_state; 1073 1074 cc_state = (struct brw_cc_unit_state *)((char *)cc_state_bo->virtual + 1075 cc_state_offset); 1076 1077 memset(cc_state, 0, sizeof(*cc_state)); 1078 cc_state->cc0.stencil_enable = 0; /* disable stencil */ 1079 cc_state->cc2.depth_test = 0; /* disable depth test */ 1080 cc_state->cc2.logicop_enable = 0; /* disable logic op */ 1081 cc_state->cc3.ia_blend_enable = 0; /* blend alpha same as colors */ 1082 cc_state->cc3.blend_enable = 1; /* enable color blend */ 1083 cc_state->cc3.alpha_test = 0; /* disable alpha test */ 1084 1085 cc_state->cc4.cc_viewport_state_offset = 1086 intel_emit_reloc(cc_state_bo, cc_state_offset + 1087 offsetof(struct brw_cc_unit_state, cc4), 1088 cc_vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; 1089 1090 cc_state->cc5.dither_enable = 0; /* disable dither */ 1091 cc_state->cc5.logicop_func = 0xc; /* COPY */ 1092 cc_state->cc5.statistics_enable = 1; 1093 cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; 1094 1095 /* Fill in alpha blend factors same as color, for the future. */ 1096 cc_state->cc5.ia_src_blend_factor = src_blend; 1097 cc_state->cc5.ia_dest_blend_factor = dst_blend; 1098 1099 cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD; 1100 cc_state->cc6.clamp_post_alpha_blend = 1; 1101 cc_state->cc6.clamp_pre_alpha_blend = 1; 1102 cc_state->cc6.clamp_range = 0; /* clamp range [0,1] */ 1103 1104 cc_state->cc6.src_blend_factor = src_blend; 1105 cc_state->cc6.dest_blend_factor = dst_blend; 1106} 1107 1108static drm_intel_bo *gen4_create_wm_state(intel_screen_private *intel, 1109 Bool has_mask, 1110 drm_intel_bo * kernel_bo, 1111 drm_intel_bo * sampler_bo) 1112{ 1113 struct brw_wm_unit_state *state; 1114 drm_intel_bo *wm_state_bo; 1115 int ret; 1116 1117 wm_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 WM state", 1118 sizeof(*state), 4096); 1119 assert(wm_state_bo); 1120 1121 ret = drm_intel_bo_map(wm_state_bo, TRUE); 1122 assert(ret == 0); 1123 1124 state = memset(wm_state_bo->virtual, 0, sizeof(*state)); 1125 state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); 1126 state->thread0.kernel_start_pointer = 1127 intel_emit_reloc(wm_state_bo, 1128 offsetof(struct brw_wm_unit_state, thread0), 1129 kernel_bo, state->thread0.grf_reg_count << 1, 1130 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; 1131 1132 state->thread1.single_program_flow = 0; 1133 1134 /* scratch space is not used in our kernel */ 1135 state->thread2.scratch_space_base_pointer = 0; 1136 state->thread2.per_thread_scratch_space = 0; 1137 1138 state->thread3.const_urb_entry_read_length = 0; 1139 state->thread3.const_urb_entry_read_offset = 0; 1140 1141 state->thread3.urb_entry_read_offset = 0; 1142 /* wm kernel use urb from 3, see wm_program in compiler module */ 1143 state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */ 1144 1145 if (IS_GEN5(intel)) 1146 state->wm4.sampler_count = 0; /* hardware requirement */ 1147 else 1148 state->wm4.sampler_count = 1; /* 1-4 samplers used */ 1149 1150 state->wm4.sampler_state_pointer = 1151 intel_emit_reloc(wm_state_bo, 1152 offsetof(struct brw_wm_unit_state, wm4), 1153 sampler_bo, 1154 state->wm4.sampler_count << 2, 1155 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; 1156 state->wm5.max_threads = PS_MAX_THREADS - 1; 1157 state->wm5.transposed_urb_read = 0; 1158 state->wm5.thread_dispatch_enable = 1; 1159 /* just use 16-pixel dispatch (4 subspans), don't need to change kernel 1160 * start point 1161 */ 1162 state->wm5.enable_16_pix = 1; 1163 state->wm5.enable_8_pix = 0; 1164 state->wm5.early_depth_test = 1; 1165 1166 /* Each pair of attributes (src/mask coords) is two URB entries */ 1167 if (has_mask) { 1168 state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */ 1169 state->thread3.urb_entry_read_length = 4; 1170 } else { 1171 state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ 1172 state->thread3.urb_entry_read_length = 2; 1173 } 1174 1175 /* binding table entry count is only used for prefetching, and it has to 1176 * be set 0 for Ironlake 1177 */ 1178 if (IS_GEN5(intel)) 1179 state->thread1.binding_table_entry_count = 0; 1180 1181 drm_intel_bo_unmap(wm_state_bo); 1182 1183 return wm_state_bo; 1184 (void)ret; 1185} 1186 1187static drm_intel_bo *gen4_create_cc_viewport(intel_screen_private *intel) 1188{ 1189 drm_intel_bo *bo; 1190 struct brw_cc_viewport vp; 1191 int ret; 1192 1193 vp.min_depth = -1.e35; 1194 vp.max_depth = 1.e35; 1195 1196 bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 render unit state", 1197 sizeof(vp), 4096); 1198 assert(bo); 1199 1200 ret = drm_intel_bo_subdata(bo, 0, sizeof(vp), &vp); 1201 assert(ret == 0); 1202 1203 return bo; 1204 (void)ret; 1205} 1206 1207static drm_intel_bo *gen4_create_vs_unit_state(intel_screen_private *intel) 1208{ 1209 struct brw_vs_unit_state vs_state; 1210 memset(&vs_state, 0, sizeof(vs_state)); 1211 1212 /* Set up the vertex shader to be disabled (passthrough) */ 1213 if (IS_GEN5(intel)) 1214 vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; /* hardware requirement */ 1215 else 1216 vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES; 1217 vs_state.thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; 1218 vs_state.vs6.vs_enable = 0; 1219 vs_state.vs6.vert_cache_disable = 1; 1220 1221 return intel_bo_alloc_for_data(intel, &vs_state, sizeof(vs_state), 1222 "gen4 render VS state"); 1223} 1224 1225/** 1226 * Set up all combinations of cc state: each blendfactor for source and 1227 * dest. 1228 */ 1229static drm_intel_bo *gen4_create_cc_unit_state(intel_screen_private *intel) 1230{ 1231 drm_intel_bo *cc_state_bo, *cc_vp_bo; 1232 int i, j, ret; 1233 1234 cc_vp_bo = gen4_create_cc_viewport(intel); 1235 1236 cc_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 CC state", 1237 sizeof(struct gen4_cc_unit_state), 1238 4096); 1239 assert(cc_state_bo); 1240 1241 ret = drm_intel_bo_map(cc_state_bo, TRUE); 1242 assert(ret == 0); 1243 1244 for (i = 0; i < BRW_BLENDFACTOR_COUNT; i++) { 1245 for (j = 0; j < BRW_BLENDFACTOR_COUNT; j++) { 1246 cc_state_init(cc_state_bo, 1247 offsetof(struct gen4_cc_unit_state, 1248 cc_state[i][j].state), 1249 i, j, cc_vp_bo); 1250 } 1251 } 1252 drm_intel_bo_unmap(cc_state_bo); 1253 1254 drm_intel_bo_unreference(cc_vp_bo); 1255 1256 return cc_state_bo; 1257 (void)ret; 1258} 1259 1260static uint32_t i965_get_card_format(PicturePtr picture) 1261{ 1262 int i; 1263 1264 for (i = 0; i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]); 1265 i++) { 1266 if (i965_tex_formats[i].fmt == picture->format) 1267 break; 1268 } 1269 assert(i != sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0])); 1270 1271 return i965_tex_formats[i].card_fmt; 1272} 1273 1274static sampler_state_filter_t sampler_state_filter_from_picture(int filter) 1275{ 1276 switch (filter) { 1277 case PictFilterNearest: 1278 return SS_FILTER_NEAREST; 1279 case PictFilterBilinear: 1280 return SS_FILTER_BILINEAR; 1281 default: 1282 return SS_INVALID_FILTER; 1283 } 1284} 1285 1286static sampler_state_extend_t sampler_state_extend_from_picture(int repeat_type) 1287{ 1288 switch (repeat_type) { 1289 case RepeatNone: 1290 return SS_EXTEND_NONE; 1291 case RepeatNormal: 1292 return SS_EXTEND_REPEAT; 1293 case RepeatPad: 1294 return SS_EXTEND_PAD; 1295 case RepeatReflect: 1296 return SS_EXTEND_REFLECT; 1297 default: 1298 return SS_INVALID_EXTEND; 1299 } 1300} 1301 1302/** 1303 * Sets up the common fields for a surface state buffer for the given 1304 * picture in the given surface state buffer. 1305 */ 1306static int 1307gen4_set_picture_surface_state(intel_screen_private *intel, 1308 PicturePtr picture, PixmapPtr pixmap, 1309 Bool is_dst) 1310{ 1311 struct intel_pixmap *priv = intel_get_pixmap_private(pixmap); 1312 struct brw_surface_state *ss; 1313 uint32_t write_domain, read_domains; 1314 int offset; 1315 1316 if (is_dst) { 1317 write_domain = I915_GEM_DOMAIN_RENDER; 1318 read_domains = I915_GEM_DOMAIN_RENDER; 1319 } else { 1320 write_domain = 0; 1321 read_domains = I915_GEM_DOMAIN_SAMPLER; 1322 } 1323 intel_batch_mark_pixmap_domains(intel, priv, 1324 read_domains, write_domain); 1325 ss = (struct brw_surface_state *) 1326 (intel->surface_data + intel->surface_used); 1327 1328 memset(ss, 0, sizeof(*ss)); 1329 ss->ss0.surface_type = BRW_SURFACE_2D; 1330 if (is_dst) 1331 ss->ss0.surface_format = i965_get_dest_format(picture); 1332 else 1333 ss->ss0.surface_format = i965_get_card_format(picture); 1334 1335 ss->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; 1336 ss->ss0.color_blend = 1; 1337 ss->ss1.base_addr = priv->bo->offset; 1338 1339 ss->ss2.height = pixmap->drawable.height - 1; 1340 ss->ss2.width = pixmap->drawable.width - 1; 1341 ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1; 1342 ss->ss3.tile_walk = 0; /* Tiled X */ 1343 ss->ss3.tiled_surface = intel_pixmap_tiled(pixmap) ? 1 : 0; 1344 1345 dri_bo_emit_reloc(intel->surface_bo, 1346 read_domains, write_domain, 1347 0, 1348 intel->surface_used + 1349 offsetof(struct brw_surface_state, ss1), 1350 priv->bo); 1351 1352 offset = intel->surface_used; 1353 intel->surface_used += SURFACE_STATE_PADDED_SIZE; 1354 1355 return offset; 1356} 1357 1358static int 1359gen7_set_picture_surface_state(intel_screen_private *intel, 1360 PicturePtr picture, PixmapPtr pixmap, 1361 Bool is_dst) 1362{ 1363 struct intel_pixmap *priv = intel_get_pixmap_private(pixmap); 1364 struct gen7_surface_state *ss; 1365 uint32_t write_domain, read_domains; 1366 int offset; 1367 1368 if (is_dst) { 1369 write_domain = I915_GEM_DOMAIN_RENDER; 1370 read_domains = I915_GEM_DOMAIN_RENDER; 1371 } else { 1372 write_domain = 0; 1373 read_domains = I915_GEM_DOMAIN_SAMPLER; 1374 } 1375 intel_batch_mark_pixmap_domains(intel, priv, 1376 read_domains, write_domain); 1377 ss = (struct gen7_surface_state *) 1378 (intel->surface_data + intel->surface_used); 1379 1380 memset(ss, 0, sizeof(*ss)); 1381 ss->ss0.surface_type = BRW_SURFACE_2D; 1382 if (is_dst) 1383 ss->ss0.surface_format = i965_get_dest_format(picture); 1384 else 1385 ss->ss0.surface_format = i965_get_card_format(picture); 1386 1387 ss->ss0.tile_walk = 0; /* Tiled X */ 1388 ss->ss0.tiled_surface = intel_pixmap_tiled(pixmap) ? 1 : 0; 1389 ss->ss1.base_addr = priv->bo->offset; 1390 1391 ss->ss2.height = pixmap->drawable.height - 1; 1392 ss->ss2.width = pixmap->drawable.width - 1; 1393 ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1; 1394 1395 if (IS_HSW(intel)) { 1396 ss->ss7.shader_chanel_select_r = HSW_SCS_RED; 1397 ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN; 1398 ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE; 1399 ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA; 1400 } 1401 1402 dri_bo_emit_reloc(intel->surface_bo, 1403 read_domains, write_domain, 1404 0, 1405 intel->surface_used + 1406 offsetof(struct gen7_surface_state, ss1), 1407 priv->bo); 1408 1409 offset = intel->surface_used; 1410 intel->surface_used += SURFACE_STATE_PADDED_SIZE; 1411 1412 return offset; 1413} 1414 1415static inline int 1416i965_set_picture_surface_state(intel_screen_private *intel, 1417 PicturePtr picture, PixmapPtr pixmap, 1418 Bool is_dst) 1419{ 1420 if (INTEL_INFO(intel)->gen < 070) 1421 return gen4_set_picture_surface_state(intel, picture, pixmap, is_dst); 1422 return gen7_set_picture_surface_state(intel, picture, pixmap, is_dst); 1423} 1424 1425static void gen4_composite_vertex_elements(struct intel_screen_private *intel) 1426{ 1427 struct gen4_render_state *render_state = intel->gen4_render_state; 1428 gen4_composite_op *composite_op = &render_state->composite_op; 1429 Bool has_mask = intel->render_mask != NULL; 1430 Bool is_affine = composite_op->is_affine; 1431 /* 1432 * number of extra parameters per vertex 1433 */ 1434 int nelem = has_mask ? 2 : 1; 1435 /* 1436 * size of extra parameters: 1437 * 3 for homogenous (xyzw) 1438 * 2 for cartesian (xy) 1439 */ 1440 int selem = is_affine ? 2 : 3; 1441 uint32_t w_component; 1442 uint32_t src_format; 1443 int id; 1444 1445 id = has_mask << 1 | is_affine; 1446 1447 if (composite_op->vertex_id == id) 1448 return; 1449 1450 composite_op->vertex_id = id; 1451 1452 if (is_affine) { 1453 src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; 1454 w_component = BRW_VFCOMPONENT_STORE_1_FLT; 1455 } else { 1456 src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; 1457 w_component = BRW_VFCOMPONENT_STORE_SRC; 1458 } 1459 1460 if (IS_GEN5(intel)) { 1461 /* 1462 * The reason to add this extra vertex element in the header is that 1463 * Ironlake has different vertex header definition and origin method to 1464 * set destination element offset doesn't exist anymore, which means 1465 * hardware requires a predefined vertex element layout. 1466 * 1467 * haihao proposed this approach to fill the first vertex element, so 1468 * origin layout for Gen4 doesn't need to change, and origin shader 1469 * programs behavior is also kept. 1470 * 1471 * I think this is not bad. - zhenyu 1472 */ 1473 1474 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 1475 ((2 * (2 + nelem)) - 1)); 1476 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | 1477 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1478 (0 << VE0_OFFSET_SHIFT)); 1479 1480 OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | 1481 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | 1482 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | 1483 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); 1484 } else { 1485 /* Set up our vertex elements, sourced from the single vertex buffer. 1486 * that will be set up later. 1487 */ 1488 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 1489 ((2 * (1 + nelem)) - 1)); 1490 } 1491 1492 /* x,y */ 1493 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | 1494 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1495 (0 << VE0_OFFSET_SHIFT)); 1496 1497 if (IS_GEN5(intel)) 1498 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1499 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1500 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 1501 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1502 else 1503 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1504 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1505 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 1506 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 1507 (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); 1508 /* u0, v0, w0 */ 1509 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | 1510 (src_format << VE0_FORMAT_SHIFT) | 1511 ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ 1512 1513 if (IS_GEN5(intel)) 1514 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1515 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1516 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1517 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1518 else 1519 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1520 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1521 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1522 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 1523 ((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ 1524 /* u1, v1, w1 */ 1525 if (has_mask) { 1526 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | 1527 (src_format << VE0_FORMAT_SHIFT) | 1528 (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ 1529 1530 if (IS_GEN5(intel)) 1531 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1532 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1533 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1534 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1535 else 1536 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1537 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1538 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1539 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 1540 ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ 1541 } 1542} 1543 1544static void i965_emit_composite_state(struct intel_screen_private *intel) 1545{ 1546 struct gen4_render_state *render_state = intel->gen4_render_state; 1547 gen4_composite_op *composite_op = &render_state->composite_op; 1548 int op = composite_op->op; 1549 PicturePtr mask_picture = intel->render_mask_picture; 1550 PicturePtr dest_picture = intel->render_dest_picture; 1551 PixmapPtr mask = intel->render_mask; 1552 PixmapPtr dest = intel->render_dest; 1553 sampler_state_filter_t src_filter = composite_op->src_filter; 1554 sampler_state_filter_t mask_filter = composite_op->mask_filter; 1555 sampler_state_extend_t src_extend = composite_op->src_extend; 1556 sampler_state_extend_t mask_extend = composite_op->mask_extend; 1557 uint32_t src_blend, dst_blend; 1558 1559 intel->needs_render_state_emit = FALSE; 1560 1561 /* Begin the long sequence of commands needed to set up the 3D 1562 * rendering pipe 1563 */ 1564 1565 if (intel->needs_3d_invariant) { 1566 if (IS_GEN5(intel)) { 1567 /* Ironlake errata workaround: Before disabling the clipper, 1568 * you have to MI_FLUSH to get the pipeline idle. 1569 */ 1570 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); 1571 } 1572 1573 /* Match Mesa driver setup */ 1574 if (INTEL_INFO(intel)->gen >= 045) 1575 OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 1576 else 1577 OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 1578 1579 /* Set system instruction pointer */ 1580 OUT_BATCH(BRW_STATE_SIP | 0); 1581 OUT_BATCH(0); 1582 1583 intel->needs_3d_invariant = FALSE; 1584 } 1585 1586 if (intel->surface_reloc == 0) { 1587 /* Zero out the two base address registers so all offsets are 1588 * absolute. 1589 */ 1590 if (IS_GEN5(intel)) { 1591 OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6); 1592 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ 1593 intel->surface_reloc = intel->batch_used; 1594 intel_batch_emit_dword(intel, 1595 intel->surface_bo->offset | BASE_ADDRESS_MODIFY); 1596 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ 1597 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Instruction base address */ 1598 /* general state max addr, disabled */ 1599 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 1600 /* media object state max addr, disabled */ 1601 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 1602 /* Instruction max addr, disabled */ 1603 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 1604 } else { 1605 OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4); 1606 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ 1607 intel->surface_reloc = intel->batch_used; 1608 intel_batch_emit_dword(intel, 1609 intel->surface_bo->offset | BASE_ADDRESS_MODIFY); 1610 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ 1611 /* general state max addr, disabled */ 1612 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 1613 /* media object state max addr, disabled */ 1614 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 1615 } 1616 } 1617 1618 i965_get_blend_cntl(op, mask_picture, dest_picture->format, 1619 &src_blend, &dst_blend); 1620 1621 /* Binding table pointers */ 1622 OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); 1623 OUT_BATCH(0); /* vs */ 1624 OUT_BATCH(0); /* gs */ 1625 OUT_BATCH(0); /* clip */ 1626 OUT_BATCH(0); /* sf */ 1627 /* Only the PS uses the binding table */ 1628 OUT_BATCH(intel->surface_table); 1629 1630 /* The drawing rectangle clipping is always on. Set it to values that 1631 * shouldn't do any clipping. 1632 */ 1633 OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); 1634 OUT_BATCH(0x00000000); /* ymin, xmin */ 1635 OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) | 1636 DRAW_XMAX(dest->drawable.width - 1)); /* ymax, xmax */ 1637 OUT_BATCH(0x00000000); /* yorigin, xorigin */ 1638 1639 /* skip the depth buffer */ 1640 /* skip the polygon stipple */ 1641 /* skip the polygon stipple offset */ 1642 /* skip the line stipple */ 1643 1644 /* Set the pointers to the 3d pipeline state */ 1645 OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5); 1646 OUT_RELOC(render_state->vs_state_bo, 1647 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1648 OUT_BATCH(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */ 1649 OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */ 1650 if (mask) { 1651 OUT_RELOC(render_state->sf_mask_state_bo, 1652 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1653 } else { 1654 OUT_RELOC(render_state->sf_state_bo, 1655 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1656 } 1657 1658 OUT_RELOC(render_state->wm_state_bo[composite_op->wm_kernel] 1659 [src_filter][src_extend] 1660 [mask_filter][mask_extend], 1661 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1662 1663 OUT_RELOC(render_state->cc_state_bo, 1664 I915_GEM_DOMAIN_INSTRUCTION, 0, 1665 offsetof(struct gen4_cc_unit_state, 1666 cc_state[src_blend][dst_blend])); 1667 1668 { 1669 int urb_vs_start, urb_vs_size; 1670 int urb_gs_start, urb_gs_size; 1671 int urb_clip_start, urb_clip_size; 1672 int urb_sf_start, urb_sf_size; 1673 int urb_cs_start, urb_cs_size; 1674 1675 urb_vs_start = 0; 1676 urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; 1677 urb_gs_start = urb_vs_start + urb_vs_size; 1678 urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; 1679 urb_clip_start = urb_gs_start + urb_gs_size; 1680 urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; 1681 urb_sf_start = urb_clip_start + urb_clip_size; 1682 urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; 1683 urb_cs_start = urb_sf_start + urb_sf_size; 1684 urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; 1685 1686 /* Erratum (Vol 1a, p32): 1687 * URB_FENCE must not cross a cache-line (64 bytes). 1688 */ 1689 if ((intel->batch_used & 15) > (16 - 3)) { 1690 int cnt = 16 - (intel->batch_used & 15); 1691 while (cnt--) 1692 OUT_BATCH(MI_NOOP); 1693 } 1694 1695 OUT_BATCH(BRW_URB_FENCE | 1696 UF0_CS_REALLOC | 1697 UF0_SF_REALLOC | 1698 UF0_CLIP_REALLOC | 1699 UF0_GS_REALLOC | 1700 UF0_VS_REALLOC | 1701 1); 1702 OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | 1703 ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | 1704 ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); 1705 OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | 1706 ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); 1707 1708 /* Constant buffer state */ 1709 OUT_BATCH(BRW_CS_URB_STATE | 0); 1710 OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) | 1711 (URB_CS_ENTRIES << 0)); 1712 } 1713 1714 gen4_composite_vertex_elements(intel); 1715} 1716 1717/** 1718 * Returns whether the current set of composite state plus vertex buffer is 1719 * expected to fit in the aperture. 1720 */ 1721static Bool i965_composite_check_aperture(intel_screen_private *intel) 1722{ 1723 struct gen4_render_state *render_state = intel->gen4_render_state; 1724 gen4_composite_op *composite_op = &render_state->composite_op; 1725 drm_intel_bo *bo_table[] = { 1726 intel->batch_bo, 1727 intel->vertex_bo, 1728 intel->surface_bo, 1729 render_state->vs_state_bo, 1730 render_state->sf_state_bo, 1731 render_state->sf_mask_state_bo, 1732 render_state->wm_state_bo[composite_op->wm_kernel] 1733 [composite_op->src_filter] 1734 [composite_op->src_extend] 1735 [composite_op->mask_filter] 1736 [composite_op->mask_extend], 1737 render_state->cc_state_bo, 1738 }; 1739 drm_intel_bo *gen6_bo_table[] = { 1740 intel->batch_bo, 1741 intel->vertex_bo, 1742 intel->surface_bo, 1743 render_state->wm_kernel_bo[composite_op->wm_kernel], 1744 render_state->ps_sampler_state_bo[composite_op->src_filter] 1745 [composite_op->src_extend] 1746 [composite_op->mask_filter] 1747 [composite_op->mask_extend], 1748 render_state->cc_vp_bo, 1749 render_state->cc_state_bo, 1750 render_state->gen6_blend_bo, 1751 render_state->gen6_depth_stencil_bo, 1752 }; 1753 1754 if (INTEL_INFO(intel)->gen >= 060) 1755 return drm_intel_bufmgr_check_aperture_space(gen6_bo_table, 1756 ARRAY_SIZE(gen6_bo_table)) == 0; 1757 else 1758 return drm_intel_bufmgr_check_aperture_space(bo_table, 1759 ARRAY_SIZE(bo_table)) == 0; 1760} 1761 1762static void i965_surface_flush(struct intel_screen_private *intel) 1763{ 1764 int ret; 1765 1766 ret = drm_intel_bo_subdata(intel->surface_bo, 1767 0, intel->surface_used, 1768 intel->surface_data); 1769 assert(ret == 0); 1770 intel->surface_used = 0; 1771 1772 assert (intel->surface_reloc != 0); 1773 drm_intel_bo_emit_reloc(intel->batch_bo, 1774 intel->surface_reloc * 4, 1775 intel->surface_bo, BASE_ADDRESS_MODIFY, 1776 I915_GEM_DOMAIN_INSTRUCTION, 0); 1777 intel->surface_reloc = 0; 1778 1779 drm_intel_bo_unreference(intel->surface_bo); 1780 intel->surface_bo = 1781 drm_intel_bo_alloc(intel->bufmgr, "surface data", 1782 sizeof(intel->surface_data), 4096); 1783 assert(intel->surface_bo); 1784 1785 return; 1786 (void)ret; 1787} 1788 1789static void 1790i965_emit_composite_primitive_identity_source(intel_screen_private *intel, 1791 int srcX, int srcY, 1792 int maskX, int maskY, 1793 int dstX, int dstY, 1794 int w, int h) 1795{ 1796 OUT_VERTEX(dstX + w); 1797 OUT_VERTEX(dstY + h); 1798 OUT_VERTEX((srcX + w) * intel->scale_units[0][0]); 1799 OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); 1800 1801 OUT_VERTEX(dstX); 1802 OUT_VERTEX(dstY + h); 1803 OUT_VERTEX(srcX * intel->scale_units[0][0]); 1804 OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); 1805 1806 OUT_VERTEX(dstX); 1807 OUT_VERTEX(dstY); 1808 OUT_VERTEX(srcX * intel->scale_units[0][0]); 1809 OUT_VERTEX(srcY * intel->scale_units[0][1]); 1810} 1811 1812static void 1813i965_emit_composite_primitive_affine_source(intel_screen_private *intel, 1814 int srcX, int srcY, 1815 int maskX, int maskY, 1816 int dstX, int dstY, 1817 int w, int h) 1818{ 1819 float src_x[3], src_y[3]; 1820 1821 if (!intel_get_transformed_coordinates(srcX, srcY, 1822 intel->transform[0], 1823 &src_x[0], 1824 &src_y[0])) 1825 return; 1826 1827 if (!intel_get_transformed_coordinates(srcX, srcY + h, 1828 intel->transform[0], 1829 &src_x[1], 1830 &src_y[1])) 1831 return; 1832 1833 if (!intel_get_transformed_coordinates(srcX + w, srcY + h, 1834 intel->transform[0], 1835 &src_x[2], 1836 &src_y[2])) 1837 return; 1838 1839 OUT_VERTEX(dstX + w); 1840 OUT_VERTEX(dstY + h); 1841 OUT_VERTEX(src_x[2] * intel->scale_units[0][0]); 1842 OUT_VERTEX(src_y[2] * intel->scale_units[0][1]); 1843 1844 OUT_VERTEX(dstX); 1845 OUT_VERTEX(dstY + h); 1846 OUT_VERTEX(src_x[1] * intel->scale_units[0][0]); 1847 OUT_VERTEX(src_y[1] * intel->scale_units[0][1]); 1848 1849 OUT_VERTEX(dstX); 1850 OUT_VERTEX(dstY); 1851 OUT_VERTEX(src_x[0] * intel->scale_units[0][0]); 1852 OUT_VERTEX(src_y[0] * intel->scale_units[0][1]); 1853} 1854 1855static void 1856i965_emit_composite_primitive_identity_source_mask(intel_screen_private *intel, 1857 int srcX, int srcY, 1858 int maskX, int maskY, 1859 int dstX, int dstY, 1860 int w, int h) 1861{ 1862 OUT_VERTEX(dstX + w); 1863 OUT_VERTEX(dstY + h); 1864 OUT_VERTEX((srcX + w) * intel->scale_units[0][0]); 1865 OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); 1866 OUT_VERTEX((maskX + w) * intel->scale_units[1][0]); 1867 OUT_VERTEX((maskY + h) * intel->scale_units[1][1]); 1868 1869 OUT_VERTEX(dstX); 1870 OUT_VERTEX(dstY + h); 1871 OUT_VERTEX(srcX * intel->scale_units[0][0]); 1872 OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); 1873 OUT_VERTEX(maskX * intel->scale_units[1][0]); 1874 OUT_VERTEX((maskY + h) * intel->scale_units[1][1]); 1875 1876 OUT_VERTEX(dstX); 1877 OUT_VERTEX(dstY); 1878 OUT_VERTEX(srcX * intel->scale_units[0][0]); 1879 OUT_VERTEX(srcY * intel->scale_units[0][1]); 1880 OUT_VERTEX(maskX * intel->scale_units[1][0]); 1881 OUT_VERTEX(maskY * intel->scale_units[1][1]); 1882} 1883 1884static void 1885i965_emit_composite_primitive(intel_screen_private *intel, 1886 int srcX, int srcY, 1887 int maskX, int maskY, 1888 int dstX, int dstY, 1889 int w, int h) 1890{ 1891 float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; 1892 Bool is_affine = intel->gen4_render_state->composite_op.is_affine; 1893 1894 if (is_affine) { 1895 if (!intel_get_transformed_coordinates(srcX, srcY, 1896 intel->transform[0], 1897 &src_x[0], 1898 &src_y[0])) 1899 return; 1900 1901 if (!intel_get_transformed_coordinates(srcX, srcY + h, 1902 intel->transform[0], 1903 &src_x[1], 1904 &src_y[1])) 1905 return; 1906 1907 if (!intel_get_transformed_coordinates(srcX + w, srcY + h, 1908 intel->transform[0], 1909 &src_x[2], 1910 &src_y[2])) 1911 return; 1912 } else { 1913 if (!intel_get_transformed_coordinates_3d(srcX, srcY, 1914 intel->transform[0], 1915 &src_x[0], 1916 &src_y[0], 1917 &src_w[0])) 1918 return; 1919 1920 if (!intel_get_transformed_coordinates_3d(srcX, srcY + h, 1921 intel->transform[0], 1922 &src_x[1], 1923 &src_y[1], 1924 &src_w[1])) 1925 return; 1926 1927 if (!intel_get_transformed_coordinates_3d(srcX + w, srcY + h, 1928 intel->transform[0], 1929 &src_x[2], 1930 &src_y[2], 1931 &src_w[2])) 1932 return; 1933 } 1934 1935 if (intel->render_mask) { 1936 if (is_affine) { 1937 if (!intel_get_transformed_coordinates(maskX, maskY, 1938 intel->transform[1], 1939 &mask_x[0], 1940 &mask_y[0])) 1941 return; 1942 1943 if (!intel_get_transformed_coordinates(maskX, maskY + h, 1944 intel->transform[1], 1945 &mask_x[1], 1946 &mask_y[1])) 1947 return; 1948 1949 if (!intel_get_transformed_coordinates(maskX + w, maskY + h, 1950 intel->transform[1], 1951 &mask_x[2], 1952 &mask_y[2])) 1953 return; 1954 } else { 1955 if (!intel_get_transformed_coordinates_3d(maskX, maskY, 1956 intel->transform[1], 1957 &mask_x[0], 1958 &mask_y[0], 1959 &mask_w[0])) 1960 return; 1961 1962 if (!intel_get_transformed_coordinates_3d(maskX, maskY + h, 1963 intel->transform[1], 1964 &mask_x[1], 1965 &mask_y[1], 1966 &mask_w[1])) 1967 return; 1968 1969 if (!intel_get_transformed_coordinates_3d(maskX + w, maskY + h, 1970 intel->transform[1], 1971 &mask_x[2], 1972 &mask_y[2], 1973 &mask_w[2])) 1974 return; 1975 } 1976 } 1977 1978 OUT_VERTEX(dstX + w); 1979 OUT_VERTEX(dstY + h); 1980 OUT_VERTEX(src_x[2] * intel->scale_units[0][0]); 1981 OUT_VERTEX(src_y[2] * intel->scale_units[0][1]); 1982 if (!is_affine) 1983 OUT_VERTEX(src_w[2]); 1984 if (intel->render_mask) { 1985 OUT_VERTEX(mask_x[2] * intel->scale_units[1][0]); 1986 OUT_VERTEX(mask_y[2] * intel->scale_units[1][1]); 1987 if (!is_affine) 1988 OUT_VERTEX(mask_w[2]); 1989 } 1990 1991 OUT_VERTEX(dstX); 1992 OUT_VERTEX(dstY + h); 1993 OUT_VERTEX(src_x[1] * intel->scale_units[0][0]); 1994 OUT_VERTEX(src_y[1] * intel->scale_units[0][1]); 1995 if (!is_affine) 1996 OUT_VERTEX(src_w[1]); 1997 if (intel->render_mask) { 1998 OUT_VERTEX(mask_x[1] * intel->scale_units[1][0]); 1999 OUT_VERTEX(mask_y[1] * intel->scale_units[1][1]); 2000 if (!is_affine) 2001 OUT_VERTEX(mask_w[1]); 2002 } 2003 2004 OUT_VERTEX(dstX); 2005 OUT_VERTEX(dstY); 2006 OUT_VERTEX(src_x[0] * intel->scale_units[0][0]); 2007 OUT_VERTEX(src_y[0] * intel->scale_units[0][1]); 2008 if (!is_affine) 2009 OUT_VERTEX(src_w[0]); 2010 if (intel->render_mask) { 2011 OUT_VERTEX(mask_x[0] * intel->scale_units[1][0]); 2012 OUT_VERTEX(mask_y[0] * intel->scale_units[1][1]); 2013 if (!is_affine) 2014 OUT_VERTEX(mask_w[0]); 2015 } 2016} 2017 2018Bool 2019i965_prepare_composite(int op, PicturePtr source_picture, 2020 PicturePtr mask_picture, PicturePtr dest_picture, 2021 PixmapPtr source, PixmapPtr mask, PixmapPtr dest) 2022{ 2023 ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen); 2024 intel_screen_private *intel = intel_get_screen_private(scrn); 2025 struct gen4_render_state *render_state = intel->gen4_render_state; 2026 gen4_composite_op *composite_op = &render_state->composite_op; 2027 2028 composite_op->src_filter = 2029 sampler_state_filter_from_picture(source_picture->filter); 2030 if (composite_op->src_filter == SS_INVALID_FILTER) { 2031 intel_debug_fallback(scrn, "Bad src filter 0x%x\n", 2032 source_picture->filter); 2033 return FALSE; 2034 } 2035 composite_op->src_extend = 2036 sampler_state_extend_from_picture(source_picture->repeatType); 2037 if (composite_op->src_extend == SS_INVALID_EXTEND) { 2038 intel_debug_fallback(scrn, "Bad src repeat 0x%x\n", 2039 source_picture->repeatType); 2040 return FALSE; 2041 } 2042 2043 if (mask_picture) { 2044 if (mask_picture->componentAlpha && 2045 PICT_FORMAT_RGB(mask_picture->format)) { 2046 /* Check if it's component alpha that relies on a source alpha and on 2047 * the source value. We can only get one of those into the single 2048 * source value that we get to blend with. 2049 */ 2050 if (i965_blend_op[op].src_alpha && 2051 (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) { 2052 intel_debug_fallback(scrn, 2053 "Component alpha not supported " 2054 "with source alpha and source " 2055 "value blending.\n"); 2056 return FALSE; 2057 } 2058 } 2059 2060 composite_op->mask_filter = 2061 sampler_state_filter_from_picture(mask_picture->filter); 2062 if (composite_op->mask_filter == SS_INVALID_FILTER) { 2063 intel_debug_fallback(scrn, "Bad mask filter 0x%x\n", 2064 mask_picture->filter); 2065 return FALSE; 2066 } 2067 composite_op->mask_extend = 2068 sampler_state_extend_from_picture(mask_picture->repeatType); 2069 if (composite_op->mask_extend == SS_INVALID_EXTEND) { 2070 intel_debug_fallback(scrn, "Bad mask repeat 0x%x\n", 2071 mask_picture->repeatType); 2072 return FALSE; 2073 } 2074 } else { 2075 composite_op->mask_filter = SS_FILTER_NEAREST; 2076 composite_op->mask_extend = SS_EXTEND_NONE; 2077 } 2078 2079 /* Flush any pending writes prior to relocating the textures. */ 2080 if (intel_pixmap_is_dirty(source) || intel_pixmap_is_dirty(mask)) 2081 intel_batch_emit_flush(scrn); 2082 2083 composite_op->op = op; 2084 intel->render_source_picture = source_picture; 2085 intel->render_mask_picture = mask_picture; 2086 intel->render_dest_picture = dest_picture; 2087 intel->render_source = source; 2088 intel->render_mask = mask; 2089 intel->render_dest = dest; 2090 2091 intel->scale_units[0][0] = 1. / source->drawable.width; 2092 intel->scale_units[0][1] = 1. / source->drawable.height; 2093 2094 intel->transform[0] = source_picture->transform; 2095 composite_op->is_affine = intel_transform_is_affine(intel->transform[0]); 2096 2097 if (mask_picture == NULL) { 2098 intel->transform[1] = NULL; 2099 intel->scale_units[1][0] = -1; 2100 intel->scale_units[1][1] = -1; 2101 } else { 2102 assert(mask != NULL); 2103 intel->transform[1] = mask_picture->transform; 2104 intel->scale_units[1][0] = 1. / mask->drawable.width; 2105 intel->scale_units[1][1] = 1. / mask->drawable.height; 2106 composite_op->is_affine &= 2107 intel_transform_is_affine(intel->transform[1]); 2108 } 2109 2110 if (mask) { 2111 if (mask_picture->componentAlpha && 2112 PICT_FORMAT_RGB(mask_picture->format)) { 2113 if (i965_blend_op[op].src_alpha) { 2114 if (composite_op->is_affine) 2115 composite_op->wm_kernel = 2116 WM_KERNEL_MASKCA_SRCALPHA_AFFINE; 2117 else 2118 composite_op->wm_kernel = 2119 WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE; 2120 } else { 2121 if (composite_op->is_affine) 2122 composite_op->wm_kernel = 2123 WM_KERNEL_MASKCA_AFFINE; 2124 else 2125 composite_op->wm_kernel = 2126 WM_KERNEL_MASKCA_PROJECTIVE; 2127 } 2128 } else { 2129 if (composite_op->is_affine) 2130 composite_op->wm_kernel = 2131 WM_KERNEL_MASKNOCA_AFFINE; 2132 else 2133 composite_op->wm_kernel = 2134 WM_KERNEL_MASKNOCA_PROJECTIVE; 2135 } 2136 } else { 2137 if (composite_op->is_affine) 2138 composite_op->wm_kernel = WM_KERNEL_NOMASK_AFFINE; 2139 else 2140 composite_op->wm_kernel = WM_KERNEL_NOMASK_PROJECTIVE; 2141 } 2142 2143 intel->prim_emit = i965_emit_composite_primitive; 2144 if (!mask) { 2145 if (intel->transform[0] == NULL) 2146 intel->prim_emit = i965_emit_composite_primitive_identity_source; 2147 else if (composite_op->is_affine) 2148 intel->prim_emit = i965_emit_composite_primitive_affine_source; 2149 } else { 2150 if (intel->transform[0] == NULL && intel->transform[1] == NULL) 2151 intel->prim_emit = i965_emit_composite_primitive_identity_source_mask; 2152 } 2153 2154 intel->floats_per_vertex = 2155 2 + (mask ? 2 : 1) * (composite_op->is_affine ? 2: 3); 2156 2157 if (!i965_composite_check_aperture(intel)) { 2158 intel_batch_submit(scrn); 2159 if (!i965_composite_check_aperture(intel)) { 2160 intel_debug_fallback(scrn, 2161 "Couldn't fit render operation " 2162 "in aperture\n"); 2163 return FALSE; 2164 } 2165 } 2166 2167 if (sizeof(intel->surface_data) - intel->surface_used < 2168 4 * SURFACE_STATE_PADDED_SIZE) 2169 i965_surface_flush(intel); 2170 2171 intel->needs_render_state_emit = TRUE; 2172 2173 return TRUE; 2174} 2175 2176static void i965_select_vertex_buffer(struct intel_screen_private *intel) 2177{ 2178 int id = intel->gen4_render_state->composite_op.vertex_id; 2179 int modifyenable = 0; 2180 2181 if (intel->vertex_id & (1 << id)) 2182 return; 2183 2184 if (INTEL_INFO(intel)->gen >= 070) 2185 modifyenable = GEN7_VB0_ADDRESS_MODIFYENABLE; 2186 2187 /* Set up the pointer to our (single) vertex buffer */ 2188 OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); 2189 2190 /* XXX could use multiple vbo to reduce relocations if 2191 * frequently switching between vertex sizes, like rgb10text. 2192 */ 2193 if (INTEL_INFO(intel)->gen >= 060) { 2194 OUT_BATCH((id << GEN6_VB0_BUFFER_INDEX_SHIFT) | 2195 GEN6_VB0_VERTEXDATA | 2196 modifyenable | 2197 (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); 2198 } else { 2199 OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | 2200 VB0_VERTEXDATA | 2201 (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); 2202 } 2203 OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); 2204 if (INTEL_INFO(intel)->gen >= 050) 2205 OUT_RELOC(intel->vertex_bo, 2206 I915_GEM_DOMAIN_VERTEX, 0, 2207 sizeof(intel->vertex_ptr) - 1); 2208 else 2209 OUT_BATCH(0); 2210 OUT_BATCH(0); // ignore for VERTEXDATA, but still there 2211 2212 intel->vertex_id |= 1 << id; 2213} 2214 2215static void i965_bind_surfaces(struct intel_screen_private *intel) 2216{ 2217 uint32_t *binding_table; 2218 2219 assert(intel->surface_used + 4 * SURFACE_STATE_PADDED_SIZE <= sizeof(intel->surface_data)); 2220 2221 binding_table = (uint32_t*) (intel->surface_data + intel->surface_used); 2222 intel->surface_table = intel->surface_used; 2223 intel->surface_used += SURFACE_STATE_PADDED_SIZE; 2224 2225 binding_table[0] = 2226 i965_set_picture_surface_state(intel, 2227 intel->render_dest_picture, 2228 intel->render_dest, 2229 TRUE); 2230 binding_table[1] = 2231 i965_set_picture_surface_state(intel, 2232 intel->render_source_picture, 2233 intel->render_source, 2234 FALSE); 2235 if (intel->render_mask) { 2236 binding_table[2] = 2237 i965_set_picture_surface_state(intel, 2238 intel->render_mask_picture, 2239 intel->render_mask, 2240 FALSE); 2241 } 2242} 2243 2244void 2245i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, 2246 int dstX, int dstY, int w, int h) 2247{ 2248 ScrnInfoPtr scrn = xf86ScreenToScrn(dest->drawable.pScreen); 2249 intel_screen_private *intel = intel_get_screen_private(scrn); 2250 2251 intel_batch_start_atomic(scrn, 200); 2252 if (intel->needs_render_state_emit) { 2253 i965_bind_surfaces(intel); 2254 2255 if (INTEL_INFO(intel)->gen >= 060) 2256 gen6_emit_composite_state(intel); 2257 else 2258 i965_emit_composite_state(intel); 2259 } 2260 2261 if (intel->floats_per_vertex != intel->last_floats_per_vertex) { 2262 intel->vertex_index = (intel->vertex_used + intel->floats_per_vertex - 1) / intel->floats_per_vertex; 2263 intel->vertex_used = intel->vertex_index * intel->floats_per_vertex; 2264 intel->last_floats_per_vertex = intel->floats_per_vertex; 2265 } 2266 if (intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) { 2267 i965_vertex_flush(intel); 2268 intel_next_vertex(intel); 2269 intel->vertex_index = 0; 2270 } 2271 i965_select_vertex_buffer(intel); 2272 2273 if (intel->vertex_offset == 0) { 2274 if (INTEL_INFO(intel)->gen >= 070) { 2275 OUT_BATCH(BRW_3DPRIMITIVE | (7 - 2)); 2276 OUT_BATCH(BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | 2277 _3DPRIM_RECTLIST); 2278 } else { 2279 OUT_BATCH(BRW_3DPRIMITIVE | 2280 BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | 2281 (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | 2282 (0 << 9) | 2283 4); 2284 } 2285 intel->vertex_offset = intel->batch_used; 2286 OUT_BATCH(0); /* vertex count, to be filled in later */ 2287 OUT_BATCH(intel->vertex_index); 2288 OUT_BATCH(1); /* single instance */ 2289 OUT_BATCH(0); /* start instance location */ 2290 OUT_BATCH(0); /* index buffer offset, ignored */ 2291 intel->vertex_count = intel->vertex_index; 2292 } 2293 2294 intel->prim_emit(intel, 2295 srcX, srcY, 2296 maskX, maskY, 2297 dstX, dstY, 2298 w, h); 2299 intel->vertex_index += 3; 2300 2301 if (INTEL_INFO(intel)->gen < 050) { 2302 /* XXX OMG! */ 2303 i965_vertex_flush(intel); 2304 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); 2305 } 2306 2307 intel_batch_end_atomic(scrn); 2308} 2309 2310void i965_batch_commit_notify(intel_screen_private *intel) 2311{ 2312 intel->needs_render_state_emit = TRUE; 2313 intel->needs_3d_invariant = TRUE; 2314 intel->last_floats_per_vertex = 0; 2315 intel->vertex_index = 0; 2316 2317 intel->gen4_render_state->composite_op.vertex_id = -1; 2318 2319 intel->gen6_render_state.num_sf_outputs = 0; 2320 intel->gen6_render_state.samplers = NULL; 2321 intel->gen6_render_state.blend = -1; 2322 intel->gen6_render_state.kernel = NULL; 2323 intel->gen6_render_state.drawrect = -1; 2324 2325 assert(intel->surface_reloc == 0); 2326} 2327 2328/** 2329 * Called at EnterVT so we can set up our offsets into the state buffer. 2330 */ 2331void gen4_render_state_init(ScrnInfoPtr scrn) 2332{ 2333 intel_screen_private *intel = intel_get_screen_private(scrn); 2334 struct gen4_render_state *render; 2335 const struct wm_kernel_info *wm_kernels; 2336 sampler_state_filter_t src_filter; 2337 sampler_state_extend_t src_extend; 2338 sampler_state_filter_t mask_filter; 2339 sampler_state_extend_t mask_extend; 2340 drm_intel_bo *sf_kernel_bo, *sf_kernel_mask_bo; 2341 drm_intel_bo *border_color_bo; 2342 int m; 2343 2344 intel->needs_3d_invariant = TRUE; 2345 2346 intel->surface_bo = 2347 drm_intel_bo_alloc(intel->bufmgr, "surface data", 2348 sizeof(intel->surface_data), 4096); 2349 assert(intel->surface_bo); 2350 2351 intel->surface_used = 0; 2352 2353 if (intel->gen4_render_state == NULL) { 2354 intel->gen4_render_state = calloc(1, sizeof(*render)); 2355 assert(intel->gen4_render_state != NULL); 2356 } 2357 2358 if (INTEL_INFO(intel)->gen >= 060) 2359 return gen6_render_state_init(scrn); 2360 2361 render = intel->gen4_render_state; 2362 render->composite_op.vertex_id = -1; 2363 2364 render->vs_state_bo = gen4_create_vs_unit_state(intel); 2365 2366 /* Set up the two SF states (one for blending with a mask, one without) */ 2367 if (IS_GEN5(intel)) { 2368 sf_kernel_bo = intel_bo_alloc_for_data(intel, 2369 sf_kernel_static_gen5, 2370 sizeof 2371 (sf_kernel_static_gen5), 2372 "sf kernel gen5"); 2373 sf_kernel_mask_bo = 2374 intel_bo_alloc_for_data(intel, sf_kernel_mask_static_gen5, 2375 sizeof(sf_kernel_mask_static_gen5), 2376 "sf mask kernel"); 2377 } else { 2378 sf_kernel_bo = intel_bo_alloc_for_data(intel, 2379 sf_kernel_static, 2380 sizeof(sf_kernel_static), 2381 "sf kernel"); 2382 sf_kernel_mask_bo = intel_bo_alloc_for_data(intel, 2383 sf_kernel_mask_static, 2384 sizeof 2385 (sf_kernel_mask_static), 2386 "sf mask kernel"); 2387 } 2388 render->sf_state_bo = gen4_create_sf_state(intel, sf_kernel_bo); 2389 render->sf_mask_state_bo = gen4_create_sf_state(intel, sf_kernel_mask_bo); 2390 drm_intel_bo_unreference(sf_kernel_bo); 2391 drm_intel_bo_unreference(sf_kernel_mask_bo); 2392 2393 wm_kernels = IS_GEN5(intel) ? wm_kernels_gen5 : wm_kernels_gen4; 2394 for (m = 0; m < KERNEL_COUNT; m++) { 2395 render->wm_kernel_bo[m] = 2396 intel_bo_alloc_for_data(intel, 2397 wm_kernels[m].data, 2398 wm_kernels[m].size, 2399 "WM kernel"); 2400 } 2401 2402 /* Set up the WM states: each filter/extend type for source and mask, per 2403 * kernel. 2404 */ 2405 border_color_bo = sampler_border_color_create(intel); 2406 for (src_filter = 0; src_filter < FILTER_COUNT; src_filter++) { 2407 for (src_extend = 0; src_extend < EXTEND_COUNT; src_extend++) { 2408 for (mask_filter = 0; mask_filter < FILTER_COUNT; mask_filter++) { 2409 for (mask_extend = 0; mask_extend < EXTEND_COUNT; mask_extend++) { 2410 drm_intel_bo *sampler_state_bo; 2411 2412 sampler_state_bo = 2413 i965_create_sampler_state(intel, 2414 src_filter, src_extend, 2415 mask_filter, mask_extend, 2416 border_color_bo); 2417 2418 for (m = 0; m < KERNEL_COUNT; m++) { 2419 render->wm_state_bo[m][src_filter][src_extend][mask_filter][mask_extend] = 2420 gen4_create_wm_state 2421 (intel, 2422 wm_kernels[m]. has_mask, 2423 render->wm_kernel_bo[m], 2424 sampler_state_bo); 2425 } 2426 drm_intel_bo_unreference(sampler_state_bo); 2427 } 2428 } 2429 } 2430 } 2431 drm_intel_bo_unreference(border_color_bo); 2432 2433 render->cc_state_bo = gen4_create_cc_unit_state(intel); 2434} 2435 2436/** 2437 * Called at LeaveVT. 2438 */ 2439void gen4_render_state_cleanup(ScrnInfoPtr scrn) 2440{ 2441 intel_screen_private *intel = intel_get_screen_private(scrn); 2442 struct gen4_render_state *render_state = intel->gen4_render_state; 2443 int i, j, k, l, m; 2444 2445 drm_intel_bo_unreference(intel->surface_bo); 2446 drm_intel_bo_unreference(render_state->vs_state_bo); 2447 drm_intel_bo_unreference(render_state->sf_state_bo); 2448 drm_intel_bo_unreference(render_state->sf_mask_state_bo); 2449 2450 for (i = 0; i < KERNEL_COUNT; i++) 2451 drm_intel_bo_unreference(render_state->wm_kernel_bo[i]); 2452 2453 for (i = 0; i < FILTER_COUNT; i++) 2454 for (j = 0; j < EXTEND_COUNT; j++) 2455 for (k = 0; k < FILTER_COUNT; k++) 2456 for (l = 0; l < EXTEND_COUNT; l++) 2457 for (m = 0; m < KERNEL_COUNT; m++) 2458 drm_intel_bo_unreference 2459 (render_state-> 2460 wm_state_bo[m][i][j][k] 2461 [l]); 2462 2463 for (i = 0; i < FILTER_COUNT; i++) 2464 for (j = 0; j < EXTEND_COUNT; j++) 2465 for (k = 0; k < FILTER_COUNT; k++) 2466 for (l = 0; l < EXTEND_COUNT; l++) 2467 drm_intel_bo_unreference(render_state->ps_sampler_state_bo[i][j][k][l]); 2468 2469 drm_intel_bo_unreference(render_state->cc_state_bo); 2470 2471 drm_intel_bo_unreference(render_state->cc_vp_bo); 2472 drm_intel_bo_unreference(render_state->gen6_blend_bo); 2473 drm_intel_bo_unreference(render_state->gen6_depth_stencil_bo); 2474 2475 free(intel->gen4_render_state); 2476 intel->gen4_render_state = NULL; 2477} 2478 2479/* 2480 * for GEN6+ 2481 */ 2482#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64) 2483 2484static drm_intel_bo * 2485gen6_composite_create_cc_state(intel_screen_private *intel) 2486{ 2487 struct gen6_color_calc_state *state; 2488 drm_intel_bo *cc_bo; 2489 int ret; 2490 2491 cc_bo = drm_intel_bo_alloc(intel->bufmgr, 2492 "gen6 CC state", 2493 sizeof(*state), 2494 4096); 2495 assert(cc_bo); 2496 2497 ret = drm_intel_bo_map(cc_bo, TRUE); 2498 assert(ret == 0); 2499 2500 state = memset(cc_bo->virtual, 0, sizeof(*state)); 2501 state->constant_r = 1.0; 2502 state->constant_g = 0.0; 2503 state->constant_b = 1.0; 2504 state->constant_a = 1.0; 2505 drm_intel_bo_unmap(cc_bo); 2506 2507 return cc_bo; 2508 (void)ret; 2509} 2510 2511static drm_intel_bo * 2512gen6_composite_create_blend_state(intel_screen_private *intel) 2513{ 2514 drm_intel_bo *blend_bo; 2515 int src, dst, ret; 2516 2517 blend_bo = drm_intel_bo_alloc(intel->bufmgr, 2518 "gen6 BLEND state", 2519 BRW_BLENDFACTOR_COUNT * BRW_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE, 2520 4096); 2521 assert(blend_bo); 2522 2523 ret = drm_intel_bo_map(blend_bo, TRUE); 2524 assert(ret == 0); 2525 2526 memset(blend_bo->virtual, 0, blend_bo->size); 2527 for (src = 0; src < BRW_BLENDFACTOR_COUNT; src++) { 2528 for (dst = 0; dst < BRW_BLENDFACTOR_COUNT; dst++) { 2529 uint32_t blend_state_offset = (src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE; 2530 struct gen6_blend_state *blend; 2531 2532 blend = (struct gen6_blend_state *)((char *)blend_bo->virtual + blend_state_offset); 2533 blend->blend0.dest_blend_factor = dst; 2534 blend->blend0.source_blend_factor = src; 2535 blend->blend0.blend_func = BRW_BLENDFUNCTION_ADD; 2536 blend->blend0.blend_enable = 1; 2537 2538 blend->blend1.post_blend_clamp_enable = 1; 2539 blend->blend1.pre_blend_clamp_enable = 1; 2540 } 2541 } 2542 2543 drm_intel_bo_unmap(blend_bo); 2544 return blend_bo; 2545 (void)ret; 2546} 2547 2548static drm_intel_bo * 2549gen6_composite_create_depth_stencil_state(intel_screen_private *intel) 2550{ 2551 drm_intel_bo *depth_stencil_bo; 2552 int ret; 2553 2554 depth_stencil_bo = 2555 drm_intel_bo_alloc(intel->bufmgr, 2556 "gen6 DEPTH_STENCIL state", 2557 sizeof(struct gen6_depth_stencil_state), 2558 4096); 2559 assert(depth_stencil_bo); 2560 2561 ret = drm_intel_bo_map(depth_stencil_bo, TRUE); 2562 assert(ret == 0); 2563 2564 memset(depth_stencil_bo->virtual, 0, 2565 sizeof(struct gen6_depth_stencil_state)); 2566 drm_intel_bo_unmap(depth_stencil_bo); 2567 2568 return depth_stencil_bo; 2569 (void)ret; 2570} 2571 2572static void 2573gen6_composite_state_base_address(intel_screen_private *intel) 2574{ 2575 OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2)); 2576 OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */ 2577 intel->surface_reloc = intel->batch_used; 2578 intel_batch_emit_dword(intel, 2579 intel->surface_bo->offset | BASE_ADDRESS_MODIFY); 2580 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */ 2581 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */ 2582 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */ 2583 OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */ 2584 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ 2585 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ 2586 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ 2587} 2588 2589static void 2590gen6_composite_cc_state_pointers(intel_screen_private *intel, 2591 uint32_t blend_offset) 2592{ 2593 struct gen4_render_state *render_state = intel->gen4_render_state; 2594 drm_intel_bo *cc_bo = NULL; 2595 drm_intel_bo *depth_stencil_bo = NULL; 2596 2597 if (intel->gen6_render_state.blend == blend_offset) 2598 return; 2599 2600 if (intel->gen6_render_state.blend == -1) { 2601 cc_bo = render_state->cc_state_bo; 2602 depth_stencil_bo = render_state->gen6_depth_stencil_bo; 2603 } 2604 if (INTEL_INFO(intel)->gen >= 070) { 2605 gen7_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset); 2606 } else { 2607 gen6_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset); 2608 } 2609 2610 intel->gen6_render_state.blend = blend_offset; 2611} 2612 2613static void 2614gen6_composite_sampler_state_pointers(intel_screen_private *intel, 2615 drm_intel_bo *bo) 2616{ 2617 if (intel->gen6_render_state.samplers == bo) 2618 return; 2619 2620 intel->gen6_render_state.samplers = bo; 2621 2622 if (INTEL_INFO(intel)->gen >= 070) 2623 gen7_upload_sampler_state_pointers(intel, bo); 2624 else 2625 gen6_upload_sampler_state_pointers(intel, bo); 2626} 2627 2628static void 2629gen6_composite_wm_constants(intel_screen_private *intel) 2630{ 2631 Bool ivb = INTEL_INFO(intel)->gen >= 070; 2632 /* disable WM constant buffer */ 2633 OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | ((ivb ? 7 : 5) - 2)); 2634 OUT_BATCH(0); 2635 OUT_BATCH(0); 2636 OUT_BATCH(0); 2637 OUT_BATCH(0); 2638 if (ivb) { 2639 OUT_BATCH(0); 2640 OUT_BATCH(0); 2641 } 2642} 2643 2644static void 2645gen6_composite_sf_state(intel_screen_private *intel, 2646 Bool has_mask) 2647{ 2648 int num_sf_outputs = has_mask ? 2 : 1; 2649 2650 if (intel->gen6_render_state.num_sf_outputs == num_sf_outputs) 2651 return; 2652 2653 intel->gen6_render_state.num_sf_outputs = num_sf_outputs; 2654 2655 if (INTEL_INFO(intel)->gen >= 070) 2656 gen7_upload_sf_state(intel, num_sf_outputs, 1); 2657 else 2658 gen6_upload_sf_state(intel, num_sf_outputs, 1); 2659} 2660 2661static void 2662gen6_composite_wm_state(intel_screen_private *intel, 2663 Bool has_mask, 2664 drm_intel_bo *bo) 2665{ 2666 int num_surfaces = has_mask ? 3 : 2; 2667 int num_sf_outputs = has_mask ? 2 : 1; 2668 2669 if (intel->gen6_render_state.kernel == bo) 2670 return; 2671 2672 intel->gen6_render_state.kernel = bo; 2673 2674 OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); 2675 OUT_RELOC(bo, 2676 I915_GEM_DOMAIN_INSTRUCTION, 0, 2677 0); 2678 OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | 2679 (num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); 2680 OUT_BATCH(0); 2681 OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ 2682 OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | 2683 GEN6_3DSTATE_WM_DISPATCH_ENABLE | 2684 GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); 2685 OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | 2686 GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 2687 OUT_BATCH(0); 2688 OUT_BATCH(0); 2689} 2690 2691static void 2692gen7_composite_wm_state(intel_screen_private *intel, 2693 Bool has_mask, 2694 drm_intel_bo *bo) 2695{ 2696 int num_surfaces = has_mask ? 3 : 2; 2697 unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB; 2698 unsigned int num_samples = 0; 2699 2700 if (IS_HSW(intel)) { 2701 max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW; 2702 num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW; 2703 } 2704 2705 if (intel->gen6_render_state.kernel == bo) 2706 return; 2707 2708 intel->gen6_render_state.kernel = bo; 2709 2710 OUT_BATCH(GEN6_3DSTATE_WM | (3 - 2)); 2711 OUT_BATCH(GEN7_WM_DISPATCH_ENABLE | 2712 GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 2713 OUT_BATCH(0); 2714 2715 OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2)); 2716 OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 2717 OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | 2718 (num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); 2719 OUT_BATCH(0); /* scratch space base offset */ 2720 OUT_BATCH(((48 - 1) << max_threads_shift) | num_samples | 2721 GEN7_PS_ATTRIBUTE_ENABLE | 2722 GEN7_PS_16_DISPATCH_ENABLE); 2723 OUT_BATCH((6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0)); 2724 OUT_BATCH(0); /* kernel 1 pointer */ 2725 OUT_BATCH(0); /* kernel 2 pointer */ 2726} 2727 2728 2729static void 2730gen6_composite_drawing_rectangle(intel_screen_private *intel, 2731 PixmapPtr dest) 2732{ 2733 uint32_t dw = 2734 DRAW_YMAX(dest->drawable.height - 1) | 2735 DRAW_XMAX(dest->drawable.width - 1); 2736 2737 /* XXX cacomposite depends upon the implicit non-pipelined flush */ 2738 if (0 && intel->gen6_render_state.drawrect == dw) 2739 return; 2740 intel->gen6_render_state.drawrect = dw; 2741 2742 OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 2743 OUT_BATCH(0x00000000); /* ymin, xmin */ 2744 OUT_BATCH(dw); /* ymax, xmax */ 2745 OUT_BATCH(0x00000000); /* yorigin, xorigin */ 2746} 2747 2748static void 2749gen6_composite_vertex_element_state(intel_screen_private *intel, 2750 Bool has_mask, 2751 Bool is_affine) 2752{ 2753 /* 2754 * vertex data in vertex buffer 2755 * position: (x, y) 2756 * texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0) 2757 * texture coordinate 1 if (has_mask is TRUE): same as above 2758 */ 2759 gen4_composite_op *composite_op = &intel->gen4_render_state->composite_op; 2760 int nelem = has_mask ? 2 : 1; 2761 int selem = is_affine ? 2 : 3; 2762 uint32_t w_component; 2763 uint32_t src_format; 2764 int id; 2765 2766 id = has_mask << 1 | is_affine; 2767 2768 if (composite_op->vertex_id == id) 2769 return; 2770 2771 composite_op->vertex_id = id; 2772 2773 if (is_affine) { 2774 src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; 2775 w_component = BRW_VFCOMPONENT_STORE_1_FLT; 2776 } else { 2777 src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; 2778 w_component = BRW_VFCOMPONENT_STORE_SRC; 2779 } 2780 2781 /* The VUE layout 2782 * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 2783 * dword 4-7: position (x, y, 1.0, 1.0), 2784 * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 2785 * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 2786 * 2787 * dword 4-15 are fetched from vertex buffer 2788 */ 2789 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 2790 ((2 * (2 + nelem)) + 1 - 2)); 2791 2792 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | 2793 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 2794 (0 << VE0_OFFSET_SHIFT)); 2795 OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | 2796 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | 2797 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | 2798 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); 2799 2800 /* x,y */ 2801 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | 2802 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 2803 (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */ 2804 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 2805 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 2806 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 2807 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 2808 2809 /* u0, v0, w0 */ 2810 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | 2811 (src_format << VE0_FORMAT_SHIFT) | 2812 ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ 2813 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 2814 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 2815 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 2816 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 2817 2818 /* u1, v1, w1 */ 2819 if (has_mask) { 2820 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | 2821 GEN6_VE0_VALID | 2822 (src_format << VE0_FORMAT_SHIFT) | 2823 (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ 2824 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 2825 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 2826 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 2827 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 2828 } 2829} 2830 2831static void 2832gen6_emit_composite_state(struct intel_screen_private *intel) 2833{ 2834 struct gen4_render_state *render = intel->gen4_render_state; 2835 gen4_composite_op *composite_op = &render->composite_op; 2836 sampler_state_filter_t src_filter = composite_op->src_filter; 2837 sampler_state_filter_t mask_filter = composite_op->mask_filter; 2838 sampler_state_extend_t src_extend = composite_op->src_extend; 2839 sampler_state_extend_t mask_extend = composite_op->mask_extend; 2840 Bool is_affine = composite_op->is_affine; 2841 Bool has_mask = intel->render_mask != NULL; 2842 Bool ivb = INTEL_INFO(intel)->gen >= 070; 2843 uint32_t src, dst; 2844 drm_intel_bo *ps_sampler_state_bo = render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend]; 2845 2846 intel->needs_render_state_emit = FALSE; 2847 if (intel->needs_3d_invariant) { 2848 gen6_upload_invariant_states(intel); 2849 2850 if (ivb) { 2851 gen7_upload_viewport_state_pointers(intel, render->cc_vp_bo); 2852 gen7_upload_urb(intel); 2853 gen7_upload_bypass_states(intel); 2854 gen7_upload_depth_buffer_state(intel); 2855 } else { 2856 gen6_upload_invariant_states(intel); 2857 gen6_upload_viewport_state_pointers(intel, render->cc_vp_bo); 2858 gen6_upload_urb(intel); 2859 2860 gen6_upload_gs_state(intel); 2861 gen6_upload_depth_buffer_state(intel); 2862 } 2863 gen6_composite_wm_constants(intel); 2864 gen6_upload_vs_state(intel); 2865 gen6_upload_clip_state(intel); 2866 2867 intel->needs_3d_invariant = FALSE; 2868 } 2869 2870 i965_get_blend_cntl(composite_op->op, 2871 intel->render_mask_picture, 2872 intel->render_dest_picture->format, 2873 &src, &dst); 2874 2875 if (intel->surface_reloc == 0) 2876 gen6_composite_state_base_address(intel); 2877 2878 gen6_composite_cc_state_pointers(intel, 2879 (src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE); 2880 gen6_composite_sampler_state_pointers(intel, ps_sampler_state_bo); 2881 gen6_composite_sf_state(intel, has_mask); 2882 if (ivb) { 2883 gen7_composite_wm_state(intel, has_mask, 2884 render->wm_kernel_bo[composite_op->wm_kernel]); 2885 gen7_upload_binding_table(intel, intel->surface_table); 2886 } else { 2887 gen6_composite_wm_state(intel, has_mask, 2888 render->wm_kernel_bo[composite_op->wm_kernel]); 2889 gen6_upload_binding_table(intel, intel->surface_table); 2890 } 2891 gen6_composite_drawing_rectangle(intel, intel->render_dest); 2892 gen6_composite_vertex_element_state(intel, has_mask, is_affine); 2893} 2894 2895static void 2896gen6_render_state_init(ScrnInfoPtr scrn) 2897{ 2898 intel_screen_private *intel = intel_get_screen_private(scrn); 2899 struct gen4_render_state *render; 2900 sampler_state_filter_t src_filter; 2901 sampler_state_filter_t mask_filter; 2902 sampler_state_extend_t src_extend; 2903 sampler_state_extend_t mask_extend; 2904 int m; 2905 drm_intel_bo *border_color_bo; 2906 const struct wm_kernel_info *wm_kernels; 2907 2908 render= intel->gen4_render_state; 2909 render->composite_op.vertex_id = -1; 2910 2911 intel->gen6_render_state.num_sf_outputs = 0; 2912 intel->gen6_render_state.samplers = NULL; 2913 intel->gen6_render_state.blend = -1; 2914 intel->gen6_render_state.kernel = NULL; 2915 intel->gen6_render_state.drawrect = -1; 2916 2917 wm_kernels = IS_GEN7(intel) ? wm_kernels_gen7 : wm_kernels_gen6; 2918 for (m = 0; m < KERNEL_COUNT; m++) { 2919 render->wm_kernel_bo[m] = 2920 intel_bo_alloc_for_data(intel, 2921 wm_kernels[m].data, 2922 wm_kernels[m].size, 2923 "WM kernel gen6/7"); 2924 } 2925 2926 border_color_bo = sampler_border_color_create(intel); 2927 2928 for (src_filter = 0; src_filter < FILTER_COUNT; src_filter++) { 2929 for (src_extend = 0; src_extend < EXTEND_COUNT; src_extend++) { 2930 for (mask_filter = 0; mask_filter < FILTER_COUNT; mask_filter++) { 2931 for (mask_extend = 0; mask_extend < EXTEND_COUNT; mask_extend++) { 2932 render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend] = 2933 i965_create_sampler_state(intel, 2934 src_filter, src_extend, 2935 mask_filter, mask_extend, 2936 border_color_bo); 2937 } 2938 } 2939 } 2940 } 2941 2942 drm_intel_bo_unreference(border_color_bo); 2943 render->cc_vp_bo = gen4_create_cc_viewport(intel); 2944 render->cc_state_bo = gen6_composite_create_cc_state(intel); 2945 render->gen6_blend_bo = gen6_composite_create_blend_state(intel); 2946 render->gen6_depth_stencil_bo = gen6_composite_create_depth_stencil_state(intel); 2947} 2948 2949void i965_vertex_flush(struct intel_screen_private *intel) 2950{ 2951 if (intel->vertex_offset) { 2952 intel->batch_ptr[intel->vertex_offset] = 2953 intel->vertex_index - intel->vertex_count; 2954 intel->vertex_offset = 0; 2955 } 2956} 2957 2958void i965_batch_flush(struct intel_screen_private *intel) 2959{ 2960 if (intel->surface_used) 2961 i965_surface_flush(intel); 2962} 2963