i965_render.c revision 42542f5f
1/* 2 * Copyright © 2006,2008 Intel Corporation 3 * Copyright © 2007 Red Hat, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Wang Zhenyu <zhenyu.z.wang@intel.com> 26 * Eric Anholt <eric@anholt.net> 27 * Carl Worth <cworth@redhat.com> 28 * Keith Packard <keithp@keithp.com> 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <assert.h> 37#include "xorg-server.h" 38#include "xf86.h" 39#include "intel.h" 40#include "i830_reg.h" 41#include "i965_reg.h" 42 43/* bring in brw structs */ 44#include "brw_defines.h" 45#include "brw_structs.h" 46 47// refer vol2, 3d rasterization 3.8.1 48 49/* defined in brw_defines.h */ 50static const struct blendinfo { 51 Bool dst_alpha; 52 Bool src_alpha; 53 uint32_t src_blend; 54 uint32_t dst_blend; 55} i965_blend_op[] = { 56 /* Clear */ 57 {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ZERO}, 58 /* Src */ 59 {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO}, 60 /* Dst */ 61 {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ONE}, 62 /* Over */ 63 {0, 1, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 64 /* OverReverse */ 65 {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE}, 66 /* In */ 67 {1, 0, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, 68 /* InReverse */ 69 {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_SRC_ALPHA}, 70 /* Out */ 71 {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, 72 /* OutReverse */ 73 {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 74 /* Atop */ 75 {1, 1, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 76 /* AtopReverse */ 77 {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA}, 78 /* Xor */ 79 {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 80 /* Add */ 81 {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ONE}, 82}; 83 84/** 85 * Highest-valued BLENDFACTOR used in i965_blend_op. 86 * 87 * This leaves out BRW_BLENDFACTOR_INV_DST_COLOR, 88 * BRW_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 89 * BRW_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 90 */ 91#define BRW_BLENDFACTOR_COUNT (BRW_BLENDFACTOR_INV_DST_ALPHA + 1) 92 93/* FIXME: surface format defined in brw_defines.h, shared Sampling engine 94 * 1.7.2 95 */ 96static const struct formatinfo { 97 int fmt; 98 uint32_t card_fmt; 99} i965_tex_formats[] = { 100 {PICT_a8, BRW_SURFACEFORMAT_A8_UNORM}, 101 {PICT_a8r8g8b8, BRW_SURFACEFORMAT_B8G8R8A8_UNORM}, 102 {PICT_x8r8g8b8, BRW_SURFACEFORMAT_B8G8R8X8_UNORM}, 103 {PICT_a8b8g8r8, BRW_SURFACEFORMAT_R8G8B8A8_UNORM}, 104 {PICT_x8b8g8r8, BRW_SURFACEFORMAT_R8G8B8X8_UNORM}, 105 {PICT_r8g8b8, BRW_SURFACEFORMAT_R8G8B8_UNORM}, 106 {PICT_r5g6b5, BRW_SURFACEFORMAT_B5G6R5_UNORM}, 107 {PICT_a1r5g5b5, BRW_SURFACEFORMAT_B5G5R5A1_UNORM}, 108#if XORG_VERSION_CURRENT >= 10699900 109 {PICT_a2r10g10b10, BRW_SURFACEFORMAT_B10G10R10A2_UNORM}, 110 {PICT_x2r10g10b10, BRW_SURFACEFORMAT_B10G10R10X2_UNORM}, 111 {PICT_a2b10g10r10, BRW_SURFACEFORMAT_R10G10B10A2_UNORM}, 112 {PICT_x2r10g10b10, BRW_SURFACEFORMAT_B10G10R10X2_UNORM}, 113#endif 114 {PICT_a4r4g4b4, BRW_SURFACEFORMAT_B4G4R4A4_UNORM}, 115}; 116 117static void i965_get_blend_cntl(int op, PicturePtr mask, uint32_t dst_format, 118 uint32_t * sblend, uint32_t * dblend) 119{ 120 121 *sblend = i965_blend_op[op].src_blend; 122 *dblend = i965_blend_op[op].dst_blend; 123 124 /* If there's no dst alpha channel, adjust the blend op so that we'll treat 125 * it as always 1. 126 */ 127 if (PICT_FORMAT_A(dst_format) == 0 && i965_blend_op[op].dst_alpha) { 128 if (*sblend == BRW_BLENDFACTOR_DST_ALPHA) 129 *sblend = BRW_BLENDFACTOR_ONE; 130 else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA) 131 *sblend = BRW_BLENDFACTOR_ZERO; 132 } 133 134 /* If the source alpha is being used, then we should only be in a case where 135 * the source blend factor is 0, and the source blend value is the mask 136 * channels multiplied by the source picture's alpha. 137 */ 138 if (mask && mask->componentAlpha && PICT_FORMAT_RGB(mask->format) 139 && i965_blend_op[op].src_alpha) { 140 if (*dblend == BRW_BLENDFACTOR_SRC_ALPHA) { 141 *dblend = BRW_BLENDFACTOR_SRC_COLOR; 142 } else if (*dblend == BRW_BLENDFACTOR_INV_SRC_ALPHA) { 143 *dblend = BRW_BLENDFACTOR_INV_SRC_COLOR; 144 } 145 } 146 147} 148 149static uint32_t i965_get_dest_format(PicturePtr dest_picture) 150{ 151 switch (dest_picture->format) { 152 case PICT_a8r8g8b8: 153 case PICT_x8r8g8b8: 154 return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; 155 case PICT_a8b8g8r8: 156 case PICT_x8b8g8r8: 157 return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; 158#if XORG_VERSION_CURRENT >= 10699900 159 case PICT_a2r10g10b10: 160 case PICT_x2r10g10b10: 161 return BRW_SURFACEFORMAT_B10G10R10A2_UNORM; 162#endif 163 case PICT_r5g6b5: 164 return BRW_SURFACEFORMAT_B5G6R5_UNORM; 165 case PICT_x1r5g5b5: 166 case PICT_a1r5g5b5: 167 return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; 168 case PICT_a8: 169 return BRW_SURFACEFORMAT_A8_UNORM; 170 case PICT_a4r4g4b4: 171 case PICT_x4r4g4b4: 172 return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; 173 default: 174 return -1; 175 } 176} 177 178Bool 179i965_check_composite(int op, 180 PicturePtr source_picture, 181 PicturePtr mask_picture, 182 PicturePtr dest_picture, 183 int width, int height) 184{ 185 ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen); 186 187 /* Check for unsupported compositing operations. */ 188 if (op >= sizeof(i965_blend_op) / sizeof(i965_blend_op[0])) { 189 intel_debug_fallback(scrn, 190 "Unsupported Composite op 0x%x\n", op); 191 return FALSE; 192 } 193 194 if (mask_picture && mask_picture->componentAlpha && 195 PICT_FORMAT_RGB(mask_picture->format)) { 196 /* Check if it's component alpha that relies on a source alpha and on 197 * the source value. We can only get one of those into the single 198 * source value that we get to blend with. 199 */ 200 if (i965_blend_op[op].src_alpha && 201 (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) { 202 intel_debug_fallback(scrn, 203 "Component alpha not supported " 204 "with source alpha and source " 205 "value blending.\n"); 206 return FALSE; 207 } 208 } 209 210 if (i965_get_dest_format(dest_picture) == -1) { 211 intel_debug_fallback(scrn, "Usupported Color buffer format 0x%x\n", 212 (int)dest_picture->format); 213 return FALSE; 214 } 215 216 return TRUE; 217} 218 219Bool 220i965_check_composite_texture(ScreenPtr screen, PicturePtr picture) 221{ 222 if (picture->repeatType > RepeatReflect) { 223 ScrnInfoPtr scrn = xf86ScreenToScrn(screen); 224 intel_debug_fallback(scrn, 225 "extended repeat (%d) not supported\n", 226 picture->repeatType); 227 return FALSE; 228 } 229 230 if (picture->filter != PictFilterNearest && 231 picture->filter != PictFilterBilinear) { 232 ScrnInfoPtr scrn = xf86ScreenToScrn(screen); 233 intel_debug_fallback(scrn, "Unsupported filter 0x%x\n", 234 picture->filter); 235 return FALSE; 236 } 237 238 if (picture->pDrawable) { 239 int w, h, i; 240 241 w = picture->pDrawable->width; 242 h = picture->pDrawable->height; 243 if ((w > 8192) || (h > 8192)) { 244 ScrnInfoPtr scrn = xf86ScreenToScrn(screen); 245 intel_debug_fallback(scrn, 246 "Picture w/h too large (%dx%d)\n", 247 w, h); 248 return FALSE; 249 } 250 251 for (i = 0; 252 i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]); 253 i++) { 254 if (i965_tex_formats[i].fmt == picture->format) 255 break; 256 } 257 if (i == sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0])) 258 { 259 ScrnInfoPtr scrn = xf86ScreenToScrn(screen); 260 intel_debug_fallback(scrn, 261 "Unsupported picture format " 262 "0x%x\n", 263 (int)picture->format); 264 return FALSE; 265 } 266 267 return TRUE; 268 } 269 270 return FALSE; 271} 272 273 274#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) 275 276/* Set up a default static partitioning of the URB, which is supposed to 277 * allow anything we would want to do, at potentially lower performance. 278 */ 279#define URB_CS_ENTRY_SIZE 0 280#define URB_CS_ENTRIES 0 281 282#define URB_VS_ENTRY_SIZE 1 // each 512-bit row 283#define URB_VS_ENTRIES 8 // we needs at least 8 entries 284 285#define URB_GS_ENTRY_SIZE 0 286#define URB_GS_ENTRIES 0 287 288#define URB_CLIP_ENTRY_SIZE 0 289#define URB_CLIP_ENTRIES 0 290 291#define URB_SF_ENTRY_SIZE 2 292#define URB_SF_ENTRIES 1 293 294/* 295 * this program computes dA/dx and dA/dy for the texture coordinates along 296 * with the base texture coordinate. It was extracted from the Mesa driver 297 */ 298 299#define SF_KERNEL_NUM_GRF 16 300#define SF_MAX_THREADS 2 301 302static const uint32_t sf_kernel_static[][4] = { 303#include "exa_sf.g4b" 304}; 305 306static const uint32_t sf_kernel_mask_static[][4] = { 307#include "exa_sf_mask.g4b" 308}; 309 310/* ps kernels */ 311#define PS_KERNEL_NUM_GRF 32 312#define PS_MAX_THREADS 48 313 314static const uint32_t ps_kernel_nomask_affine_static[][4] = { 315#include "exa_wm_xy.g4b" 316#include "exa_wm_src_affine.g4b" 317#include "exa_wm_src_sample_argb.g4b" 318#include "exa_wm_write.g4b" 319}; 320 321static const uint32_t ps_kernel_nomask_projective_static[][4] = { 322#include "exa_wm_xy.g4b" 323#include "exa_wm_src_projective.g4b" 324#include "exa_wm_src_sample_argb.g4b" 325#include "exa_wm_write.g4b" 326}; 327 328static const uint32_t ps_kernel_maskca_affine_static[][4] = { 329#include "exa_wm_xy.g4b" 330#include "exa_wm_src_affine.g4b" 331#include "exa_wm_src_sample_argb.g4b" 332#include "exa_wm_mask_affine.g4b" 333#include "exa_wm_mask_sample_argb.g4b" 334#include "exa_wm_ca.g4b" 335#include "exa_wm_write.g4b" 336}; 337 338static const uint32_t ps_kernel_maskca_projective_static[][4] = { 339#include "exa_wm_xy.g4b" 340#include "exa_wm_src_projective.g4b" 341#include "exa_wm_src_sample_argb.g4b" 342#include "exa_wm_mask_projective.g4b" 343#include "exa_wm_mask_sample_argb.g4b" 344#include "exa_wm_ca.g4b" 345#include "exa_wm_write.g4b" 346}; 347 348static const uint32_t ps_kernel_maskca_srcalpha_affine_static[][4] = { 349#include "exa_wm_xy.g4b" 350#include "exa_wm_src_affine.g4b" 351#include "exa_wm_src_sample_a.g4b" 352#include "exa_wm_mask_affine.g4b" 353#include "exa_wm_mask_sample_argb.g4b" 354#include "exa_wm_ca_srcalpha.g4b" 355#include "exa_wm_write.g4b" 356}; 357 358static const uint32_t ps_kernel_maskca_srcalpha_projective_static[][4] = { 359#include "exa_wm_xy.g4b" 360#include "exa_wm_src_projective.g4b" 361#include "exa_wm_src_sample_a.g4b" 362#include "exa_wm_mask_projective.g4b" 363#include "exa_wm_mask_sample_argb.g4b" 364#include "exa_wm_ca_srcalpha.g4b" 365#include "exa_wm_write.g4b" 366}; 367 368static const uint32_t ps_kernel_masknoca_affine_static[][4] = { 369#include "exa_wm_xy.g4b" 370#include "exa_wm_src_affine.g4b" 371#include "exa_wm_src_sample_argb.g4b" 372#include "exa_wm_mask_affine.g4b" 373#include "exa_wm_mask_sample_a.g4b" 374#include "exa_wm_noca.g4b" 375#include "exa_wm_write.g4b" 376}; 377 378static const uint32_t ps_kernel_masknoca_projective_static[][4] = { 379#include "exa_wm_xy.g4b" 380#include "exa_wm_src_projective.g4b" 381#include "exa_wm_src_sample_argb.g4b" 382#include "exa_wm_mask_projective.g4b" 383#include "exa_wm_mask_sample_a.g4b" 384#include "exa_wm_noca.g4b" 385#include "exa_wm_write.g4b" 386}; 387 388/* new programs for Ironlake */ 389static const uint32_t sf_kernel_static_gen5[][4] = { 390#include "exa_sf.g4b.gen5" 391}; 392 393static const uint32_t sf_kernel_mask_static_gen5[][4] = { 394#include "exa_sf_mask.g4b.gen5" 395}; 396 397static const uint32_t ps_kernel_nomask_affine_static_gen5[][4] = { 398#include "exa_wm_xy.g4b.gen5" 399#include "exa_wm_src_affine.g4b.gen5" 400#include "exa_wm_src_sample_argb.g4b.gen5" 401#include "exa_wm_write.g4b.gen5" 402}; 403 404static const uint32_t ps_kernel_nomask_projective_static_gen5[][4] = { 405#include "exa_wm_xy.g4b.gen5" 406#include "exa_wm_src_projective.g4b.gen5" 407#include "exa_wm_src_sample_argb.g4b.gen5" 408#include "exa_wm_write.g4b.gen5" 409}; 410 411static const uint32_t ps_kernel_maskca_affine_static_gen5[][4] = { 412#include "exa_wm_xy.g4b.gen5" 413#include "exa_wm_src_affine.g4b.gen5" 414#include "exa_wm_src_sample_argb.g4b.gen5" 415#include "exa_wm_mask_affine.g4b.gen5" 416#include "exa_wm_mask_sample_argb.g4b.gen5" 417#include "exa_wm_ca.g4b.gen5" 418#include "exa_wm_write.g4b.gen5" 419}; 420 421static const uint32_t ps_kernel_maskca_projective_static_gen5[][4] = { 422#include "exa_wm_xy.g4b.gen5" 423#include "exa_wm_src_projective.g4b.gen5" 424#include "exa_wm_src_sample_argb.g4b.gen5" 425#include "exa_wm_mask_projective.g4b.gen5" 426#include "exa_wm_mask_sample_argb.g4b.gen5" 427#include "exa_wm_ca.g4b.gen5" 428#include "exa_wm_write.g4b.gen5" 429}; 430 431static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen5[][4] = { 432#include "exa_wm_xy.g4b.gen5" 433#include "exa_wm_src_affine.g4b.gen5" 434#include "exa_wm_src_sample_a.g4b.gen5" 435#include "exa_wm_mask_affine.g4b.gen5" 436#include "exa_wm_mask_sample_argb.g4b.gen5" 437#include "exa_wm_ca_srcalpha.g4b.gen5" 438#include "exa_wm_write.g4b.gen5" 439}; 440 441static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen5[][4] = { 442#include "exa_wm_xy.g4b.gen5" 443#include "exa_wm_src_projective.g4b.gen5" 444#include "exa_wm_src_sample_a.g4b.gen5" 445#include "exa_wm_mask_projective.g4b.gen5" 446#include "exa_wm_mask_sample_argb.g4b.gen5" 447#include "exa_wm_ca_srcalpha.g4b.gen5" 448#include "exa_wm_write.g4b.gen5" 449}; 450 451static const uint32_t ps_kernel_masknoca_affine_static_gen5[][4] = { 452#include "exa_wm_xy.g4b.gen5" 453#include "exa_wm_src_affine.g4b.gen5" 454#include "exa_wm_src_sample_argb.g4b.gen5" 455#include "exa_wm_mask_affine.g4b.gen5" 456#include "exa_wm_mask_sample_a.g4b.gen5" 457#include "exa_wm_noca.g4b.gen5" 458#include "exa_wm_write.g4b.gen5" 459}; 460 461static const uint32_t ps_kernel_masknoca_projective_static_gen5[][4] = { 462#include "exa_wm_xy.g4b.gen5" 463#include "exa_wm_src_projective.g4b.gen5" 464#include "exa_wm_src_sample_argb.g4b.gen5" 465#include "exa_wm_mask_projective.g4b.gen5" 466#include "exa_wm_mask_sample_a.g4b.gen5" 467#include "exa_wm_noca.g4b.gen5" 468#include "exa_wm_write.g4b.gen5" 469}; 470 471/* programs for GEN6 */ 472static const uint32_t ps_kernel_nomask_affine_static_gen6[][4] = { 473#include "exa_wm_src_affine.g6b" 474#include "exa_wm_src_sample_argb.g6b" 475#include "exa_wm_write.g6b" 476}; 477 478static const uint32_t ps_kernel_nomask_projective_static_gen6[][4] = { 479#include "exa_wm_src_projective.g6b" 480#include "exa_wm_src_sample_argb.g6b" 481#include "exa_wm_write.g6b" 482}; 483 484static const uint32_t ps_kernel_maskca_affine_static_gen6[][4] = { 485#include "exa_wm_src_affine.g6b" 486#include "exa_wm_src_sample_argb.g6b" 487#include "exa_wm_mask_affine.g6b" 488#include "exa_wm_mask_sample_argb.g6b" 489#include "exa_wm_ca.g6b" 490#include "exa_wm_write.g6b" 491}; 492 493static const uint32_t ps_kernel_maskca_projective_static_gen6[][4] = { 494#include "exa_wm_src_projective.g6b" 495#include "exa_wm_src_sample_argb.g6b" 496#include "exa_wm_mask_projective.g6b" 497#include "exa_wm_mask_sample_argb.g6b" 498#include "exa_wm_ca.g4b.gen5" 499#include "exa_wm_write.g6b" 500}; 501 502static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen6[][4] = { 503#include "exa_wm_src_affine.g6b" 504#include "exa_wm_src_sample_a.g6b" 505#include "exa_wm_mask_affine.g6b" 506#include "exa_wm_mask_sample_argb.g6b" 507#include "exa_wm_ca_srcalpha.g6b" 508#include "exa_wm_write.g6b" 509}; 510 511static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen6[][4] = { 512#include "exa_wm_src_projective.g6b" 513#include "exa_wm_src_sample_a.g6b" 514#include "exa_wm_mask_projective.g6b" 515#include "exa_wm_mask_sample_argb.g6b" 516#include "exa_wm_ca_srcalpha.g6b" 517#include "exa_wm_write.g6b" 518}; 519 520static const uint32_t ps_kernel_masknoca_affine_static_gen6[][4] = { 521#include "exa_wm_src_affine.g6b" 522#include "exa_wm_src_sample_argb.g6b" 523#include "exa_wm_mask_affine.g6b" 524#include "exa_wm_mask_sample_a.g6b" 525#include "exa_wm_noca.g6b" 526#include "exa_wm_write.g6b" 527}; 528 529static const uint32_t ps_kernel_masknoca_projective_static_gen6[][4] = { 530#include "exa_wm_src_projective.g6b" 531#include "exa_wm_src_sample_argb.g6b" 532#include "exa_wm_mask_projective.g6b" 533#include "exa_wm_mask_sample_a.g6b" 534#include "exa_wm_noca.g6b" 535#include "exa_wm_write.g6b" 536}; 537 538/* programs for GEN7 */ 539static const uint32_t ps_kernel_nomask_affine_static_gen7[][4] = { 540#include "exa_wm_src_affine.g7b" 541#include "exa_wm_src_sample_argb.g7b" 542#include "exa_wm_write.g7b" 543}; 544 545static const uint32_t ps_kernel_nomask_projective_static_gen7[][4] = { 546#include "exa_wm_src_projective.g7b" 547#include "exa_wm_src_sample_argb.g7b" 548#include "exa_wm_write.g7b" 549}; 550 551static const uint32_t ps_kernel_maskca_affine_static_gen7[][4] = { 552#include "exa_wm_src_affine.g7b" 553#include "exa_wm_src_sample_argb.g7b" 554#include "exa_wm_mask_affine.g7b" 555#include "exa_wm_mask_sample_argb.g7b" 556#include "exa_wm_ca.g6b" 557#include "exa_wm_write.g7b" 558}; 559 560static const uint32_t ps_kernel_maskca_projective_static_gen7[][4] = { 561#include "exa_wm_src_projective.g7b" 562#include "exa_wm_src_sample_argb.g7b" 563#include "exa_wm_mask_projective.g7b" 564#include "exa_wm_mask_sample_argb.g7b" 565#include "exa_wm_ca.g4b.gen5" 566#include "exa_wm_write.g7b" 567}; 568 569static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen7[][4] = { 570#include "exa_wm_src_affine.g7b" 571#include "exa_wm_src_sample_a.g7b" 572#include "exa_wm_mask_affine.g7b" 573#include "exa_wm_mask_sample_argb.g7b" 574#include "exa_wm_ca_srcalpha.g6b" 575#include "exa_wm_write.g7b" 576}; 577 578static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen7[][4] = { 579#include "exa_wm_src_projective.g7b" 580#include "exa_wm_src_sample_a.g7b" 581#include "exa_wm_mask_projective.g7b" 582#include "exa_wm_mask_sample_argb.g7b" 583#include "exa_wm_ca_srcalpha.g6b" 584#include "exa_wm_write.g7b" 585}; 586 587static const uint32_t ps_kernel_masknoca_affine_static_gen7[][4] = { 588#include "exa_wm_src_affine.g7b" 589#include "exa_wm_src_sample_argb.g7b" 590#include "exa_wm_mask_affine.g7b" 591#include "exa_wm_mask_sample_a.g7b" 592#include "exa_wm_noca.g6b" 593#include "exa_wm_write.g7b" 594}; 595 596static const uint32_t ps_kernel_masknoca_projective_static_gen7[][4] = { 597#include "exa_wm_src_projective.g7b" 598#include "exa_wm_src_sample_argb.g7b" 599#include "exa_wm_mask_projective.g7b" 600#include "exa_wm_mask_sample_a.g7b" 601#include "exa_wm_noca.g6b" 602#include "exa_wm_write.g7b" 603}; 604 605 606typedef enum { 607 SS_INVALID_FILTER = -1, 608 SS_FILTER_NEAREST, 609 SS_FILTER_BILINEAR, 610 FILTER_COUNT, 611} sampler_state_filter_t; 612 613typedef enum { 614 SS_INVALID_EXTEND = -1, 615 SS_EXTEND_NONE, 616 SS_EXTEND_REPEAT, 617 SS_EXTEND_PAD, 618 SS_EXTEND_REFLECT, 619 EXTEND_COUNT, 620} sampler_state_extend_t; 621 622typedef enum { 623 WM_KERNEL_NOMASK_AFFINE, 624 WM_KERNEL_NOMASK_PROJECTIVE, 625 WM_KERNEL_MASKCA_AFFINE, 626 WM_KERNEL_MASKCA_PROJECTIVE, 627 WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 628 WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 629 WM_KERNEL_MASKNOCA_AFFINE, 630 WM_KERNEL_MASKNOCA_PROJECTIVE, 631 KERNEL_COUNT 632} wm_kernel_t; 633 634#define KERNEL(kernel_enum, kernel, masked) \ 635 [kernel_enum] = {&kernel, sizeof(kernel), masked} 636struct wm_kernel_info { 637 const void *data; 638 unsigned int size; 639 Bool has_mask; 640}; 641 642static const struct wm_kernel_info wm_kernels_gen4[] = { 643 KERNEL(WM_KERNEL_NOMASK_AFFINE, 644 ps_kernel_nomask_affine_static, FALSE), 645 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, 646 ps_kernel_nomask_projective_static, FALSE), 647 KERNEL(WM_KERNEL_MASKCA_AFFINE, 648 ps_kernel_maskca_affine_static, TRUE), 649 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, 650 ps_kernel_maskca_projective_static, TRUE), 651 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 652 ps_kernel_maskca_srcalpha_affine_static, TRUE), 653 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 654 ps_kernel_maskca_srcalpha_projective_static, TRUE), 655 KERNEL(WM_KERNEL_MASKNOCA_AFFINE, 656 ps_kernel_masknoca_affine_static, TRUE), 657 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, 658 ps_kernel_masknoca_projective_static, TRUE), 659}; 660 661static const struct wm_kernel_info wm_kernels_gen5[] = { 662 KERNEL(WM_KERNEL_NOMASK_AFFINE, 663 ps_kernel_nomask_affine_static_gen5, FALSE), 664 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, 665 ps_kernel_nomask_projective_static_gen5, FALSE), 666 KERNEL(WM_KERNEL_MASKCA_AFFINE, 667 ps_kernel_maskca_affine_static_gen5, TRUE), 668 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, 669 ps_kernel_maskca_projective_static_gen5, TRUE), 670 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 671 ps_kernel_maskca_srcalpha_affine_static_gen5, TRUE), 672 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 673 ps_kernel_maskca_srcalpha_projective_static_gen5, TRUE), 674 KERNEL(WM_KERNEL_MASKNOCA_AFFINE, 675 ps_kernel_masknoca_affine_static_gen5, TRUE), 676 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, 677 ps_kernel_masknoca_projective_static_gen5, TRUE), 678}; 679 680static const struct wm_kernel_info wm_kernels_gen6[] = { 681 KERNEL(WM_KERNEL_NOMASK_AFFINE, 682 ps_kernel_nomask_affine_static_gen6, FALSE), 683 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, 684 ps_kernel_nomask_projective_static_gen6, FALSE), 685 KERNEL(WM_KERNEL_MASKCA_AFFINE, 686 ps_kernel_maskca_affine_static_gen6, TRUE), 687 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, 688 ps_kernel_maskca_projective_static_gen6, TRUE), 689 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 690 ps_kernel_maskca_srcalpha_affine_static_gen6, TRUE), 691 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 692 ps_kernel_maskca_srcalpha_projective_static_gen6, TRUE), 693 KERNEL(WM_KERNEL_MASKNOCA_AFFINE, 694 ps_kernel_masknoca_affine_static_gen6, TRUE), 695 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, 696 ps_kernel_masknoca_projective_static_gen6, TRUE), 697}; 698 699static const struct wm_kernel_info wm_kernels_gen7[] = { 700 KERNEL(WM_KERNEL_NOMASK_AFFINE, 701 ps_kernel_nomask_affine_static_gen7, FALSE), 702 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, 703 ps_kernel_nomask_projective_static_gen7, FALSE), 704 KERNEL(WM_KERNEL_MASKCA_AFFINE, 705 ps_kernel_maskca_affine_static_gen7, TRUE), 706 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, 707 ps_kernel_maskca_projective_static_gen7, TRUE), 708 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 709 ps_kernel_maskca_srcalpha_affine_static_gen7, TRUE), 710 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 711 ps_kernel_maskca_srcalpha_projective_static_gen7, TRUE), 712 KERNEL(WM_KERNEL_MASKNOCA_AFFINE, 713 ps_kernel_masknoca_affine_static_gen7, TRUE), 714 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, 715 ps_kernel_masknoca_projective_static_gen7, TRUE), 716}; 717 718#undef KERNEL 719 720typedef struct _brw_cc_unit_state_padded { 721 struct brw_cc_unit_state state; 722 char pad[64 - sizeof(struct brw_cc_unit_state)]; 723} brw_cc_unit_state_padded; 724 725#ifndef MAX 726#define MAX(a, b) ((a) > (b) ? (a) : (b)) 727#endif 728#define SURFACE_STATE_PADDED_SIZE ALIGN(MAX(sizeof(struct brw_surface_state), sizeof(struct gen7_surface_state)), 32) 729 730struct gen4_cc_unit_state { 731 /* Index by [src_blend][dst_blend] */ 732 brw_cc_unit_state_padded cc_state[BRW_BLENDFACTOR_COUNT][BRW_BLENDFACTOR_COUNT]; 733}; 734 735typedef struct gen4_composite_op { 736 int op; 737 sampler_state_filter_t src_filter; 738 sampler_state_filter_t mask_filter; 739 sampler_state_extend_t src_extend; 740 sampler_state_extend_t mask_extend; 741 Bool is_affine; 742 wm_kernel_t wm_kernel; 743 int vertex_id; 744} gen4_composite_op; 745 746/** Private data for gen4 render accel implementation. */ 747struct gen4_render_state { 748 drm_intel_bo *vs_state_bo; 749 drm_intel_bo *sf_state_bo; 750 drm_intel_bo *sf_mask_state_bo; 751 drm_intel_bo *cc_state_bo; 752 drm_intel_bo *wm_state_bo[KERNEL_COUNT] 753 [FILTER_COUNT] [EXTEND_COUNT] 754 [FILTER_COUNT] [EXTEND_COUNT]; 755 drm_intel_bo *wm_kernel_bo[KERNEL_COUNT]; 756 757 drm_intel_bo *cc_vp_bo; 758 drm_intel_bo *gen6_blend_bo; 759 drm_intel_bo *gen6_depth_stencil_bo; 760 drm_intel_bo *ps_sampler_state_bo[FILTER_COUNT] 761 [EXTEND_COUNT] 762 [FILTER_COUNT] 763 [EXTEND_COUNT]; 764 gen4_composite_op composite_op; 765}; 766 767static void gen6_emit_composite_state(struct intel_screen_private *intel); 768static void gen6_render_state_init(ScrnInfoPtr scrn); 769 770/** 771 * Sets up the SF state pointing at an SF kernel. 772 * 773 * The SF kernel does coord interp: for each attribute, 774 * calculate dA/dx and dA/dy. Hand these interpolation coefficients 775 * back to SF which then hands pixels off to WM. 776 */ 777static drm_intel_bo *gen4_create_sf_state(intel_screen_private *intel, 778 drm_intel_bo * kernel_bo) 779{ 780 struct brw_sf_unit_state *sf_state; 781 drm_intel_bo *sf_state_bo; 782 int ret; 783 784 sf_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 SF state", 785 sizeof(*sf_state), 4096); 786 assert(sf_state_bo); 787 788 ret = drm_intel_bo_map(sf_state_bo, TRUE); 789 assert(ret == 0); 790 791 sf_state = memset(sf_state_bo->virtual, 0, sizeof(*sf_state)); 792 sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); 793 sf_state->thread0.kernel_start_pointer = 794 intel_emit_reloc(sf_state_bo, 795 offsetof(struct brw_sf_unit_state, thread0), 796 kernel_bo, sf_state->thread0.grf_reg_count << 1, 797 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; 798 sf_state->sf1.single_program_flow = 1; 799 sf_state->sf1.binding_table_entry_count = 0; 800 sf_state->sf1.thread_priority = 0; 801 sf_state->sf1.floating_point_mode = 0; /* Mesa does this */ 802 sf_state->sf1.illegal_op_exception_enable = 1; 803 sf_state->sf1.mask_stack_exception_enable = 1; 804 sf_state->sf1.sw_exception_enable = 1; 805 sf_state->thread2.per_thread_scratch_space = 0; 806 /* scratch space is not used in our kernel */ 807 sf_state->thread2.scratch_space_base_pointer = 0; 808 sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */ 809 sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ 810 sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ 811 /* don't smash vertex header, read start from dw8 */ 812 sf_state->thread3.urb_entry_read_offset = 1; 813 sf_state->thread3.dispatch_grf_start_reg = 3; 814 sf_state->thread4.max_threads = SF_MAX_THREADS - 1; 815 sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; 816 sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES; 817 sf_state->sf5.viewport_transform = FALSE; /* skip viewport */ 818 sf_state->sf6.cull_mode = BRW_CULLMODE_NONE; 819 sf_state->sf6.scissor = 0; 820 sf_state->sf7.trifan_pv = 2; 821 sf_state->sf6.dest_org_vbias = 0x8; 822 sf_state->sf6.dest_org_hbias = 0x8; 823 824 drm_intel_bo_unmap(sf_state_bo); 825 826 return sf_state_bo; 827 (void)ret; 828} 829 830static drm_intel_bo *sampler_border_color_create(intel_screen_private *intel) 831{ 832 struct brw_sampler_legacy_border_color sampler_border_color; 833 834 /* Set up the sampler border color (always transparent black) */ 835 memset(&sampler_border_color, 0, sizeof(sampler_border_color)); 836 sampler_border_color.color[0] = 0; /* R */ 837 sampler_border_color.color[1] = 0; /* G */ 838 sampler_border_color.color[2] = 0; /* B */ 839 sampler_border_color.color[3] = 0; /* A */ 840 841 return intel_bo_alloc_for_data(intel, 842 &sampler_border_color, 843 sizeof(sampler_border_color), 844 "gen4 render sampler border color"); 845} 846 847static void 848gen4_sampler_state_init(drm_intel_bo * sampler_state_bo, 849 struct brw_sampler_state *sampler_state, 850 sampler_state_filter_t filter, 851 sampler_state_extend_t extend, 852 drm_intel_bo * border_color_bo) 853{ 854 uint32_t sampler_state_offset; 855 856 sampler_state_offset = (char *)sampler_state - 857 (char *)sampler_state_bo->virtual; 858 859 /* PS kernel use this sampler */ 860 memset(sampler_state, 0, sizeof(*sampler_state)); 861 862 sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 863 864 /* We use the legacy mode to get the semantics specified by 865 * the Render extension. */ 866 sampler_state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY; 867 868 switch (filter) { 869 default: 870 case SS_FILTER_NEAREST: 871 sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; 872 sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; 873 break; 874 case SS_FILTER_BILINEAR: 875 sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; 876 sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; 877 break; 878 } 879 880 switch (extend) { 881 default: 882 case SS_EXTEND_NONE: 883 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 884 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 885 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 886 break; 887 case SS_EXTEND_REPEAT: 888 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; 889 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; 890 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; 891 break; 892 case SS_EXTEND_PAD: 893 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 894 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 895 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 896 break; 897 case SS_EXTEND_REFLECT: 898 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 899 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 900 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 901 break; 902 } 903 904 sampler_state->ss2.border_color_pointer = 905 intel_emit_reloc(sampler_state_bo, sampler_state_offset + 906 offsetof(struct brw_sampler_state, ss2), 907 border_color_bo, 0, 908 I915_GEM_DOMAIN_SAMPLER, 0) >> 5; 909 910 sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ 911} 912 913static void 914gen7_sampler_state_init(drm_intel_bo * sampler_state_bo, 915 struct gen7_sampler_state *sampler_state, 916 sampler_state_filter_t filter, 917 sampler_state_extend_t extend, 918 drm_intel_bo * border_color_bo) 919{ 920 uint32_t sampler_state_offset; 921 922 sampler_state_offset = (char *)sampler_state - 923 (char *)sampler_state_bo->virtual; 924 925 /* PS kernel use this sampler */ 926 memset(sampler_state, 0, sizeof(*sampler_state)); 927 928 sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 929 930 /* We use the legacy mode to get the semantics specified by 931 * the Render extension. */ 932 sampler_state->ss0.default_color_mode = BRW_BORDER_COLOR_MODE_LEGACY; 933 934 switch (filter) { 935 default: 936 case SS_FILTER_NEAREST: 937 sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; 938 sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; 939 break; 940 case SS_FILTER_BILINEAR: 941 sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; 942 sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; 943 break; 944 } 945 946 switch (extend) { 947 default: 948 case SS_EXTEND_NONE: 949 sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 950 sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 951 sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 952 break; 953 case SS_EXTEND_REPEAT: 954 sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; 955 sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; 956 sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; 957 break; 958 case SS_EXTEND_PAD: 959 sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 960 sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 961 sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 962 break; 963 case SS_EXTEND_REFLECT: 964 sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 965 sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 966 sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 967 break; 968 } 969 970 sampler_state->ss2.default_color_pointer = 971 intel_emit_reloc(sampler_state_bo, sampler_state_offset + 972 offsetof(struct gen7_sampler_state, ss2), 973 border_color_bo, 0, 974 I915_GEM_DOMAIN_SAMPLER, 0) >> 5; 975 976 sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ 977} 978 979 980 981static drm_intel_bo *gen4_create_sampler_state(intel_screen_private *intel, 982 sampler_state_filter_t src_filter, 983 sampler_state_extend_t src_extend, 984 sampler_state_filter_t mask_filter, 985 sampler_state_extend_t mask_extend, 986 drm_intel_bo * border_color_bo) 987{ 988 drm_intel_bo *sampler_state_bo; 989 struct brw_sampler_state *sampler_state; 990 int ret; 991 992 sampler_state_bo = 993 drm_intel_bo_alloc(intel->bufmgr, "gen4 sampler state", 994 sizeof(struct brw_sampler_state) * 2, 4096); 995 assert(sampler_state_bo); 996 997 ret = drm_intel_bo_map(sampler_state_bo, TRUE); 998 assert(ret == 0); 999 1000 sampler_state = sampler_state_bo->virtual; 1001 1002 gen4_sampler_state_init(sampler_state_bo, 1003 &sampler_state[0], 1004 src_filter, src_extend, border_color_bo); 1005 gen4_sampler_state_init(sampler_state_bo, 1006 &sampler_state[1], 1007 mask_filter, mask_extend, border_color_bo); 1008 1009 drm_intel_bo_unmap(sampler_state_bo); 1010 1011 return sampler_state_bo; 1012 (void)ret; 1013} 1014 1015static drm_intel_bo * 1016gen7_create_sampler_state(intel_screen_private *intel, 1017 sampler_state_filter_t src_filter, 1018 sampler_state_extend_t src_extend, 1019 sampler_state_filter_t mask_filter, 1020 sampler_state_extend_t mask_extend, 1021 drm_intel_bo * border_color_bo) 1022{ 1023 drm_intel_bo *sampler_state_bo; 1024 struct gen7_sampler_state *sampler_state; 1025 int ret; 1026 1027 sampler_state_bo = 1028 drm_intel_bo_alloc(intel->bufmgr, "gen7 sampler state", 1029 sizeof(struct gen7_sampler_state) * 2, 4096); 1030 assert(sampler_state_bo); 1031 1032 ret = drm_intel_bo_map(sampler_state_bo, TRUE); 1033 assert(ret == 0); 1034 1035 sampler_state = sampler_state_bo->virtual; 1036 1037 gen7_sampler_state_init(sampler_state_bo, 1038 &sampler_state[0], 1039 src_filter, src_extend, border_color_bo); 1040 gen7_sampler_state_init(sampler_state_bo, 1041 &sampler_state[1], 1042 mask_filter, mask_extend, border_color_bo); 1043 1044 drm_intel_bo_unmap(sampler_state_bo); 1045 1046 return sampler_state_bo; 1047 (void)ret; 1048} 1049 1050static inline drm_intel_bo * 1051i965_create_sampler_state(intel_screen_private *intel, 1052 sampler_state_filter_t src_filter, 1053 sampler_state_extend_t src_extend, 1054 sampler_state_filter_t mask_filter, 1055 sampler_state_extend_t mask_extend, 1056 drm_intel_bo * border_color_bo) 1057{ 1058 if (INTEL_INFO(intel)->gen < 070) 1059 return gen4_create_sampler_state(intel, src_filter, src_extend, 1060 mask_filter, mask_extend, 1061 border_color_bo); 1062 return gen7_create_sampler_state(intel, src_filter, src_extend, 1063 mask_filter, mask_extend, 1064 border_color_bo); 1065} 1066 1067 1068static void 1069cc_state_init(drm_intel_bo * cc_state_bo, 1070 uint32_t cc_state_offset, 1071 int src_blend, int dst_blend, drm_intel_bo * cc_vp_bo) 1072{ 1073 struct brw_cc_unit_state *cc_state; 1074 1075 cc_state = (struct brw_cc_unit_state *)((char *)cc_state_bo->virtual + 1076 cc_state_offset); 1077 1078 memset(cc_state, 0, sizeof(*cc_state)); 1079 cc_state->cc0.stencil_enable = 0; /* disable stencil */ 1080 cc_state->cc2.depth_test = 0; /* disable depth test */ 1081 cc_state->cc2.logicop_enable = 0; /* disable logic op */ 1082 cc_state->cc3.ia_blend_enable = 0; /* blend alpha same as colors */ 1083 cc_state->cc3.blend_enable = 1; /* enable color blend */ 1084 cc_state->cc3.alpha_test = 0; /* disable alpha test */ 1085 1086 cc_state->cc4.cc_viewport_state_offset = 1087 intel_emit_reloc(cc_state_bo, cc_state_offset + 1088 offsetof(struct brw_cc_unit_state, cc4), 1089 cc_vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; 1090 1091 cc_state->cc5.dither_enable = 0; /* disable dither */ 1092 cc_state->cc5.logicop_func = 0xc; /* COPY */ 1093 cc_state->cc5.statistics_enable = 1; 1094 cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; 1095 1096 /* Fill in alpha blend factors same as color, for the future. */ 1097 cc_state->cc5.ia_src_blend_factor = src_blend; 1098 cc_state->cc5.ia_dest_blend_factor = dst_blend; 1099 1100 cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD; 1101 cc_state->cc6.clamp_post_alpha_blend = 1; 1102 cc_state->cc6.clamp_pre_alpha_blend = 1; 1103 cc_state->cc6.clamp_range = 0; /* clamp range [0,1] */ 1104 1105 cc_state->cc6.src_blend_factor = src_blend; 1106 cc_state->cc6.dest_blend_factor = dst_blend; 1107} 1108 1109static drm_intel_bo *gen4_create_wm_state(intel_screen_private *intel, 1110 Bool has_mask, 1111 drm_intel_bo * kernel_bo, 1112 drm_intel_bo * sampler_bo) 1113{ 1114 struct brw_wm_unit_state *state; 1115 drm_intel_bo *wm_state_bo; 1116 int ret; 1117 1118 wm_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 WM state", 1119 sizeof(*state), 4096); 1120 assert(wm_state_bo); 1121 1122 ret = drm_intel_bo_map(wm_state_bo, TRUE); 1123 assert(ret == 0); 1124 1125 state = memset(wm_state_bo->virtual, 0, sizeof(*state)); 1126 state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); 1127 state->thread0.kernel_start_pointer = 1128 intel_emit_reloc(wm_state_bo, 1129 offsetof(struct brw_wm_unit_state, thread0), 1130 kernel_bo, state->thread0.grf_reg_count << 1, 1131 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; 1132 1133 state->thread1.single_program_flow = 0; 1134 1135 /* scratch space is not used in our kernel */ 1136 state->thread2.scratch_space_base_pointer = 0; 1137 state->thread2.per_thread_scratch_space = 0; 1138 1139 state->thread3.const_urb_entry_read_length = 0; 1140 state->thread3.const_urb_entry_read_offset = 0; 1141 1142 state->thread3.urb_entry_read_offset = 0; 1143 /* wm kernel use urb from 3, see wm_program in compiler module */ 1144 state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */ 1145 1146 if (IS_GEN5(intel)) 1147 state->wm4.sampler_count = 0; /* hardware requirement */ 1148 else 1149 state->wm4.sampler_count = 1; /* 1-4 samplers used */ 1150 1151 state->wm4.sampler_state_pointer = 1152 intel_emit_reloc(wm_state_bo, 1153 offsetof(struct brw_wm_unit_state, wm4), 1154 sampler_bo, 1155 state->wm4.sampler_count << 2, 1156 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; 1157 state->wm5.max_threads = PS_MAX_THREADS - 1; 1158 state->wm5.transposed_urb_read = 0; 1159 state->wm5.thread_dispatch_enable = 1; 1160 /* just use 16-pixel dispatch (4 subspans), don't need to change kernel 1161 * start point 1162 */ 1163 state->wm5.enable_16_pix = 1; 1164 state->wm5.enable_8_pix = 0; 1165 state->wm5.early_depth_test = 1; 1166 1167 /* Each pair of attributes (src/mask coords) is two URB entries */ 1168 if (has_mask) { 1169 state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */ 1170 state->thread3.urb_entry_read_length = 4; 1171 } else { 1172 state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ 1173 state->thread3.urb_entry_read_length = 2; 1174 } 1175 1176 /* binding table entry count is only used for prefetching, and it has to 1177 * be set 0 for Ironlake 1178 */ 1179 if (IS_GEN5(intel)) 1180 state->thread1.binding_table_entry_count = 0; 1181 1182 drm_intel_bo_unmap(wm_state_bo); 1183 1184 return wm_state_bo; 1185 (void)ret; 1186} 1187 1188static drm_intel_bo *gen4_create_cc_viewport(intel_screen_private *intel) 1189{ 1190 drm_intel_bo *bo; 1191 struct brw_cc_viewport vp; 1192 int ret; 1193 1194 vp.min_depth = -1.e35; 1195 vp.max_depth = 1.e35; 1196 1197 bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 render unit state", 1198 sizeof(vp), 4096); 1199 assert(bo); 1200 1201 ret = drm_intel_bo_subdata(bo, 0, sizeof(vp), &vp); 1202 assert(ret == 0); 1203 1204 return bo; 1205 (void)ret; 1206} 1207 1208static drm_intel_bo *gen4_create_vs_unit_state(intel_screen_private *intel) 1209{ 1210 struct brw_vs_unit_state vs_state; 1211 memset(&vs_state, 0, sizeof(vs_state)); 1212 1213 /* Set up the vertex shader to be disabled (passthrough) */ 1214 if (IS_GEN5(intel)) 1215 vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; /* hardware requirement */ 1216 else 1217 vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES; 1218 vs_state.thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; 1219 vs_state.vs6.vs_enable = 0; 1220 vs_state.vs6.vert_cache_disable = 1; 1221 1222 return intel_bo_alloc_for_data(intel, &vs_state, sizeof(vs_state), 1223 "gen4 render VS state"); 1224} 1225 1226/** 1227 * Set up all combinations of cc state: each blendfactor for source and 1228 * dest. 1229 */ 1230static drm_intel_bo *gen4_create_cc_unit_state(intel_screen_private *intel) 1231{ 1232 drm_intel_bo *cc_state_bo, *cc_vp_bo; 1233 int i, j, ret; 1234 1235 cc_vp_bo = gen4_create_cc_viewport(intel); 1236 1237 cc_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 CC state", 1238 sizeof(struct gen4_cc_unit_state), 1239 4096); 1240 assert(cc_state_bo); 1241 1242 ret = drm_intel_bo_map(cc_state_bo, TRUE); 1243 assert(ret == 0); 1244 1245 for (i = 0; i < BRW_BLENDFACTOR_COUNT; i++) { 1246 for (j = 0; j < BRW_BLENDFACTOR_COUNT; j++) { 1247 cc_state_init(cc_state_bo, 1248 offsetof(struct gen4_cc_unit_state, 1249 cc_state[i][j].state), 1250 i, j, cc_vp_bo); 1251 } 1252 } 1253 drm_intel_bo_unmap(cc_state_bo); 1254 1255 drm_intel_bo_unreference(cc_vp_bo); 1256 1257 return cc_state_bo; 1258 (void)ret; 1259} 1260 1261static uint32_t i965_get_card_format(PicturePtr picture) 1262{ 1263 unsigned i; 1264 1265 for (i = 0; i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]); 1266 i++) 1267 if (i965_tex_formats[i].fmt == picture->format) 1268 return i965_tex_formats[i].card_fmt; 1269 1270 assert(i != sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0])); 1271 1272 return 0; 1273} 1274 1275static sampler_state_filter_t sampler_state_filter_from_picture(int filter) 1276{ 1277 switch (filter) { 1278 case PictFilterNearest: 1279 return SS_FILTER_NEAREST; 1280 case PictFilterBilinear: 1281 return SS_FILTER_BILINEAR; 1282 default: 1283 return SS_INVALID_FILTER; 1284 } 1285} 1286 1287static sampler_state_extend_t sampler_state_extend_from_picture(int repeat_type) 1288{ 1289 switch (repeat_type) { 1290 case RepeatNone: 1291 return SS_EXTEND_NONE; 1292 case RepeatNormal: 1293 return SS_EXTEND_REPEAT; 1294 case RepeatPad: 1295 return SS_EXTEND_PAD; 1296 case RepeatReflect: 1297 return SS_EXTEND_REFLECT; 1298 default: 1299 return SS_INVALID_EXTEND; 1300 } 1301} 1302 1303/** 1304 * Sets up the common fields for a surface state buffer for the given 1305 * picture in the given surface state buffer. 1306 */ 1307static int 1308gen4_set_picture_surface_state(intel_screen_private *intel, 1309 PicturePtr picture, PixmapPtr pixmap, 1310 Bool is_dst) 1311{ 1312 struct intel_pixmap *priv = intel_get_pixmap_private(pixmap); 1313 struct brw_surface_state *ss; 1314 uint32_t write_domain, read_domains; 1315 int offset; 1316 1317 if (is_dst) { 1318 write_domain = I915_GEM_DOMAIN_RENDER; 1319 read_domains = I915_GEM_DOMAIN_RENDER; 1320 } else { 1321 write_domain = 0; 1322 read_domains = I915_GEM_DOMAIN_SAMPLER; 1323 } 1324 intel_batch_mark_pixmap_domains(intel, priv, 1325 read_domains, write_domain); 1326 ss = (struct brw_surface_state *) 1327 (intel->surface_data + intel->surface_used); 1328 1329 memset(ss, 0, sizeof(*ss)); 1330 ss->ss0.surface_type = BRW_SURFACE_2D; 1331 if (is_dst) 1332 ss->ss0.surface_format = i965_get_dest_format(picture); 1333 else 1334 ss->ss0.surface_format = i965_get_card_format(picture); 1335 1336 ss->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; 1337 ss->ss0.color_blend = 1; 1338 ss->ss1.base_addr = priv->bo->offset; 1339 1340 ss->ss2.height = pixmap->drawable.height - 1; 1341 ss->ss2.width = pixmap->drawable.width - 1; 1342 ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1; 1343 ss->ss3.tile_walk = 0; /* Tiled X */ 1344 ss->ss3.tiled_surface = intel_pixmap_tiled(pixmap) ? 1 : 0; 1345 1346 dri_bo_emit_reloc(intel->surface_bo, 1347 read_domains, write_domain, 1348 0, 1349 intel->surface_used + 1350 offsetof(struct brw_surface_state, ss1), 1351 priv->bo); 1352 1353 offset = intel->surface_used; 1354 intel->surface_used += SURFACE_STATE_PADDED_SIZE; 1355 1356 return offset; 1357} 1358 1359static int 1360gen7_set_picture_surface_state(intel_screen_private *intel, 1361 PicturePtr picture, PixmapPtr pixmap, 1362 Bool is_dst) 1363{ 1364 struct intel_pixmap *priv = intel_get_pixmap_private(pixmap); 1365 struct gen7_surface_state *ss; 1366 uint32_t write_domain, read_domains; 1367 int offset; 1368 1369 if (is_dst) { 1370 write_domain = I915_GEM_DOMAIN_RENDER; 1371 read_domains = I915_GEM_DOMAIN_RENDER; 1372 } else { 1373 write_domain = 0; 1374 read_domains = I915_GEM_DOMAIN_SAMPLER; 1375 } 1376 intel_batch_mark_pixmap_domains(intel, priv, 1377 read_domains, write_domain); 1378 ss = (struct gen7_surface_state *) 1379 (intel->surface_data + intel->surface_used); 1380 1381 memset(ss, 0, sizeof(*ss)); 1382 ss->ss0.surface_type = BRW_SURFACE_2D; 1383 if (is_dst) 1384 ss->ss0.surface_format = i965_get_dest_format(picture); 1385 else 1386 ss->ss0.surface_format = i965_get_card_format(picture); 1387 1388 ss->ss0.tile_walk = 0; /* Tiled X */ 1389 ss->ss0.tiled_surface = intel_pixmap_tiled(pixmap) ? 1 : 0; 1390 ss->ss1.base_addr = priv->bo->offset; 1391 1392 ss->ss2.height = pixmap->drawable.height - 1; 1393 ss->ss2.width = pixmap->drawable.width - 1; 1394 ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1; 1395 1396 if (IS_HSW(intel)) { 1397 ss->ss7.shader_chanel_select_r = HSW_SCS_RED; 1398 ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN; 1399 ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE; 1400 ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA; 1401 } 1402 1403 dri_bo_emit_reloc(intel->surface_bo, 1404 read_domains, write_domain, 1405 0, 1406 intel->surface_used + 1407 offsetof(struct gen7_surface_state, ss1), 1408 priv->bo); 1409 1410 offset = intel->surface_used; 1411 intel->surface_used += SURFACE_STATE_PADDED_SIZE; 1412 1413 return offset; 1414} 1415 1416static inline int 1417i965_set_picture_surface_state(intel_screen_private *intel, 1418 PicturePtr picture, PixmapPtr pixmap, 1419 Bool is_dst) 1420{ 1421 if (INTEL_INFO(intel)->gen < 070) 1422 return gen4_set_picture_surface_state(intel, picture, pixmap, is_dst); 1423 return gen7_set_picture_surface_state(intel, picture, pixmap, is_dst); 1424} 1425 1426static void gen4_composite_vertex_elements(struct intel_screen_private *intel) 1427{ 1428 struct gen4_render_state *render_state = intel->gen4_render_state; 1429 gen4_composite_op *composite_op = &render_state->composite_op; 1430 Bool has_mask = intel->render_mask != NULL; 1431 Bool is_affine = composite_op->is_affine; 1432 /* 1433 * number of extra parameters per vertex 1434 */ 1435 int nelem = has_mask ? 2 : 1; 1436 /* 1437 * size of extra parameters: 1438 * 3 for homogenous (xyzw) 1439 * 2 for cartesian (xy) 1440 */ 1441 int selem = is_affine ? 2 : 3; 1442 uint32_t w_component; 1443 uint32_t src_format; 1444 int id; 1445 1446 id = has_mask << 1 | is_affine; 1447 1448 if (composite_op->vertex_id == id) 1449 return; 1450 1451 composite_op->vertex_id = id; 1452 1453 if (is_affine) { 1454 src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; 1455 w_component = BRW_VFCOMPONENT_STORE_1_FLT; 1456 } else { 1457 src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; 1458 w_component = BRW_VFCOMPONENT_STORE_SRC; 1459 } 1460 1461 if (IS_GEN5(intel)) { 1462 /* 1463 * The reason to add this extra vertex element in the header is that 1464 * Ironlake has different vertex header definition and origin method to 1465 * set destination element offset doesn't exist anymore, which means 1466 * hardware requires a predefined vertex element layout. 1467 * 1468 * haihao proposed this approach to fill the first vertex element, so 1469 * origin layout for Gen4 doesn't need to change, and origin shader 1470 * programs behavior is also kept. 1471 * 1472 * I think this is not bad. - zhenyu 1473 */ 1474 1475 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 1476 ((2 * (2 + nelem)) - 1)); 1477 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | 1478 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1479 (0 << VE0_OFFSET_SHIFT)); 1480 1481 OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | 1482 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | 1483 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | 1484 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); 1485 } else { 1486 /* Set up our vertex elements, sourced from the single vertex buffer. 1487 * that will be set up later. 1488 */ 1489 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 1490 ((2 * (1 + nelem)) - 1)); 1491 } 1492 1493 /* x,y */ 1494 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | 1495 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1496 (0 << VE0_OFFSET_SHIFT)); 1497 1498 if (IS_GEN5(intel)) 1499 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1500 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1501 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 1502 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1503 else 1504 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1505 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1506 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 1507 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 1508 (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); 1509 /* u0, v0, w0 */ 1510 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | 1511 (src_format << VE0_FORMAT_SHIFT) | 1512 ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ 1513 1514 if (IS_GEN5(intel)) 1515 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1516 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1517 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1518 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1519 else 1520 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1521 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1522 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1523 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 1524 ((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ 1525 /* u1, v1, w1 */ 1526 if (has_mask) { 1527 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | 1528 (src_format << VE0_FORMAT_SHIFT) | 1529 (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ 1530 1531 if (IS_GEN5(intel)) 1532 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1533 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1534 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1535 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1536 else 1537 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1538 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1539 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1540 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 1541 ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ 1542 } 1543} 1544 1545static void i965_emit_composite_state(struct intel_screen_private *intel) 1546{ 1547 struct gen4_render_state *render_state = intel->gen4_render_state; 1548 gen4_composite_op *composite_op = &render_state->composite_op; 1549 int op = composite_op->op; 1550 PicturePtr mask_picture = intel->render_mask_picture; 1551 PicturePtr dest_picture = intel->render_dest_picture; 1552 PixmapPtr mask = intel->render_mask; 1553 PixmapPtr dest = intel->render_dest; 1554 sampler_state_filter_t src_filter = composite_op->src_filter; 1555 sampler_state_filter_t mask_filter = composite_op->mask_filter; 1556 sampler_state_extend_t src_extend = composite_op->src_extend; 1557 sampler_state_extend_t mask_extend = composite_op->mask_extend; 1558 uint32_t src_blend, dst_blend; 1559 1560 intel->needs_render_state_emit = FALSE; 1561 1562 /* Begin the long sequence of commands needed to set up the 3D 1563 * rendering pipe 1564 */ 1565 1566 if (intel->needs_3d_invariant) { 1567 if (IS_GEN5(intel)) { 1568 /* Ironlake errata workaround: Before disabling the clipper, 1569 * you have to MI_FLUSH to get the pipeline idle. 1570 */ 1571 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); 1572 } 1573 1574 /* Match Mesa driver setup */ 1575 if (INTEL_INFO(intel)->gen >= 045) 1576 OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 1577 else 1578 OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 1579 1580 /* Set system instruction pointer */ 1581 OUT_BATCH(BRW_STATE_SIP | 0); 1582 OUT_BATCH(0); 1583 1584 intel->needs_3d_invariant = FALSE; 1585 } 1586 1587 if (intel->surface_reloc == 0) { 1588 /* Zero out the two base address registers so all offsets are 1589 * absolute. 1590 */ 1591 if (IS_GEN5(intel)) { 1592 OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6); 1593 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ 1594 intel->surface_reloc = intel->batch_used; 1595 intel_batch_emit_dword(intel, 1596 intel->surface_bo->offset | BASE_ADDRESS_MODIFY); 1597 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ 1598 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Instruction base address */ 1599 /* general state max addr, disabled */ 1600 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 1601 /* media object state max addr, disabled */ 1602 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 1603 /* Instruction max addr, disabled */ 1604 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 1605 } else { 1606 OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4); 1607 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ 1608 intel->surface_reloc = intel->batch_used; 1609 intel_batch_emit_dword(intel, 1610 intel->surface_bo->offset | BASE_ADDRESS_MODIFY); 1611 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ 1612 /* general state max addr, disabled */ 1613 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 1614 /* media object state max addr, disabled */ 1615 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 1616 } 1617 } 1618 1619 i965_get_blend_cntl(op, mask_picture, dest_picture->format, 1620 &src_blend, &dst_blend); 1621 1622 /* Binding table pointers */ 1623 OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); 1624 OUT_BATCH(0); /* vs */ 1625 OUT_BATCH(0); /* gs */ 1626 OUT_BATCH(0); /* clip */ 1627 OUT_BATCH(0); /* sf */ 1628 /* Only the PS uses the binding table */ 1629 OUT_BATCH(intel->surface_table); 1630 1631 /* The drawing rectangle clipping is always on. Set it to values that 1632 * shouldn't do any clipping. 1633 */ 1634 OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); 1635 OUT_BATCH(0x00000000); /* ymin, xmin */ 1636 OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) | 1637 DRAW_XMAX(dest->drawable.width - 1)); /* ymax, xmax */ 1638 OUT_BATCH(0x00000000); /* yorigin, xorigin */ 1639 1640 /* skip the depth buffer */ 1641 /* skip the polygon stipple */ 1642 /* skip the polygon stipple offset */ 1643 /* skip the line stipple */ 1644 1645 /* Set the pointers to the 3d pipeline state */ 1646 OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5); 1647 OUT_RELOC(render_state->vs_state_bo, 1648 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1649 OUT_BATCH(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */ 1650 OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */ 1651 if (mask) { 1652 OUT_RELOC(render_state->sf_mask_state_bo, 1653 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1654 } else { 1655 OUT_RELOC(render_state->sf_state_bo, 1656 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1657 } 1658 1659 OUT_RELOC(render_state->wm_state_bo[composite_op->wm_kernel] 1660 [src_filter][src_extend] 1661 [mask_filter][mask_extend], 1662 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1663 1664 OUT_RELOC(render_state->cc_state_bo, 1665 I915_GEM_DOMAIN_INSTRUCTION, 0, 1666 offsetof(struct gen4_cc_unit_state, 1667 cc_state[src_blend][dst_blend])); 1668 1669 { 1670 int urb_vs_start, urb_vs_size; 1671 int urb_gs_start, urb_gs_size; 1672 int urb_clip_start, urb_clip_size; 1673 int urb_sf_start, urb_sf_size; 1674 int urb_cs_start, urb_cs_size; 1675 1676 urb_vs_start = 0; 1677 urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; 1678 urb_gs_start = urb_vs_start + urb_vs_size; 1679 urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; 1680 urb_clip_start = urb_gs_start + urb_gs_size; 1681 urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; 1682 urb_sf_start = urb_clip_start + urb_clip_size; 1683 urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; 1684 urb_cs_start = urb_sf_start + urb_sf_size; 1685 urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; 1686 1687 /* Erratum (Vol 1a, p32): 1688 * URB_FENCE must not cross a cache-line (64 bytes). 1689 */ 1690 if ((intel->batch_used & 15) > (16 - 3)) { 1691 int cnt = 16 - (intel->batch_used & 15); 1692 while (cnt--) 1693 OUT_BATCH(MI_NOOP); 1694 } 1695 1696 OUT_BATCH(BRW_URB_FENCE | 1697 UF0_CS_REALLOC | 1698 UF0_SF_REALLOC | 1699 UF0_CLIP_REALLOC | 1700 UF0_GS_REALLOC | 1701 UF0_VS_REALLOC | 1702 1); 1703 OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | 1704 ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | 1705 ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); 1706 OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | 1707 ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); 1708 1709 /* Constant buffer state */ 1710 OUT_BATCH(BRW_CS_URB_STATE | 0); 1711 OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) | 1712 (URB_CS_ENTRIES << 0)); 1713 } 1714 1715 gen4_composite_vertex_elements(intel); 1716} 1717 1718/** 1719 * Returns whether the current set of composite state plus vertex buffer is 1720 * expected to fit in the aperture. 1721 */ 1722static Bool i965_composite_check_aperture(intel_screen_private *intel) 1723{ 1724 struct gen4_render_state *render_state = intel->gen4_render_state; 1725 gen4_composite_op *composite_op = &render_state->composite_op; 1726 drm_intel_bo *bo_table[] = { 1727 intel->batch_bo, 1728 intel->vertex_bo, 1729 intel->surface_bo, 1730 render_state->vs_state_bo, 1731 render_state->sf_state_bo, 1732 render_state->sf_mask_state_bo, 1733 render_state->wm_state_bo[composite_op->wm_kernel] 1734 [composite_op->src_filter] 1735 [composite_op->src_extend] 1736 [composite_op->mask_filter] 1737 [composite_op->mask_extend], 1738 render_state->cc_state_bo, 1739 }; 1740 drm_intel_bo *gen6_bo_table[] = { 1741 intel->batch_bo, 1742 intel->vertex_bo, 1743 intel->surface_bo, 1744 render_state->wm_kernel_bo[composite_op->wm_kernel], 1745 render_state->ps_sampler_state_bo[composite_op->src_filter] 1746 [composite_op->src_extend] 1747 [composite_op->mask_filter] 1748 [composite_op->mask_extend], 1749 render_state->cc_vp_bo, 1750 render_state->cc_state_bo, 1751 render_state->gen6_blend_bo, 1752 render_state->gen6_depth_stencil_bo, 1753 }; 1754 1755 if (INTEL_INFO(intel)->gen >= 060) 1756 return drm_intel_bufmgr_check_aperture_space(gen6_bo_table, 1757 ARRAY_SIZE(gen6_bo_table)) == 0; 1758 else 1759 return drm_intel_bufmgr_check_aperture_space(bo_table, 1760 ARRAY_SIZE(bo_table)) == 0; 1761} 1762 1763static void i965_surface_flush(struct intel_screen_private *intel) 1764{ 1765 int ret; 1766 1767 ret = drm_intel_bo_subdata(intel->surface_bo, 1768 0, intel->surface_used, 1769 intel->surface_data); 1770 assert(ret == 0); 1771 intel->surface_used = 0; 1772 1773 assert (intel->surface_reloc != 0); 1774 drm_intel_bo_emit_reloc(intel->batch_bo, 1775 intel->surface_reloc * 4, 1776 intel->surface_bo, BASE_ADDRESS_MODIFY, 1777 I915_GEM_DOMAIN_INSTRUCTION, 0); 1778 intel->surface_reloc = 0; 1779 1780 drm_intel_bo_unreference(intel->surface_bo); 1781 intel->surface_bo = 1782 drm_intel_bo_alloc(intel->bufmgr, "surface data", 1783 sizeof(intel->surface_data), 4096); 1784 assert(intel->surface_bo); 1785 1786 return; 1787 (void)ret; 1788} 1789 1790static void 1791i965_emit_composite_primitive_identity_source(intel_screen_private *intel, 1792 int srcX, int srcY, 1793 int maskX, int maskY, 1794 int dstX, int dstY, 1795 int w, int h) 1796{ 1797 OUT_VERTEX(dstX + w); 1798 OUT_VERTEX(dstY + h); 1799 OUT_VERTEX((srcX + w) * intel->scale_units[0][0]); 1800 OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); 1801 1802 OUT_VERTEX(dstX); 1803 OUT_VERTEX(dstY + h); 1804 OUT_VERTEX(srcX * intel->scale_units[0][0]); 1805 OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); 1806 1807 OUT_VERTEX(dstX); 1808 OUT_VERTEX(dstY); 1809 OUT_VERTEX(srcX * intel->scale_units[0][0]); 1810 OUT_VERTEX(srcY * intel->scale_units[0][1]); 1811} 1812 1813static void 1814i965_emit_composite_primitive_affine_source(intel_screen_private *intel, 1815 int srcX, int srcY, 1816 int maskX, int maskY, 1817 int dstX, int dstY, 1818 int w, int h) 1819{ 1820 float src_x[3], src_y[3]; 1821 1822 if (!intel_get_transformed_coordinates(srcX, srcY, 1823 intel->transform[0], 1824 &src_x[0], 1825 &src_y[0])) 1826 return; 1827 1828 if (!intel_get_transformed_coordinates(srcX, srcY + h, 1829 intel->transform[0], 1830 &src_x[1], 1831 &src_y[1])) 1832 return; 1833 1834 if (!intel_get_transformed_coordinates(srcX + w, srcY + h, 1835 intel->transform[0], 1836 &src_x[2], 1837 &src_y[2])) 1838 return; 1839 1840 OUT_VERTEX(dstX + w); 1841 OUT_VERTEX(dstY + h); 1842 OUT_VERTEX(src_x[2] * intel->scale_units[0][0]); 1843 OUT_VERTEX(src_y[2] * intel->scale_units[0][1]); 1844 1845 OUT_VERTEX(dstX); 1846 OUT_VERTEX(dstY + h); 1847 OUT_VERTEX(src_x[1] * intel->scale_units[0][0]); 1848 OUT_VERTEX(src_y[1] * intel->scale_units[0][1]); 1849 1850 OUT_VERTEX(dstX); 1851 OUT_VERTEX(dstY); 1852 OUT_VERTEX(src_x[0] * intel->scale_units[0][0]); 1853 OUT_VERTEX(src_y[0] * intel->scale_units[0][1]); 1854} 1855 1856static void 1857i965_emit_composite_primitive_identity_source_mask(intel_screen_private *intel, 1858 int srcX, int srcY, 1859 int maskX, int maskY, 1860 int dstX, int dstY, 1861 int w, int h) 1862{ 1863 OUT_VERTEX(dstX + w); 1864 OUT_VERTEX(dstY + h); 1865 OUT_VERTEX((srcX + w) * intel->scale_units[0][0]); 1866 OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); 1867 OUT_VERTEX((maskX + w) * intel->scale_units[1][0]); 1868 OUT_VERTEX((maskY + h) * intel->scale_units[1][1]); 1869 1870 OUT_VERTEX(dstX); 1871 OUT_VERTEX(dstY + h); 1872 OUT_VERTEX(srcX * intel->scale_units[0][0]); 1873 OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); 1874 OUT_VERTEX(maskX * intel->scale_units[1][0]); 1875 OUT_VERTEX((maskY + h) * intel->scale_units[1][1]); 1876 1877 OUT_VERTEX(dstX); 1878 OUT_VERTEX(dstY); 1879 OUT_VERTEX(srcX * intel->scale_units[0][0]); 1880 OUT_VERTEX(srcY * intel->scale_units[0][1]); 1881 OUT_VERTEX(maskX * intel->scale_units[1][0]); 1882 OUT_VERTEX(maskY * intel->scale_units[1][1]); 1883} 1884 1885static void 1886i965_emit_composite_primitive(intel_screen_private *intel, 1887 int srcX, int srcY, 1888 int maskX, int maskY, 1889 int dstX, int dstY, 1890 int w, int h) 1891{ 1892 float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; 1893 Bool is_affine = intel->gen4_render_state->composite_op.is_affine; 1894 1895 if (is_affine) { 1896 if (!intel_get_transformed_coordinates(srcX, srcY, 1897 intel->transform[0], 1898 &src_x[0], 1899 &src_y[0])) 1900 return; 1901 1902 if (!intel_get_transformed_coordinates(srcX, srcY + h, 1903 intel->transform[0], 1904 &src_x[1], 1905 &src_y[1])) 1906 return; 1907 1908 if (!intel_get_transformed_coordinates(srcX + w, srcY + h, 1909 intel->transform[0], 1910 &src_x[2], 1911 &src_y[2])) 1912 return; 1913 } else { 1914 if (!intel_get_transformed_coordinates_3d(srcX, srcY, 1915 intel->transform[0], 1916 &src_x[0], 1917 &src_y[0], 1918 &src_w[0])) 1919 return; 1920 1921 if (!intel_get_transformed_coordinates_3d(srcX, srcY + h, 1922 intel->transform[0], 1923 &src_x[1], 1924 &src_y[1], 1925 &src_w[1])) 1926 return; 1927 1928 if (!intel_get_transformed_coordinates_3d(srcX + w, srcY + h, 1929 intel->transform[0], 1930 &src_x[2], 1931 &src_y[2], 1932 &src_w[2])) 1933 return; 1934 } 1935 1936 if (intel->render_mask) { 1937 if (is_affine) { 1938 if (!intel_get_transformed_coordinates(maskX, maskY, 1939 intel->transform[1], 1940 &mask_x[0], 1941 &mask_y[0])) 1942 return; 1943 1944 if (!intel_get_transformed_coordinates(maskX, maskY + h, 1945 intel->transform[1], 1946 &mask_x[1], 1947 &mask_y[1])) 1948 return; 1949 1950 if (!intel_get_transformed_coordinates(maskX + w, maskY + h, 1951 intel->transform[1], 1952 &mask_x[2], 1953 &mask_y[2])) 1954 return; 1955 } else { 1956 if (!intel_get_transformed_coordinates_3d(maskX, maskY, 1957 intel->transform[1], 1958 &mask_x[0], 1959 &mask_y[0], 1960 &mask_w[0])) 1961 return; 1962 1963 if (!intel_get_transformed_coordinates_3d(maskX, maskY + h, 1964 intel->transform[1], 1965 &mask_x[1], 1966 &mask_y[1], 1967 &mask_w[1])) 1968 return; 1969 1970 if (!intel_get_transformed_coordinates_3d(maskX + w, maskY + h, 1971 intel->transform[1], 1972 &mask_x[2], 1973 &mask_y[2], 1974 &mask_w[2])) 1975 return; 1976 } 1977 } 1978 1979 OUT_VERTEX(dstX + w); 1980 OUT_VERTEX(dstY + h); 1981 OUT_VERTEX(src_x[2] * intel->scale_units[0][0]); 1982 OUT_VERTEX(src_y[2] * intel->scale_units[0][1]); 1983 if (!is_affine) 1984 OUT_VERTEX(src_w[2]); 1985 if (intel->render_mask) { 1986 OUT_VERTEX(mask_x[2] * intel->scale_units[1][0]); 1987 OUT_VERTEX(mask_y[2] * intel->scale_units[1][1]); 1988 if (!is_affine) 1989 OUT_VERTEX(mask_w[2]); 1990 } 1991 1992 OUT_VERTEX(dstX); 1993 OUT_VERTEX(dstY + h); 1994 OUT_VERTEX(src_x[1] * intel->scale_units[0][0]); 1995 OUT_VERTEX(src_y[1] * intel->scale_units[0][1]); 1996 if (!is_affine) 1997 OUT_VERTEX(src_w[1]); 1998 if (intel->render_mask) { 1999 OUT_VERTEX(mask_x[1] * intel->scale_units[1][0]); 2000 OUT_VERTEX(mask_y[1] * intel->scale_units[1][1]); 2001 if (!is_affine) 2002 OUT_VERTEX(mask_w[1]); 2003 } 2004 2005 OUT_VERTEX(dstX); 2006 OUT_VERTEX(dstY); 2007 OUT_VERTEX(src_x[0] * intel->scale_units[0][0]); 2008 OUT_VERTEX(src_y[0] * intel->scale_units[0][1]); 2009 if (!is_affine) 2010 OUT_VERTEX(src_w[0]); 2011 if (intel->render_mask) { 2012 OUT_VERTEX(mask_x[0] * intel->scale_units[1][0]); 2013 OUT_VERTEX(mask_y[0] * intel->scale_units[1][1]); 2014 if (!is_affine) 2015 OUT_VERTEX(mask_w[0]); 2016 } 2017} 2018 2019Bool 2020i965_prepare_composite(int op, PicturePtr source_picture, 2021 PicturePtr mask_picture, PicturePtr dest_picture, 2022 PixmapPtr source, PixmapPtr mask, PixmapPtr dest) 2023{ 2024 ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen); 2025 intel_screen_private *intel = intel_get_screen_private(scrn); 2026 struct gen4_render_state *render_state = intel->gen4_render_state; 2027 gen4_composite_op *composite_op = &render_state->composite_op; 2028 2029 composite_op->src_filter = 2030 sampler_state_filter_from_picture(source_picture->filter); 2031 if (composite_op->src_filter == SS_INVALID_FILTER) { 2032 intel_debug_fallback(scrn, "Bad src filter 0x%x\n", 2033 source_picture->filter); 2034 return FALSE; 2035 } 2036 composite_op->src_extend = 2037 sampler_state_extend_from_picture(source_picture->repeatType); 2038 if (composite_op->src_extend == SS_INVALID_EXTEND) { 2039 intel_debug_fallback(scrn, "Bad src repeat 0x%x\n", 2040 source_picture->repeatType); 2041 return FALSE; 2042 } 2043 2044 if (mask_picture) { 2045 if (mask_picture->componentAlpha && 2046 PICT_FORMAT_RGB(mask_picture->format)) { 2047 /* Check if it's component alpha that relies on a source alpha and on 2048 * the source value. We can only get one of those into the single 2049 * source value that we get to blend with. 2050 */ 2051 if (i965_blend_op[op].src_alpha && 2052 (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) { 2053 intel_debug_fallback(scrn, 2054 "Component alpha not supported " 2055 "with source alpha and source " 2056 "value blending.\n"); 2057 return FALSE; 2058 } 2059 } 2060 2061 composite_op->mask_filter = 2062 sampler_state_filter_from_picture(mask_picture->filter); 2063 if (composite_op->mask_filter == SS_INVALID_FILTER) { 2064 intel_debug_fallback(scrn, "Bad mask filter 0x%x\n", 2065 mask_picture->filter); 2066 return FALSE; 2067 } 2068 composite_op->mask_extend = 2069 sampler_state_extend_from_picture(mask_picture->repeatType); 2070 if (composite_op->mask_extend == SS_INVALID_EXTEND) { 2071 intel_debug_fallback(scrn, "Bad mask repeat 0x%x\n", 2072 mask_picture->repeatType); 2073 return FALSE; 2074 } 2075 } else { 2076 composite_op->mask_filter = SS_FILTER_NEAREST; 2077 composite_op->mask_extend = SS_EXTEND_NONE; 2078 } 2079 2080 /* Flush any pending writes prior to relocating the textures. */ 2081 if (intel_pixmap_is_dirty(source) || intel_pixmap_is_dirty(mask)) 2082 intel_batch_emit_flush(scrn); 2083 2084 composite_op->op = op; 2085 intel->render_source_picture = source_picture; 2086 intel->render_mask_picture = mask_picture; 2087 intel->render_dest_picture = dest_picture; 2088 intel->render_source = source; 2089 intel->render_mask = mask; 2090 intel->render_dest = dest; 2091 2092 intel->scale_units[0][0] = 1. / source->drawable.width; 2093 intel->scale_units[0][1] = 1. / source->drawable.height; 2094 2095 intel->transform[0] = source_picture->transform; 2096 composite_op->is_affine = intel_transform_is_affine(intel->transform[0]); 2097 2098 if (mask_picture == NULL) { 2099 intel->transform[1] = NULL; 2100 intel->scale_units[1][0] = -1; 2101 intel->scale_units[1][1] = -1; 2102 } else { 2103 assert(mask != NULL); 2104 intel->transform[1] = mask_picture->transform; 2105 intel->scale_units[1][0] = 1. / mask->drawable.width; 2106 intel->scale_units[1][1] = 1. / mask->drawable.height; 2107 composite_op->is_affine &= 2108 intel_transform_is_affine(intel->transform[1]); 2109 } 2110 2111 if (mask) { 2112 assert(mask_picture != NULL); 2113 if (mask_picture->componentAlpha && 2114 PICT_FORMAT_RGB(mask_picture->format)) { 2115 if (i965_blend_op[op].src_alpha) { 2116 if (composite_op->is_affine) 2117 composite_op->wm_kernel = 2118 WM_KERNEL_MASKCA_SRCALPHA_AFFINE; 2119 else 2120 composite_op->wm_kernel = 2121 WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE; 2122 } else { 2123 if (composite_op->is_affine) 2124 composite_op->wm_kernel = 2125 WM_KERNEL_MASKCA_AFFINE; 2126 else 2127 composite_op->wm_kernel = 2128 WM_KERNEL_MASKCA_PROJECTIVE; 2129 } 2130 } else { 2131 if (composite_op->is_affine) 2132 composite_op->wm_kernel = 2133 WM_KERNEL_MASKNOCA_AFFINE; 2134 else 2135 composite_op->wm_kernel = 2136 WM_KERNEL_MASKNOCA_PROJECTIVE; 2137 } 2138 } else { 2139 if (composite_op->is_affine) 2140 composite_op->wm_kernel = WM_KERNEL_NOMASK_AFFINE; 2141 else 2142 composite_op->wm_kernel = WM_KERNEL_NOMASK_PROJECTIVE; 2143 } 2144 2145 intel->prim_emit = i965_emit_composite_primitive; 2146 if (!mask) { 2147 if (intel->transform[0] == NULL) 2148 intel->prim_emit = i965_emit_composite_primitive_identity_source; 2149 else if (composite_op->is_affine) 2150 intel->prim_emit = i965_emit_composite_primitive_affine_source; 2151 } else { 2152 if (intel->transform[0] == NULL && intel->transform[1] == NULL) 2153 intel->prim_emit = i965_emit_composite_primitive_identity_source_mask; 2154 } 2155 2156 intel->floats_per_vertex = 2157 2 + (mask ? 2 : 1) * (composite_op->is_affine ? 2: 3); 2158 2159 if (!i965_composite_check_aperture(intel)) { 2160 intel_batch_submit(scrn); 2161 if (!i965_composite_check_aperture(intel)) { 2162 intel_debug_fallback(scrn, 2163 "Couldn't fit render operation " 2164 "in aperture\n"); 2165 return FALSE; 2166 } 2167 } 2168 2169 if (sizeof(intel->surface_data) - intel->surface_used < 2170 4 * SURFACE_STATE_PADDED_SIZE) 2171 i965_surface_flush(intel); 2172 2173 intel->needs_render_state_emit = TRUE; 2174 2175 return TRUE; 2176} 2177 2178static void i965_select_vertex_buffer(struct intel_screen_private *intel) 2179{ 2180 int id = intel->gen4_render_state->composite_op.vertex_id; 2181 int modifyenable = 0; 2182 2183 if (intel->vertex_id & (1 << id)) 2184 return; 2185 2186 if (INTEL_INFO(intel)->gen >= 070) 2187 modifyenable = GEN7_VB0_ADDRESS_MODIFYENABLE; 2188 2189 /* Set up the pointer to our (single) vertex buffer */ 2190 OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); 2191 2192 /* XXX could use multiple vbo to reduce relocations if 2193 * frequently switching between vertex sizes, like rgb10text. 2194 */ 2195 if (INTEL_INFO(intel)->gen >= 060) { 2196 OUT_BATCH((id << GEN6_VB0_BUFFER_INDEX_SHIFT) | 2197 GEN6_VB0_VERTEXDATA | 2198 modifyenable | 2199 (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); 2200 } else { 2201 OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | 2202 VB0_VERTEXDATA | 2203 (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); 2204 } 2205 OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); 2206 if (INTEL_INFO(intel)->gen >= 050) 2207 OUT_RELOC(intel->vertex_bo, 2208 I915_GEM_DOMAIN_VERTEX, 0, 2209 sizeof(intel->vertex_ptr) - 1); 2210 else 2211 OUT_BATCH(0); 2212 OUT_BATCH(0); // ignore for VERTEXDATA, but still there 2213 2214 intel->vertex_id |= 1 << id; 2215} 2216 2217static void i965_bind_surfaces(struct intel_screen_private *intel) 2218{ 2219 uint32_t *binding_table; 2220 2221 assert(intel->surface_used + 4 * SURFACE_STATE_PADDED_SIZE <= sizeof(intel->surface_data)); 2222 2223 binding_table = (uint32_t*) (intel->surface_data + intel->surface_used); 2224 intel->surface_table = intel->surface_used; 2225 intel->surface_used += SURFACE_STATE_PADDED_SIZE; 2226 2227 binding_table[0] = 2228 i965_set_picture_surface_state(intel, 2229 intel->render_dest_picture, 2230 intel->render_dest, 2231 TRUE); 2232 binding_table[1] = 2233 i965_set_picture_surface_state(intel, 2234 intel->render_source_picture, 2235 intel->render_source, 2236 FALSE); 2237 if (intel->render_mask) { 2238 binding_table[2] = 2239 i965_set_picture_surface_state(intel, 2240 intel->render_mask_picture, 2241 intel->render_mask, 2242 FALSE); 2243 } 2244} 2245 2246void 2247i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, 2248 int dstX, int dstY, int w, int h) 2249{ 2250 ScrnInfoPtr scrn = xf86ScreenToScrn(dest->drawable.pScreen); 2251 intel_screen_private *intel = intel_get_screen_private(scrn); 2252 2253 intel_batch_start_atomic(scrn, 200); 2254 if (intel->needs_render_state_emit) { 2255 i965_bind_surfaces(intel); 2256 2257 if (INTEL_INFO(intel)->gen >= 060) 2258 gen6_emit_composite_state(intel); 2259 else 2260 i965_emit_composite_state(intel); 2261 } 2262 2263 if (intel->floats_per_vertex != intel->last_floats_per_vertex) { 2264 intel->vertex_index = (intel->vertex_used + intel->floats_per_vertex - 1) / intel->floats_per_vertex; 2265 intel->vertex_used = intel->vertex_index * intel->floats_per_vertex; 2266 intel->last_floats_per_vertex = intel->floats_per_vertex; 2267 } 2268 if (intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) { 2269 i965_vertex_flush(intel); 2270 intel_next_vertex(intel); 2271 intel->vertex_index = 0; 2272 } 2273 i965_select_vertex_buffer(intel); 2274 2275 if (intel->vertex_offset == 0) { 2276 if (INTEL_INFO(intel)->gen >= 070) { 2277 OUT_BATCH(BRW_3DPRIMITIVE | (7 - 2)); 2278 OUT_BATCH(BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | 2279 _3DPRIM_RECTLIST); 2280 } else { 2281 OUT_BATCH(BRW_3DPRIMITIVE | 2282 BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | 2283 (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | 2284 (0 << 9) | 2285 4); 2286 } 2287 intel->vertex_offset = intel->batch_used; 2288 OUT_BATCH(0); /* vertex count, to be filled in later */ 2289 OUT_BATCH(intel->vertex_index); 2290 OUT_BATCH(1); /* single instance */ 2291 OUT_BATCH(0); /* start instance location */ 2292 OUT_BATCH(0); /* index buffer offset, ignored */ 2293 intel->vertex_count = intel->vertex_index; 2294 } 2295 2296 intel->prim_emit(intel, 2297 srcX, srcY, 2298 maskX, maskY, 2299 dstX, dstY, 2300 w, h); 2301 intel->vertex_index += 3; 2302 2303 if (INTEL_INFO(intel)->gen < 050) { 2304 /* XXX OMG! */ 2305 i965_vertex_flush(intel); 2306 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); 2307 } 2308 2309 intel_batch_end_atomic(scrn); 2310} 2311 2312void i965_batch_commit_notify(intel_screen_private *intel) 2313{ 2314 intel->needs_render_state_emit = TRUE; 2315 intel->needs_3d_invariant = TRUE; 2316 intel->last_floats_per_vertex = 0; 2317 intel->vertex_index = 0; 2318 2319 intel->gen4_render_state->composite_op.vertex_id = -1; 2320 2321 intel->gen6_render_state.num_sf_outputs = 0; 2322 intel->gen6_render_state.samplers = NULL; 2323 intel->gen6_render_state.blend = -1; 2324 intel->gen6_render_state.kernel = NULL; 2325 intel->gen6_render_state.drawrect = -1; 2326 2327 assert(intel->surface_reloc == 0); 2328} 2329 2330/** 2331 * Called at EnterVT so we can set up our offsets into the state buffer. 2332 */ 2333void gen4_render_state_init(ScrnInfoPtr scrn) 2334{ 2335 intel_screen_private *intel = intel_get_screen_private(scrn); 2336 struct gen4_render_state *render; 2337 const struct wm_kernel_info *wm_kernels; 2338 sampler_state_filter_t src_filter; 2339 sampler_state_extend_t src_extend; 2340 sampler_state_filter_t mask_filter; 2341 sampler_state_extend_t mask_extend; 2342 drm_intel_bo *sf_kernel_bo, *sf_kernel_mask_bo; 2343 drm_intel_bo *border_color_bo; 2344 int m; 2345 2346 intel->needs_3d_invariant = TRUE; 2347 2348 intel->surface_bo = 2349 drm_intel_bo_alloc(intel->bufmgr, "surface data", 2350 sizeof(intel->surface_data), 4096); 2351 assert(intel->surface_bo); 2352 2353 intel->surface_used = 0; 2354 2355 if (intel->gen4_render_state == NULL) { 2356 intel->gen4_render_state = calloc(1, sizeof(*render)); 2357 assert(intel->gen4_render_state != NULL); 2358 } 2359 2360 if (INTEL_INFO(intel)->gen >= 060) 2361 return gen6_render_state_init(scrn); 2362 2363 render = intel->gen4_render_state; 2364 render->composite_op.vertex_id = -1; 2365 2366 render->vs_state_bo = gen4_create_vs_unit_state(intel); 2367 2368 /* Set up the two SF states (one for blending with a mask, one without) */ 2369 if (IS_GEN5(intel)) { 2370 sf_kernel_bo = intel_bo_alloc_for_data(intel, 2371 sf_kernel_static_gen5, 2372 sizeof 2373 (sf_kernel_static_gen5), 2374 "sf kernel gen5"); 2375 sf_kernel_mask_bo = 2376 intel_bo_alloc_for_data(intel, sf_kernel_mask_static_gen5, 2377 sizeof(sf_kernel_mask_static_gen5), 2378 "sf mask kernel"); 2379 } else { 2380 sf_kernel_bo = intel_bo_alloc_for_data(intel, 2381 sf_kernel_static, 2382 sizeof(sf_kernel_static), 2383 "sf kernel"); 2384 sf_kernel_mask_bo = intel_bo_alloc_for_data(intel, 2385 sf_kernel_mask_static, 2386 sizeof 2387 (sf_kernel_mask_static), 2388 "sf mask kernel"); 2389 } 2390 render->sf_state_bo = gen4_create_sf_state(intel, sf_kernel_bo); 2391 render->sf_mask_state_bo = gen4_create_sf_state(intel, sf_kernel_mask_bo); 2392 drm_intel_bo_unreference(sf_kernel_bo); 2393 drm_intel_bo_unreference(sf_kernel_mask_bo); 2394 2395 wm_kernels = IS_GEN5(intel) ? wm_kernels_gen5 : wm_kernels_gen4; 2396 for (m = 0; m < KERNEL_COUNT; m++) { 2397 render->wm_kernel_bo[m] = 2398 intel_bo_alloc_for_data(intel, 2399 wm_kernels[m].data, 2400 wm_kernels[m].size, 2401 "WM kernel"); 2402 } 2403 2404 /* Set up the WM states: each filter/extend type for source and mask, per 2405 * kernel. 2406 */ 2407 border_color_bo = sampler_border_color_create(intel); 2408 for (src_filter = 0; src_filter < FILTER_COUNT; src_filter++) { 2409 for (src_extend = 0; src_extend < EXTEND_COUNT; src_extend++) { 2410 for (mask_filter = 0; mask_filter < FILTER_COUNT; mask_filter++) { 2411 for (mask_extend = 0; mask_extend < EXTEND_COUNT; mask_extend++) { 2412 drm_intel_bo *sampler_state_bo; 2413 2414 sampler_state_bo = 2415 i965_create_sampler_state(intel, 2416 src_filter, src_extend, 2417 mask_filter, mask_extend, 2418 border_color_bo); 2419 2420 for (m = 0; m < KERNEL_COUNT; m++) { 2421 render->wm_state_bo[m][src_filter][src_extend][mask_filter][mask_extend] = 2422 gen4_create_wm_state 2423 (intel, 2424 wm_kernels[m]. has_mask, 2425 render->wm_kernel_bo[m], 2426 sampler_state_bo); 2427 } 2428 drm_intel_bo_unreference(sampler_state_bo); 2429 } 2430 } 2431 } 2432 } 2433 drm_intel_bo_unreference(border_color_bo); 2434 2435 render->cc_state_bo = gen4_create_cc_unit_state(intel); 2436} 2437 2438/** 2439 * Called at LeaveVT. 2440 */ 2441void gen4_render_state_cleanup(ScrnInfoPtr scrn) 2442{ 2443 intel_screen_private *intel = intel_get_screen_private(scrn); 2444 struct gen4_render_state *render_state = intel->gen4_render_state; 2445 int i, j, k, l, m; 2446 2447 drm_intel_bo_unreference(intel->surface_bo); 2448 drm_intel_bo_unreference(render_state->vs_state_bo); 2449 drm_intel_bo_unreference(render_state->sf_state_bo); 2450 drm_intel_bo_unreference(render_state->sf_mask_state_bo); 2451 2452 for (i = 0; i < KERNEL_COUNT; i++) 2453 drm_intel_bo_unreference(render_state->wm_kernel_bo[i]); 2454 2455 for (i = 0; i < FILTER_COUNT; i++) 2456 for (j = 0; j < EXTEND_COUNT; j++) 2457 for (k = 0; k < FILTER_COUNT; k++) 2458 for (l = 0; l < EXTEND_COUNT; l++) 2459 for (m = 0; m < KERNEL_COUNT; m++) 2460 drm_intel_bo_unreference 2461 (render_state-> 2462 wm_state_bo[m][i][j][k] 2463 [l]); 2464 2465 for (i = 0; i < FILTER_COUNT; i++) 2466 for (j = 0; j < EXTEND_COUNT; j++) 2467 for (k = 0; k < FILTER_COUNT; k++) 2468 for (l = 0; l < EXTEND_COUNT; l++) 2469 drm_intel_bo_unreference(render_state->ps_sampler_state_bo[i][j][k][l]); 2470 2471 drm_intel_bo_unreference(render_state->cc_state_bo); 2472 2473 drm_intel_bo_unreference(render_state->cc_vp_bo); 2474 drm_intel_bo_unreference(render_state->gen6_blend_bo); 2475 drm_intel_bo_unreference(render_state->gen6_depth_stencil_bo); 2476 2477 free(intel->gen4_render_state); 2478 intel->gen4_render_state = NULL; 2479} 2480 2481/* 2482 * for GEN6+ 2483 */ 2484#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64) 2485 2486static drm_intel_bo * 2487gen6_composite_create_cc_state(intel_screen_private *intel) 2488{ 2489 struct gen6_color_calc_state *state; 2490 drm_intel_bo *cc_bo; 2491 int ret; 2492 2493 cc_bo = drm_intel_bo_alloc(intel->bufmgr, 2494 "gen6 CC state", 2495 sizeof(*state), 2496 4096); 2497 assert(cc_bo); 2498 2499 ret = drm_intel_bo_map(cc_bo, TRUE); 2500 assert(ret == 0); 2501 2502 state = memset(cc_bo->virtual, 0, sizeof(*state)); 2503 state->constant_r = 1.0; 2504 state->constant_g = 0.0; 2505 state->constant_b = 1.0; 2506 state->constant_a = 1.0; 2507 drm_intel_bo_unmap(cc_bo); 2508 2509 return cc_bo; 2510 (void)ret; 2511} 2512 2513static drm_intel_bo * 2514gen6_composite_create_blend_state(intel_screen_private *intel) 2515{ 2516 drm_intel_bo *blend_bo; 2517 int src, dst, ret; 2518 2519 blend_bo = drm_intel_bo_alloc(intel->bufmgr, 2520 "gen6 BLEND state", 2521 BRW_BLENDFACTOR_COUNT * BRW_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE, 2522 4096); 2523 assert(blend_bo); 2524 2525 ret = drm_intel_bo_map(blend_bo, TRUE); 2526 assert(ret == 0); 2527 2528 memset(blend_bo->virtual, 0, blend_bo->size); 2529 for (src = 0; src < BRW_BLENDFACTOR_COUNT; src++) { 2530 for (dst = 0; dst < BRW_BLENDFACTOR_COUNT; dst++) { 2531 uint32_t blend_state_offset = (src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE; 2532 struct gen6_blend_state *blend; 2533 2534 blend = (struct gen6_blend_state *)((char *)blend_bo->virtual + blend_state_offset); 2535 blend->blend0.dest_blend_factor = dst; 2536 blend->blend0.source_blend_factor = src; 2537 blend->blend0.blend_func = BRW_BLENDFUNCTION_ADD; 2538 blend->blend0.blend_enable = 1; 2539 2540 blend->blend1.post_blend_clamp_enable = 1; 2541 blend->blend1.pre_blend_clamp_enable = 1; 2542 } 2543 } 2544 2545 drm_intel_bo_unmap(blend_bo); 2546 return blend_bo; 2547 (void)ret; 2548} 2549 2550static drm_intel_bo * 2551gen6_composite_create_depth_stencil_state(intel_screen_private *intel) 2552{ 2553 drm_intel_bo *depth_stencil_bo; 2554 int ret; 2555 2556 depth_stencil_bo = 2557 drm_intel_bo_alloc(intel->bufmgr, 2558 "gen6 DEPTH_STENCIL state", 2559 sizeof(struct gen6_depth_stencil_state), 2560 4096); 2561 assert(depth_stencil_bo); 2562 2563 ret = drm_intel_bo_map(depth_stencil_bo, TRUE); 2564 assert(ret == 0); 2565 2566 memset(depth_stencil_bo->virtual, 0, 2567 sizeof(struct gen6_depth_stencil_state)); 2568 drm_intel_bo_unmap(depth_stencil_bo); 2569 2570 return depth_stencil_bo; 2571 (void)ret; 2572} 2573 2574static void 2575gen6_composite_state_base_address(intel_screen_private *intel) 2576{ 2577 OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2)); 2578 OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */ 2579 intel->surface_reloc = intel->batch_used; 2580 intel_batch_emit_dword(intel, 2581 intel->surface_bo->offset | BASE_ADDRESS_MODIFY); 2582 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */ 2583 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */ 2584 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */ 2585 OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */ 2586 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ 2587 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ 2588 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ 2589} 2590 2591static void 2592gen6_composite_cc_state_pointers(intel_screen_private *intel, 2593 uint32_t blend_offset) 2594{ 2595 struct gen4_render_state *render_state = intel->gen4_render_state; 2596 drm_intel_bo *cc_bo = NULL; 2597 drm_intel_bo *depth_stencil_bo = NULL; 2598 2599 if (intel->gen6_render_state.blend == blend_offset) 2600 return; 2601 2602 if (intel->gen6_render_state.blend == -1) { 2603 cc_bo = render_state->cc_state_bo; 2604 depth_stencil_bo = render_state->gen6_depth_stencil_bo; 2605 } 2606 if (INTEL_INFO(intel)->gen >= 070) { 2607 gen7_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset); 2608 } else { 2609 gen6_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset); 2610 } 2611 2612 intel->gen6_render_state.blend = blend_offset; 2613} 2614 2615static void 2616gen6_composite_sampler_state_pointers(intel_screen_private *intel, 2617 drm_intel_bo *bo) 2618{ 2619 if (intel->gen6_render_state.samplers == bo) 2620 return; 2621 2622 intel->gen6_render_state.samplers = bo; 2623 2624 if (INTEL_INFO(intel)->gen >= 070) 2625 gen7_upload_sampler_state_pointers(intel, bo); 2626 else 2627 gen6_upload_sampler_state_pointers(intel, bo); 2628} 2629 2630static void 2631gen6_composite_wm_constants(intel_screen_private *intel) 2632{ 2633 Bool ivb = INTEL_INFO(intel)->gen >= 070; 2634 /* disable WM constant buffer */ 2635 OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | ((ivb ? 7 : 5) - 2)); 2636 OUT_BATCH(0); 2637 OUT_BATCH(0); 2638 OUT_BATCH(0); 2639 OUT_BATCH(0); 2640 if (ivb) { 2641 OUT_BATCH(0); 2642 OUT_BATCH(0); 2643 } 2644} 2645 2646static void 2647gen6_composite_sf_state(intel_screen_private *intel, 2648 Bool has_mask) 2649{ 2650 int num_sf_outputs = has_mask ? 2 : 1; 2651 2652 if (intel->gen6_render_state.num_sf_outputs == num_sf_outputs) 2653 return; 2654 2655 intel->gen6_render_state.num_sf_outputs = num_sf_outputs; 2656 2657 if (INTEL_INFO(intel)->gen >= 070) 2658 gen7_upload_sf_state(intel, num_sf_outputs, 1); 2659 else 2660 gen6_upload_sf_state(intel, num_sf_outputs, 1); 2661} 2662 2663static void 2664gen6_composite_wm_state(intel_screen_private *intel, 2665 Bool has_mask, 2666 drm_intel_bo *bo) 2667{ 2668 int num_surfaces = has_mask ? 3 : 2; 2669 int num_sf_outputs = has_mask ? 2 : 1; 2670 2671 if (intel->gen6_render_state.kernel == bo) 2672 return; 2673 2674 intel->gen6_render_state.kernel = bo; 2675 2676 OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); 2677 OUT_RELOC(bo, 2678 I915_GEM_DOMAIN_INSTRUCTION, 0, 2679 0); 2680 OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | 2681 (num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); 2682 OUT_BATCH(0); 2683 OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ 2684 OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | 2685 GEN6_3DSTATE_WM_DISPATCH_ENABLE | 2686 GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); 2687 OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | 2688 GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 2689 OUT_BATCH(0); 2690 OUT_BATCH(0); 2691} 2692 2693static void 2694gen7_composite_wm_state(intel_screen_private *intel, 2695 Bool has_mask, 2696 drm_intel_bo *bo) 2697{ 2698 int num_surfaces = has_mask ? 3 : 2; 2699 unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB; 2700 unsigned int num_samples = 0; 2701 2702 if (IS_HSW(intel)) { 2703 max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW; 2704 num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW; 2705 } 2706 2707 if (intel->gen6_render_state.kernel == bo) 2708 return; 2709 2710 intel->gen6_render_state.kernel = bo; 2711 2712 OUT_BATCH(GEN6_3DSTATE_WM | (3 - 2)); 2713 OUT_BATCH(GEN7_WM_DISPATCH_ENABLE | 2714 GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 2715 OUT_BATCH(0); 2716 2717 OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2)); 2718 OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 2719 OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | 2720 (num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); 2721 OUT_BATCH(0); /* scratch space base offset */ 2722 OUT_BATCH(((48 - 1) << max_threads_shift) | num_samples | 2723 GEN7_PS_ATTRIBUTE_ENABLE | 2724 GEN7_PS_16_DISPATCH_ENABLE); 2725 OUT_BATCH((6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0)); 2726 OUT_BATCH(0); /* kernel 1 pointer */ 2727 OUT_BATCH(0); /* kernel 2 pointer */ 2728} 2729 2730 2731static void 2732gen6_composite_drawing_rectangle(intel_screen_private *intel, 2733 PixmapPtr dest) 2734{ 2735 uint32_t dw = 2736 DRAW_YMAX(dest->drawable.height - 1) | 2737 DRAW_XMAX(dest->drawable.width - 1); 2738 2739 /* XXX cacomposite depends upon the implicit non-pipelined flush */ 2740 if (0 && intel->gen6_render_state.drawrect == dw) 2741 return; 2742 intel->gen6_render_state.drawrect = dw; 2743 2744 OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 2745 OUT_BATCH(0x00000000); /* ymin, xmin */ 2746 OUT_BATCH(dw); /* ymax, xmax */ 2747 OUT_BATCH(0x00000000); /* yorigin, xorigin */ 2748} 2749 2750static void 2751gen6_composite_vertex_element_state(intel_screen_private *intel, 2752 Bool has_mask, 2753 Bool is_affine) 2754{ 2755 /* 2756 * vertex data in vertex buffer 2757 * position: (x, y) 2758 * texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0) 2759 * texture coordinate 1 if (has_mask is TRUE): same as above 2760 */ 2761 gen4_composite_op *composite_op = &intel->gen4_render_state->composite_op; 2762 int nelem = has_mask ? 2 : 1; 2763 int selem = is_affine ? 2 : 3; 2764 uint32_t w_component; 2765 uint32_t src_format; 2766 int id; 2767 2768 id = has_mask << 1 | is_affine; 2769 2770 if (composite_op->vertex_id == id) 2771 return; 2772 2773 composite_op->vertex_id = id; 2774 2775 if (is_affine) { 2776 src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; 2777 w_component = BRW_VFCOMPONENT_STORE_1_FLT; 2778 } else { 2779 src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; 2780 w_component = BRW_VFCOMPONENT_STORE_SRC; 2781 } 2782 2783 /* The VUE layout 2784 * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 2785 * dword 4-7: position (x, y, 1.0, 1.0), 2786 * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 2787 * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 2788 * 2789 * dword 4-15 are fetched from vertex buffer 2790 */ 2791 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 2792 ((2 * (2 + nelem)) + 1 - 2)); 2793 2794 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | 2795 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 2796 (0 << VE0_OFFSET_SHIFT)); 2797 OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | 2798 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | 2799 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | 2800 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); 2801 2802 /* x,y */ 2803 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | 2804 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 2805 (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */ 2806 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 2807 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 2808 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 2809 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 2810 2811 /* u0, v0, w0 */ 2812 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | 2813 (src_format << VE0_FORMAT_SHIFT) | 2814 ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ 2815 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 2816 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 2817 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 2818 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 2819 2820 /* u1, v1, w1 */ 2821 if (has_mask) { 2822 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | 2823 GEN6_VE0_VALID | 2824 (src_format << VE0_FORMAT_SHIFT) | 2825 (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ 2826 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 2827 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 2828 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 2829 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 2830 } 2831} 2832 2833static void 2834gen6_emit_composite_state(struct intel_screen_private *intel) 2835{ 2836 struct gen4_render_state *render = intel->gen4_render_state; 2837 gen4_composite_op *composite_op = &render->composite_op; 2838 sampler_state_filter_t src_filter = composite_op->src_filter; 2839 sampler_state_filter_t mask_filter = composite_op->mask_filter; 2840 sampler_state_extend_t src_extend = composite_op->src_extend; 2841 sampler_state_extend_t mask_extend = composite_op->mask_extend; 2842 Bool is_affine = composite_op->is_affine; 2843 Bool has_mask = intel->render_mask != NULL; 2844 Bool ivb = INTEL_INFO(intel)->gen >= 070; 2845 uint32_t src, dst; 2846 drm_intel_bo *ps_sampler_state_bo = render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend]; 2847 2848 intel->needs_render_state_emit = FALSE; 2849 if (intel->needs_3d_invariant) { 2850 gen6_upload_invariant_states(intel); 2851 2852 if (ivb) { 2853 gen7_upload_viewport_state_pointers(intel, render->cc_vp_bo); 2854 gen7_upload_urb(intel); 2855 gen7_upload_bypass_states(intel); 2856 gen7_upload_depth_buffer_state(intel); 2857 } else { 2858 gen6_upload_invariant_states(intel); 2859 gen6_upload_viewport_state_pointers(intel, render->cc_vp_bo); 2860 gen6_upload_urb(intel); 2861 2862 gen6_upload_gs_state(intel); 2863 gen6_upload_depth_buffer_state(intel); 2864 } 2865 gen6_composite_wm_constants(intel); 2866 gen6_upload_vs_state(intel); 2867 gen6_upload_clip_state(intel); 2868 2869 intel->needs_3d_invariant = FALSE; 2870 } 2871 2872 i965_get_blend_cntl(composite_op->op, 2873 intel->render_mask_picture, 2874 intel->render_dest_picture->format, 2875 &src, &dst); 2876 2877 if (intel->surface_reloc == 0) 2878 gen6_composite_state_base_address(intel); 2879 2880 gen6_composite_cc_state_pointers(intel, 2881 (src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE); 2882 gen6_composite_sampler_state_pointers(intel, ps_sampler_state_bo); 2883 gen6_composite_sf_state(intel, has_mask); 2884 if (ivb) { 2885 gen7_composite_wm_state(intel, has_mask, 2886 render->wm_kernel_bo[composite_op->wm_kernel]); 2887 gen7_upload_binding_table(intel, intel->surface_table); 2888 } else { 2889 gen6_composite_wm_state(intel, has_mask, 2890 render->wm_kernel_bo[composite_op->wm_kernel]); 2891 gen6_upload_binding_table(intel, intel->surface_table); 2892 } 2893 gen6_composite_drawing_rectangle(intel, intel->render_dest); 2894 gen6_composite_vertex_element_state(intel, has_mask, is_affine); 2895} 2896 2897static void 2898gen6_render_state_init(ScrnInfoPtr scrn) 2899{ 2900 intel_screen_private *intel = intel_get_screen_private(scrn); 2901 struct gen4_render_state *render; 2902 sampler_state_filter_t src_filter; 2903 sampler_state_filter_t mask_filter; 2904 sampler_state_extend_t src_extend; 2905 sampler_state_extend_t mask_extend; 2906 int m; 2907 drm_intel_bo *border_color_bo; 2908 const struct wm_kernel_info *wm_kernels; 2909 2910 render= intel->gen4_render_state; 2911 render->composite_op.vertex_id = -1; 2912 2913 intel->gen6_render_state.num_sf_outputs = 0; 2914 intel->gen6_render_state.samplers = NULL; 2915 intel->gen6_render_state.blend = -1; 2916 intel->gen6_render_state.kernel = NULL; 2917 intel->gen6_render_state.drawrect = -1; 2918 2919 wm_kernels = IS_GEN7(intel) ? wm_kernels_gen7 : wm_kernels_gen6; 2920 for (m = 0; m < KERNEL_COUNT; m++) { 2921 render->wm_kernel_bo[m] = 2922 intel_bo_alloc_for_data(intel, 2923 wm_kernels[m].data, 2924 wm_kernels[m].size, 2925 "WM kernel gen6/7"); 2926 } 2927 2928 border_color_bo = sampler_border_color_create(intel); 2929 2930 for (src_filter = 0; src_filter < FILTER_COUNT; src_filter++) { 2931 for (src_extend = 0; src_extend < EXTEND_COUNT; src_extend++) { 2932 for (mask_filter = 0; mask_filter < FILTER_COUNT; mask_filter++) { 2933 for (mask_extend = 0; mask_extend < EXTEND_COUNT; mask_extend++) { 2934 render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend] = 2935 i965_create_sampler_state(intel, 2936 src_filter, src_extend, 2937 mask_filter, mask_extend, 2938 border_color_bo); 2939 } 2940 } 2941 } 2942 } 2943 2944 drm_intel_bo_unreference(border_color_bo); 2945 render->cc_vp_bo = gen4_create_cc_viewport(intel); 2946 render->cc_state_bo = gen6_composite_create_cc_state(intel); 2947 render->gen6_blend_bo = gen6_composite_create_blend_state(intel); 2948 render->gen6_depth_stencil_bo = gen6_composite_create_depth_stencil_state(intel); 2949} 2950 2951void i965_vertex_flush(struct intel_screen_private *intel) 2952{ 2953 if (intel->vertex_offset) { 2954 intel->batch_ptr[intel->vertex_offset] = 2955 intel->vertex_index - intel->vertex_count; 2956 intel->vertex_offset = 0; 2957 } 2958} 2959 2960void i965_batch_flush(struct intel_screen_private *intel) 2961{ 2962 if (intel->surface_used) 2963 i965_surface_flush(intel); 2964} 2965