1/* 2 * Copyright © 2006,2008 Intel Corporation 3 * Copyright © 2007 Red Hat, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Wang Zhenyu <zhenyu.z.wang@intel.com> 26 * Eric Anholt <eric@anholt.net> 27 * Carl Worth <cworth@redhat.com> 28 * Keith Packard <keithp@keithp.com> 29 * 30 */ 31 32#ifdef HAVE_CONFIG_H 33#include "config.h" 34#endif 35 36#include <assert.h> 37#include "xorg-server.h" 38#include "xf86.h" 39#include "intel.h" 40#include "intel_uxa.h" 41#include "i830_reg.h" 42#include "i965_reg.h" 43 44/* bring in brw structs */ 45#include "brw_defines.h" 46#include "brw_structs.h" 47 48// refer vol2, 3d rasterization 3.8.1 49 50/* defined in brw_defines.h */ 51static const struct blendinfo { 52 Bool dst_alpha; 53 Bool src_alpha; 54 uint32_t src_blend; 55 uint32_t dst_blend; 56} i965_blend_op[] = { 57 /* Clear */ 58 {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ZERO}, 59 /* Src */ 60 {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO}, 61 /* Dst */ 62 {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ONE}, 63 /* Over */ 64 {0, 1, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 65 /* OverReverse */ 66 {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE}, 67 /* In */ 68 {1, 0, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, 69 /* InReverse */ 70 {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_SRC_ALPHA}, 71 /* Out */ 72 {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, 73 /* OutReverse */ 74 {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 75 /* Atop */ 76 {1, 1, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 77 /* AtopReverse */ 78 {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA}, 79 /* Xor */ 80 {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 81 /* Add */ 82 {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ONE}, 83}; 84 85/** 86 * Highest-valued BLENDFACTOR used in i965_blend_op. 87 * 88 * This leaves out BRW_BLENDFACTOR_INV_DST_COLOR, 89 * BRW_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 90 * BRW_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 91 */ 92#define BRW_BLENDFACTOR_COUNT (BRW_BLENDFACTOR_INV_DST_ALPHA + 1) 93 94/* FIXME: surface format defined in brw_defines.h, shared Sampling engine 95 * 1.7.2 96 */ 97static const struct formatinfo { 98 int fmt; 99 uint32_t card_fmt; 100} i965_tex_formats[] = { 101 {PICT_a8, BRW_SURFACEFORMAT_A8_UNORM}, 102 {PICT_a8r8g8b8, BRW_SURFACEFORMAT_B8G8R8A8_UNORM}, 103 {PICT_x8r8g8b8, BRW_SURFACEFORMAT_B8G8R8X8_UNORM}, 104 {PICT_a8b8g8r8, BRW_SURFACEFORMAT_R8G8B8A8_UNORM}, 105 {PICT_x8b8g8r8, BRW_SURFACEFORMAT_R8G8B8X8_UNORM}, 106 {PICT_r8g8b8, BRW_SURFACEFORMAT_R8G8B8_UNORM}, 107 {PICT_r5g6b5, BRW_SURFACEFORMAT_B5G6R5_UNORM}, 108 {PICT_a1r5g5b5, BRW_SURFACEFORMAT_B5G5R5A1_UNORM}, 109#if XORG_VERSION_CURRENT >= 10699900 110 {PICT_a2r10g10b10, BRW_SURFACEFORMAT_B10G10R10A2_UNORM}, 111 {PICT_x2r10g10b10, BRW_SURFACEFORMAT_B10G10R10X2_UNORM}, 112 {PICT_a2b10g10r10, BRW_SURFACEFORMAT_R10G10B10A2_UNORM}, 113 {PICT_x2r10g10b10, BRW_SURFACEFORMAT_B10G10R10X2_UNORM}, 114#endif 115 {PICT_a4r4g4b4, BRW_SURFACEFORMAT_B4G4R4A4_UNORM}, 116}; 117 118static void i965_get_blend_cntl(int op, PicturePtr mask, uint32_t dst_format, 119 uint32_t * sblend, uint32_t * dblend) 120{ 121 122 *sblend = i965_blend_op[op].src_blend; 123 *dblend = i965_blend_op[op].dst_blend; 124 125 /* If there's no dst alpha channel, adjust the blend op so that we'll treat 126 * it as always 1. 127 */ 128 if (PICT_FORMAT_A(dst_format) == 0 && i965_blend_op[op].dst_alpha) { 129 if (*sblend == BRW_BLENDFACTOR_DST_ALPHA) 130 *sblend = BRW_BLENDFACTOR_ONE; 131 else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA) 132 *sblend = BRW_BLENDFACTOR_ZERO; 133 } 134 135 /* If the source alpha is being used, then we should only be in a case where 136 * the source blend factor is 0, and the source blend value is the mask 137 * channels multiplied by the source picture's alpha. 138 */ 139 if (mask && mask->componentAlpha && PICT_FORMAT_RGB(mask->format) 140 && i965_blend_op[op].src_alpha) { 141 if (*dblend == BRW_BLENDFACTOR_SRC_ALPHA) { 142 *dblend = BRW_BLENDFACTOR_SRC_COLOR; 143 } else if (*dblend == BRW_BLENDFACTOR_INV_SRC_ALPHA) { 144 *dblend = BRW_BLENDFACTOR_INV_SRC_COLOR; 145 } 146 } 147 148} 149 150static uint32_t i965_get_dest_format(PicturePtr dest_picture) 151{ 152 switch (dest_picture->format) { 153 case PICT_a8r8g8b8: 154 case PICT_x8r8g8b8: 155 return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; 156 case PICT_a8b8g8r8: 157 case PICT_x8b8g8r8: 158 return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; 159#if XORG_VERSION_CURRENT >= 10699900 160 case PICT_a2r10g10b10: 161 case PICT_x2r10g10b10: 162 return BRW_SURFACEFORMAT_B10G10R10A2_UNORM; 163#endif 164 case PICT_r5g6b5: 165 return BRW_SURFACEFORMAT_B5G6R5_UNORM; 166 case PICT_x1r5g5b5: 167 case PICT_a1r5g5b5: 168 return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; 169 case PICT_a8: 170 return BRW_SURFACEFORMAT_A8_UNORM; 171 case PICT_a4r4g4b4: 172 case PICT_x4r4g4b4: 173 return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; 174 default: 175 return -1; 176 } 177} 178 179Bool 180i965_check_composite(int op, 181 PicturePtr source_picture, 182 PicturePtr mask_picture, 183 PicturePtr dest_picture, 184 int width, int height) 185{ 186 ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen); 187 188 /* Check for unsupported compositing operations. */ 189 if (op >= sizeof(i965_blend_op) / sizeof(i965_blend_op[0])) { 190 intel_uxa_debug_fallback(scrn, 191 "Unsupported Composite op 0x%x\n", op); 192 return FALSE; 193 } 194 195 if (mask_picture && mask_picture->componentAlpha && 196 PICT_FORMAT_RGB(mask_picture->format)) { 197 /* Check if it's component alpha that relies on a source alpha and on 198 * the source value. We can only get one of those into the single 199 * source value that we get to blend with. 200 */ 201 if (i965_blend_op[op].src_alpha && 202 (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) { 203 intel_uxa_debug_fallback(scrn, 204 "Component alpha not supported " 205 "with source alpha and source " 206 "value blending.\n"); 207 return FALSE; 208 } 209 } 210 211 if (i965_get_dest_format(dest_picture) == -1) { 212 intel_uxa_debug_fallback(scrn, "Usupported Color buffer format 0x%x\n", 213 (int)dest_picture->format); 214 return FALSE; 215 } 216 217 return TRUE; 218} 219 220Bool 221i965_check_composite_texture(ScreenPtr screen, PicturePtr picture) 222{ 223 if (picture->repeatType > RepeatReflect) { 224 ScrnInfoPtr scrn = xf86ScreenToScrn(screen); 225 intel_uxa_debug_fallback(scrn, 226 "extended repeat (%d) not supported\n", 227 picture->repeatType); 228 return FALSE; 229 } 230 231 if (picture->filter != PictFilterNearest && 232 picture->filter != PictFilterBilinear) { 233 ScrnInfoPtr scrn = xf86ScreenToScrn(screen); 234 intel_uxa_debug_fallback(scrn, "Unsupported filter 0x%x\n", 235 picture->filter); 236 return FALSE; 237 } 238 239 if (picture->pDrawable) { 240 int w, h, i; 241 242 w = picture->pDrawable->width; 243 h = picture->pDrawable->height; 244 if ((w > 8192) || (h > 8192)) { 245 ScrnInfoPtr scrn = xf86ScreenToScrn(screen); 246 intel_uxa_debug_fallback(scrn, 247 "Picture w/h too large (%dx%d)\n", 248 w, h); 249 return FALSE; 250 } 251 252 for (i = 0; 253 i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]); 254 i++) { 255 if (i965_tex_formats[i].fmt == picture->format) 256 break; 257 } 258 if (i == sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0])) 259 { 260 ScrnInfoPtr scrn = xf86ScreenToScrn(screen); 261 intel_uxa_debug_fallback(scrn, 262 "Unsupported picture format " 263 "0x%x\n", 264 (int)picture->format); 265 return FALSE; 266 } 267 268 return TRUE; 269 } 270 271 return FALSE; 272} 273 274 275#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) 276 277/* Set up a default static partitioning of the URB, which is supposed to 278 * allow anything we would want to do, at potentially lower performance. 279 */ 280#define URB_CS_ENTRY_SIZE 0 281#define URB_CS_ENTRIES 0 282 283#define URB_VS_ENTRY_SIZE 1 // each 512-bit row 284#define URB_VS_ENTRIES 8 // we needs at least 8 entries 285 286#define URB_GS_ENTRY_SIZE 0 287#define URB_GS_ENTRIES 0 288 289#define URB_CLIP_ENTRY_SIZE 0 290#define URB_CLIP_ENTRIES 0 291 292#define URB_SF_ENTRY_SIZE 2 293#define URB_SF_ENTRIES 1 294 295/* 296 * this program computes dA/dx and dA/dy for the texture coordinates along 297 * with the base texture coordinate. It was extracted from the Mesa driver 298 */ 299 300#define SF_KERNEL_NUM_GRF 16 301#define SF_MAX_THREADS 2 302 303static const uint32_t sf_kernel_static[][4] = { 304#include "exa_sf.g4b" 305}; 306 307static const uint32_t sf_kernel_mask_static[][4] = { 308#include "exa_sf_mask.g4b" 309}; 310 311/* ps kernels */ 312#define PS_KERNEL_NUM_GRF 32 313#define PS_MAX_THREADS 48 314 315static const uint32_t ps_kernel_nomask_affine_static[][4] = { 316#include "exa_wm_xy.g4b" 317#include "exa_wm_src_affine.g4b" 318#include "exa_wm_src_sample_argb.g4b" 319#include "exa_wm_write.g4b" 320}; 321 322static const uint32_t ps_kernel_nomask_projective_static[][4] = { 323#include "exa_wm_xy.g4b" 324#include "exa_wm_src_projective.g4b" 325#include "exa_wm_src_sample_argb.g4b" 326#include "exa_wm_write.g4b" 327}; 328 329static const uint32_t ps_kernel_maskca_affine_static[][4] = { 330#include "exa_wm_xy.g4b" 331#include "exa_wm_src_affine.g4b" 332#include "exa_wm_src_sample_argb.g4b" 333#include "exa_wm_mask_affine.g4b" 334#include "exa_wm_mask_sample_argb.g4b" 335#include "exa_wm_ca.g4b" 336#include "exa_wm_write.g4b" 337}; 338 339static const uint32_t ps_kernel_maskca_projective_static[][4] = { 340#include "exa_wm_xy.g4b" 341#include "exa_wm_src_projective.g4b" 342#include "exa_wm_src_sample_argb.g4b" 343#include "exa_wm_mask_projective.g4b" 344#include "exa_wm_mask_sample_argb.g4b" 345#include "exa_wm_ca.g4b" 346#include "exa_wm_write.g4b" 347}; 348 349static const uint32_t ps_kernel_maskca_srcalpha_affine_static[][4] = { 350#include "exa_wm_xy.g4b" 351#include "exa_wm_src_affine.g4b" 352#include "exa_wm_src_sample_a.g4b" 353#include "exa_wm_mask_affine.g4b" 354#include "exa_wm_mask_sample_argb.g4b" 355#include "exa_wm_ca_srcalpha.g4b" 356#include "exa_wm_write.g4b" 357}; 358 359static const uint32_t ps_kernel_maskca_srcalpha_projective_static[][4] = { 360#include "exa_wm_xy.g4b" 361#include "exa_wm_src_projective.g4b" 362#include "exa_wm_src_sample_a.g4b" 363#include "exa_wm_mask_projective.g4b" 364#include "exa_wm_mask_sample_argb.g4b" 365#include "exa_wm_ca_srcalpha.g4b" 366#include "exa_wm_write.g4b" 367}; 368 369static const uint32_t ps_kernel_masknoca_affine_static[][4] = { 370#include "exa_wm_xy.g4b" 371#include "exa_wm_src_affine.g4b" 372#include "exa_wm_src_sample_argb.g4b" 373#include "exa_wm_mask_affine.g4b" 374#include "exa_wm_mask_sample_a.g4b" 375#include "exa_wm_noca.g4b" 376#include "exa_wm_write.g4b" 377}; 378 379static const uint32_t ps_kernel_masknoca_projective_static[][4] = { 380#include "exa_wm_xy.g4b" 381#include "exa_wm_src_projective.g4b" 382#include "exa_wm_src_sample_argb.g4b" 383#include "exa_wm_mask_projective.g4b" 384#include "exa_wm_mask_sample_a.g4b" 385#include "exa_wm_noca.g4b" 386#include "exa_wm_write.g4b" 387}; 388 389/* new programs for Ironlake */ 390static const uint32_t sf_kernel_static_gen5[][4] = { 391#include "exa_sf.g4b.gen5" 392}; 393 394static const uint32_t sf_kernel_mask_static_gen5[][4] = { 395#include "exa_sf_mask.g4b.gen5" 396}; 397 398static const uint32_t ps_kernel_nomask_affine_static_gen5[][4] = { 399#include "exa_wm_xy.g4b.gen5" 400#include "exa_wm_src_affine.g4b.gen5" 401#include "exa_wm_src_sample_argb.g4b.gen5" 402#include "exa_wm_write.g4b.gen5" 403}; 404 405static const uint32_t ps_kernel_nomask_projective_static_gen5[][4] = { 406#include "exa_wm_xy.g4b.gen5" 407#include "exa_wm_src_projective.g4b.gen5" 408#include "exa_wm_src_sample_argb.g4b.gen5" 409#include "exa_wm_write.g4b.gen5" 410}; 411 412static const uint32_t ps_kernel_maskca_affine_static_gen5[][4] = { 413#include "exa_wm_xy.g4b.gen5" 414#include "exa_wm_src_affine.g4b.gen5" 415#include "exa_wm_src_sample_argb.g4b.gen5" 416#include "exa_wm_mask_affine.g4b.gen5" 417#include "exa_wm_mask_sample_argb.g4b.gen5" 418#include "exa_wm_ca.g4b.gen5" 419#include "exa_wm_write.g4b.gen5" 420}; 421 422static const uint32_t ps_kernel_maskca_projective_static_gen5[][4] = { 423#include "exa_wm_xy.g4b.gen5" 424#include "exa_wm_src_projective.g4b.gen5" 425#include "exa_wm_src_sample_argb.g4b.gen5" 426#include "exa_wm_mask_projective.g4b.gen5" 427#include "exa_wm_mask_sample_argb.g4b.gen5" 428#include "exa_wm_ca.g4b.gen5" 429#include "exa_wm_write.g4b.gen5" 430}; 431 432static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen5[][4] = { 433#include "exa_wm_xy.g4b.gen5" 434#include "exa_wm_src_affine.g4b.gen5" 435#include "exa_wm_src_sample_a.g4b.gen5" 436#include "exa_wm_mask_affine.g4b.gen5" 437#include "exa_wm_mask_sample_argb.g4b.gen5" 438#include "exa_wm_ca_srcalpha.g4b.gen5" 439#include "exa_wm_write.g4b.gen5" 440}; 441 442static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen5[][4] = { 443#include "exa_wm_xy.g4b.gen5" 444#include "exa_wm_src_projective.g4b.gen5" 445#include "exa_wm_src_sample_a.g4b.gen5" 446#include "exa_wm_mask_projective.g4b.gen5" 447#include "exa_wm_mask_sample_argb.g4b.gen5" 448#include "exa_wm_ca_srcalpha.g4b.gen5" 449#include "exa_wm_write.g4b.gen5" 450}; 451 452static const uint32_t ps_kernel_masknoca_affine_static_gen5[][4] = { 453#include "exa_wm_xy.g4b.gen5" 454#include "exa_wm_src_affine.g4b.gen5" 455#include "exa_wm_src_sample_argb.g4b.gen5" 456#include "exa_wm_mask_affine.g4b.gen5" 457#include "exa_wm_mask_sample_a.g4b.gen5" 458#include "exa_wm_noca.g4b.gen5" 459#include "exa_wm_write.g4b.gen5" 460}; 461 462static const uint32_t ps_kernel_masknoca_projective_static_gen5[][4] = { 463#include "exa_wm_xy.g4b.gen5" 464#include "exa_wm_src_projective.g4b.gen5" 465#include "exa_wm_src_sample_argb.g4b.gen5" 466#include "exa_wm_mask_projective.g4b.gen5" 467#include "exa_wm_mask_sample_a.g4b.gen5" 468#include "exa_wm_noca.g4b.gen5" 469#include "exa_wm_write.g4b.gen5" 470}; 471 472/* programs for GEN6 */ 473static const uint32_t ps_kernel_nomask_affine_static_gen6[][4] = { 474#include "exa_wm_src_affine.g6b" 475#include "exa_wm_src_sample_argb.g6b" 476#include "exa_wm_write.g6b" 477}; 478 479static const uint32_t ps_kernel_nomask_projective_static_gen6[][4] = { 480#include "exa_wm_src_projective.g6b" 481#include "exa_wm_src_sample_argb.g6b" 482#include "exa_wm_write.g6b" 483}; 484 485static const uint32_t ps_kernel_maskca_affine_static_gen6[][4] = { 486#include "exa_wm_src_affine.g6b" 487#include "exa_wm_src_sample_argb.g6b" 488#include "exa_wm_mask_affine.g6b" 489#include "exa_wm_mask_sample_argb.g6b" 490#include "exa_wm_ca.g6b" 491#include "exa_wm_write.g6b" 492}; 493 494static const uint32_t ps_kernel_maskca_projective_static_gen6[][4] = { 495#include "exa_wm_src_projective.g6b" 496#include "exa_wm_src_sample_argb.g6b" 497#include "exa_wm_mask_projective.g6b" 498#include "exa_wm_mask_sample_argb.g6b" 499#include "exa_wm_ca.g4b.gen5" 500#include "exa_wm_write.g6b" 501}; 502 503static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen6[][4] = { 504#include "exa_wm_src_affine.g6b" 505#include "exa_wm_src_sample_a.g6b" 506#include "exa_wm_mask_affine.g6b" 507#include "exa_wm_mask_sample_argb.g6b" 508#include "exa_wm_ca_srcalpha.g6b" 509#include "exa_wm_write.g6b" 510}; 511 512static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen6[][4] = { 513#include "exa_wm_src_projective.g6b" 514#include "exa_wm_src_sample_a.g6b" 515#include "exa_wm_mask_projective.g6b" 516#include "exa_wm_mask_sample_argb.g6b" 517#include "exa_wm_ca_srcalpha.g6b" 518#include "exa_wm_write.g6b" 519}; 520 521static const uint32_t ps_kernel_masknoca_affine_static_gen6[][4] = { 522#include "exa_wm_src_affine.g6b" 523#include "exa_wm_src_sample_argb.g6b" 524#include "exa_wm_mask_affine.g6b" 525#include "exa_wm_mask_sample_a.g6b" 526#include "exa_wm_noca.g6b" 527#include "exa_wm_write.g6b" 528}; 529 530static const uint32_t ps_kernel_masknoca_projective_static_gen6[][4] = { 531#include "exa_wm_src_projective.g6b" 532#include "exa_wm_src_sample_argb.g6b" 533#include "exa_wm_mask_projective.g6b" 534#include "exa_wm_mask_sample_a.g6b" 535#include "exa_wm_noca.g6b" 536#include "exa_wm_write.g6b" 537}; 538 539/* programs for GEN7 */ 540static const uint32_t ps_kernel_nomask_affine_static_gen7[][4] = { 541#include "exa_wm_src_affine.g7b" 542#include "exa_wm_src_sample_argb.g7b" 543#include "exa_wm_write.g7b" 544}; 545 546static const uint32_t ps_kernel_nomask_projective_static_gen7[][4] = { 547#include "exa_wm_src_projective.g7b" 548#include "exa_wm_src_sample_argb.g7b" 549#include "exa_wm_write.g7b" 550}; 551 552static const uint32_t ps_kernel_maskca_affine_static_gen7[][4] = { 553#include "exa_wm_src_affine.g7b" 554#include "exa_wm_src_sample_argb.g7b" 555#include "exa_wm_mask_affine.g7b" 556#include "exa_wm_mask_sample_argb.g7b" 557#include "exa_wm_ca.g6b" 558#include "exa_wm_write.g7b" 559}; 560 561static const uint32_t ps_kernel_maskca_projective_static_gen7[][4] = { 562#include "exa_wm_src_projective.g7b" 563#include "exa_wm_src_sample_argb.g7b" 564#include "exa_wm_mask_projective.g7b" 565#include "exa_wm_mask_sample_argb.g7b" 566#include "exa_wm_ca.g4b.gen5" 567#include "exa_wm_write.g7b" 568}; 569 570static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen7[][4] = { 571#include "exa_wm_src_affine.g7b" 572#include "exa_wm_src_sample_a.g7b" 573#include "exa_wm_mask_affine.g7b" 574#include "exa_wm_mask_sample_argb.g7b" 575#include "exa_wm_ca_srcalpha.g6b" 576#include "exa_wm_write.g7b" 577}; 578 579static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen7[][4] = { 580#include "exa_wm_src_projective.g7b" 581#include "exa_wm_src_sample_a.g7b" 582#include "exa_wm_mask_projective.g7b" 583#include "exa_wm_mask_sample_argb.g7b" 584#include "exa_wm_ca_srcalpha.g6b" 585#include "exa_wm_write.g7b" 586}; 587 588static const uint32_t ps_kernel_masknoca_affine_static_gen7[][4] = { 589#include "exa_wm_src_affine.g7b" 590#include "exa_wm_src_sample_argb.g7b" 591#include "exa_wm_mask_affine.g7b" 592#include "exa_wm_mask_sample_a.g7b" 593#include "exa_wm_noca.g6b" 594#include "exa_wm_write.g7b" 595}; 596 597static const uint32_t ps_kernel_masknoca_projective_static_gen7[][4] = { 598#include "exa_wm_src_projective.g7b" 599#include "exa_wm_src_sample_argb.g7b" 600#include "exa_wm_mask_projective.g7b" 601#include "exa_wm_mask_sample_a.g7b" 602#include "exa_wm_noca.g6b" 603#include "exa_wm_write.g7b" 604}; 605 606 607typedef enum { 608 SS_INVALID_FILTER = -1, 609 SS_FILTER_NEAREST, 610 SS_FILTER_BILINEAR, 611 FILTER_COUNT, 612} sampler_state_filter_t; 613 614typedef enum { 615 SS_INVALID_EXTEND = -1, 616 SS_EXTEND_NONE, 617 SS_EXTEND_REPEAT, 618 SS_EXTEND_PAD, 619 SS_EXTEND_REFLECT, 620 EXTEND_COUNT, 621} sampler_state_extend_t; 622 623typedef enum { 624 WM_KERNEL_NOMASK_AFFINE, 625 WM_KERNEL_NOMASK_PROJECTIVE, 626 WM_KERNEL_MASKCA_AFFINE, 627 WM_KERNEL_MASKCA_PROJECTIVE, 628 WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 629 WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 630 WM_KERNEL_MASKNOCA_AFFINE, 631 WM_KERNEL_MASKNOCA_PROJECTIVE, 632 KERNEL_COUNT 633} wm_kernel_t; 634 635#define KERNEL(kernel_enum, kernel, masked) \ 636 [kernel_enum] = {&kernel, sizeof(kernel), masked} 637struct wm_kernel_info { 638 const void *data; 639 unsigned int size; 640 Bool has_mask; 641}; 642 643static const struct wm_kernel_info wm_kernels_gen4[] = { 644 KERNEL(WM_KERNEL_NOMASK_AFFINE, 645 ps_kernel_nomask_affine_static, FALSE), 646 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, 647 ps_kernel_nomask_projective_static, FALSE), 648 KERNEL(WM_KERNEL_MASKCA_AFFINE, 649 ps_kernel_maskca_affine_static, TRUE), 650 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, 651 ps_kernel_maskca_projective_static, TRUE), 652 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 653 ps_kernel_maskca_srcalpha_affine_static, TRUE), 654 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 655 ps_kernel_maskca_srcalpha_projective_static, TRUE), 656 KERNEL(WM_KERNEL_MASKNOCA_AFFINE, 657 ps_kernel_masknoca_affine_static, TRUE), 658 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, 659 ps_kernel_masknoca_projective_static, TRUE), 660}; 661 662static const struct wm_kernel_info wm_kernels_gen5[] = { 663 KERNEL(WM_KERNEL_NOMASK_AFFINE, 664 ps_kernel_nomask_affine_static_gen5, FALSE), 665 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, 666 ps_kernel_nomask_projective_static_gen5, FALSE), 667 KERNEL(WM_KERNEL_MASKCA_AFFINE, 668 ps_kernel_maskca_affine_static_gen5, TRUE), 669 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, 670 ps_kernel_maskca_projective_static_gen5, TRUE), 671 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 672 ps_kernel_maskca_srcalpha_affine_static_gen5, TRUE), 673 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 674 ps_kernel_maskca_srcalpha_projective_static_gen5, TRUE), 675 KERNEL(WM_KERNEL_MASKNOCA_AFFINE, 676 ps_kernel_masknoca_affine_static_gen5, TRUE), 677 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, 678 ps_kernel_masknoca_projective_static_gen5, TRUE), 679}; 680 681static const struct wm_kernel_info wm_kernels_gen6[] = { 682 KERNEL(WM_KERNEL_NOMASK_AFFINE, 683 ps_kernel_nomask_affine_static_gen6, FALSE), 684 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, 685 ps_kernel_nomask_projective_static_gen6, FALSE), 686 KERNEL(WM_KERNEL_MASKCA_AFFINE, 687 ps_kernel_maskca_affine_static_gen6, TRUE), 688 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, 689 ps_kernel_maskca_projective_static_gen6, TRUE), 690 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 691 ps_kernel_maskca_srcalpha_affine_static_gen6, TRUE), 692 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 693 ps_kernel_maskca_srcalpha_projective_static_gen6, TRUE), 694 KERNEL(WM_KERNEL_MASKNOCA_AFFINE, 695 ps_kernel_masknoca_affine_static_gen6, TRUE), 696 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, 697 ps_kernel_masknoca_projective_static_gen6, TRUE), 698}; 699 700static const struct wm_kernel_info wm_kernels_gen7[] = { 701 KERNEL(WM_KERNEL_NOMASK_AFFINE, 702 ps_kernel_nomask_affine_static_gen7, FALSE), 703 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, 704 ps_kernel_nomask_projective_static_gen7, FALSE), 705 KERNEL(WM_KERNEL_MASKCA_AFFINE, 706 ps_kernel_maskca_affine_static_gen7, TRUE), 707 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, 708 ps_kernel_maskca_projective_static_gen7, TRUE), 709 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 710 ps_kernel_maskca_srcalpha_affine_static_gen7, TRUE), 711 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 712 ps_kernel_maskca_srcalpha_projective_static_gen7, TRUE), 713 KERNEL(WM_KERNEL_MASKNOCA_AFFINE, 714 ps_kernel_masknoca_affine_static_gen7, TRUE), 715 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, 716 ps_kernel_masknoca_projective_static_gen7, TRUE), 717}; 718 719#undef KERNEL 720 721typedef struct _brw_cc_unit_state_padded { 722 struct brw_cc_unit_state state; 723 char pad[64 - sizeof(struct brw_cc_unit_state)]; 724} brw_cc_unit_state_padded; 725 726#ifndef MAX 727#define MAX(a, b) ((a) > (b) ? (a) : (b)) 728#endif 729#define SURFACE_STATE_PADDED_SIZE ALIGN(MAX(sizeof(struct brw_surface_state), sizeof(struct gen7_surface_state)), 32) 730 731struct gen4_cc_unit_state { 732 /* Index by [src_blend][dst_blend] */ 733 brw_cc_unit_state_padded cc_state[BRW_BLENDFACTOR_COUNT][BRW_BLENDFACTOR_COUNT]; 734}; 735 736typedef struct gen4_composite_op { 737 int op; 738 sampler_state_filter_t src_filter; 739 sampler_state_filter_t mask_filter; 740 sampler_state_extend_t src_extend; 741 sampler_state_extend_t mask_extend; 742 Bool is_affine; 743 wm_kernel_t wm_kernel; 744 int vertex_id; 745} gen4_composite_op; 746 747/** Private data for gen4 render accel implementation. */ 748struct gen4_render_state { 749 drm_intel_bo *vs_state_bo; 750 drm_intel_bo *sf_state_bo; 751 drm_intel_bo *sf_mask_state_bo; 752 drm_intel_bo *cc_state_bo; 753 drm_intel_bo *wm_state_bo[KERNEL_COUNT] 754 [FILTER_COUNT] [EXTEND_COUNT] 755 [FILTER_COUNT] [EXTEND_COUNT]; 756 drm_intel_bo *wm_kernel_bo[KERNEL_COUNT]; 757 758 drm_intel_bo *cc_vp_bo; 759 drm_intel_bo *gen6_blend_bo; 760 drm_intel_bo *gen6_depth_stencil_bo; 761 drm_intel_bo *ps_sampler_state_bo[FILTER_COUNT] 762 [EXTEND_COUNT] 763 [FILTER_COUNT] 764 [EXTEND_COUNT]; 765 gen4_composite_op composite_op; 766}; 767 768static void gen6_emit_composite_state(struct intel_screen_private *intel); 769static void gen6_render_state_init(ScrnInfoPtr scrn); 770 771/** 772 * Sets up the SF state pointing at an SF kernel. 773 * 774 * The SF kernel does coord interp: for each attribute, 775 * calculate dA/dx and dA/dy. Hand these interpolation coefficients 776 * back to SF which then hands pixels off to WM. 777 */ 778static drm_intel_bo *gen4_create_sf_state(intel_screen_private *intel, 779 drm_intel_bo * kernel_bo) 780{ 781 struct brw_sf_unit_state *sf_state; 782 drm_intel_bo *sf_state_bo; 783 int ret; 784 785 sf_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 SF state", 786 sizeof(*sf_state), 4096); 787 assert(sf_state_bo); 788 789 ret = drm_intel_bo_map(sf_state_bo, TRUE); 790 assert(ret == 0); 791 792 sf_state = memset(sf_state_bo->virtual, 0, sizeof(*sf_state)); 793 sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); 794 sf_state->thread0.kernel_start_pointer = 795 intel_uxa_emit_reloc(sf_state_bo, 796 offsetof(struct brw_sf_unit_state, thread0), 797 kernel_bo, sf_state->thread0.grf_reg_count << 1, 798 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; 799 sf_state->sf1.single_program_flow = 1; 800 sf_state->sf1.binding_table_entry_count = 0; 801 sf_state->sf1.thread_priority = 0; 802 sf_state->sf1.floating_point_mode = 0; /* Mesa does this */ 803 sf_state->sf1.illegal_op_exception_enable = 1; 804 sf_state->sf1.mask_stack_exception_enable = 1; 805 sf_state->sf1.sw_exception_enable = 1; 806 sf_state->thread2.per_thread_scratch_space = 0; 807 /* scratch space is not used in our kernel */ 808 sf_state->thread2.scratch_space_base_pointer = 0; 809 sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */ 810 sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ 811 sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ 812 /* don't smash vertex header, read start from dw8 */ 813 sf_state->thread3.urb_entry_read_offset = 1; 814 sf_state->thread3.dispatch_grf_start_reg = 3; 815 sf_state->thread4.max_threads = SF_MAX_THREADS - 1; 816 sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; 817 sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES; 818 sf_state->sf5.viewport_transform = FALSE; /* skip viewport */ 819 sf_state->sf6.cull_mode = BRW_CULLMODE_NONE; 820 sf_state->sf6.scissor = 0; 821 sf_state->sf7.trifan_pv = 2; 822 sf_state->sf6.dest_org_vbias = 0x8; 823 sf_state->sf6.dest_org_hbias = 0x8; 824 825 drm_intel_bo_unmap(sf_state_bo); 826 827 return sf_state_bo; 828 (void)ret; 829} 830 831static drm_intel_bo *sampler_border_color_create(intel_screen_private *intel) 832{ 833 struct brw_sampler_legacy_border_color sampler_border_color; 834 835 /* Set up the sampler border color (always transparent black) */ 836 memset(&sampler_border_color, 0, sizeof(sampler_border_color)); 837 sampler_border_color.color[0] = 0; /* R */ 838 sampler_border_color.color[1] = 0; /* G */ 839 sampler_border_color.color[2] = 0; /* B */ 840 sampler_border_color.color[3] = 0; /* A */ 841 842 return intel_uxa_bo_alloc_for_data(intel, 843 &sampler_border_color, 844 sizeof(sampler_border_color), 845 "gen4 render sampler border color"); 846} 847 848static void 849gen4_sampler_state_init(drm_intel_bo * sampler_state_bo, 850 struct brw_sampler_state *sampler_state, 851 sampler_state_filter_t filter, 852 sampler_state_extend_t extend, 853 drm_intel_bo * border_color_bo) 854{ 855 uint32_t sampler_state_offset; 856 857 sampler_state_offset = (char *)sampler_state - 858 (char *)sampler_state_bo->virtual; 859 860 /* PS kernel use this sampler */ 861 memset(sampler_state, 0, sizeof(*sampler_state)); 862 863 sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 864 865 /* We use the legacy mode to get the semantics specified by 866 * the Render extension. */ 867 sampler_state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY; 868 869 switch (filter) { 870 default: 871 case SS_FILTER_NEAREST: 872 sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; 873 sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; 874 break; 875 case SS_FILTER_BILINEAR: 876 sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; 877 sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; 878 break; 879 } 880 881 switch (extend) { 882 default: 883 case SS_EXTEND_NONE: 884 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 885 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 886 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 887 break; 888 case SS_EXTEND_REPEAT: 889 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; 890 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; 891 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; 892 break; 893 case SS_EXTEND_PAD: 894 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 895 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 896 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 897 break; 898 case SS_EXTEND_REFLECT: 899 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 900 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 901 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 902 break; 903 } 904 905 sampler_state->ss2.border_color_pointer = 906 intel_uxa_emit_reloc(sampler_state_bo, sampler_state_offset + 907 offsetof(struct brw_sampler_state, ss2), 908 border_color_bo, 0, 909 I915_GEM_DOMAIN_SAMPLER, 0) >> 5; 910 911 sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ 912} 913 914static void 915gen7_sampler_state_init(drm_intel_bo * sampler_state_bo, 916 struct gen7_sampler_state *sampler_state, 917 sampler_state_filter_t filter, 918 sampler_state_extend_t extend, 919 drm_intel_bo * border_color_bo) 920{ 921 uint32_t sampler_state_offset; 922 923 sampler_state_offset = (char *)sampler_state - 924 (char *)sampler_state_bo->virtual; 925 926 /* PS kernel use this sampler */ 927 memset(sampler_state, 0, sizeof(*sampler_state)); 928 929 sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 930 931 /* We use the legacy mode to get the semantics specified by 932 * the Render extension. */ 933 sampler_state->ss0.default_color_mode = BRW_BORDER_COLOR_MODE_LEGACY; 934 935 switch (filter) { 936 default: 937 case SS_FILTER_NEAREST: 938 sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; 939 sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; 940 break; 941 case SS_FILTER_BILINEAR: 942 sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; 943 sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; 944 break; 945 } 946 947 switch (extend) { 948 default: 949 case SS_EXTEND_NONE: 950 sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 951 sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 952 sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 953 break; 954 case SS_EXTEND_REPEAT: 955 sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; 956 sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; 957 sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; 958 break; 959 case SS_EXTEND_PAD: 960 sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 961 sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 962 sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 963 break; 964 case SS_EXTEND_REFLECT: 965 sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 966 sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 967 sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 968 break; 969 } 970 971 sampler_state->ss2.default_color_pointer = 972 intel_uxa_emit_reloc(sampler_state_bo, sampler_state_offset + 973 offsetof(struct gen7_sampler_state, ss2), 974 border_color_bo, 0, 975 I915_GEM_DOMAIN_SAMPLER, 0) >> 5; 976 977 sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ 978} 979 980 981 982static drm_intel_bo *gen4_create_sampler_state(intel_screen_private *intel, 983 sampler_state_filter_t src_filter, 984 sampler_state_extend_t src_extend, 985 sampler_state_filter_t mask_filter, 986 sampler_state_extend_t mask_extend, 987 drm_intel_bo * border_color_bo) 988{ 989 drm_intel_bo *sampler_state_bo; 990 struct brw_sampler_state *sampler_state; 991 int ret; 992 993 sampler_state_bo = 994 drm_intel_bo_alloc(intel->bufmgr, "gen4 sampler state", 995 sizeof(struct brw_sampler_state) * 2, 4096); 996 assert(sampler_state_bo); 997 998 ret = drm_intel_bo_map(sampler_state_bo, TRUE); 999 assert(ret == 0); 1000 1001 sampler_state = sampler_state_bo->virtual; 1002 1003 gen4_sampler_state_init(sampler_state_bo, 1004 &sampler_state[0], 1005 src_filter, src_extend, border_color_bo); 1006 gen4_sampler_state_init(sampler_state_bo, 1007 &sampler_state[1], 1008 mask_filter, mask_extend, border_color_bo); 1009 1010 drm_intel_bo_unmap(sampler_state_bo); 1011 1012 return sampler_state_bo; 1013 (void)ret; 1014} 1015 1016static drm_intel_bo * 1017gen7_create_sampler_state(intel_screen_private *intel, 1018 sampler_state_filter_t src_filter, 1019 sampler_state_extend_t src_extend, 1020 sampler_state_filter_t mask_filter, 1021 sampler_state_extend_t mask_extend, 1022 drm_intel_bo * border_color_bo) 1023{ 1024 drm_intel_bo *sampler_state_bo; 1025 struct gen7_sampler_state *sampler_state; 1026 int ret; 1027 1028 sampler_state_bo = 1029 drm_intel_bo_alloc(intel->bufmgr, "gen7 sampler state", 1030 sizeof(struct gen7_sampler_state) * 2, 4096); 1031 assert(sampler_state_bo); 1032 1033 ret = drm_intel_bo_map(sampler_state_bo, TRUE); 1034 assert(ret == 0); 1035 1036 sampler_state = sampler_state_bo->virtual; 1037 1038 gen7_sampler_state_init(sampler_state_bo, 1039 &sampler_state[0], 1040 src_filter, src_extend, border_color_bo); 1041 gen7_sampler_state_init(sampler_state_bo, 1042 &sampler_state[1], 1043 mask_filter, mask_extend, border_color_bo); 1044 1045 drm_intel_bo_unmap(sampler_state_bo); 1046 1047 return sampler_state_bo; 1048 (void)ret; 1049} 1050 1051static inline drm_intel_bo * 1052i965_create_sampler_state(intel_screen_private *intel, 1053 sampler_state_filter_t src_filter, 1054 sampler_state_extend_t src_extend, 1055 sampler_state_filter_t mask_filter, 1056 sampler_state_extend_t mask_extend, 1057 drm_intel_bo * border_color_bo) 1058{ 1059 if (INTEL_INFO(intel)->gen < 070) 1060 return gen4_create_sampler_state(intel, src_filter, src_extend, 1061 mask_filter, mask_extend, 1062 border_color_bo); 1063 return gen7_create_sampler_state(intel, src_filter, src_extend, 1064 mask_filter, mask_extend, 1065 border_color_bo); 1066} 1067 1068 1069static void 1070cc_state_init(drm_intel_bo * cc_state_bo, 1071 uint32_t cc_state_offset, 1072 int src_blend, int dst_blend, drm_intel_bo * cc_vp_bo) 1073{ 1074 struct brw_cc_unit_state *cc_state; 1075 1076 cc_state = (struct brw_cc_unit_state *)((char *)cc_state_bo->virtual + 1077 cc_state_offset); 1078 1079 memset(cc_state, 0, sizeof(*cc_state)); 1080 cc_state->cc0.stencil_enable = 0; /* disable stencil */ 1081 cc_state->cc2.depth_test = 0; /* disable depth test */ 1082 cc_state->cc2.logicop_enable = 0; /* disable logic op */ 1083 cc_state->cc3.ia_blend_enable = 0; /* blend alpha same as colors */ 1084 cc_state->cc3.blend_enable = 1; /* enable color blend */ 1085 cc_state->cc3.alpha_test = 0; /* disable alpha test */ 1086 1087 cc_state->cc4.cc_viewport_state_offset = 1088 intel_uxa_emit_reloc(cc_state_bo, cc_state_offset + 1089 offsetof(struct brw_cc_unit_state, cc4), 1090 cc_vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; 1091 1092 cc_state->cc5.dither_enable = 0; /* disable dither */ 1093 cc_state->cc5.logicop_func = 0xc; /* COPY */ 1094 cc_state->cc5.statistics_enable = 1; 1095 cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; 1096 1097 /* Fill in alpha blend factors same as color, for the future. */ 1098 cc_state->cc5.ia_src_blend_factor = src_blend; 1099 cc_state->cc5.ia_dest_blend_factor = dst_blend; 1100 1101 cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD; 1102 cc_state->cc6.clamp_post_alpha_blend = 1; 1103 cc_state->cc6.clamp_pre_alpha_blend = 1; 1104 cc_state->cc6.clamp_range = 0; /* clamp range [0,1] */ 1105 1106 cc_state->cc6.src_blend_factor = src_blend; 1107 cc_state->cc6.dest_blend_factor = dst_blend; 1108} 1109 1110static drm_intel_bo *gen4_create_wm_state(intel_screen_private *intel, 1111 Bool has_mask, 1112 drm_intel_bo * kernel_bo, 1113 drm_intel_bo * sampler_bo) 1114{ 1115 struct brw_wm_unit_state *state; 1116 drm_intel_bo *wm_state_bo; 1117 int ret; 1118 1119 wm_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 WM state", 1120 sizeof(*state), 4096); 1121 assert(wm_state_bo); 1122 1123 ret = drm_intel_bo_map(wm_state_bo, TRUE); 1124 assert(ret == 0); 1125 1126 state = memset(wm_state_bo->virtual, 0, sizeof(*state)); 1127 state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); 1128 state->thread0.kernel_start_pointer = 1129 intel_uxa_emit_reloc(wm_state_bo, 1130 offsetof(struct brw_wm_unit_state, thread0), 1131 kernel_bo, state->thread0.grf_reg_count << 1, 1132 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; 1133 1134 state->thread1.single_program_flow = 0; 1135 1136 /* scratch space is not used in our kernel */ 1137 state->thread2.scratch_space_base_pointer = 0; 1138 state->thread2.per_thread_scratch_space = 0; 1139 1140 state->thread3.const_urb_entry_read_length = 0; 1141 state->thread3.const_urb_entry_read_offset = 0; 1142 1143 state->thread3.urb_entry_read_offset = 0; 1144 /* wm kernel use urb from 3, see wm_program in compiler module */ 1145 state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */ 1146 1147 if (IS_GEN5(intel)) 1148 state->wm4.sampler_count = 0; /* hardware requirement */ 1149 else 1150 state->wm4.sampler_count = 1; /* 1-4 samplers used */ 1151 1152 state->wm4.sampler_state_pointer = 1153 intel_uxa_emit_reloc(wm_state_bo, 1154 offsetof(struct brw_wm_unit_state, wm4), 1155 sampler_bo, 1156 state->wm4.sampler_count << 2, 1157 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; 1158 state->wm5.max_threads = PS_MAX_THREADS - 1; 1159 state->wm5.transposed_urb_read = 0; 1160 state->wm5.thread_dispatch_enable = 1; 1161 /* just use 16-pixel dispatch (4 subspans), don't need to change kernel 1162 * start point 1163 */ 1164 state->wm5.enable_16_pix = 1; 1165 state->wm5.enable_8_pix = 0; 1166 state->wm5.early_depth_test = 1; 1167 1168 /* Each pair of attributes (src/mask coords) is two URB entries */ 1169 if (has_mask) { 1170 state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */ 1171 state->thread3.urb_entry_read_length = 4; 1172 } else { 1173 state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ 1174 state->thread3.urb_entry_read_length = 2; 1175 } 1176 1177 /* binding table entry count is only used for prefetching, and it has to 1178 * be set 0 for Ironlake 1179 */ 1180 if (IS_GEN5(intel)) 1181 state->thread1.binding_table_entry_count = 0; 1182 1183 drm_intel_bo_unmap(wm_state_bo); 1184 1185 return wm_state_bo; 1186 (void)ret; 1187} 1188 1189static drm_intel_bo *gen4_create_cc_viewport(intel_screen_private *intel) 1190{ 1191 drm_intel_bo *bo; 1192 struct brw_cc_viewport vp; 1193 int ret; 1194 1195 vp.min_depth = -1.e35; 1196 vp.max_depth = 1.e35; 1197 1198 bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 render unit state", 1199 sizeof(vp), 4096); 1200 assert(bo); 1201 1202 ret = drm_intel_bo_subdata(bo, 0, sizeof(vp), &vp); 1203 assert(ret == 0); 1204 1205 return bo; 1206 (void)ret; 1207} 1208 1209static drm_intel_bo *gen4_create_vs_unit_state(intel_screen_private *intel) 1210{ 1211 struct brw_vs_unit_state vs_state; 1212 memset(&vs_state, 0, sizeof(vs_state)); 1213 1214 /* Set up the vertex shader to be disabled (passthrough) */ 1215 if (IS_GEN5(intel)) 1216 vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; /* hardware requirement */ 1217 else 1218 vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES; 1219 vs_state.thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; 1220 vs_state.vs6.vs_enable = 0; 1221 vs_state.vs6.vert_cache_disable = 1; 1222 1223 return intel_uxa_bo_alloc_for_data(intel, &vs_state, sizeof(vs_state), 1224 "gen4 render VS state"); 1225} 1226 1227/** 1228 * Set up all combinations of cc state: each blendfactor for source and 1229 * dest. 1230 */ 1231static drm_intel_bo *gen4_create_cc_unit_state(intel_screen_private *intel) 1232{ 1233 drm_intel_bo *cc_state_bo, *cc_vp_bo; 1234 int i, j, ret; 1235 1236 cc_vp_bo = gen4_create_cc_viewport(intel); 1237 1238 cc_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 CC state", 1239 sizeof(struct gen4_cc_unit_state), 1240 4096); 1241 assert(cc_state_bo); 1242 1243 ret = drm_intel_bo_map(cc_state_bo, TRUE); 1244 assert(ret == 0); 1245 1246 for (i = 0; i < BRW_BLENDFACTOR_COUNT; i++) { 1247 for (j = 0; j < BRW_BLENDFACTOR_COUNT; j++) { 1248 cc_state_init(cc_state_bo, 1249 offsetof(struct gen4_cc_unit_state, 1250 cc_state[i][j].state), 1251 i, j, cc_vp_bo); 1252 } 1253 } 1254 drm_intel_bo_unmap(cc_state_bo); 1255 1256 drm_intel_bo_unreference(cc_vp_bo); 1257 1258 return cc_state_bo; 1259 (void)ret; 1260} 1261 1262static uint32_t i965_get_card_format(PicturePtr picture) 1263{ 1264 unsigned i; 1265 1266 for (i = 0; i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]); 1267 i++) 1268 if (i965_tex_formats[i].fmt == picture->format) 1269 return i965_tex_formats[i].card_fmt; 1270 1271 assert(i != sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0])); 1272 1273 return 0; 1274} 1275 1276static sampler_state_filter_t sampler_state_filter_from_picture(int filter) 1277{ 1278 switch (filter) { 1279 case PictFilterNearest: 1280 return SS_FILTER_NEAREST; 1281 case PictFilterBilinear: 1282 return SS_FILTER_BILINEAR; 1283 default: 1284 return SS_INVALID_FILTER; 1285 } 1286} 1287 1288static sampler_state_extend_t sampler_state_extend_from_picture(int repeat_type) 1289{ 1290 switch (repeat_type) { 1291 case RepeatNone: 1292 return SS_EXTEND_NONE; 1293 case RepeatNormal: 1294 return SS_EXTEND_REPEAT; 1295 case RepeatPad: 1296 return SS_EXTEND_PAD; 1297 case RepeatReflect: 1298 return SS_EXTEND_REFLECT; 1299 default: 1300 return SS_INVALID_EXTEND; 1301 } 1302} 1303 1304/** 1305 * Sets up the common fields for a surface state buffer for the given 1306 * picture in the given surface state buffer. 1307 */ 1308static int 1309gen4_set_picture_surface_state(intel_screen_private *intel, 1310 PicturePtr picture, PixmapPtr pixmap, 1311 Bool is_dst) 1312{ 1313 struct intel_uxa_pixmap *priv = intel_uxa_get_pixmap_private(pixmap); 1314 struct brw_surface_state *ss; 1315 uint32_t write_domain, read_domains; 1316 int offset; 1317 1318 if (is_dst) { 1319 write_domain = I915_GEM_DOMAIN_RENDER; 1320 read_domains = I915_GEM_DOMAIN_RENDER; 1321 } else { 1322 write_domain = 0; 1323 read_domains = I915_GEM_DOMAIN_SAMPLER; 1324 } 1325 intel_batch_mark_pixmap_domains(intel, priv, 1326 read_domains, write_domain); 1327 ss = (struct brw_surface_state *) 1328 (intel->surface_data + intel->surface_used); 1329 1330 memset(ss, 0, sizeof(*ss)); 1331 ss->ss0.surface_type = BRW_SURFACE_2D; 1332 if (is_dst) 1333 ss->ss0.surface_format = i965_get_dest_format(picture); 1334 else 1335 ss->ss0.surface_format = i965_get_card_format(picture); 1336 1337 ss->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; 1338 ss->ss0.color_blend = 1; 1339 ss->ss1.base_addr = priv->bo->offset; 1340 1341 ss->ss2.height = pixmap->drawable.height - 1; 1342 ss->ss2.width = pixmap->drawable.width - 1; 1343 ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1; 1344 ss->ss3.tile_walk = 0; /* Tiled X */ 1345 ss->ss3.tiled_surface = intel_uxa_pixmap_tiled(pixmap) ? 1 : 0; 1346 1347 dri_bo_emit_reloc(intel->surface_bo, 1348 read_domains, write_domain, 1349 0, 1350 intel->surface_used + 1351 offsetof(struct brw_surface_state, ss1), 1352 priv->bo); 1353 1354 offset = intel->surface_used; 1355 intel->surface_used += SURFACE_STATE_PADDED_SIZE; 1356 1357 return offset; 1358} 1359 1360static int 1361gen7_set_picture_surface_state(intel_screen_private *intel, 1362 PicturePtr picture, PixmapPtr pixmap, 1363 Bool is_dst) 1364{ 1365 struct intel_uxa_pixmap *priv = intel_uxa_get_pixmap_private(pixmap); 1366 struct gen7_surface_state *ss; 1367 uint32_t write_domain, read_domains; 1368 int offset; 1369 1370 if (is_dst) { 1371 write_domain = I915_GEM_DOMAIN_RENDER; 1372 read_domains = I915_GEM_DOMAIN_RENDER; 1373 } else { 1374 write_domain = 0; 1375 read_domains = I915_GEM_DOMAIN_SAMPLER; 1376 } 1377 intel_batch_mark_pixmap_domains(intel, priv, 1378 read_domains, write_domain); 1379 ss = (struct gen7_surface_state *) 1380 (intel->surface_data + intel->surface_used); 1381 1382 memset(ss, 0, sizeof(*ss)); 1383 ss->ss0.surface_type = BRW_SURFACE_2D; 1384 if (is_dst) 1385 ss->ss0.surface_format = i965_get_dest_format(picture); 1386 else 1387 ss->ss0.surface_format = i965_get_card_format(picture); 1388 1389 ss->ss0.tile_walk = 0; /* Tiled X */ 1390 ss->ss0.tiled_surface = intel_uxa_pixmap_tiled(pixmap) ? 1 : 0; 1391 ss->ss1.base_addr = priv->bo->offset; 1392 1393 ss->ss2.height = pixmap->drawable.height - 1; 1394 ss->ss2.width = pixmap->drawable.width - 1; 1395 ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1; 1396 1397 if (IS_HSW(intel)) { 1398 ss->ss7.shader_chanel_select_r = HSW_SCS_RED; 1399 ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN; 1400 ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE; 1401 ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA; 1402 } 1403 1404 dri_bo_emit_reloc(intel->surface_bo, 1405 read_domains, write_domain, 1406 0, 1407 intel->surface_used + 1408 offsetof(struct gen7_surface_state, ss1), 1409 priv->bo); 1410 1411 offset = intel->surface_used; 1412 intel->surface_used += SURFACE_STATE_PADDED_SIZE; 1413 1414 return offset; 1415} 1416 1417static inline int 1418i965_set_picture_surface_state(intel_screen_private *intel, 1419 PicturePtr picture, PixmapPtr pixmap, 1420 Bool is_dst) 1421{ 1422 if (INTEL_INFO(intel)->gen < 070) 1423 return gen4_set_picture_surface_state(intel, picture, pixmap, is_dst); 1424 return gen7_set_picture_surface_state(intel, picture, pixmap, is_dst); 1425} 1426 1427static void gen4_composite_vertex_elements(struct intel_screen_private *intel) 1428{ 1429 struct gen4_render_state *render_state = intel->gen4_render_state; 1430 gen4_composite_op *composite_op = &render_state->composite_op; 1431 Bool has_mask = intel->render_mask != NULL; 1432 Bool is_affine = composite_op->is_affine; 1433 /* 1434 * number of extra parameters per vertex 1435 */ 1436 int nelem = has_mask ? 2 : 1; 1437 /* 1438 * size of extra parameters: 1439 * 3 for homogenous (xyzw) 1440 * 2 for cartesian (xy) 1441 */ 1442 int selem = is_affine ? 2 : 3; 1443 uint32_t w_component; 1444 uint32_t src_format; 1445 int id; 1446 1447 id = has_mask << 1 | is_affine; 1448 1449 if (composite_op->vertex_id == id) 1450 return; 1451 1452 composite_op->vertex_id = id; 1453 1454 if (is_affine) { 1455 src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; 1456 w_component = BRW_VFCOMPONENT_STORE_1_FLT; 1457 } else { 1458 src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; 1459 w_component = BRW_VFCOMPONENT_STORE_SRC; 1460 } 1461 1462 if (IS_GEN5(intel)) { 1463 /* 1464 * The reason to add this extra vertex element in the header is that 1465 * Ironlake has different vertex header definition and origin method to 1466 * set destination element offset doesn't exist anymore, which means 1467 * hardware requires a predefined vertex element layout. 1468 * 1469 * haihao proposed this approach to fill the first vertex element, so 1470 * origin layout for Gen4 doesn't need to change, and origin shader 1471 * programs behavior is also kept. 1472 * 1473 * I think this is not bad. - zhenyu 1474 */ 1475 1476 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 1477 ((2 * (2 + nelem)) - 1)); 1478 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | 1479 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1480 (0 << VE0_OFFSET_SHIFT)); 1481 1482 OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | 1483 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | 1484 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | 1485 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); 1486 } else { 1487 /* Set up our vertex elements, sourced from the single vertex buffer. 1488 * that will be set up later. 1489 */ 1490 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 1491 ((2 * (1 + nelem)) - 1)); 1492 } 1493 1494 /* x,y */ 1495 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | 1496 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1497 (0 << VE0_OFFSET_SHIFT)); 1498 1499 if (IS_GEN5(intel)) 1500 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1501 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1502 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 1503 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1504 else 1505 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1506 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1507 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 1508 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 1509 (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); 1510 /* u0, v0, w0 */ 1511 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | 1512 (src_format << VE0_FORMAT_SHIFT) | 1513 ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ 1514 1515 if (IS_GEN5(intel)) 1516 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1517 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1518 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1519 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1520 else 1521 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1522 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1523 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1524 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 1525 ((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ 1526 /* u1, v1, w1 */ 1527 if (has_mask) { 1528 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | 1529 (src_format << VE0_FORMAT_SHIFT) | 1530 (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ 1531 1532 if (IS_GEN5(intel)) 1533 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1534 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1535 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1536 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1537 else 1538 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1539 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1540 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1541 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 1542 ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ 1543 } 1544} 1545 1546static void i965_emit_composite_state(struct intel_screen_private *intel) 1547{ 1548 struct gen4_render_state *render_state = intel->gen4_render_state; 1549 gen4_composite_op *composite_op = &render_state->composite_op; 1550 int op = composite_op->op; 1551 PicturePtr mask_picture = intel->render_mask_picture; 1552 PicturePtr dest_picture = intel->render_dest_picture; 1553 PixmapPtr mask = intel->render_mask; 1554 PixmapPtr dest = intel->render_dest; 1555 sampler_state_filter_t src_filter = composite_op->src_filter; 1556 sampler_state_filter_t mask_filter = composite_op->mask_filter; 1557 sampler_state_extend_t src_extend = composite_op->src_extend; 1558 sampler_state_extend_t mask_extend = composite_op->mask_extend; 1559 uint32_t src_blend, dst_blend; 1560 1561 intel->needs_render_state_emit = FALSE; 1562 1563 /* Begin the long sequence of commands needed to set up the 3D 1564 * rendering pipe 1565 */ 1566 1567 if (intel->needs_3d_invariant) { 1568 if (IS_GEN5(intel)) { 1569 /* Ironlake errata workaround: Before disabling the clipper, 1570 * you have to MI_FLUSH to get the pipeline idle. 1571 */ 1572 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); 1573 } 1574 1575 /* Match Mesa driver setup */ 1576 if (INTEL_INFO(intel)->gen >= 045) 1577 OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 1578 else 1579 OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 1580 1581 /* Set system instruction pointer */ 1582 OUT_BATCH(BRW_STATE_SIP | 0); 1583 OUT_BATCH(0); 1584 1585 intel->needs_3d_invariant = FALSE; 1586 } 1587 1588 if (intel->surface_reloc == 0) { 1589 /* Zero out the two base address registers so all offsets are 1590 * absolute. 1591 */ 1592 if (IS_GEN5(intel)) { 1593 OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6); 1594 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ 1595 intel->surface_reloc = intel->batch_used; 1596 intel_batch_emit_dword(intel, 1597 intel->surface_bo->offset | BASE_ADDRESS_MODIFY); 1598 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ 1599 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Instruction base address */ 1600 /* general state max addr, disabled */ 1601 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 1602 /* media object state max addr, disabled */ 1603 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 1604 /* Instruction max addr, disabled */ 1605 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 1606 } else { 1607 OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4); 1608 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ 1609 intel->surface_reloc = intel->batch_used; 1610 intel_batch_emit_dword(intel, 1611 intel->surface_bo->offset | BASE_ADDRESS_MODIFY); 1612 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ 1613 /* general state max addr, disabled */ 1614 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 1615 /* media object state max addr, disabled */ 1616 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); 1617 } 1618 } 1619 1620 i965_get_blend_cntl(op, mask_picture, dest_picture->format, 1621 &src_blend, &dst_blend); 1622 1623 /* Binding table pointers */ 1624 OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); 1625 OUT_BATCH(0); /* vs */ 1626 OUT_BATCH(0); /* gs */ 1627 OUT_BATCH(0); /* clip */ 1628 OUT_BATCH(0); /* sf */ 1629 /* Only the PS uses the binding table */ 1630 OUT_BATCH(intel->surface_table); 1631 1632 /* The drawing rectangle clipping is always on. Set it to values that 1633 * shouldn't do any clipping. 1634 */ 1635 OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); 1636 OUT_BATCH(0x00000000); /* ymin, xmin */ 1637 OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) | 1638 DRAW_XMAX(dest->drawable.width - 1)); /* ymax, xmax */ 1639 OUT_BATCH(0x00000000); /* yorigin, xorigin */ 1640 1641 /* skip the depth buffer */ 1642 /* skip the polygon stipple */ 1643 /* skip the polygon stipple offset */ 1644 /* skip the line stipple */ 1645 1646 /* Set the pointers to the 3d pipeline state */ 1647 OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5); 1648 OUT_RELOC(render_state->vs_state_bo, 1649 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1650 OUT_BATCH(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */ 1651 OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */ 1652 if (mask) { 1653 OUT_RELOC(render_state->sf_mask_state_bo, 1654 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1655 } else { 1656 OUT_RELOC(render_state->sf_state_bo, 1657 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1658 } 1659 1660 OUT_RELOC(render_state->wm_state_bo[composite_op->wm_kernel] 1661 [src_filter][src_extend] 1662 [mask_filter][mask_extend], 1663 I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1664 1665 OUT_RELOC(render_state->cc_state_bo, 1666 I915_GEM_DOMAIN_INSTRUCTION, 0, 1667 offsetof(struct gen4_cc_unit_state, 1668 cc_state[src_blend][dst_blend])); 1669 1670 { 1671 int urb_vs_start, urb_vs_size; 1672 int urb_gs_start, urb_gs_size; 1673 int urb_clip_start, urb_clip_size; 1674 int urb_sf_start, urb_sf_size; 1675 int urb_cs_start, urb_cs_size; 1676 1677 urb_vs_start = 0; 1678 urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; 1679 urb_gs_start = urb_vs_start + urb_vs_size; 1680 urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; 1681 urb_clip_start = urb_gs_start + urb_gs_size; 1682 urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; 1683 urb_sf_start = urb_clip_start + urb_clip_size; 1684 urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; 1685 urb_cs_start = urb_sf_start + urb_sf_size; 1686 urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; 1687 1688 /* Erratum (Vol 1a, p32): 1689 * URB_FENCE must not cross a cache-line (64 bytes). 1690 */ 1691 if ((intel->batch_used & 15) > (16 - 3)) { 1692 int cnt = 16 - (intel->batch_used & 15); 1693 while (cnt--) 1694 OUT_BATCH(MI_NOOP); 1695 } 1696 1697 OUT_BATCH(BRW_URB_FENCE | 1698 UF0_CS_REALLOC | 1699 UF0_SF_REALLOC | 1700 UF0_CLIP_REALLOC | 1701 UF0_GS_REALLOC | 1702 UF0_VS_REALLOC | 1703 1); 1704 OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | 1705 ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | 1706 ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); 1707 OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | 1708 ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); 1709 1710 /* Constant buffer state */ 1711 OUT_BATCH(BRW_CS_URB_STATE | 0); 1712 OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) | 1713 (URB_CS_ENTRIES << 0)); 1714 } 1715 1716 gen4_composite_vertex_elements(intel); 1717} 1718 1719/** 1720 * Returns whether the current set of composite state plus vertex buffer is 1721 * expected to fit in the aperture. 1722 */ 1723static Bool i965_composite_check_aperture(intel_screen_private *intel) 1724{ 1725 struct gen4_render_state *render_state = intel->gen4_render_state; 1726 gen4_composite_op *composite_op = &render_state->composite_op; 1727 drm_intel_bo *bo_table[] = { 1728 intel->batch_bo, 1729 intel->vertex_bo, 1730 intel->surface_bo, 1731 render_state->vs_state_bo, 1732 render_state->sf_state_bo, 1733 render_state->sf_mask_state_bo, 1734 render_state->wm_state_bo[composite_op->wm_kernel] 1735 [composite_op->src_filter] 1736 [composite_op->src_extend] 1737 [composite_op->mask_filter] 1738 [composite_op->mask_extend], 1739 render_state->cc_state_bo, 1740 }; 1741 drm_intel_bo *gen6_bo_table[] = { 1742 intel->batch_bo, 1743 intel->vertex_bo, 1744 intel->surface_bo, 1745 render_state->wm_kernel_bo[composite_op->wm_kernel], 1746 render_state->ps_sampler_state_bo[composite_op->src_filter] 1747 [composite_op->src_extend] 1748 [composite_op->mask_filter] 1749 [composite_op->mask_extend], 1750 render_state->cc_vp_bo, 1751 render_state->cc_state_bo, 1752 render_state->gen6_blend_bo, 1753 render_state->gen6_depth_stencil_bo, 1754 }; 1755 1756 if (INTEL_INFO(intel)->gen >= 060) 1757 return drm_intel_bufmgr_check_aperture_space(gen6_bo_table, 1758 ARRAY_SIZE(gen6_bo_table)) == 0; 1759 else 1760 return drm_intel_bufmgr_check_aperture_space(bo_table, 1761 ARRAY_SIZE(bo_table)) == 0; 1762} 1763 1764static void i965_surface_flush(struct intel_screen_private *intel) 1765{ 1766 int ret; 1767 1768 ret = drm_intel_bo_subdata(intel->surface_bo, 1769 0, intel->surface_used, 1770 intel->surface_data); 1771 assert(ret == 0); 1772 intel->surface_used = 0; 1773 1774 assert (intel->surface_reloc != 0); 1775 drm_intel_bo_emit_reloc(intel->batch_bo, 1776 intel->surface_reloc * 4, 1777 intel->surface_bo, BASE_ADDRESS_MODIFY, 1778 I915_GEM_DOMAIN_INSTRUCTION, 0); 1779 intel->surface_reloc = 0; 1780 1781 drm_intel_bo_unreference(intel->surface_bo); 1782 intel->surface_bo = 1783 drm_intel_bo_alloc(intel->bufmgr, "surface data", 1784 sizeof(intel->surface_data), 4096); 1785 assert(intel->surface_bo); 1786 1787 return; 1788 (void)ret; 1789} 1790 1791static void 1792i965_emit_composite_primitive_identity_source(intel_screen_private *intel, 1793 int srcX, int srcY, 1794 int maskX, int maskY, 1795 int dstX, int dstY, 1796 int w, int h) 1797{ 1798 OUT_VERTEX(dstX + w); 1799 OUT_VERTEX(dstY + h); 1800 OUT_VERTEX((srcX + w) * intel->scale_units[0][0]); 1801 OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); 1802 1803 OUT_VERTEX(dstX); 1804 OUT_VERTEX(dstY + h); 1805 OUT_VERTEX(srcX * intel->scale_units[0][0]); 1806 OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); 1807 1808 OUT_VERTEX(dstX); 1809 OUT_VERTEX(dstY); 1810 OUT_VERTEX(srcX * intel->scale_units[0][0]); 1811 OUT_VERTEX(srcY * intel->scale_units[0][1]); 1812} 1813 1814static void 1815i965_emit_composite_primitive_affine_source(intel_screen_private *intel, 1816 int srcX, int srcY, 1817 int maskX, int maskY, 1818 int dstX, int dstY, 1819 int w, int h) 1820{ 1821 float src_x[3], src_y[3]; 1822 1823 if (!intel_uxa_get_transformed_coordinates(srcX, srcY, 1824 intel->transform[0], 1825 &src_x[0], 1826 &src_y[0])) 1827 return; 1828 1829 if (!intel_uxa_get_transformed_coordinates(srcX, srcY + h, 1830 intel->transform[0], 1831 &src_x[1], 1832 &src_y[1])) 1833 return; 1834 1835 if (!intel_uxa_get_transformed_coordinates(srcX + w, srcY + h, 1836 intel->transform[0], 1837 &src_x[2], 1838 &src_y[2])) 1839 return; 1840 1841 OUT_VERTEX(dstX + w); 1842 OUT_VERTEX(dstY + h); 1843 OUT_VERTEX(src_x[2] * intel->scale_units[0][0]); 1844 OUT_VERTEX(src_y[2] * intel->scale_units[0][1]); 1845 1846 OUT_VERTEX(dstX); 1847 OUT_VERTEX(dstY + h); 1848 OUT_VERTEX(src_x[1] * intel->scale_units[0][0]); 1849 OUT_VERTEX(src_y[1] * intel->scale_units[0][1]); 1850 1851 OUT_VERTEX(dstX); 1852 OUT_VERTEX(dstY); 1853 OUT_VERTEX(src_x[0] * intel->scale_units[0][0]); 1854 OUT_VERTEX(src_y[0] * intel->scale_units[0][1]); 1855} 1856 1857static void 1858i965_emit_composite_primitive_identity_source_mask(intel_screen_private *intel, 1859 int srcX, int srcY, 1860 int maskX, int maskY, 1861 int dstX, int dstY, 1862 int w, int h) 1863{ 1864 OUT_VERTEX(dstX + w); 1865 OUT_VERTEX(dstY + h); 1866 OUT_VERTEX((srcX + w) * intel->scale_units[0][0]); 1867 OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); 1868 OUT_VERTEX((maskX + w) * intel->scale_units[1][0]); 1869 OUT_VERTEX((maskY + h) * intel->scale_units[1][1]); 1870 1871 OUT_VERTEX(dstX); 1872 OUT_VERTEX(dstY + h); 1873 OUT_VERTEX(srcX * intel->scale_units[0][0]); 1874 OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); 1875 OUT_VERTEX(maskX * intel->scale_units[1][0]); 1876 OUT_VERTEX((maskY + h) * intel->scale_units[1][1]); 1877 1878 OUT_VERTEX(dstX); 1879 OUT_VERTEX(dstY); 1880 OUT_VERTEX(srcX * intel->scale_units[0][0]); 1881 OUT_VERTEX(srcY * intel->scale_units[0][1]); 1882 OUT_VERTEX(maskX * intel->scale_units[1][0]); 1883 OUT_VERTEX(maskY * intel->scale_units[1][1]); 1884} 1885 1886static void 1887i965_emit_composite_primitive(intel_screen_private *intel, 1888 int srcX, int srcY, 1889 int maskX, int maskY, 1890 int dstX, int dstY, 1891 int w, int h) 1892{ 1893 float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; 1894 Bool is_affine = intel->gen4_render_state->composite_op.is_affine; 1895 1896 if (is_affine) { 1897 if (!intel_uxa_get_transformed_coordinates(srcX, srcY, 1898 intel->transform[0], 1899 &src_x[0], 1900 &src_y[0])) 1901 return; 1902 1903 if (!intel_uxa_get_transformed_coordinates(srcX, srcY + h, 1904 intel->transform[0], 1905 &src_x[1], 1906 &src_y[1])) 1907 return; 1908 1909 if (!intel_uxa_get_transformed_coordinates(srcX + w, srcY + h, 1910 intel->transform[0], 1911 &src_x[2], 1912 &src_y[2])) 1913 return; 1914 } else { 1915 if (!intel_uxa_get_transformed_coordinates_3d(srcX, srcY, 1916 intel->transform[0], 1917 &src_x[0], 1918 &src_y[0], 1919 &src_w[0])) 1920 return; 1921 1922 if (!intel_uxa_get_transformed_coordinates_3d(srcX, srcY + h, 1923 intel->transform[0], 1924 &src_x[1], 1925 &src_y[1], 1926 &src_w[1])) 1927 return; 1928 1929 if (!intel_uxa_get_transformed_coordinates_3d(srcX + w, srcY + h, 1930 intel->transform[0], 1931 &src_x[2], 1932 &src_y[2], 1933 &src_w[2])) 1934 return; 1935 } 1936 1937 if (intel->render_mask) { 1938 if (is_affine) { 1939 if (!intel_uxa_get_transformed_coordinates(maskX, maskY, 1940 intel->transform[1], 1941 &mask_x[0], 1942 &mask_y[0])) 1943 return; 1944 1945 if (!intel_uxa_get_transformed_coordinates(maskX, maskY + h, 1946 intel->transform[1], 1947 &mask_x[1], 1948 &mask_y[1])) 1949 return; 1950 1951 if (!intel_uxa_get_transformed_coordinates(maskX + w, maskY + h, 1952 intel->transform[1], 1953 &mask_x[2], 1954 &mask_y[2])) 1955 return; 1956 } else { 1957 if (!intel_uxa_get_transformed_coordinates_3d(maskX, maskY, 1958 intel->transform[1], 1959 &mask_x[0], 1960 &mask_y[0], 1961 &mask_w[0])) 1962 return; 1963 1964 if (!intel_uxa_get_transformed_coordinates_3d(maskX, maskY + h, 1965 intel->transform[1], 1966 &mask_x[1], 1967 &mask_y[1], 1968 &mask_w[1])) 1969 return; 1970 1971 if (!intel_uxa_get_transformed_coordinates_3d(maskX + w, maskY + h, 1972 intel->transform[1], 1973 &mask_x[2], 1974 &mask_y[2], 1975 &mask_w[2])) 1976 return; 1977 } 1978 } 1979 1980 OUT_VERTEX(dstX + w); 1981 OUT_VERTEX(dstY + h); 1982 OUT_VERTEX(src_x[2] * intel->scale_units[0][0]); 1983 OUT_VERTEX(src_y[2] * intel->scale_units[0][1]); 1984 if (!is_affine) 1985 OUT_VERTEX(src_w[2]); 1986 if (intel->render_mask) { 1987 OUT_VERTEX(mask_x[2] * intel->scale_units[1][0]); 1988 OUT_VERTEX(mask_y[2] * intel->scale_units[1][1]); 1989 if (!is_affine) 1990 OUT_VERTEX(mask_w[2]); 1991 } 1992 1993 OUT_VERTEX(dstX); 1994 OUT_VERTEX(dstY + h); 1995 OUT_VERTEX(src_x[1] * intel->scale_units[0][0]); 1996 OUT_VERTEX(src_y[1] * intel->scale_units[0][1]); 1997 if (!is_affine) 1998 OUT_VERTEX(src_w[1]); 1999 if (intel->render_mask) { 2000 OUT_VERTEX(mask_x[1] * intel->scale_units[1][0]); 2001 OUT_VERTEX(mask_y[1] * intel->scale_units[1][1]); 2002 if (!is_affine) 2003 OUT_VERTEX(mask_w[1]); 2004 } 2005 2006 OUT_VERTEX(dstX); 2007 OUT_VERTEX(dstY); 2008 OUT_VERTEX(src_x[0] * intel->scale_units[0][0]); 2009 OUT_VERTEX(src_y[0] * intel->scale_units[0][1]); 2010 if (!is_affine) 2011 OUT_VERTEX(src_w[0]); 2012 if (intel->render_mask) { 2013 OUT_VERTEX(mask_x[0] * intel->scale_units[1][0]); 2014 OUT_VERTEX(mask_y[0] * intel->scale_units[1][1]); 2015 if (!is_affine) 2016 OUT_VERTEX(mask_w[0]); 2017 } 2018} 2019 2020Bool 2021i965_prepare_composite(int op, PicturePtr source_picture, 2022 PicturePtr mask_picture, PicturePtr dest_picture, 2023 PixmapPtr source, PixmapPtr mask, PixmapPtr dest) 2024{ 2025 ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen); 2026 intel_screen_private *intel = intel_get_screen_private(scrn); 2027 struct gen4_render_state *render_state = intel->gen4_render_state; 2028 gen4_composite_op *composite_op = &render_state->composite_op; 2029 2030 composite_op->src_filter = 2031 sampler_state_filter_from_picture(source_picture->filter); 2032 if (composite_op->src_filter == SS_INVALID_FILTER) { 2033 intel_uxa_debug_fallback(scrn, "Bad src filter 0x%x\n", 2034 source_picture->filter); 2035 return FALSE; 2036 } 2037 composite_op->src_extend = 2038 sampler_state_extend_from_picture(source_picture->repeatType); 2039 if (composite_op->src_extend == SS_INVALID_EXTEND) { 2040 intel_uxa_debug_fallback(scrn, "Bad src repeat 0x%x\n", 2041 source_picture->repeatType); 2042 return FALSE; 2043 } 2044 2045 if (mask_picture) { 2046 if (mask_picture->componentAlpha && 2047 PICT_FORMAT_RGB(mask_picture->format)) { 2048 /* Check if it's component alpha that relies on a source alpha and on 2049 * the source value. We can only get one of those into the single 2050 * source value that we get to blend with. 2051 */ 2052 if (i965_blend_op[op].src_alpha && 2053 (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) { 2054 intel_uxa_debug_fallback(scrn, 2055 "Component alpha not supported " 2056 "with source alpha and source " 2057 "value blending.\n"); 2058 return FALSE; 2059 } 2060 } 2061 2062 composite_op->mask_filter = 2063 sampler_state_filter_from_picture(mask_picture->filter); 2064 if (composite_op->mask_filter == SS_INVALID_FILTER) { 2065 intel_uxa_debug_fallback(scrn, "Bad mask filter 0x%x\n", 2066 mask_picture->filter); 2067 return FALSE; 2068 } 2069 composite_op->mask_extend = 2070 sampler_state_extend_from_picture(mask_picture->repeatType); 2071 if (composite_op->mask_extend == SS_INVALID_EXTEND) { 2072 intel_uxa_debug_fallback(scrn, "Bad mask repeat 0x%x\n", 2073 mask_picture->repeatType); 2074 return FALSE; 2075 } 2076 } else { 2077 composite_op->mask_filter = SS_FILTER_NEAREST; 2078 composite_op->mask_extend = SS_EXTEND_NONE; 2079 } 2080 2081 /* Flush any pending writes prior to relocating the textures. */ 2082 if (intel_uxa_pixmap_is_dirty(source) || intel_uxa_pixmap_is_dirty(mask)) 2083 intel_batch_emit_flush(scrn); 2084 2085 composite_op->op = op; 2086 intel->render_source_picture = source_picture; 2087 intel->render_mask_picture = mask_picture; 2088 intel->render_dest_picture = dest_picture; 2089 intel->render_source = source; 2090 intel->render_mask = mask; 2091 intel->render_dest = dest; 2092 2093 intel->scale_units[0][0] = 1. / source->drawable.width; 2094 intel->scale_units[0][1] = 1. / source->drawable.height; 2095 2096 intel->transform[0] = source_picture->transform; 2097 composite_op->is_affine = intel_uxa_transform_is_affine(intel->transform[0]); 2098 2099 if (mask_picture == NULL) { 2100 intel->transform[1] = NULL; 2101 intel->scale_units[1][0] = -1; 2102 intel->scale_units[1][1] = -1; 2103 } else { 2104 assert(mask != NULL); 2105 intel->transform[1] = mask_picture->transform; 2106 intel->scale_units[1][0] = 1. / mask->drawable.width; 2107 intel->scale_units[1][1] = 1. / mask->drawable.height; 2108 composite_op->is_affine &= 2109 intel_uxa_transform_is_affine(intel->transform[1]); 2110 } 2111 2112 if (mask) { 2113 assert(mask_picture != NULL); 2114 if (mask_picture->componentAlpha && 2115 PICT_FORMAT_RGB(mask_picture->format)) { 2116 if (i965_blend_op[op].src_alpha) { 2117 if (composite_op->is_affine) 2118 composite_op->wm_kernel = 2119 WM_KERNEL_MASKCA_SRCALPHA_AFFINE; 2120 else 2121 composite_op->wm_kernel = 2122 WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE; 2123 } else { 2124 if (composite_op->is_affine) 2125 composite_op->wm_kernel = 2126 WM_KERNEL_MASKCA_AFFINE; 2127 else 2128 composite_op->wm_kernel = 2129 WM_KERNEL_MASKCA_PROJECTIVE; 2130 } 2131 } else { 2132 if (composite_op->is_affine) 2133 composite_op->wm_kernel = 2134 WM_KERNEL_MASKNOCA_AFFINE; 2135 else 2136 composite_op->wm_kernel = 2137 WM_KERNEL_MASKNOCA_PROJECTIVE; 2138 } 2139 } else { 2140 if (composite_op->is_affine) 2141 composite_op->wm_kernel = WM_KERNEL_NOMASK_AFFINE; 2142 else 2143 composite_op->wm_kernel = WM_KERNEL_NOMASK_PROJECTIVE; 2144 } 2145 2146 intel->prim_emit = i965_emit_composite_primitive; 2147 if (!mask) { 2148 if (intel->transform[0] == NULL) 2149 intel->prim_emit = i965_emit_composite_primitive_identity_source; 2150 else if (composite_op->is_affine) 2151 intel->prim_emit = i965_emit_composite_primitive_affine_source; 2152 } else { 2153 if (intel->transform[0] == NULL && intel->transform[1] == NULL) 2154 intel->prim_emit = i965_emit_composite_primitive_identity_source_mask; 2155 } 2156 2157 intel->floats_per_vertex = 2158 2 + (mask ? 2 : 1) * (composite_op->is_affine ? 2: 3); 2159 2160 if (!i965_composite_check_aperture(intel)) { 2161 intel_batch_submit(scrn); 2162 if (!i965_composite_check_aperture(intel)) { 2163 intel_uxa_debug_fallback(scrn, 2164 "Couldn't fit render operation " 2165 "in aperture\n"); 2166 return FALSE; 2167 } 2168 } 2169 2170 if (sizeof(intel->surface_data) - intel->surface_used < 2171 4 * SURFACE_STATE_PADDED_SIZE) 2172 i965_surface_flush(intel); 2173 2174 intel->needs_render_state_emit = TRUE; 2175 2176 return TRUE; 2177} 2178 2179static void i965_select_vertex_buffer(struct intel_screen_private *intel) 2180{ 2181 int id = intel->gen4_render_state->composite_op.vertex_id; 2182 int modifyenable = 0; 2183 2184 if (intel->vertex_id & (1 << id)) 2185 return; 2186 2187 if (INTEL_INFO(intel)->gen >= 070) 2188 modifyenable = GEN7_VB0_ADDRESS_MODIFYENABLE; 2189 2190 /* Set up the pointer to our (single) vertex buffer */ 2191 OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); 2192 2193 /* XXX could use multiple vbo to reduce relocations if 2194 * frequently switching between vertex sizes, like rgb10text. 2195 */ 2196 if (INTEL_INFO(intel)->gen >= 060) { 2197 OUT_BATCH((id << GEN6_VB0_BUFFER_INDEX_SHIFT) | 2198 GEN6_VB0_VERTEXDATA | 2199 modifyenable | 2200 (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); 2201 } else { 2202 OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | 2203 VB0_VERTEXDATA | 2204 (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); 2205 } 2206 OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); 2207 if (INTEL_INFO(intel)->gen >= 050) 2208 OUT_RELOC(intel->vertex_bo, 2209 I915_GEM_DOMAIN_VERTEX, 0, 2210 sizeof(intel->vertex_ptr) - 1); 2211 else 2212 OUT_BATCH(0); 2213 OUT_BATCH(0); // ignore for VERTEXDATA, but still there 2214 2215 intel->vertex_id |= 1 << id; 2216} 2217 2218static void i965_bind_surfaces(struct intel_screen_private *intel) 2219{ 2220 uint32_t *binding_table; 2221 2222 assert(intel->surface_used + 4 * SURFACE_STATE_PADDED_SIZE <= sizeof(intel->surface_data)); 2223 2224 binding_table = (uint32_t*) (intel->surface_data + intel->surface_used); 2225 intel->surface_table = intel->surface_used; 2226 intel->surface_used += SURFACE_STATE_PADDED_SIZE; 2227 2228 binding_table[0] = 2229 i965_set_picture_surface_state(intel, 2230 intel->render_dest_picture, 2231 intel->render_dest, 2232 TRUE); 2233 binding_table[1] = 2234 i965_set_picture_surface_state(intel, 2235 intel->render_source_picture, 2236 intel->render_source, 2237 FALSE); 2238 if (intel->render_mask) { 2239 binding_table[2] = 2240 i965_set_picture_surface_state(intel, 2241 intel->render_mask_picture, 2242 intel->render_mask, 2243 FALSE); 2244 } 2245} 2246 2247void 2248i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, 2249 int dstX, int dstY, int w, int h) 2250{ 2251 ScrnInfoPtr scrn = xf86ScreenToScrn(dest->drawable.pScreen); 2252 intel_screen_private *intel = intel_get_screen_private(scrn); 2253 2254 intel_batch_start_atomic(scrn, 200); 2255 if (intel->needs_render_state_emit) { 2256 i965_bind_surfaces(intel); 2257 2258 if (INTEL_INFO(intel)->gen >= 060) 2259 gen6_emit_composite_state(intel); 2260 else 2261 i965_emit_composite_state(intel); 2262 } 2263 2264 if (intel->floats_per_vertex != intel->last_floats_per_vertex) { 2265 intel->vertex_index = (intel->vertex_used + intel->floats_per_vertex - 1) / intel->floats_per_vertex; 2266 intel->vertex_used = intel->vertex_index * intel->floats_per_vertex; 2267 intel->last_floats_per_vertex = intel->floats_per_vertex; 2268 } 2269 if (intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) { 2270 i965_vertex_flush(intel); 2271 intel_next_vertex(intel); 2272 intel->vertex_index = 0; 2273 } 2274 i965_select_vertex_buffer(intel); 2275 2276 if (intel->vertex_offset == 0) { 2277 if (INTEL_INFO(intel)->gen >= 070) { 2278 OUT_BATCH(BRW_3DPRIMITIVE | (7 - 2)); 2279 OUT_BATCH(BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | 2280 _3DPRIM_RECTLIST); 2281 } else { 2282 OUT_BATCH(BRW_3DPRIMITIVE | 2283 BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | 2284 (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | 2285 (0 << 9) | 2286 4); 2287 } 2288 intel->vertex_offset = intel->batch_used; 2289 OUT_BATCH(0); /* vertex count, to be filled in later */ 2290 OUT_BATCH(intel->vertex_index); 2291 OUT_BATCH(1); /* single instance */ 2292 OUT_BATCH(0); /* start instance location */ 2293 OUT_BATCH(0); /* index buffer offset, ignored */ 2294 intel->vertex_count = intel->vertex_index; 2295 } 2296 2297 intel->prim_emit(intel, 2298 srcX, srcY, 2299 maskX, maskY, 2300 dstX, dstY, 2301 w, h); 2302 intel->vertex_index += 3; 2303 2304 if (INTEL_INFO(intel)->gen < 050) { 2305 /* XXX OMG! */ 2306 i965_vertex_flush(intel); 2307 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); 2308 } 2309 2310 intel_batch_end_atomic(scrn); 2311} 2312 2313void i965_batch_commit_notify(intel_screen_private *intel) 2314{ 2315 intel->needs_render_state_emit = TRUE; 2316 intel->needs_3d_invariant = TRUE; 2317 intel->last_floats_per_vertex = 0; 2318 intel->vertex_index = 0; 2319 2320 intel->gen4_render_state->composite_op.vertex_id = -1; 2321 2322 intel->gen6_render_state.num_sf_outputs = 0; 2323 intel->gen6_render_state.samplers = NULL; 2324 intel->gen6_render_state.blend = -1; 2325 intel->gen6_render_state.kernel = NULL; 2326 intel->gen6_render_state.drawrect = -1; 2327 2328 assert(intel->surface_reloc == 0); 2329} 2330 2331/** 2332 * Called at EnterVT so we can set up our offsets into the state buffer. 2333 */ 2334void gen4_render_state_init(ScrnInfoPtr scrn) 2335{ 2336 intel_screen_private *intel = intel_get_screen_private(scrn); 2337 struct gen4_render_state *render; 2338 const struct wm_kernel_info *wm_kernels; 2339 sampler_state_filter_t src_filter; 2340 sampler_state_extend_t src_extend; 2341 sampler_state_filter_t mask_filter; 2342 sampler_state_extend_t mask_extend; 2343 drm_intel_bo *sf_kernel_bo, *sf_kernel_mask_bo; 2344 drm_intel_bo *border_color_bo; 2345 int m; 2346 2347 intel->needs_3d_invariant = TRUE; 2348 2349 intel->surface_bo = 2350 drm_intel_bo_alloc(intel->bufmgr, "surface data", 2351 sizeof(intel->surface_data), 4096); 2352 assert(intel->surface_bo); 2353 2354 intel->surface_used = 0; 2355 2356 if (intel->gen4_render_state == NULL) { 2357 intel->gen4_render_state = calloc(1, sizeof(*render)); 2358 assert(intel->gen4_render_state != NULL); 2359 } 2360 2361 if (INTEL_INFO(intel)->gen >= 060) 2362 return gen6_render_state_init(scrn); 2363 2364 render = intel->gen4_render_state; 2365 render->composite_op.vertex_id = -1; 2366 2367 render->vs_state_bo = gen4_create_vs_unit_state(intel); 2368 2369 /* Set up the two SF states (one for blending with a mask, one without) */ 2370 if (IS_GEN5(intel)) { 2371 sf_kernel_bo = intel_uxa_bo_alloc_for_data(intel, 2372 sf_kernel_static_gen5, 2373 sizeof 2374 (sf_kernel_static_gen5), 2375 "sf kernel gen5"); 2376 sf_kernel_mask_bo = 2377 intel_uxa_bo_alloc_for_data(intel, sf_kernel_mask_static_gen5, 2378 sizeof(sf_kernel_mask_static_gen5), 2379 "sf mask kernel"); 2380 } else { 2381 sf_kernel_bo = intel_uxa_bo_alloc_for_data(intel, 2382 sf_kernel_static, 2383 sizeof(sf_kernel_static), 2384 "sf kernel"); 2385 sf_kernel_mask_bo = intel_uxa_bo_alloc_for_data(intel, 2386 sf_kernel_mask_static, 2387 sizeof 2388 (sf_kernel_mask_static), 2389 "sf mask kernel"); 2390 } 2391 render->sf_state_bo = gen4_create_sf_state(intel, sf_kernel_bo); 2392 render->sf_mask_state_bo = gen4_create_sf_state(intel, sf_kernel_mask_bo); 2393 drm_intel_bo_unreference(sf_kernel_bo); 2394 drm_intel_bo_unreference(sf_kernel_mask_bo); 2395 2396 wm_kernels = IS_GEN5(intel) ? wm_kernels_gen5 : wm_kernels_gen4; 2397 for (m = 0; m < KERNEL_COUNT; m++) { 2398 render->wm_kernel_bo[m] = 2399 intel_uxa_bo_alloc_for_data(intel, 2400 wm_kernels[m].data, 2401 wm_kernels[m].size, 2402 "WM kernel"); 2403 } 2404 2405 /* Set up the WM states: each filter/extend type for source and mask, per 2406 * kernel. 2407 */ 2408 border_color_bo = sampler_border_color_create(intel); 2409 for (src_filter = 0; src_filter < FILTER_COUNT; src_filter++) { 2410 for (src_extend = 0; src_extend < EXTEND_COUNT; src_extend++) { 2411 for (mask_filter = 0; mask_filter < FILTER_COUNT; mask_filter++) { 2412 for (mask_extend = 0; mask_extend < EXTEND_COUNT; mask_extend++) { 2413 drm_intel_bo *sampler_state_bo; 2414 2415 sampler_state_bo = 2416 i965_create_sampler_state(intel, 2417 src_filter, src_extend, 2418 mask_filter, mask_extend, 2419 border_color_bo); 2420 2421 for (m = 0; m < KERNEL_COUNT; m++) { 2422 render->wm_state_bo[m][src_filter][src_extend][mask_filter][mask_extend] = 2423 gen4_create_wm_state 2424 (intel, 2425 wm_kernels[m]. has_mask, 2426 render->wm_kernel_bo[m], 2427 sampler_state_bo); 2428 } 2429 drm_intel_bo_unreference(sampler_state_bo); 2430 } 2431 } 2432 } 2433 } 2434 drm_intel_bo_unreference(border_color_bo); 2435 2436 render->cc_state_bo = gen4_create_cc_unit_state(intel); 2437} 2438 2439/** 2440 * Called at LeaveVT. 2441 */ 2442void gen4_render_state_cleanup(ScrnInfoPtr scrn) 2443{ 2444 intel_screen_private *intel = intel_get_screen_private(scrn); 2445 struct gen4_render_state *render_state = intel->gen4_render_state; 2446 int i, j, k, l, m; 2447 2448 drm_intel_bo_unreference(intel->surface_bo); 2449 drm_intel_bo_unreference(render_state->vs_state_bo); 2450 drm_intel_bo_unreference(render_state->sf_state_bo); 2451 drm_intel_bo_unreference(render_state->sf_mask_state_bo); 2452 2453 for (i = 0; i < KERNEL_COUNT; i++) 2454 drm_intel_bo_unreference(render_state->wm_kernel_bo[i]); 2455 2456 for (i = 0; i < FILTER_COUNT; i++) 2457 for (j = 0; j < EXTEND_COUNT; j++) 2458 for (k = 0; k < FILTER_COUNT; k++) 2459 for (l = 0; l < EXTEND_COUNT; l++) 2460 for (m = 0; m < KERNEL_COUNT; m++) 2461 drm_intel_bo_unreference 2462 (render_state-> 2463 wm_state_bo[m][i][j][k] 2464 [l]); 2465 2466 for (i = 0; i < FILTER_COUNT; i++) 2467 for (j = 0; j < EXTEND_COUNT; j++) 2468 for (k = 0; k < FILTER_COUNT; k++) 2469 for (l = 0; l < EXTEND_COUNT; l++) 2470 drm_intel_bo_unreference(render_state->ps_sampler_state_bo[i][j][k][l]); 2471 2472 drm_intel_bo_unreference(render_state->cc_state_bo); 2473 2474 drm_intel_bo_unreference(render_state->cc_vp_bo); 2475 drm_intel_bo_unreference(render_state->gen6_blend_bo); 2476 drm_intel_bo_unreference(render_state->gen6_depth_stencil_bo); 2477 2478 free(intel->gen4_render_state); 2479 intel->gen4_render_state = NULL; 2480} 2481 2482/* 2483 * for GEN6+ 2484 */ 2485#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64) 2486 2487static drm_intel_bo * 2488gen6_composite_create_cc_state(intel_screen_private *intel) 2489{ 2490 struct gen6_color_calc_state *state; 2491 drm_intel_bo *cc_bo; 2492 int ret; 2493 2494 cc_bo = drm_intel_bo_alloc(intel->bufmgr, 2495 "gen6 CC state", 2496 sizeof(*state), 2497 4096); 2498 assert(cc_bo); 2499 2500 ret = drm_intel_bo_map(cc_bo, TRUE); 2501 assert(ret == 0); 2502 2503 state = memset(cc_bo->virtual, 0, sizeof(*state)); 2504 state->constant_r = 1.0; 2505 state->constant_g = 0.0; 2506 state->constant_b = 1.0; 2507 state->constant_a = 1.0; 2508 drm_intel_bo_unmap(cc_bo); 2509 2510 return cc_bo; 2511 (void)ret; 2512} 2513 2514static drm_intel_bo * 2515gen6_composite_create_blend_state(intel_screen_private *intel) 2516{ 2517 drm_intel_bo *blend_bo; 2518 int src, dst, ret; 2519 2520 blend_bo = drm_intel_bo_alloc(intel->bufmgr, 2521 "gen6 BLEND state", 2522 BRW_BLENDFACTOR_COUNT * BRW_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE, 2523 4096); 2524 assert(blend_bo); 2525 2526 ret = drm_intel_bo_map(blend_bo, TRUE); 2527 assert(ret == 0); 2528 2529 memset(blend_bo->virtual, 0, blend_bo->size); 2530 for (src = 0; src < BRW_BLENDFACTOR_COUNT; src++) { 2531 for (dst = 0; dst < BRW_BLENDFACTOR_COUNT; dst++) { 2532 uint32_t blend_state_offset = (src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE; 2533 struct gen6_blend_state *blend; 2534 2535 blend = (struct gen6_blend_state *)((char *)blend_bo->virtual + blend_state_offset); 2536 blend->blend0.dest_blend_factor = dst; 2537 blend->blend0.source_blend_factor = src; 2538 blend->blend0.blend_func = BRW_BLENDFUNCTION_ADD; 2539 blend->blend0.blend_enable = 1; 2540 2541 blend->blend1.post_blend_clamp_enable = 1; 2542 blend->blend1.pre_blend_clamp_enable = 1; 2543 } 2544 } 2545 2546 drm_intel_bo_unmap(blend_bo); 2547 return blend_bo; 2548 (void)ret; 2549} 2550 2551static drm_intel_bo * 2552gen6_composite_create_depth_stencil_state(intel_screen_private *intel) 2553{ 2554 drm_intel_bo *depth_stencil_bo; 2555 int ret; 2556 2557 depth_stencil_bo = 2558 drm_intel_bo_alloc(intel->bufmgr, 2559 "gen6 DEPTH_STENCIL state", 2560 sizeof(struct gen6_depth_stencil_state), 2561 4096); 2562 assert(depth_stencil_bo); 2563 2564 ret = drm_intel_bo_map(depth_stencil_bo, TRUE); 2565 assert(ret == 0); 2566 2567 memset(depth_stencil_bo->virtual, 0, 2568 sizeof(struct gen6_depth_stencil_state)); 2569 drm_intel_bo_unmap(depth_stencil_bo); 2570 2571 return depth_stencil_bo; 2572 (void)ret; 2573} 2574 2575static void 2576gen6_composite_state_base_address(intel_screen_private *intel) 2577{ 2578 OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2)); 2579 OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */ 2580 intel->surface_reloc = intel->batch_used; 2581 intel_batch_emit_dword(intel, 2582 intel->surface_bo->offset | BASE_ADDRESS_MODIFY); 2583 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */ 2584 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */ 2585 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */ 2586 OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */ 2587 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ 2588 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ 2589 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ 2590} 2591 2592static void 2593gen6_composite_cc_state_pointers(intel_screen_private *intel, 2594 uint32_t blend_offset) 2595{ 2596 struct gen4_render_state *render_state = intel->gen4_render_state; 2597 drm_intel_bo *cc_bo = NULL; 2598 drm_intel_bo *depth_stencil_bo = NULL; 2599 2600 if (intel->gen6_render_state.blend == blend_offset) 2601 return; 2602 2603 if (intel->gen6_render_state.blend == -1) { 2604 cc_bo = render_state->cc_state_bo; 2605 depth_stencil_bo = render_state->gen6_depth_stencil_bo; 2606 } 2607 if (INTEL_INFO(intel)->gen >= 070) { 2608 gen7_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset); 2609 } else { 2610 gen6_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset); 2611 } 2612 2613 intel->gen6_render_state.blend = blend_offset; 2614} 2615 2616static void 2617gen6_composite_sampler_state_pointers(intel_screen_private *intel, 2618 drm_intel_bo *bo) 2619{ 2620 if (intel->gen6_render_state.samplers == bo) 2621 return; 2622 2623 intel->gen6_render_state.samplers = bo; 2624 2625 if (INTEL_INFO(intel)->gen >= 070) 2626 gen7_upload_sampler_state_pointers(intel, bo); 2627 else 2628 gen6_upload_sampler_state_pointers(intel, bo); 2629} 2630 2631static void 2632gen6_composite_wm_constants(intel_screen_private *intel) 2633{ 2634 Bool ivb = INTEL_INFO(intel)->gen >= 070; 2635 /* disable WM constant buffer */ 2636 OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | ((ivb ? 7 : 5) - 2)); 2637 OUT_BATCH(0); 2638 OUT_BATCH(0); 2639 OUT_BATCH(0); 2640 OUT_BATCH(0); 2641 if (ivb) { 2642 OUT_BATCH(0); 2643 OUT_BATCH(0); 2644 } 2645} 2646 2647static void 2648gen6_composite_sf_state(intel_screen_private *intel, 2649 Bool has_mask) 2650{ 2651 int num_sf_outputs = has_mask ? 2 : 1; 2652 2653 if (intel->gen6_render_state.num_sf_outputs == num_sf_outputs) 2654 return; 2655 2656 intel->gen6_render_state.num_sf_outputs = num_sf_outputs; 2657 2658 if (INTEL_INFO(intel)->gen >= 070) 2659 gen7_upload_sf_state(intel, num_sf_outputs, 1); 2660 else 2661 gen6_upload_sf_state(intel, num_sf_outputs, 1); 2662} 2663 2664static void 2665gen6_composite_wm_state(intel_screen_private *intel, 2666 Bool has_mask, 2667 drm_intel_bo *bo) 2668{ 2669 int num_surfaces = has_mask ? 3 : 2; 2670 int num_sf_outputs = has_mask ? 2 : 1; 2671 2672 if (intel->gen6_render_state.kernel == bo) 2673 return; 2674 2675 intel->gen6_render_state.kernel = bo; 2676 2677 OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); 2678 OUT_RELOC(bo, 2679 I915_GEM_DOMAIN_INSTRUCTION, 0, 2680 0); 2681 OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | 2682 (num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); 2683 OUT_BATCH(0); 2684 OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ 2685 OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | 2686 GEN6_3DSTATE_WM_DISPATCH_ENABLE | 2687 GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); 2688 OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | 2689 GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 2690 OUT_BATCH(0); 2691 OUT_BATCH(0); 2692} 2693 2694static void 2695gen7_composite_wm_state(intel_screen_private *intel, 2696 Bool has_mask, 2697 drm_intel_bo *bo) 2698{ 2699 int num_surfaces = has_mask ? 3 : 2; 2700 unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB; 2701 unsigned int num_samples = 0; 2702 2703 if (IS_HSW(intel)) { 2704 max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW; 2705 num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW; 2706 } 2707 2708 if (intel->gen6_render_state.kernel == bo) 2709 return; 2710 2711 intel->gen6_render_state.kernel = bo; 2712 2713 OUT_BATCH(GEN6_3DSTATE_WM | (3 - 2)); 2714 OUT_BATCH(GEN7_WM_DISPATCH_ENABLE | 2715 GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 2716 OUT_BATCH(0); 2717 2718 OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2)); 2719 OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 2720 OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | 2721 (num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); 2722 OUT_BATCH(0); /* scratch space base offset */ 2723 OUT_BATCH(((48 - 1) << max_threads_shift) | num_samples | 2724 GEN7_PS_ATTRIBUTE_ENABLE | 2725 GEN7_PS_16_DISPATCH_ENABLE); 2726 OUT_BATCH((6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0)); 2727 OUT_BATCH(0); /* kernel 1 pointer */ 2728 OUT_BATCH(0); /* kernel 2 pointer */ 2729} 2730 2731 2732static void 2733gen6_composite_drawing_rectangle(intel_screen_private *intel, 2734 PixmapPtr dest) 2735{ 2736 uint32_t dw = 2737 DRAW_YMAX(dest->drawable.height - 1) | 2738 DRAW_XMAX(dest->drawable.width - 1); 2739 2740 /* XXX cacomposite depends upon the implicit non-pipelined flush */ 2741 if (0 && intel->gen6_render_state.drawrect == dw) 2742 return; 2743 intel->gen6_render_state.drawrect = dw; 2744 2745 OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 2746 OUT_BATCH(0x00000000); /* ymin, xmin */ 2747 OUT_BATCH(dw); /* ymax, xmax */ 2748 OUT_BATCH(0x00000000); /* yorigin, xorigin */ 2749} 2750 2751static void 2752gen6_composite_vertex_element_state(intel_screen_private *intel, 2753 Bool has_mask, 2754 Bool is_affine) 2755{ 2756 /* 2757 * vertex data in vertex buffer 2758 * position: (x, y) 2759 * texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0) 2760 * texture coordinate 1 if (has_mask is TRUE): same as above 2761 */ 2762 gen4_composite_op *composite_op = &intel->gen4_render_state->composite_op; 2763 int nelem = has_mask ? 2 : 1; 2764 int selem = is_affine ? 2 : 3; 2765 uint32_t w_component; 2766 uint32_t src_format; 2767 int id; 2768 2769 id = has_mask << 1 | is_affine; 2770 2771 if (composite_op->vertex_id == id) 2772 return; 2773 2774 composite_op->vertex_id = id; 2775 2776 if (is_affine) { 2777 src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; 2778 w_component = BRW_VFCOMPONENT_STORE_1_FLT; 2779 } else { 2780 src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; 2781 w_component = BRW_VFCOMPONENT_STORE_SRC; 2782 } 2783 2784 /* The VUE layout 2785 * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 2786 * dword 4-7: position (x, y, 1.0, 1.0), 2787 * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 2788 * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 2789 * 2790 * dword 4-15 are fetched from vertex buffer 2791 */ 2792 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 2793 ((2 * (2 + nelem)) + 1 - 2)); 2794 2795 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | 2796 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 2797 (0 << VE0_OFFSET_SHIFT)); 2798 OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | 2799 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | 2800 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | 2801 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); 2802 2803 /* x,y */ 2804 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | 2805 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 2806 (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */ 2807 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 2808 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 2809 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 2810 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 2811 2812 /* u0, v0, w0 */ 2813 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | 2814 (src_format << VE0_FORMAT_SHIFT) | 2815 ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ 2816 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 2817 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 2818 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 2819 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 2820 2821 /* u1, v1, w1 */ 2822 if (has_mask) { 2823 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | 2824 GEN6_VE0_VALID | 2825 (src_format << VE0_FORMAT_SHIFT) | 2826 (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ 2827 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 2828 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 2829 (w_component << VE1_VFCOMPONENT_2_SHIFT) | 2830 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 2831 } 2832} 2833 2834static void 2835gen6_emit_composite_state(struct intel_screen_private *intel) 2836{ 2837 struct gen4_render_state *render = intel->gen4_render_state; 2838 gen4_composite_op *composite_op = &render->composite_op; 2839 sampler_state_filter_t src_filter = composite_op->src_filter; 2840 sampler_state_filter_t mask_filter = composite_op->mask_filter; 2841 sampler_state_extend_t src_extend = composite_op->src_extend; 2842 sampler_state_extend_t mask_extend = composite_op->mask_extend; 2843 Bool is_affine = composite_op->is_affine; 2844 Bool has_mask = intel->render_mask != NULL; 2845 Bool ivb = INTEL_INFO(intel)->gen >= 070; 2846 uint32_t src, dst; 2847 drm_intel_bo *ps_sampler_state_bo = render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend]; 2848 2849 intel->needs_render_state_emit = FALSE; 2850 if (intel->needs_3d_invariant) { 2851 gen6_upload_invariant_states(intel); 2852 2853 if (ivb) { 2854 gen7_upload_viewport_state_pointers(intel, render->cc_vp_bo); 2855 gen7_upload_urb(intel); 2856 gen7_upload_bypass_states(intel); 2857 gen7_upload_depth_buffer_state(intel); 2858 } else { 2859 gen6_upload_invariant_states(intel); 2860 gen6_upload_viewport_state_pointers(intel, render->cc_vp_bo); 2861 gen6_upload_urb(intel); 2862 2863 gen6_upload_gs_state(intel); 2864 gen6_upload_depth_buffer_state(intel); 2865 } 2866 gen6_composite_wm_constants(intel); 2867 gen6_upload_vs_state(intel); 2868 gen6_upload_clip_state(intel); 2869 2870 intel->needs_3d_invariant = FALSE; 2871 } 2872 2873 i965_get_blend_cntl(composite_op->op, 2874 intel->render_mask_picture, 2875 intel->render_dest_picture->format, 2876 &src, &dst); 2877 2878 if (intel->surface_reloc == 0) 2879 gen6_composite_state_base_address(intel); 2880 2881 gen6_composite_cc_state_pointers(intel, 2882 (src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE); 2883 gen6_composite_sampler_state_pointers(intel, ps_sampler_state_bo); 2884 gen6_composite_sf_state(intel, has_mask); 2885 if (ivb) { 2886 gen7_composite_wm_state(intel, has_mask, 2887 render->wm_kernel_bo[composite_op->wm_kernel]); 2888 gen7_upload_binding_table(intel, intel->surface_table); 2889 } else { 2890 gen6_composite_wm_state(intel, has_mask, 2891 render->wm_kernel_bo[composite_op->wm_kernel]); 2892 gen6_upload_binding_table(intel, intel->surface_table); 2893 } 2894 gen6_composite_drawing_rectangle(intel, intel->render_dest); 2895 gen6_composite_vertex_element_state(intel, has_mask, is_affine); 2896} 2897 2898static void 2899gen6_render_state_init(ScrnInfoPtr scrn) 2900{ 2901 intel_screen_private *intel = intel_get_screen_private(scrn); 2902 struct gen4_render_state *render; 2903 sampler_state_filter_t src_filter; 2904 sampler_state_filter_t mask_filter; 2905 sampler_state_extend_t src_extend; 2906 sampler_state_extend_t mask_extend; 2907 int m; 2908 drm_intel_bo *border_color_bo; 2909 const struct wm_kernel_info *wm_kernels; 2910 2911 render= intel->gen4_render_state; 2912 render->composite_op.vertex_id = -1; 2913 2914 intel->gen6_render_state.num_sf_outputs = 0; 2915 intel->gen6_render_state.samplers = NULL; 2916 intel->gen6_render_state.blend = -1; 2917 intel->gen6_render_state.kernel = NULL; 2918 intel->gen6_render_state.drawrect = -1; 2919 2920 wm_kernels = IS_GEN7(intel) ? wm_kernels_gen7 : wm_kernels_gen6; 2921 for (m = 0; m < KERNEL_COUNT; m++) { 2922 render->wm_kernel_bo[m] = 2923 intel_uxa_bo_alloc_for_data(intel, 2924 wm_kernels[m].data, 2925 wm_kernels[m].size, 2926 "WM kernel gen6/7"); 2927 } 2928 2929 border_color_bo = sampler_border_color_create(intel); 2930 2931 for (src_filter = 0; src_filter < FILTER_COUNT; src_filter++) { 2932 for (src_extend = 0; src_extend < EXTEND_COUNT; src_extend++) { 2933 for (mask_filter = 0; mask_filter < FILTER_COUNT; mask_filter++) { 2934 for (mask_extend = 0; mask_extend < EXTEND_COUNT; mask_extend++) { 2935 render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend] = 2936 i965_create_sampler_state(intel, 2937 src_filter, src_extend, 2938 mask_filter, mask_extend, 2939 border_color_bo); 2940 } 2941 } 2942 } 2943 } 2944 2945 drm_intel_bo_unreference(border_color_bo); 2946 render->cc_vp_bo = gen4_create_cc_viewport(intel); 2947 render->cc_state_bo = gen6_composite_create_cc_state(intel); 2948 render->gen6_blend_bo = gen6_composite_create_blend_state(intel); 2949 render->gen6_depth_stencil_bo = gen6_composite_create_depth_stencil_state(intel); 2950} 2951 2952void i965_vertex_flush(struct intel_screen_private *intel) 2953{ 2954 if (intel->vertex_offset) { 2955 intel->batch_ptr[intel->vertex_offset] = 2956 intel->vertex_index - intel->vertex_count; 2957 intel->vertex_offset = 0; 2958 } 2959} 2960 2961void i965_batch_flush(struct intel_screen_private *intel) 2962{ 2963 if (intel->surface_used) 2964 i965_surface_flush(intel); 2965} 2966