Home | History | Annotate | Line # | Download | only in src
      1 /*
      2  * Copyright  2006,2008 Intel Corporation
      3  * Copyright  2007 Red Hat, Inc.
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9  * and/or sell copies of the Software, and to permit persons to whom the
     10  * Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice (including the next
     13  * paragraph) shall be included in all copies or substantial portions of the
     14  * Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     22  * SOFTWARE.
     23  *
     24  * Authors:
     25  *    Wang Zhenyu <zhenyu.z.wang (at) intel.com>
     26  *    Eric Anholt <eric (at) anholt.net>
     27  *    Carl Worth <cworth (at) redhat.com>
     28  *    Keith Packard <keithp (at) keithp.com>
     29  *
     30  */
     31 
     32 #ifdef HAVE_CONFIG_H
     33 #include "config.h"
     34 #endif
     35 
     36 #include <assert.h>
     37 #include "xf86.h"
     38 #include "i830.h"
     39 #include "i915_reg.h"
     40 
     41 /* bring in brw structs */
     42 #include "brw_defines.h"
     43 #include "brw_structs.h"
     44 
     45 /* 24 = 4 vertices/composite * 3 texcoords/vertex * 2 floats/texcoord
     46  *
     47  * This is an upper-bound based on the case of a non-affine
     48  * transformation and with a mask, but useful for sizing all cases for
     49  * simplicity.
     50  */
     51 #define VERTEX_FLOATS_PER_COMPOSITE	24
     52 #define VERTEX_BUFFER_SIZE		(256 * VERTEX_FLOATS_PER_COMPOSITE)
     53 
     54 struct blendinfo {
     55     Bool dst_alpha;
     56     Bool src_alpha;
     57     uint32_t src_blend;
     58     uint32_t dst_blend;
     59 };
     60 
     61 struct formatinfo {
     62     int fmt;
     63     uint32_t card_fmt;
     64 };
     65 
     66 // refer vol2, 3d rasterization 3.8.1
     67 
     68 /* defined in brw_defines.h */
     69 static struct blendinfo i965_blend_op[] = {
     70     /* Clear */
     71     {0, 0, BRW_BLENDFACTOR_ZERO,          BRW_BLENDFACTOR_ZERO},
     72     /* Src */
     73     {0, 0, BRW_BLENDFACTOR_ONE,           BRW_BLENDFACTOR_ZERO},
     74     /* Dst */
     75     {0, 0, BRW_BLENDFACTOR_ZERO,          BRW_BLENDFACTOR_ONE},
     76     /* Over */
     77     {0, 1, BRW_BLENDFACTOR_ONE,           BRW_BLENDFACTOR_INV_SRC_ALPHA},
     78     /* OverReverse */
     79     {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE},
     80     /* In */
     81     {1, 0, BRW_BLENDFACTOR_DST_ALPHA,     BRW_BLENDFACTOR_ZERO},
     82     /* InReverse */
     83     {0, 1, BRW_BLENDFACTOR_ZERO,          BRW_BLENDFACTOR_SRC_ALPHA},
     84     /* Out */
     85     {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO},
     86     /* OutReverse */
     87     {0, 1, BRW_BLENDFACTOR_ZERO,          BRW_BLENDFACTOR_INV_SRC_ALPHA},
     88     /* Atop */
     89     {1, 1, BRW_BLENDFACTOR_DST_ALPHA,     BRW_BLENDFACTOR_INV_SRC_ALPHA},
     90     /* AtopReverse */
     91     {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA},
     92     /* Xor */
     93     {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA},
     94     /* Add */
     95     {0, 0, BRW_BLENDFACTOR_ONE,           BRW_BLENDFACTOR_ONE},
     96 };
     97 /**
     98  * Highest-valued BLENDFACTOR used in i965_blend_op.
     99  *
    100  * This leaves out BRW_BLENDFACTOR_INV_DST_COLOR,
    101  * BRW_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
    102  * BRW_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
    103  */
    104 #define BRW_BLENDFACTOR_COUNT (BRW_BLENDFACTOR_INV_DST_ALPHA + 1)
    105 
    106 /* FIXME: surface format defined in brw_defines.h, shared Sampling engine
    107  * 1.7.2
    108  */
    109 static struct formatinfo i965_tex_formats[] = {
    110     {PICT_a8r8g8b8, BRW_SURFACEFORMAT_B8G8R8A8_UNORM },
    111     {PICT_x8r8g8b8, BRW_SURFACEFORMAT_B8G8R8X8_UNORM },
    112     {PICT_a8b8g8r8, BRW_SURFACEFORMAT_R8G8B8A8_UNORM },
    113     {PICT_x8b8g8r8, BRW_SURFACEFORMAT_R8G8B8X8_UNORM },
    114     {PICT_r5g6b5,   BRW_SURFACEFORMAT_B5G6R5_UNORM   },
    115     {PICT_a1r5g5b5, BRW_SURFACEFORMAT_B5G5R5A1_UNORM },
    116     {PICT_a8,       BRW_SURFACEFORMAT_A8_UNORM	 },
    117 };
    118 
    119 static void i965_get_blend_cntl(int op, PicturePtr pMask, uint32_t dst_format,
    120 				uint32_t *sblend, uint32_t *dblend)
    121 {
    122 
    123     *sblend = i965_blend_op[op].src_blend;
    124     *dblend = i965_blend_op[op].dst_blend;
    125 
    126     /* If there's no dst alpha channel, adjust the blend op so that we'll treat
    127      * it as always 1.
    128      */
    129     if (PICT_FORMAT_A(dst_format) == 0 && i965_blend_op[op].dst_alpha) {
    130         if (*sblend == BRW_BLENDFACTOR_DST_ALPHA)
    131             *sblend = BRW_BLENDFACTOR_ONE;
    132         else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA)
    133             *sblend = BRW_BLENDFACTOR_ZERO;
    134     }
    135 
    136     /* If the source alpha is being used, then we should only be in a case where
    137      * the source blend factor is 0, and the source blend value is the mask
    138      * channels multiplied by the source picture's alpha.
    139      */
    140     if (pMask && pMask->componentAlpha && PICT_FORMAT_RGB(pMask->format)
    141             && i965_blend_op[op].src_alpha) {
    142         if (*dblend == BRW_BLENDFACTOR_SRC_ALPHA) {
    143 	    *dblend = BRW_BLENDFACTOR_SRC_COLOR;
    144         } else if (*dblend == BRW_BLENDFACTOR_INV_SRC_ALPHA) {
    145 	    *dblend = BRW_BLENDFACTOR_INV_SRC_COLOR;
    146         }
    147     }
    148 
    149 }
    150 
    151 static Bool i965_get_dest_format(PicturePtr pDstPicture, uint32_t *dst_format)
    152 {
    153     ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum];
    154 
    155     switch (pDstPicture->format) {
    156     case PICT_a8r8g8b8:
    157     case PICT_x8r8g8b8:
    158         *dst_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
    159         break;
    160     case PICT_r5g6b5:
    161         *dst_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
    162         break;
    163     case PICT_a1r5g5b5:
    164     	*dst_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
    165 	break;
    166     case PICT_x1r5g5b5:
    167         *dst_format = BRW_SURFACEFORMAT_B5G5R5X1_UNORM;
    168         break;
    169     case PICT_a8:
    170         *dst_format = BRW_SURFACEFORMAT_A8_UNORM;
    171         break;
    172     case PICT_a4r4g4b4:
    173     case PICT_x4r4g4b4:
    174 	*dst_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
    175 	break;
    176     default:
    177         I830FALLBACK("Unsupported dest format 0x%x\n",
    178 		     (int)pDstPicture->format);
    179     }
    180 
    181     return TRUE;
    182 }
    183 
    184 static Bool i965_check_composite_texture(ScrnInfoPtr pScrn, PicturePtr pPict, int unit)
    185 {
    186     if (pPict->repeatType > RepeatReflect)
    187 	I830FALLBACK("extended repeat (%d) not supported\n",
    188 		     pPict->repeatType);
    189 
    190     if (pPict->filter != PictFilterNearest &&
    191         pPict->filter != PictFilterBilinear)
    192     {
    193         I830FALLBACK("Unsupported filter 0x%x\n", pPict->filter);
    194     }
    195 
    196     if (pPict->pDrawable)
    197     {
    198 	int w, h, i;
    199 
    200 	w = pPict->pDrawable->width;
    201 	h = pPict->pDrawable->height;
    202 	if ((w > 8192) || (h > 8192))
    203 	    I830FALLBACK("Picture w/h too large (%dx%d)\n", w, h);
    204 
    205 	for (i = 0; i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]);
    206 	     i++)
    207 	{
    208 	    if (i965_tex_formats[i].fmt == pPict->format)
    209 		break;
    210 	}
    211 	if (i == sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]))
    212 	    I830FALLBACK("Unsupported picture format 0x%x\n",
    213 			 (int)pPict->format);
    214     }
    215 
    216     return TRUE;
    217 }
    218 
    219 Bool
    220 i965_check_composite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
    221 		     PicturePtr pDstPicture)
    222 {
    223     ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum];
    224     uint32_t tmp1;
    225 
    226     /* Check for unsupported compositing operations. */
    227     if (op >= sizeof(i965_blend_op) / sizeof(i965_blend_op[0]))
    228         I830FALLBACK("Unsupported Composite op 0x%x\n", op);
    229 
    230     if (pMaskPicture && pMaskPicture->componentAlpha &&
    231             PICT_FORMAT_RGB(pMaskPicture->format)) {
    232         /* Check if it's component alpha that relies on a source alpha and on
    233          * the source value.  We can only get one of those into the single
    234          * source value that we get to blend with.
    235          */
    236         if (i965_blend_op[op].src_alpha &&
    237             (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO))
    238 	{
    239 	    I830FALLBACK("Component alpha not supported with source "
    240 			 "alpha and source value blending.\n");
    241 	}
    242     }
    243 
    244     if (!i965_check_composite_texture(pScrn, pSrcPicture, 0))
    245         I830FALLBACK("Check Src picture texture\n");
    246     if (pMaskPicture != NULL && !i965_check_composite_texture(pScrn, pMaskPicture, 1))
    247         I830FALLBACK("Check Mask picture texture\n");
    248 
    249     if (!i965_get_dest_format(pDstPicture, &tmp1))
    250 	I830FALLBACK("Get Color buffer format\n");
    251 
    252     return TRUE;
    253 
    254 }
    255 
    256 #define BRW_GRF_BLOCKS(nreg)    ((nreg + 15) / 16 - 1)
    257 
    258 /* Set up a default static partitioning of the URB, which is supposed to
    259  * allow anything we would want to do, at potentially lower performance.
    260  */
    261 #define URB_CS_ENTRY_SIZE     0
    262 #define URB_CS_ENTRIES	      0
    263 
    264 #define URB_VS_ENTRY_SIZE     1	  // each 512-bit row
    265 #define URB_VS_ENTRIES	      8	  // we needs at least 8 entries
    266 
    267 #define URB_GS_ENTRY_SIZE     0
    268 #define URB_GS_ENTRIES	      0
    269 
    270 #define URB_CLIP_ENTRY_SIZE   0
    271 #define URB_CLIP_ENTRIES      0
    272 
    273 #define URB_SF_ENTRY_SIZE     2
    274 #define URB_SF_ENTRIES	      1
    275 
    276 static const uint32_t sip_kernel_static[][4] = {
    277 /*    wait (1) a0<1>UW a145<0,1,0>UW { align1 +  } */
    278     { 0x00000030, 0x20000108, 0x00001220, 0x00000000 },
    279 /*    nop (4) g0<1>UD { align1 +  } */
    280     { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
    281 /*    nop (4) g0<1>UD { align1 +  } */
    282     { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
    283 /*    nop (4) g0<1>UD { align1 +  } */
    284     { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
    285 /*    nop (4) g0<1>UD { align1 +  } */
    286     { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
    287 /*    nop (4) g0<1>UD { align1 +  } */
    288     { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
    289 /*    nop (4) g0<1>UD { align1 +  } */
    290     { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
    291 /*    nop (4) g0<1>UD { align1 +  } */
    292     { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
    293 /*    nop (4) g0<1>UD { align1 +  } */
    294     { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
    295 /*    nop (4) g0<1>UD { align1 +  } */
    296     { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
    297 };
    298 
    299 /*
    300  * this program computes dA/dx and dA/dy for the texture coordinates along
    301  * with the base texture coordinate. It was extracted from the Mesa driver
    302  */
    303 
    304 #define SF_KERNEL_NUM_GRF  16
    305 #define SF_MAX_THREADS	   2
    306 
    307 static const uint32_t sf_kernel_static[][4] = {
    308 #include "exa_sf.g4b"
    309 };
    310 
    311 static const uint32_t sf_kernel_mask_static[][4] = {
    312 #include "exa_sf_mask.g4b"
    313 };
    314 
    315 /* ps kernels */
    316 #define PS_KERNEL_NUM_GRF   32
    317 #define PS_MAX_THREADS	    48
    318 
    319 static const uint32_t ps_kernel_nomask_affine_static [][4] = {
    320 #include "exa_wm_xy.g4b"
    321 #include "exa_wm_src_affine.g4b"
    322 #include "exa_wm_src_sample_argb.g4b"
    323 #include "exa_wm_write.g4b"
    324 };
    325 
    326 static const uint32_t ps_kernel_nomask_projective_static [][4] = {
    327 #include "exa_wm_xy.g4b"
    328 #include "exa_wm_src_projective.g4b"
    329 #include "exa_wm_src_sample_argb.g4b"
    330 #include "exa_wm_write.g4b"
    331 };
    332 
    333 static const uint32_t ps_kernel_maskca_affine_static [][4] = {
    334 #include "exa_wm_xy.g4b"
    335 #include "exa_wm_src_affine.g4b"
    336 #include "exa_wm_src_sample_argb.g4b"
    337 #include "exa_wm_mask_affine.g4b"
    338 #include "exa_wm_mask_sample_argb.g4b"
    339 #include "exa_wm_ca.g4b"
    340 #include "exa_wm_write.g4b"
    341 };
    342 
    343 static const uint32_t ps_kernel_maskca_projective_static [][4] = {
    344 #include "exa_wm_xy.g4b"
    345 #include "exa_wm_src_projective.g4b"
    346 #include "exa_wm_src_sample_argb.g4b"
    347 #include "exa_wm_mask_projective.g4b"
    348 #include "exa_wm_mask_sample_argb.g4b"
    349 #include "exa_wm_ca.g4b"
    350 #include "exa_wm_write.g4b"
    351 };
    352 
    353 static const uint32_t ps_kernel_maskca_srcalpha_affine_static [][4] = {
    354 #include "exa_wm_xy.g4b"
    355 #include "exa_wm_src_affine.g4b"
    356 #include "exa_wm_src_sample_a.g4b"
    357 #include "exa_wm_mask_affine.g4b"
    358 #include "exa_wm_mask_sample_argb.g4b"
    359 #include "exa_wm_ca_srcalpha.g4b"
    360 #include "exa_wm_write.g4b"
    361 };
    362 
    363 static const uint32_t ps_kernel_maskca_srcalpha_projective_static [][4] = {
    364 #include "exa_wm_xy.g4b"
    365 #include "exa_wm_src_projective.g4b"
    366 #include "exa_wm_src_sample_a.g4b"
    367 #include "exa_wm_mask_projective.g4b"
    368 #include "exa_wm_mask_sample_argb.g4b"
    369 #include "exa_wm_ca_srcalpha.g4b"
    370 #include "exa_wm_write.g4b"
    371 };
    372 
    373 static const uint32_t ps_kernel_masknoca_affine_static [][4] = {
    374 #include "exa_wm_xy.g4b"
    375 #include "exa_wm_src_affine.g4b"
    376 #include "exa_wm_src_sample_argb.g4b"
    377 #include "exa_wm_mask_affine.g4b"
    378 #include "exa_wm_mask_sample_a.g4b"
    379 #include "exa_wm_noca.g4b"
    380 #include "exa_wm_write.g4b"
    381 };
    382 
    383 static const uint32_t ps_kernel_masknoca_projective_static [][4] = {
    384 #include "exa_wm_xy.g4b"
    385 #include "exa_wm_src_projective.g4b"
    386 #include "exa_wm_src_sample_argb.g4b"
    387 #include "exa_wm_mask_projective.g4b"
    388 #include "exa_wm_mask_sample_a.g4b"
    389 #include "exa_wm_noca.g4b"
    390 #include "exa_wm_write.g4b"
    391 };
    392 
    393 /* new programs for IGDNG */
    394 static const uint32_t sf_kernel_static_gen5[][4] = {
    395 #include "exa_sf.g4b.gen5"
    396 };
    397 
    398 static const uint32_t sf_kernel_mask_static_gen5[][4] = {
    399 #include "exa_sf_mask.g4b.gen5"
    400 };
    401 
    402 static const uint32_t ps_kernel_nomask_affine_static_gen5 [][4] = {
    403 #include "exa_wm_xy.g4b.gen5"
    404 #include "exa_wm_src_affine.g4b.gen5"
    405 #include "exa_wm_src_sample_argb.g4b.gen5"
    406 #include "exa_wm_write.g4b.gen5"
    407 };
    408 
    409 static const uint32_t ps_kernel_nomask_projective_static_gen5 [][4] = {
    410 #include "exa_wm_xy.g4b.gen5"
    411 #include "exa_wm_src_projective.g4b.gen5"
    412 #include "exa_wm_src_sample_argb.g4b.gen5"
    413 #include "exa_wm_write.g4b.gen5"
    414 };
    415 
    416 static const uint32_t ps_kernel_maskca_affine_static_gen5 [][4] = {
    417 #include "exa_wm_xy.g4b.gen5"
    418 #include "exa_wm_src_affine.g4b.gen5"
    419 #include "exa_wm_src_sample_argb.g4b.gen5"
    420 #include "exa_wm_mask_affine.g4b.gen5"
    421 #include "exa_wm_mask_sample_argb.g4b.gen5"
    422 #include "exa_wm_ca.g4b.gen5"
    423 #include "exa_wm_write.g4b.gen5"
    424 };
    425 
    426 static const uint32_t ps_kernel_maskca_projective_static_gen5 [][4] = {
    427 #include "exa_wm_xy.g4b.gen5"
    428 #include "exa_wm_src_projective.g4b.gen5"
    429 #include "exa_wm_src_sample_argb.g4b.gen5"
    430 #include "exa_wm_mask_projective.g4b.gen5"
    431 #include "exa_wm_mask_sample_argb.g4b.gen5"
    432 #include "exa_wm_ca.g4b.gen5"
    433 #include "exa_wm_write.g4b.gen5"
    434 };
    435 
    436 static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen5 [][4] = {
    437 #include "exa_wm_xy.g4b.gen5"
    438 #include "exa_wm_src_affine.g4b.gen5"
    439 #include "exa_wm_src_sample_a.g4b.gen5"
    440 #include "exa_wm_mask_affine.g4b.gen5"
    441 #include "exa_wm_mask_sample_argb.g4b.gen5"
    442 #include "exa_wm_ca_srcalpha.g4b.gen5"
    443 #include "exa_wm_write.g4b.gen5"
    444 };
    445 
    446 static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen5 [][4] = {
    447 #include "exa_wm_xy.g4b.gen5"
    448 #include "exa_wm_src_projective.g4b.gen5"
    449 #include "exa_wm_src_sample_a.g4b.gen5"
    450 #include "exa_wm_mask_projective.g4b.gen5"
    451 #include "exa_wm_mask_sample_argb.g4b.gen5"
    452 #include "exa_wm_ca_srcalpha.g4b.gen5"
    453 #include "exa_wm_write.g4b.gen5"
    454 };
    455 
    456 static const uint32_t ps_kernel_masknoca_affine_static_gen5 [][4] = {
    457 #include "exa_wm_xy.g4b.gen5"
    458 #include "exa_wm_src_affine.g4b.gen5"
    459 #include "exa_wm_src_sample_argb.g4b.gen5"
    460 #include "exa_wm_mask_affine.g4b.gen5"
    461 #include "exa_wm_mask_sample_a.g4b.gen5"
    462 #include "exa_wm_noca.g4b.gen5"
    463 #include "exa_wm_write.g4b.gen5"
    464 };
    465 
    466 static const uint32_t ps_kernel_masknoca_projective_static_gen5 [][4] = {
    467 #include "exa_wm_xy.g4b.gen5"
    468 #include "exa_wm_src_projective.g4b.gen5"
    469 #include "exa_wm_src_sample_argb.g4b.gen5"
    470 #include "exa_wm_mask_projective.g4b.gen5"
    471 #include "exa_wm_mask_sample_a.g4b.gen5"
    472 #include "exa_wm_noca.g4b.gen5"
    473 #include "exa_wm_write.g4b.gen5"
    474 };
    475 
    476 #define WM_STATE_DECL(kernel) \
    477     struct brw_wm_unit_state wm_state_ ## kernel[SAMPLER_STATE_FILTER_COUNT] \
    478 						[SAMPLER_STATE_EXTEND_COUNT] \
    479 						[SAMPLER_STATE_FILTER_COUNT] \
    480 						[SAMPLER_STATE_EXTEND_COUNT]
    481 
    482 /* Many of the fields in the state structure must be aligned to a
    483  * 64-byte boundary, (or a 32-byte boundary, but 64 is good enough for
    484  * those too).
    485  */
    486 #define PAD64_MULTI(previous, idx, factor) char previous ## _pad ## idx [(64 - (sizeof(struct previous) * (factor)) % 64) % 64]
    487 #define PAD64(previous, idx) PAD64_MULTI(previous, idx, 1)
    488 
    489 typedef enum {
    490     SAMPLER_STATE_FILTER_NEAREST,
    491     SAMPLER_STATE_FILTER_BILINEAR,
    492     SAMPLER_STATE_FILTER_COUNT
    493 } sampler_state_filter_t;
    494 
    495 typedef enum {
    496     SAMPLER_STATE_EXTEND_NONE,
    497     SAMPLER_STATE_EXTEND_REPEAT,
    498     SAMPLER_STATE_EXTEND_PAD,
    499     SAMPLER_STATE_EXTEND_REFLECT,
    500     SAMPLER_STATE_EXTEND_COUNT
    501 } sampler_state_extend_t;
    502 
    503 typedef enum {
    504     WM_KERNEL_NOMASK_AFFINE,
    505     WM_KERNEL_NOMASK_PROJECTIVE,
    506     WM_KERNEL_MASKCA_AFFINE,
    507     WM_KERNEL_MASKCA_PROJECTIVE,
    508     WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
    509     WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
    510     WM_KERNEL_MASKNOCA_AFFINE,
    511     WM_KERNEL_MASKNOCA_PROJECTIVE,
    512     WM_KERNEL_COUNT
    513 } wm_kernel_t;
    514 
    515 #define KERNEL(kernel_enum, kernel, masked) \
    516     [kernel_enum] = {__UNCONST(&kernel), sizeof(kernel), masked}
    517 struct wm_kernel_info {
    518     void *data;
    519     unsigned int size;
    520     Bool has_mask;
    521 };
    522 
    523 static struct wm_kernel_info wm_kernels[] = {
    524     KERNEL(WM_KERNEL_NOMASK_AFFINE,
    525 	   ps_kernel_nomask_affine_static, FALSE),
    526     KERNEL(WM_KERNEL_NOMASK_PROJECTIVE,
    527 	   ps_kernel_nomask_projective_static, FALSE),
    528     KERNEL(WM_KERNEL_MASKCA_AFFINE,
    529 	   ps_kernel_maskca_affine_static, TRUE),
    530     KERNEL(WM_KERNEL_MASKCA_PROJECTIVE,
    531 	   ps_kernel_maskca_projective_static, TRUE),
    532     KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
    533 	   ps_kernel_maskca_srcalpha_affine_static, TRUE),
    534     KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
    535 	   ps_kernel_maskca_srcalpha_projective_static, TRUE),
    536     KERNEL(WM_KERNEL_MASKNOCA_AFFINE,
    537 	   ps_kernel_masknoca_affine_static, TRUE),
    538     KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE,
    539 	   ps_kernel_masknoca_projective_static, TRUE),
    540 };
    541 
    542 static struct wm_kernel_info wm_kernels_gen5[] = {
    543     KERNEL(WM_KERNEL_NOMASK_AFFINE,
    544 	   ps_kernel_nomask_affine_static_gen5, FALSE),
    545     KERNEL(WM_KERNEL_NOMASK_PROJECTIVE,
    546 	   ps_kernel_nomask_projective_static_gen5, FALSE),
    547     KERNEL(WM_KERNEL_MASKCA_AFFINE,
    548 	   ps_kernel_maskca_affine_static_gen5, TRUE),
    549     KERNEL(WM_KERNEL_MASKCA_PROJECTIVE,
    550 	   ps_kernel_maskca_projective_static_gen5, TRUE),
    551     KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
    552 	   ps_kernel_maskca_srcalpha_affine_static_gen5, TRUE),
    553     KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
    554 	   ps_kernel_maskca_srcalpha_projective_static_gen5, TRUE),
    555     KERNEL(WM_KERNEL_MASKNOCA_AFFINE,
    556 	   ps_kernel_masknoca_affine_static_gen5, TRUE),
    557     KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE,
    558 	   ps_kernel_masknoca_projective_static_gen5, TRUE),
    559 };
    560 #undef KERNEL
    561 
    562 typedef struct _brw_cc_unit_state_padded {
    563     struct brw_cc_unit_state state;
    564     char pad[64 - sizeof (struct brw_cc_unit_state)];
    565 } brw_cc_unit_state_padded;
    566 
    567 typedef struct brw_surface_state_padded {
    568     struct brw_surface_state state;
    569     char pad[32 - sizeof (struct brw_surface_state)];
    570 } brw_surface_state_padded;
    571 
    572 struct gen4_cc_unit_state {
    573     /* Index by [src_blend][dst_blend] */
    574     brw_cc_unit_state_padded cc_state[BRW_BLENDFACTOR_COUNT]
    575 				     [BRW_BLENDFACTOR_COUNT];
    576 };
    577 
    578 typedef float gen4_vertex_buffer[VERTEX_BUFFER_SIZE];
    579 
    580 typedef struct gen4_composite_op {
    581     int		op;
    582     PicturePtr	source_picture;
    583     PicturePtr	mask_picture;
    584     PicturePtr	dest_picture;
    585     PixmapPtr	source;
    586     PixmapPtr	mask;
    587     PixmapPtr	dest;
    588     drm_intel_bo *binding_table_bo;
    589     sampler_state_filter_t src_filter;
    590     sampler_state_filter_t mask_filter;
    591     sampler_state_extend_t src_extend;
    592     sampler_state_extend_t mask_extend;
    593     Bool is_affine;
    594     wm_kernel_t wm_kernel;
    595 } gen4_composite_op;
    596 
    597 /** Private data for gen4 render accel implementation. */
    598 struct gen4_render_state {
    599     drm_intel_bo *vs_state_bo;
    600     drm_intel_bo *sf_state_bo;
    601     drm_intel_bo *sf_mask_state_bo;
    602     drm_intel_bo *cc_state_bo;
    603     drm_intel_bo *wm_state_bo[WM_KERNEL_COUNT]
    604 			     [SAMPLER_STATE_FILTER_COUNT]
    605 			     [SAMPLER_STATE_EXTEND_COUNT]
    606 			     [SAMPLER_STATE_FILTER_COUNT]
    607 			     [SAMPLER_STATE_EXTEND_COUNT];
    608     drm_intel_bo *wm_kernel_bo[WM_KERNEL_COUNT];
    609 
    610     drm_intel_bo *sip_kernel_bo;
    611     dri_bo* vertex_buffer_bo;
    612 
    613     gen4_composite_op composite_op;
    614 
    615     int vb_offset;
    616     int vertex_size;
    617 
    618     Bool needs_state_emit;
    619 };
    620 
    621 /**
    622  * Sets up the SF state pointing at an SF kernel.
    623  *
    624  * The SF kernel does coord interp: for each attribute,
    625  * calculate dA/dx and dA/dy.  Hand these interpolation coefficients
    626  * back to SF which then hands pixels off to WM.
    627  */
    628 static drm_intel_bo *
    629 gen4_create_sf_state(ScrnInfoPtr scrn, drm_intel_bo *kernel_bo)
    630 {
    631     I830Ptr pI830 = I830PTR(scrn);
    632     struct brw_sf_unit_state *sf_state;
    633     drm_intel_bo *sf_state_bo;
    634 
    635     sf_state_bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 SF state",
    636 				     sizeof(*sf_state), 4096);
    637     drm_intel_bo_map(sf_state_bo, TRUE);
    638     sf_state = sf_state_bo->virtual;
    639 
    640     memset(sf_state, 0, sizeof(*sf_state));
    641     sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
    642     sf_state->thread0.kernel_start_pointer =
    643 	intel_emit_reloc(sf_state_bo,
    644 			 offsetof(struct brw_sf_unit_state, thread0),
    645 			 kernel_bo, sf_state->thread0.grf_reg_count << 1,
    646 			 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6;
    647     sf_state->sf1.single_program_flow = 1;
    648     sf_state->sf1.binding_table_entry_count = 0;
    649     sf_state->sf1.thread_priority = 0;
    650     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
    651     sf_state->sf1.illegal_op_exception_enable = 1;
    652     sf_state->sf1.mask_stack_exception_enable = 1;
    653     sf_state->sf1.sw_exception_enable = 1;
    654     sf_state->thread2.per_thread_scratch_space = 0;
    655     /* scratch space is not used in our kernel */
    656     sf_state->thread2.scratch_space_base_pointer = 0;
    657     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
    658     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
    659     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
    660     /* don't smash vertex header, read start from dw8 */
    661     sf_state->thread3.urb_entry_read_offset = 1;
    662     sf_state->thread3.dispatch_grf_start_reg = 3;
    663     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
    664     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
    665     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
    666     sf_state->thread4.stats_enable = 1;
    667     sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
    668     sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
    669     sf_state->sf6.scissor = 0;
    670     sf_state->sf7.trifan_pv = 2;
    671     sf_state->sf6.dest_org_vbias = 0x8;
    672     sf_state->sf6.dest_org_hbias = 0x8;
    673 
    674     drm_intel_bo_unmap(sf_state_bo);
    675 
    676     return sf_state_bo;
    677 }
    678 
    679 static drm_intel_bo *
    680 sampler_border_color_create(ScrnInfoPtr scrn)
    681 {
    682     struct brw_sampler_legacy_border_color sampler_border_color;
    683 
    684     /* Set up the sampler border color (always transparent black) */
    685     memset(&sampler_border_color, 0, sizeof(sampler_border_color));
    686     sampler_border_color.color[0] = 0; /* R */
    687     sampler_border_color.color[1] = 0; /* G */
    688     sampler_border_color.color[2] = 0; /* B */
    689     sampler_border_color.color[3] = 0; /* A */
    690 
    691     return intel_bo_alloc_for_data(scrn,
    692 				   &sampler_border_color,
    693 				   sizeof(sampler_border_color),
    694 				   "gen4 render sampler border color");
    695 }
    696 
    697 static void
    698 sampler_state_init (drm_intel_bo *sampler_state_bo,
    699 		    struct brw_sampler_state *sampler_state,
    700 		    sampler_state_filter_t filter,
    701 		    sampler_state_extend_t extend,
    702 		    drm_intel_bo *border_color_bo)
    703 {
    704     uint32_t sampler_state_offset;
    705 
    706     sampler_state_offset = (char *)sampler_state -
    707 	(char *)sampler_state_bo->virtual;
    708 
    709     /* PS kernel use this sampler */
    710     memset(sampler_state, 0, sizeof(*sampler_state));
    711 
    712     sampler_state->ss0.lod_preclamp = 1; /* GL mode */
    713 
    714     /* We use the legacy mode to get the semantics specified by
    715      * the Render extension. */
    716     sampler_state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY;
    717 
    718     switch(filter) {
    719     default:
    720     case SAMPLER_STATE_FILTER_NEAREST:
    721 	sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
    722 	sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
    723 	break;
    724     case SAMPLER_STATE_FILTER_BILINEAR:
    725 	sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
    726 	sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
    727 	break;
    728     }
    729 
    730     switch (extend) {
    731     default:
    732     case SAMPLER_STATE_EXTEND_NONE:
    733 	sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
    734 	sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
    735 	sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
    736 	break;
    737     case SAMPLER_STATE_EXTEND_REPEAT:
    738 	sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
    739 	sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
    740 	sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
    741 	break;
    742     case SAMPLER_STATE_EXTEND_PAD:
    743 	sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
    744 	sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
    745 	sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
    746 	break;
    747     case SAMPLER_STATE_EXTEND_REFLECT:
    748 	sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
    749 	sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
    750 	sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
    751 	break;
    752     }
    753 
    754     sampler_state->ss2.border_color_pointer =
    755 	intel_emit_reloc(sampler_state_bo, sampler_state_offset +
    756 			 offsetof(struct brw_sampler_state, ss2),
    757 			 border_color_bo, 0,
    758 			 I915_GEM_DOMAIN_SAMPLER, 0) >> 5;
    759 
    760     sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
    761 }
    762 
    763 static drm_intel_bo *
    764 gen4_create_sampler_state(ScrnInfoPtr scrn,
    765 			  sampler_state_filter_t src_filter,
    766 			  sampler_state_extend_t src_extend,
    767 			  sampler_state_filter_t mask_filter,
    768 			  sampler_state_extend_t mask_extend,
    769 			  drm_intel_bo *border_color_bo)
    770 {
    771     I830Ptr pI830 = I830PTR(scrn);
    772     drm_intel_bo *sampler_state_bo;
    773     struct brw_sampler_state *sampler_state;
    774 
    775     sampler_state_bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 sampler state",
    776 					  sizeof(struct brw_sampler_state) * 2,
    777 					  4096);
    778     drm_intel_bo_map(sampler_state_bo, TRUE);
    779     sampler_state = sampler_state_bo->virtual;
    780 
    781     sampler_state_init(sampler_state_bo,
    782 		       &sampler_state[0],
    783 		       src_filter, src_extend,
    784 		       border_color_bo);
    785     sampler_state_init(sampler_state_bo,
    786 		       &sampler_state[1],
    787 		       mask_filter, mask_extend,
    788 		       border_color_bo);
    789 
    790     drm_intel_bo_unmap(sampler_state_bo);
    791 
    792     return sampler_state_bo;
    793 }
    794 
    795 static void
    796 cc_state_init (drm_intel_bo *cc_state_bo,
    797 	       uint32_t cc_state_offset,
    798 	       int src_blend,
    799 	       int dst_blend,
    800 	       drm_intel_bo *cc_vp_bo)
    801 {
    802     struct brw_cc_unit_state *cc_state;
    803 
    804     cc_state = (struct brw_cc_unit_state *)((char *)cc_state_bo->virtual +
    805 					    cc_state_offset);
    806 
    807     memset(cc_state, 0, sizeof(*cc_state));
    808     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
    809     cc_state->cc2.depth_test = 0;       /* disable depth test */
    810     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
    811     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha same as colors */
    812     cc_state->cc3.blend_enable = 1;     /* enable color blend */
    813     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
    814 
    815     cc_state->cc4.cc_viewport_state_offset =
    816 	intel_emit_reloc(cc_state_bo, cc_state_offset +
    817 			 offsetof(struct brw_cc_unit_state, cc4),
    818 			 cc_vp_bo, 0,
    819 			 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5;
    820 
    821     cc_state->cc5.dither_enable = 0;    /* disable dither */
    822     cc_state->cc5.logicop_func = 0xc;   /* COPY */
    823     cc_state->cc5.statistics_enable = 1;
    824     cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
    825 
    826     /* Fill in alpha blend factors same as color, for the future. */
    827     cc_state->cc5.ia_src_blend_factor = src_blend;
    828     cc_state->cc5.ia_dest_blend_factor = dst_blend;
    829 
    830     cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD;
    831     cc_state->cc6.clamp_post_alpha_blend = 1;
    832     cc_state->cc6.clamp_pre_alpha_blend = 1;
    833     cc_state->cc6.clamp_range = 0;  /* clamp range [0,1] */
    834 
    835     cc_state->cc6.src_blend_factor = src_blend;
    836     cc_state->cc6.dest_blend_factor = dst_blend;
    837 }
    838 
    839 static drm_intel_bo *
    840 gen4_create_wm_state(ScrnInfoPtr scrn,
    841 		     Bool has_mask, drm_intel_bo *kernel_bo,
    842 		     drm_intel_bo *sampler_bo)
    843 {
    844     I830Ptr pI830 = I830PTR(scrn);
    845     struct brw_wm_unit_state *wm_state;
    846     drm_intel_bo *wm_state_bo;
    847 
    848     wm_state_bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 WM state",
    849 				     sizeof(*wm_state), 4096);
    850     drm_intel_bo_map(wm_state_bo, TRUE);
    851     wm_state = wm_state_bo->virtual;
    852 
    853     memset(wm_state, 0, sizeof (*wm_state));
    854     wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
    855     wm_state->thread0.kernel_start_pointer =
    856 	intel_emit_reloc(wm_state_bo,
    857 			 offsetof(struct brw_wm_unit_state, thread0),
    858                          kernel_bo, wm_state->thread0.grf_reg_count << 1,
    859                          I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6;
    860 
    861     wm_state->thread1.single_program_flow = 0;
    862 
    863     /* scratch space is not used in our kernel */
    864     wm_state->thread2.scratch_space_base_pointer = 0;
    865     wm_state->thread2.per_thread_scratch_space = 0;
    866 
    867     wm_state->thread3.const_urb_entry_read_length = 0;
    868     wm_state->thread3.const_urb_entry_read_offset = 0;
    869 
    870     wm_state->thread3.urb_entry_read_offset = 0;
    871     /* wm kernel use urb from 3, see wm_program in compiler module */
    872     wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
    873 
    874     wm_state->wm4.stats_enable = 1;  /* statistic */
    875 
    876     if (IS_IGDNG(pI830))
    877         wm_state->wm4.sampler_count = 0; /* hardware requirement */
    878     else
    879         wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */
    880 
    881     wm_state->wm4.sampler_state_pointer =
    882 	intel_emit_reloc(wm_state_bo, offsetof(struct brw_wm_unit_state, wm4),
    883 			 sampler_bo,
    884 			 wm_state->wm4.stats_enable +
    885 			 (wm_state->wm4.sampler_count << 2),
    886 			 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5;
    887     wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
    888     wm_state->wm5.transposed_urb_read = 0;
    889     wm_state->wm5.thread_dispatch_enable = 1;
    890     /* just use 16-pixel dispatch (4 subspans), don't need to change kernel
    891      * start point
    892      */
    893     wm_state->wm5.enable_16_pix = 1;
    894     wm_state->wm5.enable_8_pix = 0;
    895     wm_state->wm5.early_depth_test = 1;
    896 
    897     /* Each pair of attributes (src/mask coords) is two URB entries */
    898     if (has_mask) {
    899 	wm_state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
    900 	wm_state->thread3.urb_entry_read_length = 4;
    901     } else {
    902 	wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
    903 	wm_state->thread3.urb_entry_read_length = 2;
    904     }
    905 
    906     /* binding table entry count is only used for prefetching, and it has to
    907      * be set 0 for IGDNG
    908      */
    909     if (IS_IGDNG(pI830))
    910         wm_state->thread1.binding_table_entry_count = 0;
    911 
    912     drm_intel_bo_unmap(wm_state_bo);
    913 
    914     return wm_state_bo;
    915 }
    916 
    917 static drm_intel_bo *
    918 gen4_create_cc_viewport(ScrnInfoPtr scrn)
    919 {
    920     I830Ptr pI830 = I830PTR(scrn);
    921     drm_intel_bo *bo;
    922     struct brw_cc_viewport cc_viewport;
    923 
    924     cc_viewport.min_depth = -1.e35;
    925     cc_viewport.max_depth = 1.e35;
    926 
    927     bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 render unit state",
    928 			    sizeof(cc_viewport), 4096);
    929     drm_intel_bo_subdata(bo, 0, sizeof(cc_viewport), &cc_viewport);
    930 
    931     return bo;
    932 }
    933 
    934 static drm_intel_bo *
    935 gen4_create_vs_unit_state(ScrnInfoPtr scrn)
    936 {
    937     I830Ptr pI830 = I830PTR(scrn);
    938     struct brw_vs_unit_state vs_state;
    939     memset(&vs_state, 0, sizeof(vs_state));
    940 
    941     /* Set up the vertex shader to be disabled (passthrough) */
    942     if (IS_IGDNG(pI830))
    943         vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; /* hardware requirement */
    944     else
    945         vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES;
    946     vs_state.thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
    947     vs_state.vs6.vs_enable = 0;
    948     vs_state.vs6.vert_cache_disable = 1;
    949 
    950     return intel_bo_alloc_for_data(scrn, &vs_state, sizeof(vs_state),
    951 				   "gen4 render VS state");
    952 }
    953 
    954 /**
    955  * Set up all combinations of cc state: each blendfactor for source and
    956  * dest.
    957  */
    958 static drm_intel_bo *
    959 gen4_create_cc_unit_state(ScrnInfoPtr scrn)
    960 {
    961     I830Ptr pI830 = I830PTR(scrn);
    962     struct gen4_cc_unit_state *cc_state;
    963     drm_intel_bo *cc_state_bo, *cc_vp_bo;
    964     int i, j;
    965 
    966     cc_vp_bo = gen4_create_cc_viewport(scrn);
    967 
    968     cc_state_bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 CC state",
    969 				     sizeof(*cc_state), 4096);
    970     drm_intel_bo_map(cc_state_bo, TRUE);
    971     cc_state = cc_state_bo->virtual;
    972     for (i = 0; i < BRW_BLENDFACTOR_COUNT; i++) {
    973 	for (j = 0; j < BRW_BLENDFACTOR_COUNT; j++) {
    974 	    cc_state_init(cc_state_bo,
    975 			  offsetof(struct gen4_cc_unit_state,
    976 				   cc_state[i][j].state),
    977 			  i, j, cc_vp_bo);
    978 	}
    979     }
    980     drm_intel_bo_unmap(cc_state_bo);
    981 
    982     drm_intel_bo_unreference(cc_vp_bo);
    983 
    984     return cc_state_bo;
    985 }
    986 
    987 static uint32_t
    988 i965_get_card_format(PicturePtr pPict)
    989 {
    990     int i;
    991 
    992     for (i = 0; i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]);
    993 	 i++)
    994     {
    995 	if (i965_tex_formats[i].fmt == pPict->format)
    996 	    break;
    997     }
    998     assert(i != sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]));
    999 
   1000     return i965_tex_formats[i].card_fmt;
   1001 }
   1002 
   1003 static sampler_state_filter_t
   1004 sampler_state_filter_from_picture (int filter)
   1005 {
   1006     switch (filter) {
   1007     case PictFilterNearest:
   1008 	return SAMPLER_STATE_FILTER_NEAREST;
   1009     case PictFilterBilinear:
   1010 	return SAMPLER_STATE_FILTER_BILINEAR;
   1011     default:
   1012 	return -1;
   1013     }
   1014 }
   1015 
   1016 static sampler_state_extend_t
   1017 sampler_state_extend_from_picture (int repeat_type)
   1018 {
   1019     switch (repeat_type) {
   1020     case RepeatNone:
   1021 	return SAMPLER_STATE_EXTEND_NONE;
   1022     case RepeatNormal:
   1023 	return SAMPLER_STATE_EXTEND_REPEAT;
   1024     case RepeatPad:
   1025 	return SAMPLER_STATE_EXTEND_PAD;
   1026     case RepeatReflect:
   1027 	return SAMPLER_STATE_EXTEND_REFLECT;
   1028     default:
   1029 	return -1;
   1030     }
   1031 }
   1032 
   1033 /**
   1034  * Sets up the common fields for a surface state buffer for the given
   1035  * picture in the given surface state buffer.
   1036  */
   1037 static void
   1038 i965_set_picture_surface_state(dri_bo *ss_bo, int ss_index,
   1039 			       PicturePtr pPicture, PixmapPtr pPixmap,
   1040 			       Bool is_dst)
   1041 {
   1042     struct brw_surface_state_padded *ss;
   1043     struct brw_surface_state local_ss;
   1044     dri_bo *pixmap_bo = i830_get_pixmap_bo(pPixmap);
   1045 
   1046     ss = (struct brw_surface_state_padded *)ss_bo->virtual + ss_index;
   1047 
   1048     /* Since ss is a pointer to WC memory, do all of our bit operations
   1049      * into a local temporary first.
   1050      */
   1051     memset(&local_ss, 0, sizeof(local_ss));
   1052     local_ss.ss0.surface_type = BRW_SURFACE_2D;
   1053     if (is_dst) {
   1054 	uint32_t dst_format = 0;
   1055 	Bool ret = TRUE;
   1056 
   1057 	ret = i965_get_dest_format(pPicture, &dst_format);
   1058 	assert(ret == TRUE);
   1059 	local_ss.ss0.surface_format = dst_format;
   1060     } else {
   1061 	local_ss.ss0.surface_format = i965_get_card_format(pPicture);
   1062     }
   1063 
   1064     local_ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
   1065     local_ss.ss0.writedisable_alpha = 0;
   1066     local_ss.ss0.writedisable_red = 0;
   1067     local_ss.ss0.writedisable_green = 0;
   1068     local_ss.ss0.writedisable_blue = 0;
   1069     local_ss.ss0.color_blend = 1;
   1070     local_ss.ss0.vert_line_stride = 0;
   1071     local_ss.ss0.vert_line_stride_ofs = 0;
   1072     local_ss.ss0.mipmap_layout_mode = 0;
   1073     local_ss.ss0.render_cache_read_mode = 0;
   1074     if (pixmap_bo != NULL)
   1075 	local_ss.ss1.base_addr = pixmap_bo->offset;
   1076     else
   1077 	local_ss.ss1.base_addr = intel_get_pixmap_offset(pPixmap);
   1078 
   1079     local_ss.ss2.mip_count = 0;
   1080     local_ss.ss2.render_target_rotation = 0;
   1081     local_ss.ss2.height = pPixmap->drawable.height - 1;
   1082     local_ss.ss2.width = pPixmap->drawable.width - 1;
   1083     local_ss.ss3.pitch = intel_get_pixmap_pitch(pPixmap) - 1;
   1084     local_ss.ss3.tile_walk = 0; /* Tiled X */
   1085     local_ss.ss3.tiled_surface = i830_pixmap_tiled(pPixmap) ? 1 : 0;
   1086 
   1087     memcpy(ss, &local_ss, sizeof(local_ss));
   1088 
   1089     if (pixmap_bo != NULL) {
   1090 	uint32_t write_domain, read_domains;
   1091 
   1092 	if (is_dst) {
   1093 	    write_domain = I915_GEM_DOMAIN_RENDER;
   1094 	    read_domains = I915_GEM_DOMAIN_RENDER;
   1095 	} else {
   1096 	    write_domain = 0;
   1097 	    read_domains = I915_GEM_DOMAIN_SAMPLER;
   1098 	}
   1099 	dri_bo_emit_reloc(ss_bo, read_domains, write_domain,
   1100 			  0,
   1101 			  ss_index * sizeof(*ss) +
   1102 			  offsetof(struct brw_surface_state, ss1),
   1103 			  pixmap_bo);
   1104     }
   1105 }
   1106 
   1107 static void
   1108 i965_emit_composite_state(ScrnInfoPtr pScrn)
   1109 {
   1110     I830Ptr pI830 = I830PTR(pScrn);
   1111     struct gen4_render_state *render_state= pI830->gen4_render_state;
   1112     gen4_composite_op *composite_op = &render_state->composite_op;
   1113     int op = composite_op->op;
   1114     PicturePtr pMaskPicture = composite_op->mask_picture;
   1115     PicturePtr pDstPicture = composite_op->dest_picture;
   1116     PixmapPtr pMask = composite_op->mask;
   1117     PixmapPtr pDst = composite_op->dest;
   1118     sampler_state_filter_t src_filter = composite_op->src_filter;
   1119     sampler_state_filter_t mask_filter = composite_op->mask_filter;
   1120     sampler_state_extend_t src_extend = composite_op->src_extend;
   1121     sampler_state_extend_t mask_extend = composite_op->mask_extend;
   1122     Bool is_affine = composite_op->is_affine;
   1123     int urb_vs_start, urb_vs_size;
   1124     int urb_gs_start, urb_gs_size;
   1125     int urb_clip_start, urb_clip_size;
   1126     int urb_sf_start, urb_sf_size;
   1127     int urb_cs_start, urb_cs_size;
   1128     uint32_t src_blend, dst_blend;
   1129     dri_bo *binding_table_bo = composite_op->binding_table_bo;
   1130 
   1131     render_state->needs_state_emit = FALSE;
   1132 
   1133     IntelEmitInvarientState(pScrn);
   1134     pI830->last_3d = LAST_3D_RENDER;
   1135 
   1136     urb_vs_start = 0;
   1137     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
   1138     urb_gs_start = urb_vs_start + urb_vs_size;
   1139     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
   1140     urb_clip_start = urb_gs_start + urb_gs_size;
   1141     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
   1142     urb_sf_start = urb_clip_start + urb_clip_size;
   1143     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
   1144     urb_cs_start = urb_sf_start + urb_sf_size;
   1145     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
   1146 
   1147     i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format,
   1148 			&src_blend, &dst_blend);
   1149 
   1150     /* Begin the long sequence of commands needed to set up the 3D
   1151      * rendering pipe
   1152      */
   1153     {
   1154 	BEGIN_BATCH(2);
   1155 	OUT_BATCH(MI_FLUSH |
   1156 		  MI_STATE_INSTRUCTION_CACHE_FLUSH |
   1157 		  BRW_MI_GLOBAL_SNAPSHOT_RESET);
   1158 	OUT_BATCH(MI_NOOP);
   1159 	ADVANCE_BATCH();
   1160     }
   1161     {
   1162         if (IS_IGDNG(pI830))
   1163             BEGIN_BATCH(14);
   1164         else
   1165             BEGIN_BATCH(12);
   1166 
   1167         /* Match Mesa driver setup */
   1168 	if (IS_G4X(pI830) || IS_IGDNG(pI830))
   1169 	    OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
   1170 	else
   1171 	    OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
   1172 
   1173 	OUT_BATCH(BRW_CS_URB_STATE | 0);
   1174 	OUT_BATCH((0 << 4) |  /* URB Entry Allocation Size */
   1175 		  (0 << 0));  /* Number of URB Entries */
   1176 
   1177 	/* Zero out the two base address registers so all offsets are
   1178 	 * absolute.
   1179 	 */
   1180         if (IS_IGDNG(pI830)) {
   1181             OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6);
   1182             OUT_BATCH(0 | BASE_ADDRESS_MODIFY);  /* Generate state base address */
   1183             OUT_BATCH(0 | BASE_ADDRESS_MODIFY);  /* Surface state base address */
   1184             OUT_BATCH(0 | BASE_ADDRESS_MODIFY);  /* media base addr, don't care */
   1185             OUT_BATCH(0 | BASE_ADDRESS_MODIFY);  /* Instruction base address */
   1186             /* general state max addr, disabled */
   1187             OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY);
   1188             /* media object state max addr, disabled */
   1189             OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY);
   1190             /* Instruction max addr, disabled */
   1191             OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY);
   1192         } else {
   1193             OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4);
   1194             OUT_BATCH(0 | BASE_ADDRESS_MODIFY);  /* Generate state base address */
   1195             OUT_BATCH(0 | BASE_ADDRESS_MODIFY);  /* Surface state base address */
   1196             OUT_BATCH(0 | BASE_ADDRESS_MODIFY);  /* media base addr, don't care */
   1197             /* general state max addr, disabled */
   1198             OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY);
   1199             /* media object state max addr, disabled */
   1200             OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY);
   1201         }
   1202 	/* Set system instruction pointer */
   1203 	OUT_BATCH(BRW_STATE_SIP | 0);
   1204 	OUT_RELOC(render_state->sip_kernel_bo,
   1205 		  I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
   1206 	OUT_BATCH(MI_NOOP);
   1207 	ADVANCE_BATCH();
   1208     }
   1209     {
   1210 	int pipe_ctrl;
   1211 	BEGIN_BATCH(26);
   1212 	/* Pipe control */
   1213 
   1214 	if (IS_IGDNG(pI830))
   1215             pipe_ctrl = BRW_PIPE_CONTROL_NOWRITE;
   1216 	else
   1217             pipe_ctrl = BRW_PIPE_CONTROL_NOWRITE | BRW_PIPE_CONTROL_IS_FLUSH;
   1218 
   1219 	OUT_BATCH(BRW_PIPE_CONTROL | pipe_ctrl | 2);
   1220 	OUT_BATCH(0);			       /* Destination address */
   1221 	OUT_BATCH(0);			       /* Immediate data low DW */
   1222 	OUT_BATCH(0);			       /* Immediate data high DW */
   1223 
   1224 	/* Binding table pointers */
   1225 	OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4);
   1226 	OUT_BATCH(0); /* vs */
   1227 	OUT_BATCH(0); /* gs */
   1228 	OUT_BATCH(0); /* clip */
   1229 	OUT_BATCH(0); /* sf */
   1230 	/* Only the PS uses the binding table */
   1231 	OUT_RELOC(binding_table_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0);
   1232 
   1233 	/* The drawing rectangle clipping is always on.  Set it to values that
   1234 	 * shouldn't do any clipping.
   1235 	 */
   1236 	OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2);
   1237 	OUT_BATCH(0x00000000);	/* ymin, xmin */
   1238 	OUT_BATCH(DRAW_YMAX(pDst->drawable.height - 1) |
   1239 		  DRAW_XMAX(pDst->drawable.width - 1)); /* ymax, xmax */
   1240 	OUT_BATCH(0x00000000);	/* yorigin, xorigin */
   1241 
   1242 	/* skip the depth buffer */
   1243 	/* skip the polygon stipple */
   1244 	/* skip the polygon stipple offset */
   1245 	/* skip the line stipple */
   1246 
   1247 	/* Set the pointers to the 3d pipeline state */
   1248 	OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5);
   1249 	OUT_RELOC(render_state->vs_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
   1250 	OUT_BATCH(BRW_GS_DISABLE);   /* disable GS, resulting in passthrough */
   1251 	OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */
   1252 	if (pMask) {
   1253 	    OUT_RELOC(render_state->sf_mask_state_bo,
   1254 		      I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
   1255 	} else {
   1256 	    OUT_RELOC(render_state->sf_state_bo,
   1257 		      I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
   1258 	}
   1259 
   1260 	OUT_RELOC(render_state->wm_state_bo[composite_op->wm_kernel]
   1261 		  [src_filter][src_extend]
   1262 		  [mask_filter][mask_extend],
   1263 		  I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
   1264 
   1265 	OUT_RELOC(render_state->cc_state_bo,
   1266 		  I915_GEM_DOMAIN_INSTRUCTION, 0,
   1267 		  offsetof(struct gen4_cc_unit_state,
   1268 			   cc_state[src_blend][dst_blend]));
   1269 
   1270 	/* URB fence */
   1271 	OUT_BATCH(BRW_URB_FENCE |
   1272 		  UF0_CS_REALLOC |
   1273 		  UF0_SF_REALLOC |
   1274 		  UF0_CLIP_REALLOC |
   1275 		  UF0_GS_REALLOC |
   1276 		  UF0_VS_REALLOC |
   1277 		  1);
   1278 	OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
   1279 		  ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
   1280 		  ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
   1281 	OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
   1282 		  ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
   1283 
   1284 	/* Constant buffer state */
   1285 	OUT_BATCH(BRW_CS_URB_STATE | 0);
   1286 	OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) |
   1287 		  (URB_CS_ENTRIES << 0));
   1288 	ADVANCE_BATCH();
   1289     }
   1290     {
   1291 	/*
   1292 	 * number of extra parameters per vertex
   1293 	 */
   1294         int nelem = pMask ? 2: 1;
   1295 	/*
   1296 	 * size of extra parameters:
   1297 	 *  3 for homogenous (xyzw)
   1298 	 *  2 for cartesian (xy)
   1299 	 */
   1300 	int selem = is_affine ? 2 : 3;
   1301 	uint32_t    w_component;
   1302 	uint32_t    src_format;
   1303 
   1304 	render_state->vertex_size = 4 * (2 + nelem * selem);
   1305 
   1306 	if (is_affine)
   1307 	{
   1308 	    src_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
   1309 	    w_component = BRW_VFCOMPONENT_STORE_1_FLT;
   1310 	}
   1311 	else
   1312 	{
   1313 	    src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
   1314 	    w_component = BRW_VFCOMPONENT_STORE_SRC;
   1315 	}
   1316 
   1317         if (IS_IGDNG(pI830)) {
   1318             BEGIN_BATCH(pMask?9:7);
   1319 	    /*
   1320 	     * The reason to add this extra vertex element in the header is that
   1321 	     * IGDNG has different vertex header definition and origin method to
   1322 	     * set destination element offset doesn't exist anymore, which means
   1323 	     * hardware requires a predefined vertex element layout.
   1324 	     *
   1325 	     * haihao proposed this approach to fill the first vertex element, so
   1326 	     * origin layout for Gen4 doesn't need to change, and origin shader
   1327 	     * programs behavior is also kept.
   1328 	     *
   1329 	     * I think this is not bad. - zhenyu
   1330 	     */
   1331 
   1332 	    OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * (2 + nelem)) - 1));
   1333 	    OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
   1334                       VE0_VALID |
   1335                       (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
   1336                       (0 << VE0_OFFSET_SHIFT));
   1337 
   1338 	    OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
   1339                       (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
   1340                       (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
   1341                       (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
   1342         } else {
   1343             BEGIN_BATCH(pMask?7:5);
   1344             /* Set up our vertex elements, sourced from the single vertex buffer.
   1345              * that will be set up later.
   1346              */
   1347             OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * (1 + nelem)) - 1));
   1348         }
   1349 
   1350 	/* x,y */
   1351 	OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
   1352 		  VE0_VALID |
   1353 		  (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
   1354 		  (0                            << VE0_OFFSET_SHIFT));
   1355 
   1356         if (IS_IGDNG(pI830))
   1357             OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC	<< VE1_VFCOMPONENT_0_SHIFT) |
   1358                       (BRW_VFCOMPONENT_STORE_SRC	<< VE1_VFCOMPONENT_1_SHIFT) |
   1359                       (BRW_VFCOMPONENT_STORE_1_FLT	<< VE1_VFCOMPONENT_2_SHIFT) |
   1360                       (BRW_VFCOMPONENT_STORE_1_FLT	<< VE1_VFCOMPONENT_3_SHIFT));
   1361         else
   1362             OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC	<< VE1_VFCOMPONENT_0_SHIFT) |
   1363                       (BRW_VFCOMPONENT_STORE_SRC	<< VE1_VFCOMPONENT_1_SHIFT) |
   1364                       (BRW_VFCOMPONENT_STORE_1_FLT	<< VE1_VFCOMPONENT_2_SHIFT) |
   1365                       (BRW_VFCOMPONENT_STORE_1_FLT	<< VE1_VFCOMPONENT_3_SHIFT) |
   1366                       (4				<< VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
   1367 	/* u0, v0, w0 */
   1368 	OUT_BATCH((0				<< VE0_VERTEX_BUFFER_INDEX_SHIFT) |
   1369 		  VE0_VALID					     |
   1370 		  (src_format			<< VE0_FORMAT_SHIFT) |
   1371 		  ((2 * 4)                      << VE0_OFFSET_SHIFT)); /* offset vb in bytes */
   1372 
   1373         if (IS_IGDNG(pI830))
   1374             OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC	<< VE1_VFCOMPONENT_0_SHIFT) |
   1375                       (BRW_VFCOMPONENT_STORE_SRC	<< VE1_VFCOMPONENT_1_SHIFT) |
   1376                       (w_component			<< VE1_VFCOMPONENT_2_SHIFT) |
   1377                       (BRW_VFCOMPONENT_STORE_1_FLT	<< VE1_VFCOMPONENT_3_SHIFT));
   1378         else
   1379             OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC	<< VE1_VFCOMPONENT_0_SHIFT) |
   1380                       (BRW_VFCOMPONENT_STORE_SRC	<< VE1_VFCOMPONENT_1_SHIFT) |
   1381                       (w_component			<< VE1_VFCOMPONENT_2_SHIFT) |
   1382                       (BRW_VFCOMPONENT_STORE_1_FLT	<< VE1_VFCOMPONENT_3_SHIFT) |
   1383                       ((4 + 4)			<< VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
   1384 	/* u1, v1, w1 */
   1385    	if (pMask) {
   1386 	    OUT_BATCH((0			    << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
   1387 		      VE0_VALID							    |
   1388 		      (src_format		    << VE0_FORMAT_SHIFT) |
   1389 		      (((2 + selem) * 4)            << VE0_OFFSET_SHIFT));  /* vb offset in bytes */
   1390 
   1391             if (IS_IGDNG(pI830))
   1392                 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC    << VE1_VFCOMPONENT_0_SHIFT) |
   1393                           (BRW_VFCOMPONENT_STORE_SRC    << VE1_VFCOMPONENT_1_SHIFT) |
   1394                           (w_component		    << VE1_VFCOMPONENT_2_SHIFT) |
   1395                           (BRW_VFCOMPONENT_STORE_1_FLT  << VE1_VFCOMPONENT_3_SHIFT));
   1396             else
   1397                 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC    << VE1_VFCOMPONENT_0_SHIFT) |
   1398                           (BRW_VFCOMPONENT_STORE_SRC    << VE1_VFCOMPONENT_1_SHIFT) |
   1399                           (w_component		    << VE1_VFCOMPONENT_2_SHIFT) |
   1400                           (BRW_VFCOMPONENT_STORE_1_FLT  << VE1_VFCOMPONENT_3_SHIFT) |
   1401                           ((4 + 4 + 4)		    << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
   1402    	}
   1403 
   1404 	ADVANCE_BATCH();
   1405     }
   1406 }
   1407 
   1408 /**
   1409  * Returns whether the current set of composite state plus vertex buffer is
   1410  * expected to fit in the aperture.
   1411  */
   1412 static Bool
   1413 i965_composite_check_aperture(ScrnInfoPtr pScrn)
   1414 {
   1415     I830Ptr pI830 = I830PTR(pScrn);
   1416     struct gen4_render_state *render_state= pI830->gen4_render_state;
   1417     gen4_composite_op *composite_op = &render_state->composite_op;
   1418     drm_intel_bo *bo_table[] = {
   1419 	pI830->batch_bo,
   1420 	composite_op->binding_table_bo,
   1421 	render_state->vertex_buffer_bo,
   1422 	render_state->vs_state_bo,
   1423 	render_state->sf_state_bo,
   1424 	render_state->sf_mask_state_bo,
   1425 	render_state->wm_state_bo[composite_op->wm_kernel]
   1426 				 [composite_op->src_filter]
   1427 				 [composite_op->src_extend]
   1428 				 [composite_op->mask_filter]
   1429 				 [composite_op->mask_extend],
   1430 	render_state->cc_state_bo,
   1431 	render_state->sip_kernel_bo,
   1432     };
   1433 
   1434     return drm_intel_bufmgr_check_aperture_space(bo_table,
   1435 						 ARRAY_SIZE(bo_table)) == 0;
   1436 }
   1437 
   1438 Bool
   1439 i965_prepare_composite(int op, PicturePtr pSrcPicture,
   1440 		       PicturePtr pMaskPicture, PicturePtr pDstPicture,
   1441 		       PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
   1442 {
   1443     ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum];
   1444     I830Ptr pI830 = I830PTR(pScrn);
   1445     struct gen4_render_state *render_state= pI830->gen4_render_state;
   1446     gen4_composite_op *composite_op = &render_state->composite_op;
   1447     uint32_t *binding_table;
   1448     drm_intel_bo *binding_table_bo, *surface_state_bo;
   1449 
   1450     if (composite_op->src_filter < 0)
   1451 	I830FALLBACK("Bad src filter 0x%x\n", pSrcPicture->filter);
   1452     composite_op->src_extend =
   1453 	sampler_state_extend_from_picture(pSrcPicture->repeatType);
   1454     if (composite_op->src_extend < 0)
   1455 	I830FALLBACK("Bad src repeat 0x%x\n", pSrcPicture->repeatType);
   1456 
   1457     if (pMaskPicture) {
   1458 	composite_op->mask_filter =
   1459 	    sampler_state_filter_from_picture(pMaskPicture->filter);
   1460 	if (composite_op->mask_filter < 0)
   1461 	    I830FALLBACK("Bad mask filter 0x%x\n", pMaskPicture->filter);
   1462 	composite_op->mask_extend =
   1463 	    sampler_state_extend_from_picture(pMaskPicture->repeatType);
   1464 	if (composite_op->mask_extend < 0)
   1465 	    I830FALLBACK("Bad mask repeat 0x%x\n", pMaskPicture->repeatType);
   1466     } else {
   1467 	composite_op->mask_filter = SAMPLER_STATE_FILTER_NEAREST;
   1468 	composite_op->mask_extend = SAMPLER_STATE_EXTEND_NONE;
   1469     }
   1470 
   1471     /* Set up the surface states. */
   1472     surface_state_bo = dri_bo_alloc(pI830->bufmgr, "surface_state",
   1473 				    3 * sizeof (brw_surface_state_padded),
   1474 				    4096);
   1475     if (dri_bo_map(surface_state_bo, 1) != 0) {
   1476 	dri_bo_unreference(surface_state_bo);
   1477 	return FALSE;
   1478     }
   1479     /* Set up the state buffer for the destination surface */
   1480     i965_set_picture_surface_state(surface_state_bo, 0,
   1481 				   pDstPicture, pDst, TRUE);
   1482     /* Set up the source surface state buffer */
   1483     i965_set_picture_surface_state(surface_state_bo, 1,
   1484 				   pSrcPicture, pSrc, FALSE);
   1485     if (pMask) {
   1486 	/* Set up the mask surface state buffer */
   1487 	i965_set_picture_surface_state(surface_state_bo, 2,
   1488 				       pMaskPicture, pMask,
   1489 				       FALSE);
   1490     }
   1491     dri_bo_unmap(surface_state_bo);
   1492 
   1493     /* Set up the binding table of surface indices to surface state. */
   1494     binding_table_bo = dri_bo_alloc(pI830->bufmgr, "binding_table",
   1495 				    3 * sizeof(uint32_t), 4096);
   1496     if (dri_bo_map (binding_table_bo, 1) != 0) {
   1497 	dri_bo_unreference(binding_table_bo);
   1498 	dri_bo_unreference(surface_state_bo);
   1499 	return FALSE;
   1500     }
   1501 
   1502     binding_table = binding_table_bo->virtual;
   1503     binding_table[0] = intel_emit_reloc(binding_table_bo,
   1504 					0 * sizeof(uint32_t),
   1505 					surface_state_bo,
   1506 					0 * sizeof(brw_surface_state_padded),
   1507 					I915_GEM_DOMAIN_INSTRUCTION, 0);
   1508 
   1509     binding_table[1] = intel_emit_reloc(binding_table_bo,
   1510 					1 * sizeof(uint32_t),
   1511 					surface_state_bo,
   1512 					1 * sizeof(brw_surface_state_padded),
   1513 					I915_GEM_DOMAIN_INSTRUCTION, 0);
   1514 
   1515     if (pMask) {
   1516 	binding_table[2] = intel_emit_reloc(binding_table_bo,
   1517 					    2 * sizeof(uint32_t),
   1518 					    surface_state_bo,
   1519 					    2 * sizeof(brw_surface_state_padded),
   1520 					    I915_GEM_DOMAIN_INSTRUCTION, 0);
   1521     } else {
   1522 	binding_table[2] = 0;
   1523     }
   1524     dri_bo_unmap(binding_table_bo);
   1525     /* All refs to surface_state are now contained in binding_table_bo. */
   1526     drm_intel_bo_unreference(surface_state_bo);
   1527 
   1528     composite_op->op = op;
   1529     composite_op->source_picture = pSrcPicture;
   1530     composite_op->mask_picture = pMaskPicture;
   1531     composite_op->dest_picture = pDstPicture;
   1532     composite_op->source = pSrc;
   1533     composite_op->mask = pMask;
   1534     composite_op->dest = pDst;
   1535     drm_intel_bo_unreference(composite_op->binding_table_bo);
   1536     composite_op->binding_table_bo = binding_table_bo;
   1537     composite_op->src_filter =
   1538 	sampler_state_filter_from_picture(pSrcPicture->filter);
   1539 
   1540     pI830->scale_units[0][0] = pSrc->drawable.width;
   1541     pI830->scale_units[0][1] = pSrc->drawable.height;
   1542 
   1543     pI830->transform[0] = pSrcPicture->transform;
   1544     composite_op->is_affine =
   1545 	i830_transform_is_affine(pI830->transform[0]);
   1546 
   1547     if (!pMask) {
   1548 	pI830->transform[1] = NULL;
   1549 	pI830->scale_units[1][0] = -1;
   1550 	pI830->scale_units[1][1] = -1;
   1551     } else {
   1552 	pI830->transform[1] = pMaskPicture->transform;
   1553 	pI830->scale_units[1][0] = pMask->drawable.width;
   1554 	pI830->scale_units[1][1] = pMask->drawable.height;
   1555 	composite_op->is_affine |=
   1556 	    i830_transform_is_affine(pI830->transform[1]);
   1557     }
   1558 
   1559 
   1560     if (pMask) {
   1561 	if (pMaskPicture->componentAlpha &&
   1562 	    PICT_FORMAT_RGB(pMaskPicture->format))
   1563 	{
   1564 	    if (i965_blend_op[op].src_alpha) {
   1565 		if (composite_op->is_affine)
   1566 		    composite_op->wm_kernel = WM_KERNEL_MASKCA_SRCALPHA_AFFINE;
   1567 		else
   1568 		    composite_op->wm_kernel = WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE;
   1569 	    } else {
   1570 		if (composite_op->is_affine)
   1571 		    composite_op->wm_kernel = WM_KERNEL_MASKCA_AFFINE;
   1572 		else
   1573 		    composite_op->wm_kernel = WM_KERNEL_MASKCA_PROJECTIVE;
   1574 	    }
   1575 	} else {
   1576 	    if (composite_op->is_affine)
   1577 		composite_op->wm_kernel = WM_KERNEL_MASKNOCA_AFFINE;
   1578 	    else
   1579 		composite_op->wm_kernel = WM_KERNEL_MASKNOCA_PROJECTIVE;
   1580 	}
   1581     } else {
   1582 	if (composite_op->is_affine)
   1583 	    composite_op->wm_kernel = WM_KERNEL_NOMASK_AFFINE;
   1584 	else
   1585 	    composite_op->wm_kernel = WM_KERNEL_NOMASK_PROJECTIVE;
   1586     }
   1587 
   1588     if (!i965_composite_check_aperture(pScrn)) {
   1589 	intel_batch_flush(pScrn, FALSE);
   1590 	if (!i965_composite_check_aperture(pScrn))
   1591 	    I830FALLBACK("Couldn't fit render operation in aperture\n");
   1592     }
   1593 
   1594     render_state->needs_state_emit = TRUE;
   1595 
   1596     return TRUE;
   1597 }
   1598 
   1599 static drm_intel_bo *
   1600 i965_get_vb_space(ScrnInfoPtr pScrn)
   1601 {
   1602     I830Ptr pI830 = I830PTR(pScrn);
   1603     struct gen4_render_state *render_state = pI830->gen4_render_state;
   1604 
   1605     /* If the vertex buffer is too full, then we free the old and a new one
   1606      * gets made.
   1607      */
   1608     if (render_state->vb_offset + VERTEX_FLOATS_PER_COMPOSITE >
   1609 	VERTEX_BUFFER_SIZE) {
   1610 	drm_intel_bo_unreference(render_state->vertex_buffer_bo);
   1611 	render_state->vertex_buffer_bo = NULL;
   1612     }
   1613 
   1614     /* Alloc a new vertex buffer if necessary. */
   1615     if (render_state->vertex_buffer_bo == NULL) {
   1616 	render_state->vertex_buffer_bo = drm_intel_bo_alloc(pI830->bufmgr, "vb",
   1617 							    sizeof(gen4_vertex_buffer),
   1618 							    4096);
   1619 	render_state->vb_offset = 0;
   1620     }
   1621 
   1622     drm_intel_bo_reference(render_state->vertex_buffer_bo);
   1623     return render_state->vertex_buffer_bo;
   1624 }
   1625 
   1626 void
   1627 i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
   1628 	       int dstX, int dstY, int w, int h)
   1629 {
   1630     ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
   1631     I830Ptr pI830 = I830PTR(pScrn);
   1632     struct gen4_render_state *render_state = pI830->gen4_render_state;
   1633     Bool has_mask;
   1634     float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
   1635     int i;
   1636     drm_intel_bo *vb_bo;
   1637     float vb[18];
   1638     Bool is_affine = render_state->composite_op.is_affine;
   1639 
   1640     if (is_affine)
   1641     {
   1642 	if (!i830_get_transformed_coordinates(srcX, srcY,
   1643 					      pI830->transform[0],
   1644 					      &src_x[0], &src_y[0]))
   1645 	    return;
   1646 	if (!i830_get_transformed_coordinates(srcX, srcY + h,
   1647 					      pI830->transform[0],
   1648 					      &src_x[1], &src_y[1]))
   1649 	    return;
   1650 	if (!i830_get_transformed_coordinates(srcX + w, srcY + h,
   1651 					      pI830->transform[0],
   1652 					      &src_x[2], &src_y[2]))
   1653 	    return;
   1654     }
   1655     else
   1656     {
   1657 	if (!i830_get_transformed_coordinates_3d(srcX, srcY,
   1658 						 pI830->transform[0],
   1659 						 &src_x[0], &src_y[0],
   1660 						 &src_w[0]))
   1661 	    return;
   1662 	if (!i830_get_transformed_coordinates_3d(srcX, srcY + h,
   1663 						 pI830->transform[0],
   1664 						 &src_x[1], &src_y[1],
   1665 						 &src_w[1]))
   1666 	    return;
   1667 	if (!i830_get_transformed_coordinates_3d(srcX + w, srcY + h,
   1668 						 pI830->transform[0],
   1669 						 &src_x[2], &src_y[2],
   1670 						 &src_w[2]))
   1671 	    return;
   1672     }
   1673 
   1674     if (pI830->scale_units[1][0] == -1 || pI830->scale_units[1][1] == -1) {
   1675 	has_mask = FALSE;
   1676     } else {
   1677 	has_mask = TRUE;
   1678 	if (is_affine) {
   1679 	    if (!i830_get_transformed_coordinates(maskX, maskY,
   1680 						  pI830->transform[1],
   1681 						  &mask_x[0], &mask_y[0]))
   1682 		return;
   1683 	    if (!i830_get_transformed_coordinates(maskX, maskY + h,
   1684 						  pI830->transform[1],
   1685 						  &mask_x[1], &mask_y[1]))
   1686 		return;
   1687 	    if (!i830_get_transformed_coordinates(maskX + w, maskY + h,
   1688 						  pI830->transform[1],
   1689 						  &mask_x[2], &mask_y[2]))
   1690 		return;
   1691 	} else {
   1692 	    if (!i830_get_transformed_coordinates_3d(maskX, maskY,
   1693 						     pI830->transform[1],
   1694 						     &mask_x[0], &mask_y[0],
   1695 						     &mask_w[0]))
   1696 		return;
   1697 	    if (!i830_get_transformed_coordinates_3d(maskX, maskY + h,
   1698 						     pI830->transform[1],
   1699 						     &mask_x[1], &mask_y[1],
   1700 						     &mask_w[1]))
   1701 		return;
   1702 	    if (!i830_get_transformed_coordinates_3d(maskX + w, maskY + h,
   1703 						     pI830->transform[1],
   1704 						     &mask_x[2], &mask_y[2],
   1705 						     &mask_w[2]))
   1706 		return;
   1707 	}
   1708     }
   1709 
   1710     vb_bo = i965_get_vb_space(pScrn);
   1711     if (vb_bo == NULL)
   1712 	return;
   1713     i = 0;
   1714     /* rect (x2,y2) */
   1715     vb[i++] = (float)(dstX + w);
   1716     vb[i++] = (float)(dstY + h);
   1717     vb[i++] = src_x[2] / pI830->scale_units[0][0];
   1718     vb[i++] = src_y[2] / pI830->scale_units[0][1];
   1719     if (!is_affine)
   1720 	vb[i++] = src_w[2];
   1721     if (has_mask) {
   1722         vb[i++] = mask_x[2] / pI830->scale_units[1][0];
   1723         vb[i++] = mask_y[2] / pI830->scale_units[1][1];
   1724 	if (!is_affine)
   1725 	    vb[i++] = mask_w[2];
   1726     }
   1727 
   1728     /* rect (x1,y2) */
   1729     vb[i++] = (float)dstX;
   1730     vb[i++] = (float)(dstY + h);
   1731     vb[i++] = src_x[1] / pI830->scale_units[0][0];
   1732     vb[i++] = src_y[1] / pI830->scale_units[0][1];
   1733     if (!is_affine)
   1734 	vb[i++] = src_w[1];
   1735     if (has_mask) {
   1736         vb[i++] = mask_x[1] / pI830->scale_units[1][0];
   1737         vb[i++] = mask_y[1] / pI830->scale_units[1][1];
   1738 	if (!is_affine)
   1739 	    vb[i++] = mask_w[1];
   1740     }
   1741 
   1742     /* rect (x1,y1) */
   1743     vb[i++] = (float)dstX;
   1744     vb[i++] = (float)dstY;
   1745     vb[i++] = src_x[0] / pI830->scale_units[0][0];
   1746     vb[i++] = src_y[0] / pI830->scale_units[0][1];
   1747     if (!is_affine)
   1748 	vb[i++] = src_w[0];
   1749     if (has_mask) {
   1750         vb[i++] = mask_x[0] / pI830->scale_units[1][0];
   1751         vb[i++] = mask_y[0] / pI830->scale_units[1][1];
   1752 	if (!is_affine)
   1753 	    vb[i++] = mask_w[0];
   1754     }
   1755     assert (i <= VERTEX_BUFFER_SIZE);
   1756     drm_intel_bo_subdata(vb_bo, render_state->vb_offset * 4, i * 4, vb);
   1757 
   1758     if (!i965_composite_check_aperture(pScrn))
   1759 	intel_batch_flush(pScrn, FALSE);
   1760 
   1761     intel_batch_start_atomic(pScrn, 200);
   1762     if (render_state->needs_state_emit)
   1763 	i965_emit_composite_state(pScrn);
   1764 
   1765     BEGIN_BATCH(12);
   1766     OUT_BATCH(MI_FLUSH);
   1767     /* Set up the pointer to our (single) vertex buffer */
   1768     OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3);
   1769     OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
   1770 	      VB0_VERTEXDATA |
   1771 	      (render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT));
   1772     OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, render_state->vb_offset * 4);
   1773 
   1774     if (IS_IGDNG(pI830))
   1775         OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, render_state->vb_offset * 4 + i * 4);
   1776     else
   1777         OUT_BATCH(3);
   1778 
   1779     OUT_BATCH(0); // ignore for VERTEXDATA, but still there
   1780 
   1781     OUT_BATCH(BRW_3DPRIMITIVE |
   1782 	      BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
   1783 	      (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
   1784 	      (0 << 9) |  /* CTG - indirect vertex count */
   1785 	      4);
   1786     OUT_BATCH(3);  /* vertex count per instance */
   1787     OUT_BATCH(0); /* start vertex offset */
   1788     OUT_BATCH(1); /* single instance */
   1789     OUT_BATCH(0); /* start instance location */
   1790     OUT_BATCH(0); /* index buffer offset, ignored */
   1791     ADVANCE_BATCH();
   1792 
   1793     render_state->vb_offset += i;
   1794     drm_intel_bo_unreference(vb_bo);
   1795 
   1796     intel_batch_end_atomic(pScrn);
   1797 
   1798     i830_debug_sync(pScrn);
   1799 }
   1800 
   1801 void
   1802 i965_batch_flush_notify(ScrnInfoPtr pScrn)
   1803 {
   1804     I830Ptr pI830 = I830PTR(pScrn);
   1805     struct gen4_render_state *render_state = pI830->gen4_render_state;
   1806 
   1807     /* Once a batch is emitted, we never want to map again any buffer
   1808      * object being referenced by that batch, (which would be very
   1809      * expensive). */
   1810     if (render_state->vertex_buffer_bo) {
   1811 	dri_bo_unreference (render_state->vertex_buffer_bo);
   1812 	render_state->vertex_buffer_bo = NULL;
   1813     }
   1814 
   1815     render_state->needs_state_emit = TRUE;
   1816 }
   1817 
   1818 /**
   1819  * Called at EnterVT so we can set up our offsets into the state buffer.
   1820  */
   1821 void
   1822 gen4_render_state_init(ScrnInfoPtr pScrn)
   1823 {
   1824     I830Ptr pI830 = I830PTR(pScrn);
   1825     struct gen4_render_state *render_state;
   1826     int i, j, k, l, m;
   1827     drm_intel_bo *sf_kernel_bo, *sf_kernel_mask_bo;
   1828     drm_intel_bo *border_color_bo;
   1829 
   1830     if (pI830->gen4_render_state == NULL)
   1831 	pI830->gen4_render_state = calloc(sizeof(*render_state), 1);
   1832 
   1833     render_state = pI830->gen4_render_state;
   1834     render_state->vb_offset = 0;
   1835 
   1836     render_state->vs_state_bo = gen4_create_vs_unit_state(pScrn);
   1837 
   1838     /* Set up the two SF states (one for blending with a mask, one without) */
   1839     if (IS_IGDNG(pI830)) {
   1840 	sf_kernel_bo = intel_bo_alloc_for_data(pScrn,
   1841 					       sf_kernel_static_gen5,
   1842 					       sizeof(sf_kernel_static_gen5),
   1843 					       "sf kernel gen5");
   1844 	sf_kernel_mask_bo = intel_bo_alloc_for_data(pScrn,
   1845 						    sf_kernel_mask_static_gen5,
   1846 						    sizeof(sf_kernel_mask_static_gen5),
   1847 						    "sf mask kernel");
   1848     } else {
   1849 	sf_kernel_bo = intel_bo_alloc_for_data(pScrn,
   1850 					       sf_kernel_static,
   1851 					       sizeof(sf_kernel_static),
   1852 					       "sf kernel");
   1853 	sf_kernel_mask_bo = intel_bo_alloc_for_data(pScrn,
   1854 						    sf_kernel_mask_static,
   1855 						    sizeof(sf_kernel_mask_static),
   1856 						    "sf mask kernel");
   1857     }
   1858     render_state->sf_state_bo = gen4_create_sf_state(pScrn, sf_kernel_bo);
   1859     render_state->sf_mask_state_bo = gen4_create_sf_state(pScrn,
   1860 							  sf_kernel_mask_bo);
   1861     drm_intel_bo_unreference(sf_kernel_bo);
   1862     drm_intel_bo_unreference(sf_kernel_mask_bo);
   1863 
   1864     for (m = 0; m < WM_KERNEL_COUNT; m++) {
   1865 	if (IS_IGDNG(pI830))
   1866 	    render_state->wm_kernel_bo[m] =
   1867 		intel_bo_alloc_for_data(pScrn,
   1868 				        wm_kernels_gen5[m].data, wm_kernels_gen5[m].size,
   1869 				        "WM kernel gen5");
   1870 	else
   1871 	    render_state->wm_kernel_bo[m] =
   1872 		intel_bo_alloc_for_data(pScrn,
   1873 				        wm_kernels[m].data, wm_kernels[m].size,
   1874 				        "WM kernel");
   1875     }
   1876 
   1877     /* Set up the WM states: each filter/extend type for source and mask, per
   1878      * kernel.
   1879      */
   1880     border_color_bo = sampler_border_color_create(pScrn);
   1881     for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++) {
   1882 	for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) {
   1883 	    for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) {
   1884 		for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++) {
   1885 		    drm_intel_bo *sampler_state_bo;
   1886 
   1887 		    sampler_state_bo =
   1888 			gen4_create_sampler_state(pScrn,
   1889 						  i, j,
   1890 						  k, l,
   1891 						  border_color_bo);
   1892 
   1893 		    for (m = 0; m < WM_KERNEL_COUNT; m++) {
   1894 			if (IS_IGDNG(pI830))
   1895 			    render_state->wm_state_bo[m][i][j][k][l] =
   1896 				gen4_create_wm_state(pScrn,
   1897 						     wm_kernels_gen5[m].has_mask,
   1898 						     render_state->wm_kernel_bo[m],
   1899 						     sampler_state_bo);
   1900 			else
   1901 			    render_state->wm_state_bo[m][i][j][k][l] =
   1902 				gen4_create_wm_state(pScrn,
   1903 						     wm_kernels[m].has_mask,
   1904 						     render_state->wm_kernel_bo[m],
   1905 						     sampler_state_bo);
   1906 		    }
   1907 		    drm_intel_bo_unreference(sampler_state_bo);
   1908 		}
   1909 	    }
   1910 	}
   1911     }
   1912     drm_intel_bo_unreference(border_color_bo);
   1913 
   1914     render_state->cc_state_bo = gen4_create_cc_unit_state(pScrn);
   1915     render_state->sip_kernel_bo = intel_bo_alloc_for_data(pScrn,
   1916 							  sip_kernel_static,
   1917 							  sizeof(sip_kernel_static),
   1918 							  "sip kernel");
   1919 }
   1920 
   1921 /**
   1922  * Called at LeaveVT.
   1923  */
   1924 void
   1925 gen4_render_state_cleanup(ScrnInfoPtr pScrn)
   1926 {
   1927     I830Ptr pI830 = I830PTR(pScrn);
   1928     struct gen4_render_state *render_state= pI830->gen4_render_state;
   1929     int i, j, k, l, m;
   1930     gen4_composite_op *composite_op = &render_state->composite_op;
   1931 
   1932     drm_intel_bo_unreference(composite_op->binding_table_bo);
   1933     drm_intel_bo_unreference(render_state->vertex_buffer_bo);
   1934 
   1935     drm_intel_bo_unreference(render_state->vs_state_bo);
   1936     drm_intel_bo_unreference(render_state->sf_state_bo);
   1937     drm_intel_bo_unreference(render_state->sf_mask_state_bo);
   1938 
   1939     for (i = 0; i < WM_KERNEL_COUNT; i++)
   1940 	drm_intel_bo_unreference(render_state->wm_kernel_bo[i]);
   1941 
   1942     for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++)
   1943 	for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++)
   1944 	    for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++)
   1945 		for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++)
   1946 		    for (m = 0; m < WM_KERNEL_COUNT; m++)
   1947 			drm_intel_bo_unreference(render_state->wm_state_bo[m][i][j][k][l]);
   1948 
   1949     drm_intel_bo_unreference(render_state->cc_state_bo);
   1950     drm_intel_bo_unreference(render_state->sip_kernel_bo);
   1951 
   1952     free(pI830->gen4_render_state);
   1953     pI830->gen4_render_state = NULL;
   1954 }
   1955