1fa225cbcSrjs/* 2fa225cbcSrjs * Copyright © 2006,2008 Intel Corporation 3fa225cbcSrjs * Copyright © 2007 Red Hat, Inc. 4fa225cbcSrjs * 5fa225cbcSrjs * Permission is hereby granted, free of charge, to any person obtaining a 6fa225cbcSrjs * copy of this software and associated documentation files (the "Software"), 7fa225cbcSrjs * to deal in the Software without restriction, including without limitation 8fa225cbcSrjs * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9fa225cbcSrjs * and/or sell copies of the Software, and to permit persons to whom the 10fa225cbcSrjs * Software is furnished to do so, subject to the following conditions: 11fa225cbcSrjs * 12fa225cbcSrjs * The above copyright notice and this permission notice (including the next 13fa225cbcSrjs * paragraph) shall be included in all copies or substantial portions of the 14fa225cbcSrjs * Software. 15fa225cbcSrjs * 16fa225cbcSrjs * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17fa225cbcSrjs * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18fa225cbcSrjs * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19fa225cbcSrjs * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20fa225cbcSrjs * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21fa225cbcSrjs * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22fa225cbcSrjs * SOFTWARE. 23fa225cbcSrjs * 24fa225cbcSrjs * Authors: 25fa225cbcSrjs * Wang Zhenyu <zhenyu.z.wang@intel.com> 26fa225cbcSrjs * Eric Anholt <eric@anholt.net> 27fa225cbcSrjs * Carl Worth <cworth@redhat.com> 28fa225cbcSrjs * Keith Packard <keithp@keithp.com> 29fa225cbcSrjs * 30fa225cbcSrjs */ 31fa225cbcSrjs 32fa225cbcSrjs#ifdef HAVE_CONFIG_H 33fa225cbcSrjs#include "config.h" 34fa225cbcSrjs#endif 35fa225cbcSrjs 36fa225cbcSrjs#include <assert.h> 37fa225cbcSrjs#include "xf86.h" 38fa225cbcSrjs#include "i830.h" 39fa225cbcSrjs#include "i915_reg.h" 40fa225cbcSrjs 41fa225cbcSrjs/* bring in brw structs */ 42fa225cbcSrjs#include "brw_defines.h" 43fa225cbcSrjs#include "brw_structs.h" 44fa225cbcSrjs 45fa225cbcSrjs/* 24 = 4 vertices/composite * 3 texcoords/vertex * 2 floats/texcoord 46fa225cbcSrjs * 47fa225cbcSrjs * This is an upper-bound based on the case of a non-affine 48fa225cbcSrjs * transformation and with a mask, but useful for sizing all cases for 49fa225cbcSrjs * simplicity. 50fa225cbcSrjs */ 51fa225cbcSrjs#define VERTEX_FLOATS_PER_COMPOSITE 24 52fa225cbcSrjs#define VERTEX_BUFFER_SIZE (256 * VERTEX_FLOATS_PER_COMPOSITE) 53fa225cbcSrjs 54fa225cbcSrjsstruct blendinfo { 55fa225cbcSrjs Bool dst_alpha; 56fa225cbcSrjs Bool src_alpha; 57fa225cbcSrjs uint32_t src_blend; 58fa225cbcSrjs uint32_t dst_blend; 59fa225cbcSrjs}; 60fa225cbcSrjs 61fa225cbcSrjsstruct formatinfo { 62fa225cbcSrjs int fmt; 63fa225cbcSrjs uint32_t card_fmt; 64fa225cbcSrjs}; 65fa225cbcSrjs 66fa225cbcSrjs// refer vol2, 3d rasterization 3.8.1 67fa225cbcSrjs 68fa225cbcSrjs/* defined in brw_defines.h */ 69fa225cbcSrjsstatic struct blendinfo i965_blend_op[] = { 70fa225cbcSrjs /* Clear */ 71fa225cbcSrjs {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ZERO}, 72fa225cbcSrjs /* Src */ 73fa225cbcSrjs {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO}, 74fa225cbcSrjs /* Dst */ 75fa225cbcSrjs {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ONE}, 76fa225cbcSrjs /* Over */ 77fa225cbcSrjs {0, 1, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 78fa225cbcSrjs /* OverReverse */ 79fa225cbcSrjs {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE}, 80fa225cbcSrjs /* In */ 81fa225cbcSrjs {1, 0, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, 82fa225cbcSrjs /* InReverse */ 83fa225cbcSrjs {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_SRC_ALPHA}, 84fa225cbcSrjs /* Out */ 85fa225cbcSrjs {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, 86fa225cbcSrjs /* OutReverse */ 87fa225cbcSrjs {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 88fa225cbcSrjs /* Atop */ 89fa225cbcSrjs {1, 1, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 90fa225cbcSrjs /* AtopReverse */ 91fa225cbcSrjs {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA}, 92fa225cbcSrjs /* Xor */ 93fa225cbcSrjs {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, 94fa225cbcSrjs /* Add */ 95fa225cbcSrjs {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ONE}, 96fa225cbcSrjs}; 97fa225cbcSrjs/** 98fa225cbcSrjs * Highest-valued BLENDFACTOR used in i965_blend_op. 99fa225cbcSrjs * 100fa225cbcSrjs * This leaves out BRW_BLENDFACTOR_INV_DST_COLOR, 101fa225cbcSrjs * BRW_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 102fa225cbcSrjs * BRW_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 103fa225cbcSrjs */ 104fa225cbcSrjs#define BRW_BLENDFACTOR_COUNT (BRW_BLENDFACTOR_INV_DST_ALPHA + 1) 105fa225cbcSrjs 106fa225cbcSrjs/* FIXME: surface format defined in brw_defines.h, shared Sampling engine 107fa225cbcSrjs * 1.7.2 108fa225cbcSrjs */ 109fa225cbcSrjsstatic struct formatinfo i965_tex_formats[] = { 110fa225cbcSrjs {PICT_a8r8g8b8, BRW_SURFACEFORMAT_B8G8R8A8_UNORM }, 111fa225cbcSrjs {PICT_x8r8g8b8, BRW_SURFACEFORMAT_B8G8R8X8_UNORM }, 112fa225cbcSrjs {PICT_a8b8g8r8, BRW_SURFACEFORMAT_R8G8B8A8_UNORM }, 113fa225cbcSrjs {PICT_x8b8g8r8, BRW_SURFACEFORMAT_R8G8B8X8_UNORM }, 114fa225cbcSrjs {PICT_r5g6b5, BRW_SURFACEFORMAT_B5G6R5_UNORM }, 115fa225cbcSrjs {PICT_a1r5g5b5, BRW_SURFACEFORMAT_B5G5R5A1_UNORM }, 116fa225cbcSrjs {PICT_a8, BRW_SURFACEFORMAT_A8_UNORM }, 117fa225cbcSrjs}; 118fa225cbcSrjs 119fa225cbcSrjsstatic void i965_get_blend_cntl(int op, PicturePtr pMask, uint32_t dst_format, 120fa225cbcSrjs uint32_t *sblend, uint32_t *dblend) 121fa225cbcSrjs{ 122fa225cbcSrjs 123fa225cbcSrjs *sblend = i965_blend_op[op].src_blend; 124fa225cbcSrjs *dblend = i965_blend_op[op].dst_blend; 125fa225cbcSrjs 126fa225cbcSrjs /* If there's no dst alpha channel, adjust the blend op so that we'll treat 127fa225cbcSrjs * it as always 1. 128fa225cbcSrjs */ 129fa225cbcSrjs if (PICT_FORMAT_A(dst_format) == 0 && i965_blend_op[op].dst_alpha) { 130fa225cbcSrjs if (*sblend == BRW_BLENDFACTOR_DST_ALPHA) 131fa225cbcSrjs *sblend = BRW_BLENDFACTOR_ONE; 132fa225cbcSrjs else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA) 133fa225cbcSrjs *sblend = BRW_BLENDFACTOR_ZERO; 134fa225cbcSrjs } 135fa225cbcSrjs 136fa225cbcSrjs /* If the source alpha is being used, then we should only be in a case where 137fa225cbcSrjs * the source blend factor is 0, and the source blend value is the mask 138fa225cbcSrjs * channels multiplied by the source picture's alpha. 139fa225cbcSrjs */ 140fa225cbcSrjs if (pMask && pMask->componentAlpha && PICT_FORMAT_RGB(pMask->format) 141fa225cbcSrjs && i965_blend_op[op].src_alpha) { 142fa225cbcSrjs if (*dblend == BRW_BLENDFACTOR_SRC_ALPHA) { 143fa225cbcSrjs *dblend = BRW_BLENDFACTOR_SRC_COLOR; 144fa225cbcSrjs } else if (*dblend == BRW_BLENDFACTOR_INV_SRC_ALPHA) { 145fa225cbcSrjs *dblend = BRW_BLENDFACTOR_INV_SRC_COLOR; 146fa225cbcSrjs } 147fa225cbcSrjs } 148fa225cbcSrjs 149fa225cbcSrjs} 150fa225cbcSrjs 151fa225cbcSrjsstatic Bool i965_get_dest_format(PicturePtr pDstPicture, uint32_t *dst_format) 152fa225cbcSrjs{ 153fa225cbcSrjs ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum]; 154fa225cbcSrjs 155fa225cbcSrjs switch (pDstPicture->format) { 156fa225cbcSrjs case PICT_a8r8g8b8: 157fa225cbcSrjs case PICT_x8r8g8b8: 158fa225cbcSrjs *dst_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; 159fa225cbcSrjs break; 160fa225cbcSrjs case PICT_r5g6b5: 161fa225cbcSrjs *dst_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; 162fa225cbcSrjs break; 163fa225cbcSrjs case PICT_a1r5g5b5: 164fa225cbcSrjs *dst_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM; 165fa225cbcSrjs break; 166fa225cbcSrjs case PICT_x1r5g5b5: 167fa225cbcSrjs *dst_format = BRW_SURFACEFORMAT_B5G5R5X1_UNORM; 168fa225cbcSrjs break; 169fa225cbcSrjs case PICT_a8: 170fa225cbcSrjs *dst_format = BRW_SURFACEFORMAT_A8_UNORM; 171fa225cbcSrjs break; 172fa225cbcSrjs case PICT_a4r4g4b4: 173fa225cbcSrjs case PICT_x4r4g4b4: 174fa225cbcSrjs *dst_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; 175fa225cbcSrjs break; 176fa225cbcSrjs default: 177fa225cbcSrjs I830FALLBACK("Unsupported dest format 0x%x\n", 178fa225cbcSrjs (int)pDstPicture->format); 179fa225cbcSrjs } 180fa225cbcSrjs 181fa225cbcSrjs return TRUE; 182fa225cbcSrjs} 183fa225cbcSrjs 184fa225cbcSrjsstatic Bool i965_check_composite_texture(ScrnInfoPtr pScrn, PicturePtr pPict, int unit) 185fa225cbcSrjs{ 186fa225cbcSrjs if (pPict->repeatType > RepeatReflect) 187fa225cbcSrjs I830FALLBACK("extended repeat (%d) not supported\n", 188fa225cbcSrjs pPict->repeatType); 189fa225cbcSrjs 190fa225cbcSrjs if (pPict->filter != PictFilterNearest && 191fa225cbcSrjs pPict->filter != PictFilterBilinear) 192fa225cbcSrjs { 193fa225cbcSrjs I830FALLBACK("Unsupported filter 0x%x\n", pPict->filter); 194fa225cbcSrjs } 195fa225cbcSrjs 196fa225cbcSrjs if (pPict->pDrawable) 197fa225cbcSrjs { 198fa225cbcSrjs int w, h, i; 199fa225cbcSrjs 200fa225cbcSrjs w = pPict->pDrawable->width; 201fa225cbcSrjs h = pPict->pDrawable->height; 202fa225cbcSrjs if ((w > 8192) || (h > 8192)) 203fa225cbcSrjs I830FALLBACK("Picture w/h too large (%dx%d)\n", w, h); 204fa225cbcSrjs 205fa225cbcSrjs for (i = 0; i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]); 206fa225cbcSrjs i++) 207fa225cbcSrjs { 208fa225cbcSrjs if (i965_tex_formats[i].fmt == pPict->format) 209fa225cbcSrjs break; 210fa225cbcSrjs } 211fa225cbcSrjs if (i == sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0])) 212fa225cbcSrjs I830FALLBACK("Unsupported picture format 0x%x\n", 213fa225cbcSrjs (int)pPict->format); 214fa225cbcSrjs } 215fa225cbcSrjs 216fa225cbcSrjs return TRUE; 217fa225cbcSrjs} 218fa225cbcSrjs 219fa225cbcSrjsBool 220fa225cbcSrjsi965_check_composite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, 221fa225cbcSrjs PicturePtr pDstPicture) 222fa225cbcSrjs{ 223fa225cbcSrjs ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum]; 224fa225cbcSrjs uint32_t tmp1; 225fa225cbcSrjs 226fa225cbcSrjs /* Check for unsupported compositing operations. */ 227fa225cbcSrjs if (op >= sizeof(i965_blend_op) / sizeof(i965_blend_op[0])) 228fa225cbcSrjs I830FALLBACK("Unsupported Composite op 0x%x\n", op); 229fa225cbcSrjs 230fa225cbcSrjs if (pMaskPicture && pMaskPicture->componentAlpha && 231fa225cbcSrjs PICT_FORMAT_RGB(pMaskPicture->format)) { 232fa225cbcSrjs /* Check if it's component alpha that relies on a source alpha and on 233fa225cbcSrjs * the source value. We can only get one of those into the single 234fa225cbcSrjs * source value that we get to blend with. 235fa225cbcSrjs */ 236fa225cbcSrjs if (i965_blend_op[op].src_alpha && 237fa225cbcSrjs (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) 238fa225cbcSrjs { 239fa225cbcSrjs I830FALLBACK("Component alpha not supported with source " 240fa225cbcSrjs "alpha and source value blending.\n"); 241fa225cbcSrjs } 242fa225cbcSrjs } 243fa225cbcSrjs 244fa225cbcSrjs if (!i965_check_composite_texture(pScrn, pSrcPicture, 0)) 245fa225cbcSrjs I830FALLBACK("Check Src picture texture\n"); 246fa225cbcSrjs if (pMaskPicture != NULL && !i965_check_composite_texture(pScrn, pMaskPicture, 1)) 247fa225cbcSrjs I830FALLBACK("Check Mask picture texture\n"); 248fa225cbcSrjs 249fa225cbcSrjs if (!i965_get_dest_format(pDstPicture, &tmp1)) 250fa225cbcSrjs I830FALLBACK("Get Color buffer format\n"); 251fa225cbcSrjs 252fa225cbcSrjs return TRUE; 253fa225cbcSrjs 254fa225cbcSrjs} 255fa225cbcSrjs 256fa225cbcSrjs#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) 257fa225cbcSrjs 258fa225cbcSrjs/* Set up a default static partitioning of the URB, which is supposed to 259fa225cbcSrjs * allow anything we would want to do, at potentially lower performance. 260fa225cbcSrjs */ 261fa225cbcSrjs#define URB_CS_ENTRY_SIZE 0 262fa225cbcSrjs#define URB_CS_ENTRIES 0 263fa225cbcSrjs 264fa225cbcSrjs#define URB_VS_ENTRY_SIZE 1 // each 512-bit row 265fa225cbcSrjs#define URB_VS_ENTRIES 8 // we needs at least 8 entries 266fa225cbcSrjs 267fa225cbcSrjs#define URB_GS_ENTRY_SIZE 0 268fa225cbcSrjs#define URB_GS_ENTRIES 0 269fa225cbcSrjs 270fa225cbcSrjs#define URB_CLIP_ENTRY_SIZE 0 271fa225cbcSrjs#define URB_CLIP_ENTRIES 0 272fa225cbcSrjs 273fa225cbcSrjs#define URB_SF_ENTRY_SIZE 2 274fa225cbcSrjs#define URB_SF_ENTRIES 1 275fa225cbcSrjs 276fa225cbcSrjsstatic const uint32_t sip_kernel_static[][4] = { 277fa225cbcSrjs/* wait (1) a0<1>UW a145<0,1,0>UW { align1 + } */ 278fa225cbcSrjs { 0x00000030, 0x20000108, 0x00001220, 0x00000000 }, 279fa225cbcSrjs/* nop (4) g0<1>UD { align1 + } */ 280fa225cbcSrjs { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 281fa225cbcSrjs/* nop (4) g0<1>UD { align1 + } */ 282fa225cbcSrjs { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 283fa225cbcSrjs/* nop (4) g0<1>UD { align1 + } */ 284fa225cbcSrjs { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 285fa225cbcSrjs/* nop (4) g0<1>UD { align1 + } */ 286fa225cbcSrjs { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 287fa225cbcSrjs/* nop (4) g0<1>UD { align1 + } */ 288fa225cbcSrjs { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 289fa225cbcSrjs/* nop (4) g0<1>UD { align1 + } */ 290fa225cbcSrjs { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 291fa225cbcSrjs/* nop (4) g0<1>UD { align1 + } */ 292fa225cbcSrjs { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 293fa225cbcSrjs/* nop (4) g0<1>UD { align1 + } */ 294fa225cbcSrjs { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 295fa225cbcSrjs/* nop (4) g0<1>UD { align1 + } */ 296fa225cbcSrjs { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, 297fa225cbcSrjs}; 298fa225cbcSrjs 299fa225cbcSrjs/* 300fa225cbcSrjs * this program computes dA/dx and dA/dy for the texture coordinates along 301fa225cbcSrjs * with the base texture coordinate. It was extracted from the Mesa driver 302fa225cbcSrjs */ 303fa225cbcSrjs 304fa225cbcSrjs#define SF_KERNEL_NUM_GRF 16 305fa225cbcSrjs#define SF_MAX_THREADS 2 306fa225cbcSrjs 307fa225cbcSrjsstatic const uint32_t sf_kernel_static[][4] = { 308fa225cbcSrjs#include "exa_sf.g4b" 309fa225cbcSrjs}; 310fa225cbcSrjs 311fa225cbcSrjsstatic const uint32_t sf_kernel_mask_static[][4] = { 312fa225cbcSrjs#include "exa_sf_mask.g4b" 313fa225cbcSrjs}; 314fa225cbcSrjs 315fa225cbcSrjs/* ps kernels */ 316fa225cbcSrjs#define PS_KERNEL_NUM_GRF 32 317fa225cbcSrjs#define PS_MAX_THREADS 48 318fa225cbcSrjs 319fa225cbcSrjsstatic const uint32_t ps_kernel_nomask_affine_static [][4] = { 320fa225cbcSrjs#include "exa_wm_xy.g4b" 321fa225cbcSrjs#include "exa_wm_src_affine.g4b" 322fa225cbcSrjs#include "exa_wm_src_sample_argb.g4b" 323fa225cbcSrjs#include "exa_wm_write.g4b" 324fa225cbcSrjs}; 325fa225cbcSrjs 326fa225cbcSrjsstatic const uint32_t ps_kernel_nomask_projective_static [][4] = { 327fa225cbcSrjs#include "exa_wm_xy.g4b" 328fa225cbcSrjs#include "exa_wm_src_projective.g4b" 329fa225cbcSrjs#include "exa_wm_src_sample_argb.g4b" 330fa225cbcSrjs#include "exa_wm_write.g4b" 331fa225cbcSrjs}; 332fa225cbcSrjs 333fa225cbcSrjsstatic const uint32_t ps_kernel_maskca_affine_static [][4] = { 334fa225cbcSrjs#include "exa_wm_xy.g4b" 335fa225cbcSrjs#include "exa_wm_src_affine.g4b" 336fa225cbcSrjs#include "exa_wm_src_sample_argb.g4b" 337fa225cbcSrjs#include "exa_wm_mask_affine.g4b" 338fa225cbcSrjs#include "exa_wm_mask_sample_argb.g4b" 339fa225cbcSrjs#include "exa_wm_ca.g4b" 340fa225cbcSrjs#include "exa_wm_write.g4b" 341fa225cbcSrjs}; 342fa225cbcSrjs 343fa225cbcSrjsstatic const uint32_t ps_kernel_maskca_projective_static [][4] = { 344fa225cbcSrjs#include "exa_wm_xy.g4b" 345fa225cbcSrjs#include "exa_wm_src_projective.g4b" 346fa225cbcSrjs#include "exa_wm_src_sample_argb.g4b" 347fa225cbcSrjs#include "exa_wm_mask_projective.g4b" 348fa225cbcSrjs#include "exa_wm_mask_sample_argb.g4b" 349fa225cbcSrjs#include "exa_wm_ca.g4b" 350fa225cbcSrjs#include "exa_wm_write.g4b" 351fa225cbcSrjs}; 352fa225cbcSrjs 353fa225cbcSrjsstatic const uint32_t ps_kernel_maskca_srcalpha_affine_static [][4] = { 354fa225cbcSrjs#include "exa_wm_xy.g4b" 355fa225cbcSrjs#include "exa_wm_src_affine.g4b" 356fa225cbcSrjs#include "exa_wm_src_sample_a.g4b" 357fa225cbcSrjs#include "exa_wm_mask_affine.g4b" 358fa225cbcSrjs#include "exa_wm_mask_sample_argb.g4b" 359fa225cbcSrjs#include "exa_wm_ca_srcalpha.g4b" 360fa225cbcSrjs#include "exa_wm_write.g4b" 361fa225cbcSrjs}; 362fa225cbcSrjs 363fa225cbcSrjsstatic const uint32_t ps_kernel_maskca_srcalpha_projective_static [][4] = { 364fa225cbcSrjs#include "exa_wm_xy.g4b" 365fa225cbcSrjs#include "exa_wm_src_projective.g4b" 366fa225cbcSrjs#include "exa_wm_src_sample_a.g4b" 367fa225cbcSrjs#include "exa_wm_mask_projective.g4b" 368fa225cbcSrjs#include "exa_wm_mask_sample_argb.g4b" 369fa225cbcSrjs#include "exa_wm_ca_srcalpha.g4b" 370fa225cbcSrjs#include "exa_wm_write.g4b" 371fa225cbcSrjs}; 372fa225cbcSrjs 373fa225cbcSrjsstatic const uint32_t ps_kernel_masknoca_affine_static [][4] = { 374fa225cbcSrjs#include "exa_wm_xy.g4b" 375fa225cbcSrjs#include "exa_wm_src_affine.g4b" 376fa225cbcSrjs#include "exa_wm_src_sample_argb.g4b" 377fa225cbcSrjs#include "exa_wm_mask_affine.g4b" 378fa225cbcSrjs#include "exa_wm_mask_sample_a.g4b" 379fa225cbcSrjs#include "exa_wm_noca.g4b" 380fa225cbcSrjs#include "exa_wm_write.g4b" 381fa225cbcSrjs}; 382fa225cbcSrjs 383fa225cbcSrjsstatic const uint32_t ps_kernel_masknoca_projective_static [][4] = { 384fa225cbcSrjs#include "exa_wm_xy.g4b" 385fa225cbcSrjs#include "exa_wm_src_projective.g4b" 386fa225cbcSrjs#include "exa_wm_src_sample_argb.g4b" 387fa225cbcSrjs#include "exa_wm_mask_projective.g4b" 388fa225cbcSrjs#include "exa_wm_mask_sample_a.g4b" 389fa225cbcSrjs#include "exa_wm_noca.g4b" 390fa225cbcSrjs#include "exa_wm_write.g4b" 391fa225cbcSrjs}; 392fa225cbcSrjs 393fa225cbcSrjs/* new programs for IGDNG */ 394fa225cbcSrjsstatic const uint32_t sf_kernel_static_gen5[][4] = { 395fa225cbcSrjs#include "exa_sf.g4b.gen5" 396fa225cbcSrjs}; 397fa225cbcSrjs 398fa225cbcSrjsstatic const uint32_t sf_kernel_mask_static_gen5[][4] = { 399fa225cbcSrjs#include "exa_sf_mask.g4b.gen5" 400fa225cbcSrjs}; 401fa225cbcSrjs 402fa225cbcSrjsstatic const uint32_t ps_kernel_nomask_affine_static_gen5 [][4] = { 403fa225cbcSrjs#include "exa_wm_xy.g4b.gen5" 404fa225cbcSrjs#include "exa_wm_src_affine.g4b.gen5" 405fa225cbcSrjs#include "exa_wm_src_sample_argb.g4b.gen5" 406fa225cbcSrjs#include "exa_wm_write.g4b.gen5" 407fa225cbcSrjs}; 408fa225cbcSrjs 409fa225cbcSrjsstatic const uint32_t ps_kernel_nomask_projective_static_gen5 [][4] = { 410fa225cbcSrjs#include "exa_wm_xy.g4b.gen5" 411fa225cbcSrjs#include "exa_wm_src_projective.g4b.gen5" 412fa225cbcSrjs#include "exa_wm_src_sample_argb.g4b.gen5" 413fa225cbcSrjs#include "exa_wm_write.g4b.gen5" 414fa225cbcSrjs}; 415fa225cbcSrjs 416fa225cbcSrjsstatic const uint32_t ps_kernel_maskca_affine_static_gen5 [][4] = { 417fa225cbcSrjs#include "exa_wm_xy.g4b.gen5" 418fa225cbcSrjs#include "exa_wm_src_affine.g4b.gen5" 419fa225cbcSrjs#include "exa_wm_src_sample_argb.g4b.gen5" 420fa225cbcSrjs#include "exa_wm_mask_affine.g4b.gen5" 421fa225cbcSrjs#include "exa_wm_mask_sample_argb.g4b.gen5" 422fa225cbcSrjs#include "exa_wm_ca.g4b.gen5" 423fa225cbcSrjs#include "exa_wm_write.g4b.gen5" 424fa225cbcSrjs}; 425fa225cbcSrjs 426fa225cbcSrjsstatic const uint32_t ps_kernel_maskca_projective_static_gen5 [][4] = { 427fa225cbcSrjs#include "exa_wm_xy.g4b.gen5" 428fa225cbcSrjs#include "exa_wm_src_projective.g4b.gen5" 429fa225cbcSrjs#include "exa_wm_src_sample_argb.g4b.gen5" 430fa225cbcSrjs#include "exa_wm_mask_projective.g4b.gen5" 431fa225cbcSrjs#include "exa_wm_mask_sample_argb.g4b.gen5" 432fa225cbcSrjs#include "exa_wm_ca.g4b.gen5" 433fa225cbcSrjs#include "exa_wm_write.g4b.gen5" 434fa225cbcSrjs}; 435fa225cbcSrjs 436fa225cbcSrjsstatic const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen5 [][4] = { 437fa225cbcSrjs#include "exa_wm_xy.g4b.gen5" 438fa225cbcSrjs#include "exa_wm_src_affine.g4b.gen5" 439fa225cbcSrjs#include "exa_wm_src_sample_a.g4b.gen5" 440fa225cbcSrjs#include "exa_wm_mask_affine.g4b.gen5" 441fa225cbcSrjs#include "exa_wm_mask_sample_argb.g4b.gen5" 442fa225cbcSrjs#include "exa_wm_ca_srcalpha.g4b.gen5" 443fa225cbcSrjs#include "exa_wm_write.g4b.gen5" 444fa225cbcSrjs}; 445fa225cbcSrjs 446fa225cbcSrjsstatic const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen5 [][4] = { 447fa225cbcSrjs#include "exa_wm_xy.g4b.gen5" 448fa225cbcSrjs#include "exa_wm_src_projective.g4b.gen5" 449fa225cbcSrjs#include "exa_wm_src_sample_a.g4b.gen5" 450fa225cbcSrjs#include "exa_wm_mask_projective.g4b.gen5" 451fa225cbcSrjs#include "exa_wm_mask_sample_argb.g4b.gen5" 452fa225cbcSrjs#include "exa_wm_ca_srcalpha.g4b.gen5" 453fa225cbcSrjs#include "exa_wm_write.g4b.gen5" 454fa225cbcSrjs}; 455fa225cbcSrjs 456fa225cbcSrjsstatic const uint32_t ps_kernel_masknoca_affine_static_gen5 [][4] = { 457fa225cbcSrjs#include "exa_wm_xy.g4b.gen5" 458fa225cbcSrjs#include "exa_wm_src_affine.g4b.gen5" 459fa225cbcSrjs#include "exa_wm_src_sample_argb.g4b.gen5" 460fa225cbcSrjs#include "exa_wm_mask_affine.g4b.gen5" 461fa225cbcSrjs#include "exa_wm_mask_sample_a.g4b.gen5" 462fa225cbcSrjs#include "exa_wm_noca.g4b.gen5" 463fa225cbcSrjs#include "exa_wm_write.g4b.gen5" 464fa225cbcSrjs}; 465fa225cbcSrjs 466fa225cbcSrjsstatic const uint32_t ps_kernel_masknoca_projective_static_gen5 [][4] = { 467fa225cbcSrjs#include "exa_wm_xy.g4b.gen5" 468fa225cbcSrjs#include "exa_wm_src_projective.g4b.gen5" 469fa225cbcSrjs#include "exa_wm_src_sample_argb.g4b.gen5" 470fa225cbcSrjs#include "exa_wm_mask_projective.g4b.gen5" 471fa225cbcSrjs#include "exa_wm_mask_sample_a.g4b.gen5" 472fa225cbcSrjs#include "exa_wm_noca.g4b.gen5" 473fa225cbcSrjs#include "exa_wm_write.g4b.gen5" 474fa225cbcSrjs}; 475fa225cbcSrjs 476fa225cbcSrjs#define WM_STATE_DECL(kernel) \ 477fa225cbcSrjs struct brw_wm_unit_state wm_state_ ## kernel[SAMPLER_STATE_FILTER_COUNT] \ 478fa225cbcSrjs [SAMPLER_STATE_EXTEND_COUNT] \ 479fa225cbcSrjs [SAMPLER_STATE_FILTER_COUNT] \ 480fa225cbcSrjs [SAMPLER_STATE_EXTEND_COUNT] 481fa225cbcSrjs 482fa225cbcSrjs/* Many of the fields in the state structure must be aligned to a 483fa225cbcSrjs * 64-byte boundary, (or a 32-byte boundary, but 64 is good enough for 484fa225cbcSrjs * those too). 485fa225cbcSrjs */ 486fa225cbcSrjs#define PAD64_MULTI(previous, idx, factor) char previous ## _pad ## idx [(64 - (sizeof(struct previous) * (factor)) % 64) % 64] 487fa225cbcSrjs#define PAD64(previous, idx) PAD64_MULTI(previous, idx, 1) 488fa225cbcSrjs 489fa225cbcSrjstypedef enum { 490fa225cbcSrjs SAMPLER_STATE_FILTER_NEAREST, 491fa225cbcSrjs SAMPLER_STATE_FILTER_BILINEAR, 492fa225cbcSrjs SAMPLER_STATE_FILTER_COUNT 493fa225cbcSrjs} sampler_state_filter_t; 494fa225cbcSrjs 495fa225cbcSrjstypedef enum { 496fa225cbcSrjs SAMPLER_STATE_EXTEND_NONE, 497fa225cbcSrjs SAMPLER_STATE_EXTEND_REPEAT, 498fa225cbcSrjs SAMPLER_STATE_EXTEND_PAD, 499fa225cbcSrjs SAMPLER_STATE_EXTEND_REFLECT, 500fa225cbcSrjs SAMPLER_STATE_EXTEND_COUNT 501fa225cbcSrjs} sampler_state_extend_t; 502fa225cbcSrjs 503fa225cbcSrjstypedef enum { 504fa225cbcSrjs WM_KERNEL_NOMASK_AFFINE, 505fa225cbcSrjs WM_KERNEL_NOMASK_PROJECTIVE, 506fa225cbcSrjs WM_KERNEL_MASKCA_AFFINE, 507fa225cbcSrjs WM_KERNEL_MASKCA_PROJECTIVE, 508fa225cbcSrjs WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 509fa225cbcSrjs WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 510fa225cbcSrjs WM_KERNEL_MASKNOCA_AFFINE, 511fa225cbcSrjs WM_KERNEL_MASKNOCA_PROJECTIVE, 512fa225cbcSrjs WM_KERNEL_COUNT 513fa225cbcSrjs} wm_kernel_t; 514fa225cbcSrjs 515fa225cbcSrjs#define KERNEL(kernel_enum, kernel, masked) \ 516dad3ac4cSchristos [kernel_enum] = {__UNCONST(&kernel), sizeof(kernel), masked} 517fa225cbcSrjsstruct wm_kernel_info { 518fa225cbcSrjs void *data; 519fa225cbcSrjs unsigned int size; 520fa225cbcSrjs Bool has_mask; 521fa225cbcSrjs}; 522fa225cbcSrjs 523fa225cbcSrjsstatic struct wm_kernel_info wm_kernels[] = { 524fa225cbcSrjs KERNEL(WM_KERNEL_NOMASK_AFFINE, 525fa225cbcSrjs ps_kernel_nomask_affine_static, FALSE), 526fa225cbcSrjs KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, 527fa225cbcSrjs ps_kernel_nomask_projective_static, FALSE), 528fa225cbcSrjs KERNEL(WM_KERNEL_MASKCA_AFFINE, 529fa225cbcSrjs ps_kernel_maskca_affine_static, TRUE), 530fa225cbcSrjs KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, 531fa225cbcSrjs ps_kernel_maskca_projective_static, TRUE), 532fa225cbcSrjs KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 533fa225cbcSrjs ps_kernel_maskca_srcalpha_affine_static, TRUE), 534fa225cbcSrjs KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 535fa225cbcSrjs ps_kernel_maskca_srcalpha_projective_static, TRUE), 536fa225cbcSrjs KERNEL(WM_KERNEL_MASKNOCA_AFFINE, 537fa225cbcSrjs ps_kernel_masknoca_affine_static, TRUE), 538fa225cbcSrjs KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, 539fa225cbcSrjs ps_kernel_masknoca_projective_static, TRUE), 540fa225cbcSrjs}; 541fa225cbcSrjs 542fa225cbcSrjsstatic struct wm_kernel_info wm_kernels_gen5[] = { 543fa225cbcSrjs KERNEL(WM_KERNEL_NOMASK_AFFINE, 544fa225cbcSrjs ps_kernel_nomask_affine_static_gen5, FALSE), 545fa225cbcSrjs KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, 546fa225cbcSrjs ps_kernel_nomask_projective_static_gen5, FALSE), 547fa225cbcSrjs KERNEL(WM_KERNEL_MASKCA_AFFINE, 548fa225cbcSrjs ps_kernel_maskca_affine_static_gen5, TRUE), 549fa225cbcSrjs KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, 550fa225cbcSrjs ps_kernel_maskca_projective_static_gen5, TRUE), 551fa225cbcSrjs KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, 552fa225cbcSrjs ps_kernel_maskca_srcalpha_affine_static_gen5, TRUE), 553fa225cbcSrjs KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, 554fa225cbcSrjs ps_kernel_maskca_srcalpha_projective_static_gen5, TRUE), 555fa225cbcSrjs KERNEL(WM_KERNEL_MASKNOCA_AFFINE, 556fa225cbcSrjs ps_kernel_masknoca_affine_static_gen5, TRUE), 557fa225cbcSrjs KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, 558fa225cbcSrjs ps_kernel_masknoca_projective_static_gen5, TRUE), 559fa225cbcSrjs}; 560fa225cbcSrjs#undef KERNEL 561fa225cbcSrjs 562fa225cbcSrjstypedef struct _brw_cc_unit_state_padded { 563fa225cbcSrjs struct brw_cc_unit_state state; 564fa225cbcSrjs char pad[64 - sizeof (struct brw_cc_unit_state)]; 565fa225cbcSrjs} brw_cc_unit_state_padded; 566fa225cbcSrjs 567fa225cbcSrjstypedef struct brw_surface_state_padded { 568fa225cbcSrjs struct brw_surface_state state; 569fa225cbcSrjs char pad[32 - sizeof (struct brw_surface_state)]; 570fa225cbcSrjs} brw_surface_state_padded; 571fa225cbcSrjs 572fa225cbcSrjsstruct gen4_cc_unit_state { 573fa225cbcSrjs /* Index by [src_blend][dst_blend] */ 574fa225cbcSrjs brw_cc_unit_state_padded cc_state[BRW_BLENDFACTOR_COUNT] 575fa225cbcSrjs [BRW_BLENDFACTOR_COUNT]; 576fa225cbcSrjs}; 577fa225cbcSrjs 578fa225cbcSrjstypedef float gen4_vertex_buffer[VERTEX_BUFFER_SIZE]; 579fa225cbcSrjs 580fa225cbcSrjstypedef struct gen4_composite_op { 581fa225cbcSrjs int op; 582fa225cbcSrjs PicturePtr source_picture; 583fa225cbcSrjs PicturePtr mask_picture; 584fa225cbcSrjs PicturePtr dest_picture; 585fa225cbcSrjs PixmapPtr source; 586fa225cbcSrjs PixmapPtr mask; 587fa225cbcSrjs PixmapPtr dest; 588fa225cbcSrjs drm_intel_bo *binding_table_bo; 589fa225cbcSrjs sampler_state_filter_t src_filter; 590fa225cbcSrjs sampler_state_filter_t mask_filter; 591fa225cbcSrjs sampler_state_extend_t src_extend; 592fa225cbcSrjs sampler_state_extend_t mask_extend; 593fa225cbcSrjs Bool is_affine; 594fa225cbcSrjs wm_kernel_t wm_kernel; 595fa225cbcSrjs} gen4_composite_op; 596fa225cbcSrjs 597fa225cbcSrjs/** Private data for gen4 render accel implementation. */ 598fa225cbcSrjsstruct gen4_render_state { 599fa225cbcSrjs drm_intel_bo *vs_state_bo; 600fa225cbcSrjs drm_intel_bo *sf_state_bo; 601fa225cbcSrjs drm_intel_bo *sf_mask_state_bo; 602fa225cbcSrjs drm_intel_bo *cc_state_bo; 603fa225cbcSrjs drm_intel_bo *wm_state_bo[WM_KERNEL_COUNT] 604fa225cbcSrjs [SAMPLER_STATE_FILTER_COUNT] 605fa225cbcSrjs [SAMPLER_STATE_EXTEND_COUNT] 606fa225cbcSrjs [SAMPLER_STATE_FILTER_COUNT] 607fa225cbcSrjs [SAMPLER_STATE_EXTEND_COUNT]; 608fa225cbcSrjs drm_intel_bo *wm_kernel_bo[WM_KERNEL_COUNT]; 609fa225cbcSrjs 610fa225cbcSrjs drm_intel_bo *sip_kernel_bo; 611fa225cbcSrjs dri_bo* vertex_buffer_bo; 612fa225cbcSrjs 613fa225cbcSrjs gen4_composite_op composite_op; 614fa225cbcSrjs 615fa225cbcSrjs int vb_offset; 616fa225cbcSrjs int vertex_size; 617fa225cbcSrjs 618fa225cbcSrjs Bool needs_state_emit; 619fa225cbcSrjs}; 620fa225cbcSrjs 621fa225cbcSrjs/** 622fa225cbcSrjs * Sets up the SF state pointing at an SF kernel. 623fa225cbcSrjs * 624fa225cbcSrjs * The SF kernel does coord interp: for each attribute, 625fa225cbcSrjs * calculate dA/dx and dA/dy. Hand these interpolation coefficients 626fa225cbcSrjs * back to SF which then hands pixels off to WM. 627fa225cbcSrjs */ 628fa225cbcSrjsstatic drm_intel_bo * 629fa225cbcSrjsgen4_create_sf_state(ScrnInfoPtr scrn, drm_intel_bo *kernel_bo) 630fa225cbcSrjs{ 631fa225cbcSrjs I830Ptr pI830 = I830PTR(scrn); 632fa225cbcSrjs struct brw_sf_unit_state *sf_state; 633fa225cbcSrjs drm_intel_bo *sf_state_bo; 634fa225cbcSrjs 635fa225cbcSrjs sf_state_bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 SF state", 636fa225cbcSrjs sizeof(*sf_state), 4096); 637fa225cbcSrjs drm_intel_bo_map(sf_state_bo, TRUE); 638fa225cbcSrjs sf_state = sf_state_bo->virtual; 639fa225cbcSrjs 640fa225cbcSrjs memset(sf_state, 0, sizeof(*sf_state)); 641fa225cbcSrjs sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); 642fa225cbcSrjs sf_state->thread0.kernel_start_pointer = 643fa225cbcSrjs intel_emit_reloc(sf_state_bo, 644fa225cbcSrjs offsetof(struct brw_sf_unit_state, thread0), 645fa225cbcSrjs kernel_bo, sf_state->thread0.grf_reg_count << 1, 646fa225cbcSrjs I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; 647fa225cbcSrjs sf_state->sf1.single_program_flow = 1; 648fa225cbcSrjs sf_state->sf1.binding_table_entry_count = 0; 649fa225cbcSrjs sf_state->sf1.thread_priority = 0; 650fa225cbcSrjs sf_state->sf1.floating_point_mode = 0; /* Mesa does this */ 651fa225cbcSrjs sf_state->sf1.illegal_op_exception_enable = 1; 652fa225cbcSrjs sf_state->sf1.mask_stack_exception_enable = 1; 653fa225cbcSrjs sf_state->sf1.sw_exception_enable = 1; 654fa225cbcSrjs sf_state->thread2.per_thread_scratch_space = 0; 655fa225cbcSrjs /* scratch space is not used in our kernel */ 656fa225cbcSrjs sf_state->thread2.scratch_space_base_pointer = 0; 657fa225cbcSrjs sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */ 658fa225cbcSrjs sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ 659fa225cbcSrjs sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ 660fa225cbcSrjs /* don't smash vertex header, read start from dw8 */ 661fa225cbcSrjs sf_state->thread3.urb_entry_read_offset = 1; 662fa225cbcSrjs sf_state->thread3.dispatch_grf_start_reg = 3; 663fa225cbcSrjs sf_state->thread4.max_threads = SF_MAX_THREADS - 1; 664fa225cbcSrjs sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; 665fa225cbcSrjs sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES; 666fa225cbcSrjs sf_state->thread4.stats_enable = 1; 667fa225cbcSrjs sf_state->sf5.viewport_transform = FALSE; /* skip viewport */ 668fa225cbcSrjs sf_state->sf6.cull_mode = BRW_CULLMODE_NONE; 669fa225cbcSrjs sf_state->sf6.scissor = 0; 670fa225cbcSrjs sf_state->sf7.trifan_pv = 2; 671fa225cbcSrjs sf_state->sf6.dest_org_vbias = 0x8; 672fa225cbcSrjs sf_state->sf6.dest_org_hbias = 0x8; 673fa225cbcSrjs 674fa225cbcSrjs drm_intel_bo_unmap(sf_state_bo); 675fa225cbcSrjs 676fa225cbcSrjs return sf_state_bo; 677fa225cbcSrjs} 678fa225cbcSrjs 679fa225cbcSrjsstatic drm_intel_bo * 680fa225cbcSrjssampler_border_color_create(ScrnInfoPtr scrn) 681fa225cbcSrjs{ 682fa225cbcSrjs struct brw_sampler_legacy_border_color sampler_border_color; 683fa225cbcSrjs 684fa225cbcSrjs /* Set up the sampler border color (always transparent black) */ 685fa225cbcSrjs memset(&sampler_border_color, 0, sizeof(sampler_border_color)); 686fa225cbcSrjs sampler_border_color.color[0] = 0; /* R */ 687fa225cbcSrjs sampler_border_color.color[1] = 0; /* G */ 688fa225cbcSrjs sampler_border_color.color[2] = 0; /* B */ 689fa225cbcSrjs sampler_border_color.color[3] = 0; /* A */ 690fa225cbcSrjs 691fa225cbcSrjs return intel_bo_alloc_for_data(scrn, 692fa225cbcSrjs &sampler_border_color, 693fa225cbcSrjs sizeof(sampler_border_color), 694fa225cbcSrjs "gen4 render sampler border color"); 695fa225cbcSrjs} 696fa225cbcSrjs 697fa225cbcSrjsstatic void 698fa225cbcSrjssampler_state_init (drm_intel_bo *sampler_state_bo, 699fa225cbcSrjs struct brw_sampler_state *sampler_state, 700fa225cbcSrjs sampler_state_filter_t filter, 701fa225cbcSrjs sampler_state_extend_t extend, 702fa225cbcSrjs drm_intel_bo *border_color_bo) 703fa225cbcSrjs{ 704fa225cbcSrjs uint32_t sampler_state_offset; 705fa225cbcSrjs 706fa225cbcSrjs sampler_state_offset = (char *)sampler_state - 707fa225cbcSrjs (char *)sampler_state_bo->virtual; 708fa225cbcSrjs 709fa225cbcSrjs /* PS kernel use this sampler */ 710fa225cbcSrjs memset(sampler_state, 0, sizeof(*sampler_state)); 711fa225cbcSrjs 712fa225cbcSrjs sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 713fa225cbcSrjs 714fa225cbcSrjs /* We use the legacy mode to get the semantics specified by 715fa225cbcSrjs * the Render extension. */ 716fa225cbcSrjs sampler_state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY; 717fa225cbcSrjs 718fa225cbcSrjs switch(filter) { 719fa225cbcSrjs default: 720fa225cbcSrjs case SAMPLER_STATE_FILTER_NEAREST: 721fa225cbcSrjs sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; 722fa225cbcSrjs sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; 723fa225cbcSrjs break; 724fa225cbcSrjs case SAMPLER_STATE_FILTER_BILINEAR: 725fa225cbcSrjs sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; 726fa225cbcSrjs sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; 727fa225cbcSrjs break; 728fa225cbcSrjs } 729fa225cbcSrjs 730fa225cbcSrjs switch (extend) { 731fa225cbcSrjs default: 732fa225cbcSrjs case SAMPLER_STATE_EXTEND_NONE: 733fa225cbcSrjs sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 734fa225cbcSrjs sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 735fa225cbcSrjs sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; 736fa225cbcSrjs break; 737fa225cbcSrjs case SAMPLER_STATE_EXTEND_REPEAT: 738fa225cbcSrjs sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; 739fa225cbcSrjs sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; 740fa225cbcSrjs sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; 741fa225cbcSrjs break; 742fa225cbcSrjs case SAMPLER_STATE_EXTEND_PAD: 743fa225cbcSrjs sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 744fa225cbcSrjs sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 745fa225cbcSrjs sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; 746fa225cbcSrjs break; 747fa225cbcSrjs case SAMPLER_STATE_EXTEND_REFLECT: 748fa225cbcSrjs sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 749fa225cbcSrjs sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 750fa225cbcSrjs sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR; 751fa225cbcSrjs break; 752fa225cbcSrjs } 753fa225cbcSrjs 754fa225cbcSrjs sampler_state->ss2.border_color_pointer = 755fa225cbcSrjs intel_emit_reloc(sampler_state_bo, sampler_state_offset + 756fa225cbcSrjs offsetof(struct brw_sampler_state, ss2), 757fa225cbcSrjs border_color_bo, 0, 758fa225cbcSrjs I915_GEM_DOMAIN_SAMPLER, 0) >> 5; 759fa225cbcSrjs 760fa225cbcSrjs sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ 761fa225cbcSrjs} 762fa225cbcSrjs 763fa225cbcSrjsstatic drm_intel_bo * 764fa225cbcSrjsgen4_create_sampler_state(ScrnInfoPtr scrn, 765fa225cbcSrjs sampler_state_filter_t src_filter, 766fa225cbcSrjs sampler_state_extend_t src_extend, 767fa225cbcSrjs sampler_state_filter_t mask_filter, 768fa225cbcSrjs sampler_state_extend_t mask_extend, 769fa225cbcSrjs drm_intel_bo *border_color_bo) 770fa225cbcSrjs{ 771fa225cbcSrjs I830Ptr pI830 = I830PTR(scrn); 772fa225cbcSrjs drm_intel_bo *sampler_state_bo; 773fa225cbcSrjs struct brw_sampler_state *sampler_state; 774fa225cbcSrjs 775fa225cbcSrjs sampler_state_bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 sampler state", 776fa225cbcSrjs sizeof(struct brw_sampler_state) * 2, 777fa225cbcSrjs 4096); 778fa225cbcSrjs drm_intel_bo_map(sampler_state_bo, TRUE); 779fa225cbcSrjs sampler_state = sampler_state_bo->virtual; 780fa225cbcSrjs 781fa225cbcSrjs sampler_state_init(sampler_state_bo, 782fa225cbcSrjs &sampler_state[0], 783fa225cbcSrjs src_filter, src_extend, 784fa225cbcSrjs border_color_bo); 785fa225cbcSrjs sampler_state_init(sampler_state_bo, 786fa225cbcSrjs &sampler_state[1], 787fa225cbcSrjs mask_filter, mask_extend, 788fa225cbcSrjs border_color_bo); 789fa225cbcSrjs 790fa225cbcSrjs drm_intel_bo_unmap(sampler_state_bo); 791fa225cbcSrjs 792fa225cbcSrjs return sampler_state_bo; 793fa225cbcSrjs} 794fa225cbcSrjs 795fa225cbcSrjsstatic void 796fa225cbcSrjscc_state_init (drm_intel_bo *cc_state_bo, 797fa225cbcSrjs uint32_t cc_state_offset, 798fa225cbcSrjs int src_blend, 799fa225cbcSrjs int dst_blend, 800fa225cbcSrjs drm_intel_bo *cc_vp_bo) 801fa225cbcSrjs{ 802fa225cbcSrjs struct brw_cc_unit_state *cc_state; 803fa225cbcSrjs 804fa225cbcSrjs cc_state = (struct brw_cc_unit_state *)((char *)cc_state_bo->virtual + 805fa225cbcSrjs cc_state_offset); 806fa225cbcSrjs 807fa225cbcSrjs memset(cc_state, 0, sizeof(*cc_state)); 808fa225cbcSrjs cc_state->cc0.stencil_enable = 0; /* disable stencil */ 809fa225cbcSrjs cc_state->cc2.depth_test = 0; /* disable depth test */ 810fa225cbcSrjs cc_state->cc2.logicop_enable = 0; /* disable logic op */ 811fa225cbcSrjs cc_state->cc3.ia_blend_enable = 0; /* blend alpha same as colors */ 812fa225cbcSrjs cc_state->cc3.blend_enable = 1; /* enable color blend */ 813fa225cbcSrjs cc_state->cc3.alpha_test = 0; /* disable alpha test */ 814fa225cbcSrjs 815fa225cbcSrjs cc_state->cc4.cc_viewport_state_offset = 816fa225cbcSrjs intel_emit_reloc(cc_state_bo, cc_state_offset + 817fa225cbcSrjs offsetof(struct brw_cc_unit_state, cc4), 818fa225cbcSrjs cc_vp_bo, 0, 819fa225cbcSrjs I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; 820fa225cbcSrjs 821fa225cbcSrjs cc_state->cc5.dither_enable = 0; /* disable dither */ 822fa225cbcSrjs cc_state->cc5.logicop_func = 0xc; /* COPY */ 823fa225cbcSrjs cc_state->cc5.statistics_enable = 1; 824fa225cbcSrjs cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; 825fa225cbcSrjs 826fa225cbcSrjs /* Fill in alpha blend factors same as color, for the future. */ 827fa225cbcSrjs cc_state->cc5.ia_src_blend_factor = src_blend; 828fa225cbcSrjs cc_state->cc5.ia_dest_blend_factor = dst_blend; 829fa225cbcSrjs 830fa225cbcSrjs cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD; 831fa225cbcSrjs cc_state->cc6.clamp_post_alpha_blend = 1; 832fa225cbcSrjs cc_state->cc6.clamp_pre_alpha_blend = 1; 833fa225cbcSrjs cc_state->cc6.clamp_range = 0; /* clamp range [0,1] */ 834fa225cbcSrjs 835fa225cbcSrjs cc_state->cc6.src_blend_factor = src_blend; 836fa225cbcSrjs cc_state->cc6.dest_blend_factor = dst_blend; 837fa225cbcSrjs} 838fa225cbcSrjs 839fa225cbcSrjsstatic drm_intel_bo * 840fa225cbcSrjsgen4_create_wm_state(ScrnInfoPtr scrn, 841fa225cbcSrjs Bool has_mask, drm_intel_bo *kernel_bo, 842fa225cbcSrjs drm_intel_bo *sampler_bo) 843fa225cbcSrjs{ 844fa225cbcSrjs I830Ptr pI830 = I830PTR(scrn); 845fa225cbcSrjs struct brw_wm_unit_state *wm_state; 846fa225cbcSrjs drm_intel_bo *wm_state_bo; 847fa225cbcSrjs 848fa225cbcSrjs wm_state_bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 WM state", 849fa225cbcSrjs sizeof(*wm_state), 4096); 850fa225cbcSrjs drm_intel_bo_map(wm_state_bo, TRUE); 851fa225cbcSrjs wm_state = wm_state_bo->virtual; 852fa225cbcSrjs 853fa225cbcSrjs memset(wm_state, 0, sizeof (*wm_state)); 854fa225cbcSrjs wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); 855fa225cbcSrjs wm_state->thread0.kernel_start_pointer = 856fa225cbcSrjs intel_emit_reloc(wm_state_bo, 857fa225cbcSrjs offsetof(struct brw_wm_unit_state, thread0), 858fa225cbcSrjs kernel_bo, wm_state->thread0.grf_reg_count << 1, 859fa225cbcSrjs I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; 860fa225cbcSrjs 861fa225cbcSrjs wm_state->thread1.single_program_flow = 0; 862fa225cbcSrjs 863fa225cbcSrjs /* scratch space is not used in our kernel */ 864fa225cbcSrjs wm_state->thread2.scratch_space_base_pointer = 0; 865fa225cbcSrjs wm_state->thread2.per_thread_scratch_space = 0; 866fa225cbcSrjs 867fa225cbcSrjs wm_state->thread3.const_urb_entry_read_length = 0; 868fa225cbcSrjs wm_state->thread3.const_urb_entry_read_offset = 0; 869fa225cbcSrjs 870fa225cbcSrjs wm_state->thread3.urb_entry_read_offset = 0; 871fa225cbcSrjs /* wm kernel use urb from 3, see wm_program in compiler module */ 872fa225cbcSrjs wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */ 873fa225cbcSrjs 874fa225cbcSrjs wm_state->wm4.stats_enable = 1; /* statistic */ 875fa225cbcSrjs 876fa225cbcSrjs if (IS_IGDNG(pI830)) 877fa225cbcSrjs wm_state->wm4.sampler_count = 0; /* hardware requirement */ 878fa225cbcSrjs else 879fa225cbcSrjs wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */ 880fa225cbcSrjs 881fa225cbcSrjs wm_state->wm4.sampler_state_pointer = 882fa225cbcSrjs intel_emit_reloc(wm_state_bo, offsetof(struct brw_wm_unit_state, wm4), 883fa225cbcSrjs sampler_bo, 884fa225cbcSrjs wm_state->wm4.stats_enable + 885fa225cbcSrjs (wm_state->wm4.sampler_count << 2), 886fa225cbcSrjs I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; 887fa225cbcSrjs wm_state->wm5.max_threads = PS_MAX_THREADS - 1; 888fa225cbcSrjs wm_state->wm5.transposed_urb_read = 0; 889fa225cbcSrjs wm_state->wm5.thread_dispatch_enable = 1; 890fa225cbcSrjs /* just use 16-pixel dispatch (4 subspans), don't need to change kernel 891fa225cbcSrjs * start point 892fa225cbcSrjs */ 893fa225cbcSrjs wm_state->wm5.enable_16_pix = 1; 894fa225cbcSrjs wm_state->wm5.enable_8_pix = 0; 895fa225cbcSrjs wm_state->wm5.early_depth_test = 1; 896fa225cbcSrjs 897fa225cbcSrjs /* Each pair of attributes (src/mask coords) is two URB entries */ 898fa225cbcSrjs if (has_mask) { 899fa225cbcSrjs wm_state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */ 900fa225cbcSrjs wm_state->thread3.urb_entry_read_length = 4; 901fa225cbcSrjs } else { 902fa225cbcSrjs wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ 903fa225cbcSrjs wm_state->thread3.urb_entry_read_length = 2; 904fa225cbcSrjs } 905fa225cbcSrjs 906fa225cbcSrjs /* binding table entry count is only used for prefetching, and it has to 907fa225cbcSrjs * be set 0 for IGDNG 908fa225cbcSrjs */ 909fa225cbcSrjs if (IS_IGDNG(pI830)) 910fa225cbcSrjs wm_state->thread1.binding_table_entry_count = 0; 911fa225cbcSrjs 912fa225cbcSrjs drm_intel_bo_unmap(wm_state_bo); 913fa225cbcSrjs 914fa225cbcSrjs return wm_state_bo; 915fa225cbcSrjs} 916fa225cbcSrjs 917fa225cbcSrjsstatic drm_intel_bo * 918fa225cbcSrjsgen4_create_cc_viewport(ScrnInfoPtr scrn) 919fa225cbcSrjs{ 920fa225cbcSrjs I830Ptr pI830 = I830PTR(scrn); 921fa225cbcSrjs drm_intel_bo *bo; 922fa225cbcSrjs struct brw_cc_viewport cc_viewport; 923fa225cbcSrjs 924fa225cbcSrjs cc_viewport.min_depth = -1.e35; 925fa225cbcSrjs cc_viewport.max_depth = 1.e35; 926fa225cbcSrjs 927fa225cbcSrjs bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 render unit state", 928fa225cbcSrjs sizeof(cc_viewport), 4096); 929fa225cbcSrjs drm_intel_bo_subdata(bo, 0, sizeof(cc_viewport), &cc_viewport); 930fa225cbcSrjs 931fa225cbcSrjs return bo; 932fa225cbcSrjs} 933fa225cbcSrjs 934fa225cbcSrjsstatic drm_intel_bo * 935fa225cbcSrjsgen4_create_vs_unit_state(ScrnInfoPtr scrn) 936fa225cbcSrjs{ 937fa225cbcSrjs I830Ptr pI830 = I830PTR(scrn); 938fa225cbcSrjs struct brw_vs_unit_state vs_state; 939fa225cbcSrjs memset(&vs_state, 0, sizeof(vs_state)); 940fa225cbcSrjs 941fa225cbcSrjs /* Set up the vertex shader to be disabled (passthrough) */ 942fa225cbcSrjs if (IS_IGDNG(pI830)) 943fa225cbcSrjs vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; /* hardware requirement */ 944fa225cbcSrjs else 945fa225cbcSrjs vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES; 946fa225cbcSrjs vs_state.thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; 947fa225cbcSrjs vs_state.vs6.vs_enable = 0; 948fa225cbcSrjs vs_state.vs6.vert_cache_disable = 1; 949fa225cbcSrjs 950fa225cbcSrjs return intel_bo_alloc_for_data(scrn, &vs_state, sizeof(vs_state), 951fa225cbcSrjs "gen4 render VS state"); 952fa225cbcSrjs} 953fa225cbcSrjs 954fa225cbcSrjs/** 955fa225cbcSrjs * Set up all combinations of cc state: each blendfactor for source and 956fa225cbcSrjs * dest. 957fa225cbcSrjs */ 958fa225cbcSrjsstatic drm_intel_bo * 959fa225cbcSrjsgen4_create_cc_unit_state(ScrnInfoPtr scrn) 960fa225cbcSrjs{ 961fa225cbcSrjs I830Ptr pI830 = I830PTR(scrn); 962fa225cbcSrjs struct gen4_cc_unit_state *cc_state; 963fa225cbcSrjs drm_intel_bo *cc_state_bo, *cc_vp_bo; 964fa225cbcSrjs int i, j; 965fa225cbcSrjs 966fa225cbcSrjs cc_vp_bo = gen4_create_cc_viewport(scrn); 967fa225cbcSrjs 968fa225cbcSrjs cc_state_bo = drm_intel_bo_alloc(pI830->bufmgr, "gen4 CC state", 969fa225cbcSrjs sizeof(*cc_state), 4096); 970fa225cbcSrjs drm_intel_bo_map(cc_state_bo, TRUE); 971fa225cbcSrjs cc_state = cc_state_bo->virtual; 972fa225cbcSrjs for (i = 0; i < BRW_BLENDFACTOR_COUNT; i++) { 973fa225cbcSrjs for (j = 0; j < BRW_BLENDFACTOR_COUNT; j++) { 974fa225cbcSrjs cc_state_init(cc_state_bo, 975fa225cbcSrjs offsetof(struct gen4_cc_unit_state, 976fa225cbcSrjs cc_state[i][j].state), 977fa225cbcSrjs i, j, cc_vp_bo); 978fa225cbcSrjs } 979fa225cbcSrjs } 980fa225cbcSrjs drm_intel_bo_unmap(cc_state_bo); 981fa225cbcSrjs 982fa225cbcSrjs drm_intel_bo_unreference(cc_vp_bo); 983fa225cbcSrjs 984fa225cbcSrjs return cc_state_bo; 985fa225cbcSrjs} 986fa225cbcSrjs 987fa225cbcSrjsstatic uint32_t 988fa225cbcSrjsi965_get_card_format(PicturePtr pPict) 989fa225cbcSrjs{ 990fa225cbcSrjs int i; 991fa225cbcSrjs 992fa225cbcSrjs for (i = 0; i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]); 993fa225cbcSrjs i++) 994fa225cbcSrjs { 995fa225cbcSrjs if (i965_tex_formats[i].fmt == pPict->format) 996fa225cbcSrjs break; 997fa225cbcSrjs } 998fa225cbcSrjs assert(i != sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0])); 999fa225cbcSrjs 1000fa225cbcSrjs return i965_tex_formats[i].card_fmt; 1001fa225cbcSrjs} 1002fa225cbcSrjs 1003fa225cbcSrjsstatic sampler_state_filter_t 1004fa225cbcSrjssampler_state_filter_from_picture (int filter) 1005fa225cbcSrjs{ 1006fa225cbcSrjs switch (filter) { 1007fa225cbcSrjs case PictFilterNearest: 1008fa225cbcSrjs return SAMPLER_STATE_FILTER_NEAREST; 1009fa225cbcSrjs case PictFilterBilinear: 1010fa225cbcSrjs return SAMPLER_STATE_FILTER_BILINEAR; 1011fa225cbcSrjs default: 1012fa225cbcSrjs return -1; 1013fa225cbcSrjs } 1014fa225cbcSrjs} 1015fa225cbcSrjs 1016fa225cbcSrjsstatic sampler_state_extend_t 1017fa225cbcSrjssampler_state_extend_from_picture (int repeat_type) 1018fa225cbcSrjs{ 1019fa225cbcSrjs switch (repeat_type) { 1020fa225cbcSrjs case RepeatNone: 1021fa225cbcSrjs return SAMPLER_STATE_EXTEND_NONE; 1022fa225cbcSrjs case RepeatNormal: 1023fa225cbcSrjs return SAMPLER_STATE_EXTEND_REPEAT; 1024fa225cbcSrjs case RepeatPad: 1025fa225cbcSrjs return SAMPLER_STATE_EXTEND_PAD; 1026fa225cbcSrjs case RepeatReflect: 1027fa225cbcSrjs return SAMPLER_STATE_EXTEND_REFLECT; 1028fa225cbcSrjs default: 1029fa225cbcSrjs return -1; 1030fa225cbcSrjs } 1031fa225cbcSrjs} 1032fa225cbcSrjs 1033fa225cbcSrjs/** 1034fa225cbcSrjs * Sets up the common fields for a surface state buffer for the given 1035fa225cbcSrjs * picture in the given surface state buffer. 1036fa225cbcSrjs */ 1037fa225cbcSrjsstatic void 1038fa225cbcSrjsi965_set_picture_surface_state(dri_bo *ss_bo, int ss_index, 1039fa225cbcSrjs PicturePtr pPicture, PixmapPtr pPixmap, 1040fa225cbcSrjs Bool is_dst) 1041fa225cbcSrjs{ 1042fa225cbcSrjs struct brw_surface_state_padded *ss; 1043fa225cbcSrjs struct brw_surface_state local_ss; 1044fa225cbcSrjs dri_bo *pixmap_bo = i830_get_pixmap_bo(pPixmap); 1045fa225cbcSrjs 1046fa225cbcSrjs ss = (struct brw_surface_state_padded *)ss_bo->virtual + ss_index; 1047fa225cbcSrjs 1048fa225cbcSrjs /* Since ss is a pointer to WC memory, do all of our bit operations 1049fa225cbcSrjs * into a local temporary first. 1050fa225cbcSrjs */ 1051fa225cbcSrjs memset(&local_ss, 0, sizeof(local_ss)); 1052fa225cbcSrjs local_ss.ss0.surface_type = BRW_SURFACE_2D; 1053fa225cbcSrjs if (is_dst) { 1054fa225cbcSrjs uint32_t dst_format = 0; 1055fa225cbcSrjs Bool ret = TRUE; 1056fa225cbcSrjs 1057fa225cbcSrjs ret = i965_get_dest_format(pPicture, &dst_format); 1058fa225cbcSrjs assert(ret == TRUE); 1059fa225cbcSrjs local_ss.ss0.surface_format = dst_format; 1060fa225cbcSrjs } else { 1061fa225cbcSrjs local_ss.ss0.surface_format = i965_get_card_format(pPicture); 1062fa225cbcSrjs } 1063fa225cbcSrjs 1064fa225cbcSrjs local_ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; 1065fa225cbcSrjs local_ss.ss0.writedisable_alpha = 0; 1066fa225cbcSrjs local_ss.ss0.writedisable_red = 0; 1067fa225cbcSrjs local_ss.ss0.writedisable_green = 0; 1068fa225cbcSrjs local_ss.ss0.writedisable_blue = 0; 1069fa225cbcSrjs local_ss.ss0.color_blend = 1; 1070fa225cbcSrjs local_ss.ss0.vert_line_stride = 0; 1071fa225cbcSrjs local_ss.ss0.vert_line_stride_ofs = 0; 1072fa225cbcSrjs local_ss.ss0.mipmap_layout_mode = 0; 1073fa225cbcSrjs local_ss.ss0.render_cache_read_mode = 0; 1074fa225cbcSrjs if (pixmap_bo != NULL) 1075fa225cbcSrjs local_ss.ss1.base_addr = pixmap_bo->offset; 1076fa225cbcSrjs else 1077fa225cbcSrjs local_ss.ss1.base_addr = intel_get_pixmap_offset(pPixmap); 1078fa225cbcSrjs 1079fa225cbcSrjs local_ss.ss2.mip_count = 0; 1080fa225cbcSrjs local_ss.ss2.render_target_rotation = 0; 1081fa225cbcSrjs local_ss.ss2.height = pPixmap->drawable.height - 1; 1082fa225cbcSrjs local_ss.ss2.width = pPixmap->drawable.width - 1; 1083fa225cbcSrjs local_ss.ss3.pitch = intel_get_pixmap_pitch(pPixmap) - 1; 1084fa225cbcSrjs local_ss.ss3.tile_walk = 0; /* Tiled X */ 1085fa225cbcSrjs local_ss.ss3.tiled_surface = i830_pixmap_tiled(pPixmap) ? 1 : 0; 1086fa225cbcSrjs 1087fa225cbcSrjs memcpy(ss, &local_ss, sizeof(local_ss)); 1088fa225cbcSrjs 1089fa225cbcSrjs if (pixmap_bo != NULL) { 1090fa225cbcSrjs uint32_t write_domain, read_domains; 1091fa225cbcSrjs 1092fa225cbcSrjs if (is_dst) { 1093fa225cbcSrjs write_domain = I915_GEM_DOMAIN_RENDER; 1094fa225cbcSrjs read_domains = I915_GEM_DOMAIN_RENDER; 1095fa225cbcSrjs } else { 1096fa225cbcSrjs write_domain = 0; 1097fa225cbcSrjs read_domains = I915_GEM_DOMAIN_SAMPLER; 1098fa225cbcSrjs } 1099fa225cbcSrjs dri_bo_emit_reloc(ss_bo, read_domains, write_domain, 1100fa225cbcSrjs 0, 1101fa225cbcSrjs ss_index * sizeof(*ss) + 1102fa225cbcSrjs offsetof(struct brw_surface_state, ss1), 1103fa225cbcSrjs pixmap_bo); 1104fa225cbcSrjs } 1105fa225cbcSrjs} 1106fa225cbcSrjs 1107fa225cbcSrjsstatic void 1108fa225cbcSrjsi965_emit_composite_state(ScrnInfoPtr pScrn) 1109fa225cbcSrjs{ 1110fa225cbcSrjs I830Ptr pI830 = I830PTR(pScrn); 1111fa225cbcSrjs struct gen4_render_state *render_state= pI830->gen4_render_state; 1112fa225cbcSrjs gen4_composite_op *composite_op = &render_state->composite_op; 1113fa225cbcSrjs int op = composite_op->op; 1114fa225cbcSrjs PicturePtr pMaskPicture = composite_op->mask_picture; 1115fa225cbcSrjs PicturePtr pDstPicture = composite_op->dest_picture; 1116fa225cbcSrjs PixmapPtr pMask = composite_op->mask; 1117fa225cbcSrjs PixmapPtr pDst = composite_op->dest; 1118fa225cbcSrjs sampler_state_filter_t src_filter = composite_op->src_filter; 1119fa225cbcSrjs sampler_state_filter_t mask_filter = composite_op->mask_filter; 1120fa225cbcSrjs sampler_state_extend_t src_extend = composite_op->src_extend; 1121fa225cbcSrjs sampler_state_extend_t mask_extend = composite_op->mask_extend; 1122fa225cbcSrjs Bool is_affine = composite_op->is_affine; 1123fa225cbcSrjs int urb_vs_start, urb_vs_size; 1124fa225cbcSrjs int urb_gs_start, urb_gs_size; 1125fa225cbcSrjs int urb_clip_start, urb_clip_size; 1126fa225cbcSrjs int urb_sf_start, urb_sf_size; 1127fa225cbcSrjs int urb_cs_start, urb_cs_size; 1128fa225cbcSrjs uint32_t src_blend, dst_blend; 1129fa225cbcSrjs dri_bo *binding_table_bo = composite_op->binding_table_bo; 1130fa225cbcSrjs 1131fa225cbcSrjs render_state->needs_state_emit = FALSE; 1132fa225cbcSrjs 1133fa225cbcSrjs IntelEmitInvarientState(pScrn); 1134fa225cbcSrjs pI830->last_3d = LAST_3D_RENDER; 1135fa225cbcSrjs 1136fa225cbcSrjs urb_vs_start = 0; 1137fa225cbcSrjs urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; 1138fa225cbcSrjs urb_gs_start = urb_vs_start + urb_vs_size; 1139fa225cbcSrjs urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; 1140fa225cbcSrjs urb_clip_start = urb_gs_start + urb_gs_size; 1141fa225cbcSrjs urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; 1142fa225cbcSrjs urb_sf_start = urb_clip_start + urb_clip_size; 1143fa225cbcSrjs urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; 1144fa225cbcSrjs urb_cs_start = urb_sf_start + urb_sf_size; 1145fa225cbcSrjs urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; 1146fa225cbcSrjs 1147fa225cbcSrjs i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format, 1148fa225cbcSrjs &src_blend, &dst_blend); 1149fa225cbcSrjs 1150fa225cbcSrjs /* Begin the long sequence of commands needed to set up the 3D 1151fa225cbcSrjs * rendering pipe 1152fa225cbcSrjs */ 1153fa225cbcSrjs { 1154fa225cbcSrjs BEGIN_BATCH(2); 1155fa225cbcSrjs OUT_BATCH(MI_FLUSH | 1156fa225cbcSrjs MI_STATE_INSTRUCTION_CACHE_FLUSH | 1157fa225cbcSrjs BRW_MI_GLOBAL_SNAPSHOT_RESET); 1158fa225cbcSrjs OUT_BATCH(MI_NOOP); 1159fa225cbcSrjs ADVANCE_BATCH(); 1160fa225cbcSrjs } 1161fa225cbcSrjs { 1162fa225cbcSrjs if (IS_IGDNG(pI830)) 1163fa225cbcSrjs BEGIN_BATCH(14); 1164fa225cbcSrjs else 1165fa225cbcSrjs BEGIN_BATCH(12); 1166fa225cbcSrjs 1167fa225cbcSrjs /* Match Mesa driver setup */ 1168fa225cbcSrjs if (IS_G4X(pI830) || IS_IGDNG(pI830)) 1169fa225cbcSrjs OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 1170fa225cbcSrjs else 1171fa225cbcSrjs OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 1172fa225cbcSrjs 1173fa225cbcSrjs OUT_BATCH(BRW_CS_URB_STATE | 0); 1174fa225cbcSrjs OUT_BATCH((0 << 4) | /* URB Entry Allocation Size */ 1175fa225cbcSrjs (0 << 0)); /* Number of URB Entries */ 1176fa225cbcSrjs 1177fa225cbcSrjs /* Zero out the two base address registers so all offsets are 1178fa225cbcSrjs * absolute. 1179fa225cbcSrjs */ 1180fa225cbcSrjs if (IS_IGDNG(pI830)) { 1181fa225cbcSrjs OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6); 1182fa225cbcSrjs OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ 1183fa225cbcSrjs OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */ 1184fa225cbcSrjs OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ 1185fa225cbcSrjs OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Instruction base address */ 1186fa225cbcSrjs /* general state max addr, disabled */ 1187fa225cbcSrjs OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY); 1188fa225cbcSrjs /* media object state max addr, disabled */ 1189fa225cbcSrjs OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY); 1190fa225cbcSrjs /* Instruction max addr, disabled */ 1191fa225cbcSrjs OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY); 1192fa225cbcSrjs } else { 1193fa225cbcSrjs OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4); 1194fa225cbcSrjs OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ 1195fa225cbcSrjs OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */ 1196fa225cbcSrjs OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ 1197fa225cbcSrjs /* general state max addr, disabled */ 1198fa225cbcSrjs OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY); 1199fa225cbcSrjs /* media object state max addr, disabled */ 1200fa225cbcSrjs OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY); 1201fa225cbcSrjs } 1202fa225cbcSrjs /* Set system instruction pointer */ 1203fa225cbcSrjs OUT_BATCH(BRW_STATE_SIP | 0); 1204fa225cbcSrjs OUT_RELOC(render_state->sip_kernel_bo, 1205fa225cbcSrjs I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1206fa225cbcSrjs OUT_BATCH(MI_NOOP); 1207fa225cbcSrjs ADVANCE_BATCH(); 1208fa225cbcSrjs } 1209fa225cbcSrjs { 1210fa225cbcSrjs int pipe_ctrl; 1211fa225cbcSrjs BEGIN_BATCH(26); 1212fa225cbcSrjs /* Pipe control */ 1213fa225cbcSrjs 1214fa225cbcSrjs if (IS_IGDNG(pI830)) 1215fa225cbcSrjs pipe_ctrl = BRW_PIPE_CONTROL_NOWRITE; 1216fa225cbcSrjs else 1217fa225cbcSrjs pipe_ctrl = BRW_PIPE_CONTROL_NOWRITE | BRW_PIPE_CONTROL_IS_FLUSH; 1218fa225cbcSrjs 1219fa225cbcSrjs OUT_BATCH(BRW_PIPE_CONTROL | pipe_ctrl | 2); 1220fa225cbcSrjs OUT_BATCH(0); /* Destination address */ 1221fa225cbcSrjs OUT_BATCH(0); /* Immediate data low DW */ 1222fa225cbcSrjs OUT_BATCH(0); /* Immediate data high DW */ 1223fa225cbcSrjs 1224fa225cbcSrjs /* Binding table pointers */ 1225fa225cbcSrjs OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); 1226fa225cbcSrjs OUT_BATCH(0); /* vs */ 1227fa225cbcSrjs OUT_BATCH(0); /* gs */ 1228fa225cbcSrjs OUT_BATCH(0); /* clip */ 1229fa225cbcSrjs OUT_BATCH(0); /* sf */ 1230fa225cbcSrjs /* Only the PS uses the binding table */ 1231fa225cbcSrjs OUT_RELOC(binding_table_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); 1232fa225cbcSrjs 1233fa225cbcSrjs /* The drawing rectangle clipping is always on. Set it to values that 1234fa225cbcSrjs * shouldn't do any clipping. 1235fa225cbcSrjs */ 1236fa225cbcSrjs OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); 1237fa225cbcSrjs OUT_BATCH(0x00000000); /* ymin, xmin */ 1238fa225cbcSrjs OUT_BATCH(DRAW_YMAX(pDst->drawable.height - 1) | 1239fa225cbcSrjs DRAW_XMAX(pDst->drawable.width - 1)); /* ymax, xmax */ 1240fa225cbcSrjs OUT_BATCH(0x00000000); /* yorigin, xorigin */ 1241fa225cbcSrjs 1242fa225cbcSrjs /* skip the depth buffer */ 1243fa225cbcSrjs /* skip the polygon stipple */ 1244fa225cbcSrjs /* skip the polygon stipple offset */ 1245fa225cbcSrjs /* skip the line stipple */ 1246fa225cbcSrjs 1247fa225cbcSrjs /* Set the pointers to the 3d pipeline state */ 1248fa225cbcSrjs OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5); 1249fa225cbcSrjs OUT_RELOC(render_state->vs_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1250fa225cbcSrjs OUT_BATCH(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */ 1251fa225cbcSrjs OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */ 1252fa225cbcSrjs if (pMask) { 1253fa225cbcSrjs OUT_RELOC(render_state->sf_mask_state_bo, 1254fa225cbcSrjs I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1255fa225cbcSrjs } else { 1256fa225cbcSrjs OUT_RELOC(render_state->sf_state_bo, 1257fa225cbcSrjs I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1258fa225cbcSrjs } 1259fa225cbcSrjs 1260fa225cbcSrjs OUT_RELOC(render_state->wm_state_bo[composite_op->wm_kernel] 1261fa225cbcSrjs [src_filter][src_extend] 1262fa225cbcSrjs [mask_filter][mask_extend], 1263fa225cbcSrjs I915_GEM_DOMAIN_INSTRUCTION, 0, 0); 1264fa225cbcSrjs 1265fa225cbcSrjs OUT_RELOC(render_state->cc_state_bo, 1266fa225cbcSrjs I915_GEM_DOMAIN_INSTRUCTION, 0, 1267fa225cbcSrjs offsetof(struct gen4_cc_unit_state, 1268fa225cbcSrjs cc_state[src_blend][dst_blend])); 1269fa225cbcSrjs 1270fa225cbcSrjs /* URB fence */ 1271fa225cbcSrjs OUT_BATCH(BRW_URB_FENCE | 1272fa225cbcSrjs UF0_CS_REALLOC | 1273fa225cbcSrjs UF0_SF_REALLOC | 1274fa225cbcSrjs UF0_CLIP_REALLOC | 1275fa225cbcSrjs UF0_GS_REALLOC | 1276fa225cbcSrjs UF0_VS_REALLOC | 1277fa225cbcSrjs 1); 1278fa225cbcSrjs OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | 1279fa225cbcSrjs ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | 1280fa225cbcSrjs ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); 1281fa225cbcSrjs OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | 1282fa225cbcSrjs ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); 1283fa225cbcSrjs 1284fa225cbcSrjs /* Constant buffer state */ 1285fa225cbcSrjs OUT_BATCH(BRW_CS_URB_STATE | 0); 1286fa225cbcSrjs OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) | 1287fa225cbcSrjs (URB_CS_ENTRIES << 0)); 1288fa225cbcSrjs ADVANCE_BATCH(); 1289fa225cbcSrjs } 1290fa225cbcSrjs { 1291fa225cbcSrjs /* 1292fa225cbcSrjs * number of extra parameters per vertex 1293fa225cbcSrjs */ 1294fa225cbcSrjs int nelem = pMask ? 2: 1; 1295fa225cbcSrjs /* 1296fa225cbcSrjs * size of extra parameters: 1297fa225cbcSrjs * 3 for homogenous (xyzw) 1298fa225cbcSrjs * 2 for cartesian (xy) 1299fa225cbcSrjs */ 1300fa225cbcSrjs int selem = is_affine ? 2 : 3; 1301fa225cbcSrjs uint32_t w_component; 1302fa225cbcSrjs uint32_t src_format; 1303fa225cbcSrjs 1304fa225cbcSrjs render_state->vertex_size = 4 * (2 + nelem * selem); 1305fa225cbcSrjs 1306fa225cbcSrjs if (is_affine) 1307fa225cbcSrjs { 1308fa225cbcSrjs src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; 1309fa225cbcSrjs w_component = BRW_VFCOMPONENT_STORE_1_FLT; 1310fa225cbcSrjs } 1311fa225cbcSrjs else 1312fa225cbcSrjs { 1313fa225cbcSrjs src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; 1314fa225cbcSrjs w_component = BRW_VFCOMPONENT_STORE_SRC; 1315fa225cbcSrjs } 1316fa225cbcSrjs 1317fa225cbcSrjs if (IS_IGDNG(pI830)) { 1318fa225cbcSrjs BEGIN_BATCH(pMask?9:7); 1319fa225cbcSrjs /* 1320fa225cbcSrjs * The reason to add this extra vertex element in the header is that 1321fa225cbcSrjs * IGDNG has different vertex header definition and origin method to 1322fa225cbcSrjs * set destination element offset doesn't exist anymore, which means 1323fa225cbcSrjs * hardware requires a predefined vertex element layout. 1324fa225cbcSrjs * 1325fa225cbcSrjs * haihao proposed this approach to fill the first vertex element, so 1326fa225cbcSrjs * origin layout for Gen4 doesn't need to change, and origin shader 1327fa225cbcSrjs * programs behavior is also kept. 1328fa225cbcSrjs * 1329fa225cbcSrjs * I think this is not bad. - zhenyu 1330fa225cbcSrjs */ 1331fa225cbcSrjs 1332fa225cbcSrjs OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * (2 + nelem)) - 1)); 1333fa225cbcSrjs OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1334fa225cbcSrjs VE0_VALID | 1335fa225cbcSrjs (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1336fa225cbcSrjs (0 << VE0_OFFSET_SHIFT)); 1337fa225cbcSrjs 1338fa225cbcSrjs OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | 1339fa225cbcSrjs (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | 1340fa225cbcSrjs (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | 1341fa225cbcSrjs (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); 1342fa225cbcSrjs } else { 1343fa225cbcSrjs BEGIN_BATCH(pMask?7:5); 1344fa225cbcSrjs /* Set up our vertex elements, sourced from the single vertex buffer. 1345fa225cbcSrjs * that will be set up later. 1346fa225cbcSrjs */ 1347fa225cbcSrjs OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * (1 + nelem)) - 1)); 1348fa225cbcSrjs } 1349fa225cbcSrjs 1350fa225cbcSrjs /* x,y */ 1351fa225cbcSrjs OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1352fa225cbcSrjs VE0_VALID | 1353fa225cbcSrjs (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | 1354fa225cbcSrjs (0 << VE0_OFFSET_SHIFT)); 1355fa225cbcSrjs 1356fa225cbcSrjs if (IS_IGDNG(pI830)) 1357fa225cbcSrjs OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1358fa225cbcSrjs (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1359fa225cbcSrjs (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 1360fa225cbcSrjs (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1361fa225cbcSrjs else 1362fa225cbcSrjs OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1363fa225cbcSrjs (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1364fa225cbcSrjs (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | 1365fa225cbcSrjs (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 1366fa225cbcSrjs (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); 1367fa225cbcSrjs /* u0, v0, w0 */ 1368fa225cbcSrjs OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1369fa225cbcSrjs VE0_VALID | 1370fa225cbcSrjs (src_format << VE0_FORMAT_SHIFT) | 1371fa225cbcSrjs ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ 1372fa225cbcSrjs 1373fa225cbcSrjs if (IS_IGDNG(pI830)) 1374fa225cbcSrjs OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1375fa225cbcSrjs (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1376fa225cbcSrjs (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1377fa225cbcSrjs (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1378fa225cbcSrjs else 1379fa225cbcSrjs OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1380fa225cbcSrjs (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1381fa225cbcSrjs (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1382fa225cbcSrjs (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 1383fa225cbcSrjs ((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ 1384fa225cbcSrjs /* u1, v1, w1 */ 1385fa225cbcSrjs if (pMask) { 1386fa225cbcSrjs OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | 1387fa225cbcSrjs VE0_VALID | 1388fa225cbcSrjs (src_format << VE0_FORMAT_SHIFT) | 1389fa225cbcSrjs (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ 1390fa225cbcSrjs 1391fa225cbcSrjs if (IS_IGDNG(pI830)) 1392fa225cbcSrjs OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1393fa225cbcSrjs (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1394fa225cbcSrjs (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1395fa225cbcSrjs (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); 1396fa225cbcSrjs else 1397fa225cbcSrjs OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 1398fa225cbcSrjs (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | 1399fa225cbcSrjs (w_component << VE1_VFCOMPONENT_2_SHIFT) | 1400fa225cbcSrjs (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | 1401fa225cbcSrjs ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ 1402fa225cbcSrjs } 1403fa225cbcSrjs 1404fa225cbcSrjs ADVANCE_BATCH(); 1405fa225cbcSrjs } 1406fa225cbcSrjs} 1407fa225cbcSrjs 1408fa225cbcSrjs/** 1409fa225cbcSrjs * Returns whether the current set of composite state plus vertex buffer is 1410fa225cbcSrjs * expected to fit in the aperture. 1411fa225cbcSrjs */ 1412fa225cbcSrjsstatic Bool 1413fa225cbcSrjsi965_composite_check_aperture(ScrnInfoPtr pScrn) 1414fa225cbcSrjs{ 1415fa225cbcSrjs I830Ptr pI830 = I830PTR(pScrn); 1416fa225cbcSrjs struct gen4_render_state *render_state= pI830->gen4_render_state; 1417fa225cbcSrjs gen4_composite_op *composite_op = &render_state->composite_op; 1418fa225cbcSrjs drm_intel_bo *bo_table[] = { 1419fa225cbcSrjs pI830->batch_bo, 1420fa225cbcSrjs composite_op->binding_table_bo, 1421fa225cbcSrjs render_state->vertex_buffer_bo, 1422fa225cbcSrjs render_state->vs_state_bo, 1423fa225cbcSrjs render_state->sf_state_bo, 1424fa225cbcSrjs render_state->sf_mask_state_bo, 1425fa225cbcSrjs render_state->wm_state_bo[composite_op->wm_kernel] 1426fa225cbcSrjs [composite_op->src_filter] 1427fa225cbcSrjs [composite_op->src_extend] 1428fa225cbcSrjs [composite_op->mask_filter] 1429fa225cbcSrjs [composite_op->mask_extend], 1430fa225cbcSrjs render_state->cc_state_bo, 1431fa225cbcSrjs render_state->sip_kernel_bo, 1432fa225cbcSrjs }; 1433fa225cbcSrjs 1434fa225cbcSrjs return drm_intel_bufmgr_check_aperture_space(bo_table, 1435fa225cbcSrjs ARRAY_SIZE(bo_table)) == 0; 1436fa225cbcSrjs} 1437fa225cbcSrjs 1438fa225cbcSrjsBool 1439fa225cbcSrjsi965_prepare_composite(int op, PicturePtr pSrcPicture, 1440fa225cbcSrjs PicturePtr pMaskPicture, PicturePtr pDstPicture, 1441fa225cbcSrjs PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) 1442fa225cbcSrjs{ 1443fa225cbcSrjs ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum]; 1444fa225cbcSrjs I830Ptr pI830 = I830PTR(pScrn); 1445fa225cbcSrjs struct gen4_render_state *render_state= pI830->gen4_render_state; 1446fa225cbcSrjs gen4_composite_op *composite_op = &render_state->composite_op; 1447fa225cbcSrjs uint32_t *binding_table; 1448fa225cbcSrjs drm_intel_bo *binding_table_bo, *surface_state_bo; 1449fa225cbcSrjs 1450fa225cbcSrjs if (composite_op->src_filter < 0) 1451fa225cbcSrjs I830FALLBACK("Bad src filter 0x%x\n", pSrcPicture->filter); 1452fa225cbcSrjs composite_op->src_extend = 1453fa225cbcSrjs sampler_state_extend_from_picture(pSrcPicture->repeatType); 1454fa225cbcSrjs if (composite_op->src_extend < 0) 1455fa225cbcSrjs I830FALLBACK("Bad src repeat 0x%x\n", pSrcPicture->repeatType); 1456fa225cbcSrjs 1457fa225cbcSrjs if (pMaskPicture) { 1458fa225cbcSrjs composite_op->mask_filter = 1459fa225cbcSrjs sampler_state_filter_from_picture(pMaskPicture->filter); 1460fa225cbcSrjs if (composite_op->mask_filter < 0) 1461fa225cbcSrjs I830FALLBACK("Bad mask filter 0x%x\n", pMaskPicture->filter); 1462fa225cbcSrjs composite_op->mask_extend = 1463fa225cbcSrjs sampler_state_extend_from_picture(pMaskPicture->repeatType); 1464fa225cbcSrjs if (composite_op->mask_extend < 0) 1465fa225cbcSrjs I830FALLBACK("Bad mask repeat 0x%x\n", pMaskPicture->repeatType); 1466fa225cbcSrjs } else { 1467fa225cbcSrjs composite_op->mask_filter = SAMPLER_STATE_FILTER_NEAREST; 1468fa225cbcSrjs composite_op->mask_extend = SAMPLER_STATE_EXTEND_NONE; 1469fa225cbcSrjs } 1470fa225cbcSrjs 1471fa225cbcSrjs /* Set up the surface states. */ 1472fa225cbcSrjs surface_state_bo = dri_bo_alloc(pI830->bufmgr, "surface_state", 1473fa225cbcSrjs 3 * sizeof (brw_surface_state_padded), 1474fa225cbcSrjs 4096); 1475fa225cbcSrjs if (dri_bo_map(surface_state_bo, 1) != 0) { 1476fa225cbcSrjs dri_bo_unreference(surface_state_bo); 1477fa225cbcSrjs return FALSE; 1478fa225cbcSrjs } 1479fa225cbcSrjs /* Set up the state buffer for the destination surface */ 1480fa225cbcSrjs i965_set_picture_surface_state(surface_state_bo, 0, 1481fa225cbcSrjs pDstPicture, pDst, TRUE); 1482fa225cbcSrjs /* Set up the source surface state buffer */ 1483fa225cbcSrjs i965_set_picture_surface_state(surface_state_bo, 1, 1484fa225cbcSrjs pSrcPicture, pSrc, FALSE); 1485fa225cbcSrjs if (pMask) { 1486fa225cbcSrjs /* Set up the mask surface state buffer */ 1487fa225cbcSrjs i965_set_picture_surface_state(surface_state_bo, 2, 1488fa225cbcSrjs pMaskPicture, pMask, 1489fa225cbcSrjs FALSE); 1490fa225cbcSrjs } 1491fa225cbcSrjs dri_bo_unmap(surface_state_bo); 1492fa225cbcSrjs 1493fa225cbcSrjs /* Set up the binding table of surface indices to surface state. */ 1494fa225cbcSrjs binding_table_bo = dri_bo_alloc(pI830->bufmgr, "binding_table", 1495fa225cbcSrjs 3 * sizeof(uint32_t), 4096); 1496fa225cbcSrjs if (dri_bo_map (binding_table_bo, 1) != 0) { 1497fa225cbcSrjs dri_bo_unreference(binding_table_bo); 1498fa225cbcSrjs dri_bo_unreference(surface_state_bo); 1499fa225cbcSrjs return FALSE; 1500fa225cbcSrjs } 1501fa225cbcSrjs 1502fa225cbcSrjs binding_table = binding_table_bo->virtual; 1503fa225cbcSrjs binding_table[0] = intel_emit_reloc(binding_table_bo, 1504fa225cbcSrjs 0 * sizeof(uint32_t), 1505fa225cbcSrjs surface_state_bo, 1506fa225cbcSrjs 0 * sizeof(brw_surface_state_padded), 1507fa225cbcSrjs I915_GEM_DOMAIN_INSTRUCTION, 0); 1508fa225cbcSrjs 1509fa225cbcSrjs binding_table[1] = intel_emit_reloc(binding_table_bo, 1510fa225cbcSrjs 1 * sizeof(uint32_t), 1511fa225cbcSrjs surface_state_bo, 1512fa225cbcSrjs 1 * sizeof(brw_surface_state_padded), 1513fa225cbcSrjs I915_GEM_DOMAIN_INSTRUCTION, 0); 1514fa225cbcSrjs 1515fa225cbcSrjs if (pMask) { 1516fa225cbcSrjs binding_table[2] = intel_emit_reloc(binding_table_bo, 1517fa225cbcSrjs 2 * sizeof(uint32_t), 1518fa225cbcSrjs surface_state_bo, 1519fa225cbcSrjs 2 * sizeof(brw_surface_state_padded), 1520fa225cbcSrjs I915_GEM_DOMAIN_INSTRUCTION, 0); 1521fa225cbcSrjs } else { 1522fa225cbcSrjs binding_table[2] = 0; 1523fa225cbcSrjs } 1524fa225cbcSrjs dri_bo_unmap(binding_table_bo); 1525fa225cbcSrjs /* All refs to surface_state are now contained in binding_table_bo. */ 1526fa225cbcSrjs drm_intel_bo_unreference(surface_state_bo); 1527fa225cbcSrjs 1528fa225cbcSrjs composite_op->op = op; 1529fa225cbcSrjs composite_op->source_picture = pSrcPicture; 1530fa225cbcSrjs composite_op->mask_picture = pMaskPicture; 1531fa225cbcSrjs composite_op->dest_picture = pDstPicture; 1532fa225cbcSrjs composite_op->source = pSrc; 1533fa225cbcSrjs composite_op->mask = pMask; 1534fa225cbcSrjs composite_op->dest = pDst; 1535fa225cbcSrjs drm_intel_bo_unreference(composite_op->binding_table_bo); 1536fa225cbcSrjs composite_op->binding_table_bo = binding_table_bo; 1537fa225cbcSrjs composite_op->src_filter = 1538fa225cbcSrjs sampler_state_filter_from_picture(pSrcPicture->filter); 1539fa225cbcSrjs 1540fa225cbcSrjs pI830->scale_units[0][0] = pSrc->drawable.width; 1541fa225cbcSrjs pI830->scale_units[0][1] = pSrc->drawable.height; 1542fa225cbcSrjs 1543fa225cbcSrjs pI830->transform[0] = pSrcPicture->transform; 1544fa225cbcSrjs composite_op->is_affine = 1545fa225cbcSrjs i830_transform_is_affine(pI830->transform[0]); 1546fa225cbcSrjs 1547fa225cbcSrjs if (!pMask) { 1548fa225cbcSrjs pI830->transform[1] = NULL; 1549fa225cbcSrjs pI830->scale_units[1][0] = -1; 1550fa225cbcSrjs pI830->scale_units[1][1] = -1; 1551fa225cbcSrjs } else { 1552fa225cbcSrjs pI830->transform[1] = pMaskPicture->transform; 1553fa225cbcSrjs pI830->scale_units[1][0] = pMask->drawable.width; 1554fa225cbcSrjs pI830->scale_units[1][1] = pMask->drawable.height; 1555fa225cbcSrjs composite_op->is_affine |= 1556fa225cbcSrjs i830_transform_is_affine(pI830->transform[1]); 1557fa225cbcSrjs } 1558fa225cbcSrjs 1559fa225cbcSrjs 1560fa225cbcSrjs if (pMask) { 1561fa225cbcSrjs if (pMaskPicture->componentAlpha && 1562fa225cbcSrjs PICT_FORMAT_RGB(pMaskPicture->format)) 1563fa225cbcSrjs { 1564fa225cbcSrjs if (i965_blend_op[op].src_alpha) { 1565fa225cbcSrjs if (composite_op->is_affine) 1566fa225cbcSrjs composite_op->wm_kernel = WM_KERNEL_MASKCA_SRCALPHA_AFFINE; 1567fa225cbcSrjs else 1568fa225cbcSrjs composite_op->wm_kernel = WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE; 1569fa225cbcSrjs } else { 1570fa225cbcSrjs if (composite_op->is_affine) 1571fa225cbcSrjs composite_op->wm_kernel = WM_KERNEL_MASKCA_AFFINE; 1572fa225cbcSrjs else 1573fa225cbcSrjs composite_op->wm_kernel = WM_KERNEL_MASKCA_PROJECTIVE; 1574fa225cbcSrjs } 1575fa225cbcSrjs } else { 1576fa225cbcSrjs if (composite_op->is_affine) 1577fa225cbcSrjs composite_op->wm_kernel = WM_KERNEL_MASKNOCA_AFFINE; 1578fa225cbcSrjs else 1579fa225cbcSrjs composite_op->wm_kernel = WM_KERNEL_MASKNOCA_PROJECTIVE; 1580fa225cbcSrjs } 1581fa225cbcSrjs } else { 1582fa225cbcSrjs if (composite_op->is_affine) 1583fa225cbcSrjs composite_op->wm_kernel = WM_KERNEL_NOMASK_AFFINE; 1584fa225cbcSrjs else 1585fa225cbcSrjs composite_op->wm_kernel = WM_KERNEL_NOMASK_PROJECTIVE; 1586fa225cbcSrjs } 1587fa225cbcSrjs 1588fa225cbcSrjs if (!i965_composite_check_aperture(pScrn)) { 1589fa225cbcSrjs intel_batch_flush(pScrn, FALSE); 1590fa225cbcSrjs if (!i965_composite_check_aperture(pScrn)) 1591fa225cbcSrjs I830FALLBACK("Couldn't fit render operation in aperture\n"); 1592fa225cbcSrjs } 1593fa225cbcSrjs 1594fa225cbcSrjs render_state->needs_state_emit = TRUE; 1595fa225cbcSrjs 1596fa225cbcSrjs return TRUE; 1597fa225cbcSrjs} 1598fa225cbcSrjs 1599fa225cbcSrjsstatic drm_intel_bo * 1600fa225cbcSrjsi965_get_vb_space(ScrnInfoPtr pScrn) 1601fa225cbcSrjs{ 1602fa225cbcSrjs I830Ptr pI830 = I830PTR(pScrn); 1603fa225cbcSrjs struct gen4_render_state *render_state = pI830->gen4_render_state; 1604fa225cbcSrjs 1605fa225cbcSrjs /* If the vertex buffer is too full, then we free the old and a new one 1606fa225cbcSrjs * gets made. 1607fa225cbcSrjs */ 1608fa225cbcSrjs if (render_state->vb_offset + VERTEX_FLOATS_PER_COMPOSITE > 1609fa225cbcSrjs VERTEX_BUFFER_SIZE) { 1610fa225cbcSrjs drm_intel_bo_unreference(render_state->vertex_buffer_bo); 1611fa225cbcSrjs render_state->vertex_buffer_bo = NULL; 1612fa225cbcSrjs } 1613fa225cbcSrjs 1614fa225cbcSrjs /* Alloc a new vertex buffer if necessary. */ 1615fa225cbcSrjs if (render_state->vertex_buffer_bo == NULL) { 1616fa225cbcSrjs render_state->vertex_buffer_bo = drm_intel_bo_alloc(pI830->bufmgr, "vb", 1617fa225cbcSrjs sizeof(gen4_vertex_buffer), 1618fa225cbcSrjs 4096); 1619fa225cbcSrjs render_state->vb_offset = 0; 1620fa225cbcSrjs } 1621fa225cbcSrjs 1622fa225cbcSrjs drm_intel_bo_reference(render_state->vertex_buffer_bo); 1623fa225cbcSrjs return render_state->vertex_buffer_bo; 1624fa225cbcSrjs} 1625fa225cbcSrjs 1626fa225cbcSrjsvoid 1627fa225cbcSrjsi965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, 1628fa225cbcSrjs int dstX, int dstY, int w, int h) 1629fa225cbcSrjs{ 1630fa225cbcSrjs ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; 1631fa225cbcSrjs I830Ptr pI830 = I830PTR(pScrn); 1632fa225cbcSrjs struct gen4_render_state *render_state = pI830->gen4_render_state; 1633fa225cbcSrjs Bool has_mask; 1634fa225cbcSrjs float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; 1635fa225cbcSrjs int i; 1636fa225cbcSrjs drm_intel_bo *vb_bo; 1637fa225cbcSrjs float vb[18]; 1638fa225cbcSrjs Bool is_affine = render_state->composite_op.is_affine; 1639fa225cbcSrjs 1640fa225cbcSrjs if (is_affine) 1641fa225cbcSrjs { 1642fa225cbcSrjs if (!i830_get_transformed_coordinates(srcX, srcY, 1643fa225cbcSrjs pI830->transform[0], 1644fa225cbcSrjs &src_x[0], &src_y[0])) 1645fa225cbcSrjs return; 1646fa225cbcSrjs if (!i830_get_transformed_coordinates(srcX, srcY + h, 1647fa225cbcSrjs pI830->transform[0], 1648fa225cbcSrjs &src_x[1], &src_y[1])) 1649fa225cbcSrjs return; 1650fa225cbcSrjs if (!i830_get_transformed_coordinates(srcX + w, srcY + h, 1651fa225cbcSrjs pI830->transform[0], 1652fa225cbcSrjs &src_x[2], &src_y[2])) 1653fa225cbcSrjs return; 1654fa225cbcSrjs } 1655fa225cbcSrjs else 1656fa225cbcSrjs { 1657fa225cbcSrjs if (!i830_get_transformed_coordinates_3d(srcX, srcY, 1658fa225cbcSrjs pI830->transform[0], 1659fa225cbcSrjs &src_x[0], &src_y[0], 1660fa225cbcSrjs &src_w[0])) 1661fa225cbcSrjs return; 1662fa225cbcSrjs if (!i830_get_transformed_coordinates_3d(srcX, srcY + h, 1663fa225cbcSrjs pI830->transform[0], 1664fa225cbcSrjs &src_x[1], &src_y[1], 1665fa225cbcSrjs &src_w[1])) 1666fa225cbcSrjs return; 1667fa225cbcSrjs if (!i830_get_transformed_coordinates_3d(srcX + w, srcY + h, 1668fa225cbcSrjs pI830->transform[0], 1669fa225cbcSrjs &src_x[2], &src_y[2], 1670fa225cbcSrjs &src_w[2])) 1671fa225cbcSrjs return; 1672fa225cbcSrjs } 1673fa225cbcSrjs 1674fa225cbcSrjs if (pI830->scale_units[1][0] == -1 || pI830->scale_units[1][1] == -1) { 1675fa225cbcSrjs has_mask = FALSE; 1676fa225cbcSrjs } else { 1677fa225cbcSrjs has_mask = TRUE; 1678fa225cbcSrjs if (is_affine) { 1679fa225cbcSrjs if (!i830_get_transformed_coordinates(maskX, maskY, 1680fa225cbcSrjs pI830->transform[1], 1681fa225cbcSrjs &mask_x[0], &mask_y[0])) 1682fa225cbcSrjs return; 1683fa225cbcSrjs if (!i830_get_transformed_coordinates(maskX, maskY + h, 1684fa225cbcSrjs pI830->transform[1], 1685fa225cbcSrjs &mask_x[1], &mask_y[1])) 1686fa225cbcSrjs return; 1687fa225cbcSrjs if (!i830_get_transformed_coordinates(maskX + w, maskY + h, 1688fa225cbcSrjs pI830->transform[1], 1689fa225cbcSrjs &mask_x[2], &mask_y[2])) 1690fa225cbcSrjs return; 1691fa225cbcSrjs } else { 1692fa225cbcSrjs if (!i830_get_transformed_coordinates_3d(maskX, maskY, 1693fa225cbcSrjs pI830->transform[1], 1694fa225cbcSrjs &mask_x[0], &mask_y[0], 1695fa225cbcSrjs &mask_w[0])) 1696fa225cbcSrjs return; 1697fa225cbcSrjs if (!i830_get_transformed_coordinates_3d(maskX, maskY + h, 1698fa225cbcSrjs pI830->transform[1], 1699fa225cbcSrjs &mask_x[1], &mask_y[1], 1700fa225cbcSrjs &mask_w[1])) 1701fa225cbcSrjs return; 1702fa225cbcSrjs if (!i830_get_transformed_coordinates_3d(maskX + w, maskY + h, 1703fa225cbcSrjs pI830->transform[1], 1704fa225cbcSrjs &mask_x[2], &mask_y[2], 1705fa225cbcSrjs &mask_w[2])) 1706fa225cbcSrjs return; 1707fa225cbcSrjs } 1708fa225cbcSrjs } 1709fa225cbcSrjs 1710fa225cbcSrjs vb_bo = i965_get_vb_space(pScrn); 1711fa225cbcSrjs if (vb_bo == NULL) 1712fa225cbcSrjs return; 1713fa225cbcSrjs i = 0; 1714fa225cbcSrjs /* rect (x2,y2) */ 1715fa225cbcSrjs vb[i++] = (float)(dstX + w); 1716fa225cbcSrjs vb[i++] = (float)(dstY + h); 1717fa225cbcSrjs vb[i++] = src_x[2] / pI830->scale_units[0][0]; 1718fa225cbcSrjs vb[i++] = src_y[2] / pI830->scale_units[0][1]; 1719fa225cbcSrjs if (!is_affine) 1720fa225cbcSrjs vb[i++] = src_w[2]; 1721fa225cbcSrjs if (has_mask) { 1722fa225cbcSrjs vb[i++] = mask_x[2] / pI830->scale_units[1][0]; 1723fa225cbcSrjs vb[i++] = mask_y[2] / pI830->scale_units[1][1]; 1724fa225cbcSrjs if (!is_affine) 1725fa225cbcSrjs vb[i++] = mask_w[2]; 1726fa225cbcSrjs } 1727fa225cbcSrjs 1728fa225cbcSrjs /* rect (x1,y2) */ 1729fa225cbcSrjs vb[i++] = (float)dstX; 1730fa225cbcSrjs vb[i++] = (float)(dstY + h); 1731fa225cbcSrjs vb[i++] = src_x[1] / pI830->scale_units[0][0]; 1732fa225cbcSrjs vb[i++] = src_y[1] / pI830->scale_units[0][1]; 1733fa225cbcSrjs if (!is_affine) 1734fa225cbcSrjs vb[i++] = src_w[1]; 1735fa225cbcSrjs if (has_mask) { 1736fa225cbcSrjs vb[i++] = mask_x[1] / pI830->scale_units[1][0]; 1737fa225cbcSrjs vb[i++] = mask_y[1] / pI830->scale_units[1][1]; 1738fa225cbcSrjs if (!is_affine) 1739fa225cbcSrjs vb[i++] = mask_w[1]; 1740fa225cbcSrjs } 1741fa225cbcSrjs 1742fa225cbcSrjs /* rect (x1,y1) */ 1743fa225cbcSrjs vb[i++] = (float)dstX; 1744fa225cbcSrjs vb[i++] = (float)dstY; 1745fa225cbcSrjs vb[i++] = src_x[0] / pI830->scale_units[0][0]; 1746fa225cbcSrjs vb[i++] = src_y[0] / pI830->scale_units[0][1]; 1747fa225cbcSrjs if (!is_affine) 1748fa225cbcSrjs vb[i++] = src_w[0]; 1749fa225cbcSrjs if (has_mask) { 1750fa225cbcSrjs vb[i++] = mask_x[0] / pI830->scale_units[1][0]; 1751fa225cbcSrjs vb[i++] = mask_y[0] / pI830->scale_units[1][1]; 1752fa225cbcSrjs if (!is_affine) 1753fa225cbcSrjs vb[i++] = mask_w[0]; 1754fa225cbcSrjs } 1755fa225cbcSrjs assert (i <= VERTEX_BUFFER_SIZE); 1756fa225cbcSrjs drm_intel_bo_subdata(vb_bo, render_state->vb_offset * 4, i * 4, vb); 1757fa225cbcSrjs 1758fa225cbcSrjs if (!i965_composite_check_aperture(pScrn)) 1759fa225cbcSrjs intel_batch_flush(pScrn, FALSE); 1760fa225cbcSrjs 1761fa225cbcSrjs intel_batch_start_atomic(pScrn, 200); 1762fa225cbcSrjs if (render_state->needs_state_emit) 1763fa225cbcSrjs i965_emit_composite_state(pScrn); 1764fa225cbcSrjs 1765fa225cbcSrjs BEGIN_BATCH(12); 1766fa225cbcSrjs OUT_BATCH(MI_FLUSH); 1767fa225cbcSrjs /* Set up the pointer to our (single) vertex buffer */ 1768fa225cbcSrjs OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); 1769fa225cbcSrjs OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) | 1770fa225cbcSrjs VB0_VERTEXDATA | 1771fa225cbcSrjs (render_state->vertex_size << VB0_BUFFER_PITCH_SHIFT)); 1772fa225cbcSrjs OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, render_state->vb_offset * 4); 1773fa225cbcSrjs 1774fa225cbcSrjs if (IS_IGDNG(pI830)) 1775fa225cbcSrjs OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, render_state->vb_offset * 4 + i * 4); 1776fa225cbcSrjs else 1777fa225cbcSrjs OUT_BATCH(3); 1778fa225cbcSrjs 1779fa225cbcSrjs OUT_BATCH(0); // ignore for VERTEXDATA, but still there 1780fa225cbcSrjs 1781fa225cbcSrjs OUT_BATCH(BRW_3DPRIMITIVE | 1782fa225cbcSrjs BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | 1783fa225cbcSrjs (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | 1784fa225cbcSrjs (0 << 9) | /* CTG - indirect vertex count */ 1785fa225cbcSrjs 4); 1786fa225cbcSrjs OUT_BATCH(3); /* vertex count per instance */ 1787fa225cbcSrjs OUT_BATCH(0); /* start vertex offset */ 1788fa225cbcSrjs OUT_BATCH(1); /* single instance */ 1789fa225cbcSrjs OUT_BATCH(0); /* start instance location */ 1790fa225cbcSrjs OUT_BATCH(0); /* index buffer offset, ignored */ 1791fa225cbcSrjs ADVANCE_BATCH(); 1792fa225cbcSrjs 1793fa225cbcSrjs render_state->vb_offset += i; 1794fa225cbcSrjs drm_intel_bo_unreference(vb_bo); 1795fa225cbcSrjs 1796fa225cbcSrjs intel_batch_end_atomic(pScrn); 1797fa225cbcSrjs 1798fa225cbcSrjs i830_debug_sync(pScrn); 1799fa225cbcSrjs} 1800fa225cbcSrjs 1801fa225cbcSrjsvoid 1802fa225cbcSrjsi965_batch_flush_notify(ScrnInfoPtr pScrn) 1803fa225cbcSrjs{ 1804fa225cbcSrjs I830Ptr pI830 = I830PTR(pScrn); 1805fa225cbcSrjs struct gen4_render_state *render_state = pI830->gen4_render_state; 1806fa225cbcSrjs 1807fa225cbcSrjs /* Once a batch is emitted, we never want to map again any buffer 1808fa225cbcSrjs * object being referenced by that batch, (which would be very 1809fa225cbcSrjs * expensive). */ 1810fa225cbcSrjs if (render_state->vertex_buffer_bo) { 1811fa225cbcSrjs dri_bo_unreference (render_state->vertex_buffer_bo); 1812fa225cbcSrjs render_state->vertex_buffer_bo = NULL; 1813fa225cbcSrjs } 1814fa225cbcSrjs 1815fa225cbcSrjs render_state->needs_state_emit = TRUE; 1816fa225cbcSrjs} 1817fa225cbcSrjs 1818fa225cbcSrjs/** 1819fa225cbcSrjs * Called at EnterVT so we can set up our offsets into the state buffer. 1820fa225cbcSrjs */ 1821fa225cbcSrjsvoid 1822fa225cbcSrjsgen4_render_state_init(ScrnInfoPtr pScrn) 1823fa225cbcSrjs{ 1824fa225cbcSrjs I830Ptr pI830 = I830PTR(pScrn); 1825fa225cbcSrjs struct gen4_render_state *render_state; 1826fa225cbcSrjs int i, j, k, l, m; 1827fa225cbcSrjs drm_intel_bo *sf_kernel_bo, *sf_kernel_mask_bo; 1828fa225cbcSrjs drm_intel_bo *border_color_bo; 1829fa225cbcSrjs 1830fa225cbcSrjs if (pI830->gen4_render_state == NULL) 1831fa225cbcSrjs pI830->gen4_render_state = calloc(sizeof(*render_state), 1); 1832fa225cbcSrjs 1833fa225cbcSrjs render_state = pI830->gen4_render_state; 1834fa225cbcSrjs render_state->vb_offset = 0; 1835fa225cbcSrjs 1836fa225cbcSrjs render_state->vs_state_bo = gen4_create_vs_unit_state(pScrn); 1837fa225cbcSrjs 1838fa225cbcSrjs /* Set up the two SF states (one for blending with a mask, one without) */ 1839fa225cbcSrjs if (IS_IGDNG(pI830)) { 1840fa225cbcSrjs sf_kernel_bo = intel_bo_alloc_for_data(pScrn, 1841fa225cbcSrjs sf_kernel_static_gen5, 1842fa225cbcSrjs sizeof(sf_kernel_static_gen5), 1843fa225cbcSrjs "sf kernel gen5"); 1844fa225cbcSrjs sf_kernel_mask_bo = intel_bo_alloc_for_data(pScrn, 1845fa225cbcSrjs sf_kernel_mask_static_gen5, 1846fa225cbcSrjs sizeof(sf_kernel_mask_static_gen5), 1847fa225cbcSrjs "sf mask kernel"); 1848fa225cbcSrjs } else { 1849fa225cbcSrjs sf_kernel_bo = intel_bo_alloc_for_data(pScrn, 1850fa225cbcSrjs sf_kernel_static, 1851fa225cbcSrjs sizeof(sf_kernel_static), 1852fa225cbcSrjs "sf kernel"); 1853fa225cbcSrjs sf_kernel_mask_bo = intel_bo_alloc_for_data(pScrn, 1854fa225cbcSrjs sf_kernel_mask_static, 1855fa225cbcSrjs sizeof(sf_kernel_mask_static), 1856fa225cbcSrjs "sf mask kernel"); 1857fa225cbcSrjs } 1858fa225cbcSrjs render_state->sf_state_bo = gen4_create_sf_state(pScrn, sf_kernel_bo); 1859fa225cbcSrjs render_state->sf_mask_state_bo = gen4_create_sf_state(pScrn, 1860fa225cbcSrjs sf_kernel_mask_bo); 1861fa225cbcSrjs drm_intel_bo_unreference(sf_kernel_bo); 1862fa225cbcSrjs drm_intel_bo_unreference(sf_kernel_mask_bo); 1863fa225cbcSrjs 1864fa225cbcSrjs for (m = 0; m < WM_KERNEL_COUNT; m++) { 1865fa225cbcSrjs if (IS_IGDNG(pI830)) 1866fa225cbcSrjs render_state->wm_kernel_bo[m] = 1867fa225cbcSrjs intel_bo_alloc_for_data(pScrn, 1868fa225cbcSrjs wm_kernels_gen5[m].data, wm_kernels_gen5[m].size, 1869fa225cbcSrjs "WM kernel gen5"); 1870fa225cbcSrjs else 1871fa225cbcSrjs render_state->wm_kernel_bo[m] = 1872fa225cbcSrjs intel_bo_alloc_for_data(pScrn, 1873fa225cbcSrjs wm_kernels[m].data, wm_kernels[m].size, 1874fa225cbcSrjs "WM kernel"); 1875fa225cbcSrjs } 1876fa225cbcSrjs 1877fa225cbcSrjs /* Set up the WM states: each filter/extend type for source and mask, per 1878fa225cbcSrjs * kernel. 1879fa225cbcSrjs */ 1880fa225cbcSrjs border_color_bo = sampler_border_color_create(pScrn); 1881fa225cbcSrjs for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++) { 1882fa225cbcSrjs for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) { 1883fa225cbcSrjs for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) { 1884fa225cbcSrjs for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++) { 1885fa225cbcSrjs drm_intel_bo *sampler_state_bo; 1886fa225cbcSrjs 1887fa225cbcSrjs sampler_state_bo = 1888fa225cbcSrjs gen4_create_sampler_state(pScrn, 1889fa225cbcSrjs i, j, 1890fa225cbcSrjs k, l, 1891fa225cbcSrjs border_color_bo); 1892fa225cbcSrjs 1893fa225cbcSrjs for (m = 0; m < WM_KERNEL_COUNT; m++) { 1894fa225cbcSrjs if (IS_IGDNG(pI830)) 1895fa225cbcSrjs render_state->wm_state_bo[m][i][j][k][l] = 1896fa225cbcSrjs gen4_create_wm_state(pScrn, 1897fa225cbcSrjs wm_kernels_gen5[m].has_mask, 1898fa225cbcSrjs render_state->wm_kernel_bo[m], 1899fa225cbcSrjs sampler_state_bo); 1900fa225cbcSrjs else 1901fa225cbcSrjs render_state->wm_state_bo[m][i][j][k][l] = 1902fa225cbcSrjs gen4_create_wm_state(pScrn, 1903fa225cbcSrjs wm_kernels[m].has_mask, 1904fa225cbcSrjs render_state->wm_kernel_bo[m], 1905fa225cbcSrjs sampler_state_bo); 1906fa225cbcSrjs } 1907fa225cbcSrjs drm_intel_bo_unreference(sampler_state_bo); 1908fa225cbcSrjs } 1909fa225cbcSrjs } 1910fa225cbcSrjs } 1911fa225cbcSrjs } 1912fa225cbcSrjs drm_intel_bo_unreference(border_color_bo); 1913fa225cbcSrjs 1914fa225cbcSrjs render_state->cc_state_bo = gen4_create_cc_unit_state(pScrn); 1915fa225cbcSrjs render_state->sip_kernel_bo = intel_bo_alloc_for_data(pScrn, 1916fa225cbcSrjs sip_kernel_static, 1917fa225cbcSrjs sizeof(sip_kernel_static), 1918fa225cbcSrjs "sip kernel"); 1919fa225cbcSrjs} 1920fa225cbcSrjs 1921fa225cbcSrjs/** 1922fa225cbcSrjs * Called at LeaveVT. 1923fa225cbcSrjs */ 1924fa225cbcSrjsvoid 1925fa225cbcSrjsgen4_render_state_cleanup(ScrnInfoPtr pScrn) 1926fa225cbcSrjs{ 1927fa225cbcSrjs I830Ptr pI830 = I830PTR(pScrn); 1928fa225cbcSrjs struct gen4_render_state *render_state= pI830->gen4_render_state; 1929fa225cbcSrjs int i, j, k, l, m; 1930fa225cbcSrjs gen4_composite_op *composite_op = &render_state->composite_op; 1931fa225cbcSrjs 1932fa225cbcSrjs drm_intel_bo_unreference(composite_op->binding_table_bo); 1933fa225cbcSrjs drm_intel_bo_unreference(render_state->vertex_buffer_bo); 1934fa225cbcSrjs 1935fa225cbcSrjs drm_intel_bo_unreference(render_state->vs_state_bo); 1936fa225cbcSrjs drm_intel_bo_unreference(render_state->sf_state_bo); 1937fa225cbcSrjs drm_intel_bo_unreference(render_state->sf_mask_state_bo); 1938fa225cbcSrjs 1939fa225cbcSrjs for (i = 0; i < WM_KERNEL_COUNT; i++) 1940fa225cbcSrjs drm_intel_bo_unreference(render_state->wm_kernel_bo[i]); 1941fa225cbcSrjs 1942fa225cbcSrjs for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++) 1943fa225cbcSrjs for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) 1944fa225cbcSrjs for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) 1945fa225cbcSrjs for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++) 1946fa225cbcSrjs for (m = 0; m < WM_KERNEL_COUNT; m++) 1947fa225cbcSrjs drm_intel_bo_unreference(render_state->wm_state_bo[m][i][j][k][l]); 1948fa225cbcSrjs 1949fa225cbcSrjs drm_intel_bo_unreference(render_state->cc_state_bo); 1950fa225cbcSrjs drm_intel_bo_unreference(render_state->sip_kernel_bo); 1951fa225cbcSrjs 1952fa225cbcSrjs free(pI830->gen4_render_state); 1953fa225cbcSrjs pI830->gen4_render_state = NULL; 1954fa225cbcSrjs} 1955