1428d7b3dSmrg/* 2428d7b3dSmrg * Copyright © 2006,2011 Intel Corporation 3428d7b3dSmrg * 4428d7b3dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 5428d7b3dSmrg * copy of this software and associated documentation files (the "Software"), 6428d7b3dSmrg * to deal in the Software without restriction, including without limitation 7428d7b3dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8428d7b3dSmrg * and/or sell copies of the Software, and to permit persons to whom the 9428d7b3dSmrg * Software is furnished to do so, subject to the following conditions: 10428d7b3dSmrg * 11428d7b3dSmrg * The above copyright notice and this permission notice (including the next 12428d7b3dSmrg * paragraph) shall be included in all copies or substantial portions of the 13428d7b3dSmrg * Software. 14428d7b3dSmrg * 15428d7b3dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16428d7b3dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17428d7b3dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18428d7b3dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19428d7b3dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20428d7b3dSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21428d7b3dSmrg * SOFTWARE. 22428d7b3dSmrg * 23428d7b3dSmrg * Authors: 24428d7b3dSmrg * Wang Zhenyu <zhenyu.z.wang@intel.com> 25428d7b3dSmrg * Eric Anholt <eric@anholt.net> 26428d7b3dSmrg * Chris Wilson <chris@chris-wilson.co.uk> 27428d7b3dSmrg * 28428d7b3dSmrg */ 29428d7b3dSmrg 30428d7b3dSmrg#ifdef HAVE_CONFIG_H 31428d7b3dSmrg#include "config.h" 32428d7b3dSmrg#endif 33428d7b3dSmrg 34428d7b3dSmrg#include "sna.h" 35428d7b3dSmrg#include "sna_reg.h" 36428d7b3dSmrg#include "sna_render.h" 37428d7b3dSmrg#include "sna_render_inline.h" 38428d7b3dSmrg 39428d7b3dSmrg#include "gen2_render.h" 40428d7b3dSmrg 41428d7b3dSmrg#define NO_COMPOSITE 0 42428d7b3dSmrg#define NO_COMPOSITE_SPANS 0 43428d7b3dSmrg#define NO_COPY 0 44428d7b3dSmrg#define NO_COPY_BOXES 0 45428d7b3dSmrg#define NO_FILL 0 46428d7b3dSmrg#define NO_FILL_ONE 0 47428d7b3dSmrg#define NO_FILL_BOXES 0 48428d7b3dSmrg 49428d7b3dSmrg#define MAX_3D_SIZE 2048 50428d7b3dSmrg#define MAX_3D_PITCH 8192 51428d7b3dSmrg 52428d7b3dSmrg#define BATCH(v) batch_emit(sna, v) 53428d7b3dSmrg#define BATCH_F(v) batch_emit_float(sna, v) 54428d7b3dSmrg#define VERTEX(v) batch_emit_float(sna, v) 55428d7b3dSmrg 56428d7b3dSmrgstatic const struct blendinfo { 57428d7b3dSmrg bool dst_alpha; 58428d7b3dSmrg bool src_alpha; 59428d7b3dSmrg uint32_t src_blend; 60428d7b3dSmrg uint32_t dst_blend; 61428d7b3dSmrg} gen2_blend_op[] = { 62428d7b3dSmrg /* Clear */ 63428d7b3dSmrg {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO}, 64428d7b3dSmrg /* Src */ 65428d7b3dSmrg {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO}, 66428d7b3dSmrg /* Dst */ 67428d7b3dSmrg {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE}, 68428d7b3dSmrg /* Over */ 69428d7b3dSmrg {0, 1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA}, 70428d7b3dSmrg /* OverReverse */ 71428d7b3dSmrg {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE}, 72428d7b3dSmrg /* In */ 73428d7b3dSmrg {1, 0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO}, 74428d7b3dSmrg /* InReverse */ 75428d7b3dSmrg {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA}, 76428d7b3dSmrg /* Out */ 77428d7b3dSmrg {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO}, 78428d7b3dSmrg /* OutReverse */ 79428d7b3dSmrg {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA}, 80428d7b3dSmrg /* Atop */ 81428d7b3dSmrg {1, 1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 82428d7b3dSmrg /* AtopReverse */ 83428d7b3dSmrg {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA}, 84428d7b3dSmrg /* Xor */ 85428d7b3dSmrg {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 86428d7b3dSmrg /* Add */ 87428d7b3dSmrg {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, 88428d7b3dSmrg}; 89428d7b3dSmrg 90428d7b3dSmrgstatic const struct formatinfo { 91428d7b3dSmrg unsigned int fmt; 92428d7b3dSmrg uint32_t card_fmt; 93428d7b3dSmrg} i8xx_tex_formats[] = { 94428d7b3dSmrg {PICT_a8, MAPSURF_8BIT | MT_8BIT_A8}, 95428d7b3dSmrg {PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888}, 96428d7b3dSmrg {PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888}, 97428d7b3dSmrg {PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565}, 98428d7b3dSmrg {PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555}, 99428d7b3dSmrg {PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444}, 100428d7b3dSmrg}, i85x_tex_formats[] = { 101428d7b3dSmrg {PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888}, 102428d7b3dSmrg {PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888}, 103428d7b3dSmrg}; 104428d7b3dSmrg 105428d7b3dSmrgstatic inline bool 106428d7b3dSmrgtoo_large(int width, int height) 107428d7b3dSmrg{ 108428d7b3dSmrg return width > MAX_3D_SIZE || height > MAX_3D_SIZE; 109428d7b3dSmrg} 110428d7b3dSmrg 111428d7b3dSmrgstatic inline uint32_t 112428d7b3dSmrggen2_buf_tiling(uint32_t tiling) 113428d7b3dSmrg{ 114428d7b3dSmrg uint32_t v = 0; 115428d7b3dSmrg switch (tiling) { 116428d7b3dSmrg default: assert(0); 117428d7b3dSmrg case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y; 118428d7b3dSmrg case I915_TILING_X: v |= BUF_3D_TILED_SURFACE; 119428d7b3dSmrg case I915_TILING_NONE: break; 120428d7b3dSmrg } 121428d7b3dSmrg return v; 122428d7b3dSmrg} 123428d7b3dSmrg 124428d7b3dSmrgstatic uint32_t 125428d7b3dSmrggen2_get_dst_format(uint32_t format) 126428d7b3dSmrg{ 127428d7b3dSmrg#define BIAS DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8) 128428d7b3dSmrg switch (format) { 129428d7b3dSmrg default: 130428d7b3dSmrg assert(0); 131428d7b3dSmrg case PICT_a8r8g8b8: 132428d7b3dSmrg case PICT_x8r8g8b8: 133428d7b3dSmrg return COLR_BUF_ARGB8888 | BIAS; 134428d7b3dSmrg case PICT_r5g6b5: 135428d7b3dSmrg return COLR_BUF_RGB565 | BIAS; 136428d7b3dSmrg case PICT_a1r5g5b5: 137428d7b3dSmrg case PICT_x1r5g5b5: 138428d7b3dSmrg return COLR_BUF_ARGB1555 | BIAS; 139428d7b3dSmrg case PICT_a8: 140428d7b3dSmrg return COLR_BUF_8BIT | BIAS; 141428d7b3dSmrg case PICT_a4r4g4b4: 142428d7b3dSmrg case PICT_x4r4g4b4: 143428d7b3dSmrg return COLR_BUF_ARGB4444 | BIAS; 144428d7b3dSmrg } 145428d7b3dSmrg#undef BIAS 146428d7b3dSmrg} 147428d7b3dSmrg 148428d7b3dSmrgstatic bool 149428d7b3dSmrggen2_check_dst_format(uint32_t format) 150428d7b3dSmrg{ 151428d7b3dSmrg switch (format) { 152428d7b3dSmrg case PICT_a8r8g8b8: 153428d7b3dSmrg case PICT_x8r8g8b8: 154428d7b3dSmrg case PICT_r5g6b5: 155428d7b3dSmrg case PICT_a1r5g5b5: 156428d7b3dSmrg case PICT_x1r5g5b5: 157428d7b3dSmrg case PICT_a8: 158428d7b3dSmrg case PICT_a4r4g4b4: 159428d7b3dSmrg case PICT_x4r4g4b4: 160428d7b3dSmrg return true; 161428d7b3dSmrg default: 162428d7b3dSmrg return false; 163428d7b3dSmrg } 164428d7b3dSmrg} 165428d7b3dSmrg 166428d7b3dSmrgstatic uint32_t 167428d7b3dSmrggen2_get_card_format(struct sna *sna, uint32_t format) 168428d7b3dSmrg{ 169428d7b3dSmrg unsigned int i; 170428d7b3dSmrg 171428d7b3dSmrg for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) 172428d7b3dSmrg if (i8xx_tex_formats[i].fmt == format) 173428d7b3dSmrg return i8xx_tex_formats[i].card_fmt; 174428d7b3dSmrg 175428d7b3dSmrg if (sna->kgem.gen < 021) { 176428d7b3dSmrg /* Whilst these are not directly supported on 830/845, 177428d7b3dSmrg * we only enable them when we can implicitly convert 178428d7b3dSmrg * them to a supported variant through the texture 179428d7b3dSmrg * combiners. 180428d7b3dSmrg */ 181428d7b3dSmrg for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) 182428d7b3dSmrg if (i85x_tex_formats[i].fmt == format) 183428d7b3dSmrg return i8xx_tex_formats[1+i].card_fmt; 184428d7b3dSmrg } else { 185428d7b3dSmrg for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) 186428d7b3dSmrg if (i85x_tex_formats[i].fmt == format) 187428d7b3dSmrg return i85x_tex_formats[i].card_fmt; 188428d7b3dSmrg } 189428d7b3dSmrg 190428d7b3dSmrg assert(0); 191428d7b3dSmrg return 0; 192428d7b3dSmrg} 193428d7b3dSmrg 194428d7b3dSmrgstatic uint32_t 195428d7b3dSmrggen2_check_format(struct sna *sna, PicturePtr p) 196428d7b3dSmrg{ 197428d7b3dSmrg unsigned int i; 198428d7b3dSmrg 199428d7b3dSmrg for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) 200428d7b3dSmrg if (i8xx_tex_formats[i].fmt == p->format) 201428d7b3dSmrg return true; 202428d7b3dSmrg 203428d7b3dSmrg if (sna->kgem.gen > 021) { 204428d7b3dSmrg for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) 205428d7b3dSmrg if (i85x_tex_formats[i].fmt == p->format) 206428d7b3dSmrg return true; 207428d7b3dSmrg } 208428d7b3dSmrg 209428d7b3dSmrg return false; 210428d7b3dSmrg} 211428d7b3dSmrg 212428d7b3dSmrgstatic uint32_t 213428d7b3dSmrggen2_sampler_tiling_bits(uint32_t tiling) 214428d7b3dSmrg{ 215428d7b3dSmrg uint32_t bits = 0; 216428d7b3dSmrg switch (tiling) { 217428d7b3dSmrg default: 218428d7b3dSmrg assert(0); 219428d7b3dSmrg case I915_TILING_Y: 220428d7b3dSmrg bits |= TM0S1_TILE_WALK; 221428d7b3dSmrg case I915_TILING_X: 222428d7b3dSmrg bits |= TM0S1_TILED_SURFACE; 223428d7b3dSmrg case I915_TILING_NONE: 224428d7b3dSmrg break; 225428d7b3dSmrg } 226428d7b3dSmrg return bits; 227428d7b3dSmrg} 228428d7b3dSmrg 229428d7b3dSmrgstatic bool 230428d7b3dSmrggen2_check_filter(PicturePtr picture) 231428d7b3dSmrg{ 232428d7b3dSmrg switch (picture->filter) { 233428d7b3dSmrg case PictFilterNearest: 234428d7b3dSmrg case PictFilterBilinear: 235428d7b3dSmrg return true; 236428d7b3dSmrg default: 237428d7b3dSmrg return false; 238428d7b3dSmrg } 239428d7b3dSmrg} 240428d7b3dSmrg 241428d7b3dSmrgstatic bool 242428d7b3dSmrggen2_check_repeat(PicturePtr picture) 243428d7b3dSmrg{ 244428d7b3dSmrg if (!picture->repeat) 245428d7b3dSmrg return true; 246428d7b3dSmrg 247428d7b3dSmrg switch (picture->repeatType) { 248428d7b3dSmrg case RepeatNone: 249428d7b3dSmrg case RepeatNormal: 250428d7b3dSmrg case RepeatPad: 251428d7b3dSmrg case RepeatReflect: 252428d7b3dSmrg return true; 253428d7b3dSmrg default: 254428d7b3dSmrg return false; 255428d7b3dSmrg } 256428d7b3dSmrg} 257428d7b3dSmrg 258428d7b3dSmrgstatic void 259428d7b3dSmrggen2_emit_texture(struct sna *sna, 260428d7b3dSmrg const struct sna_composite_channel *channel, 261428d7b3dSmrg int unit) 262428d7b3dSmrg{ 263428d7b3dSmrg uint32_t wrap_mode_u, wrap_mode_v; 264428d7b3dSmrg uint32_t texcoordtype; 265428d7b3dSmrg uint32_t filter; 266428d7b3dSmrg 267428d7b3dSmrg assert(channel->bo); 268428d7b3dSmrg 269428d7b3dSmrg if (channel->is_affine) 270428d7b3dSmrg texcoordtype = TEXCOORDTYPE_CARTESIAN; 271428d7b3dSmrg else 272428d7b3dSmrg texcoordtype = TEXCOORDTYPE_HOMOGENEOUS; 273428d7b3dSmrg 274428d7b3dSmrg switch (channel->repeat) { 275428d7b3dSmrg default: 276428d7b3dSmrg assert(0); 277428d7b3dSmrg case RepeatNone: 278428d7b3dSmrg wrap_mode_u = TEXCOORDMODE_CLAMP_BORDER; 279428d7b3dSmrg break; 280428d7b3dSmrg case RepeatNormal: 281428d7b3dSmrg wrap_mode_u = TEXCOORDMODE_WRAP; 282428d7b3dSmrg break; 283428d7b3dSmrg case RepeatPad: 284428d7b3dSmrg wrap_mode_u = TEXCOORDMODE_CLAMP; 285428d7b3dSmrg break; 286428d7b3dSmrg case RepeatReflect: 287428d7b3dSmrg wrap_mode_u = TEXCOORDMODE_MIRROR; 288428d7b3dSmrg break; 289428d7b3dSmrg } 290428d7b3dSmrg if (channel->is_linear) 291428d7b3dSmrg wrap_mode_v = TEXCOORDMODE_WRAP; 292428d7b3dSmrg else 293428d7b3dSmrg wrap_mode_v = wrap_mode_u; 294428d7b3dSmrg 295428d7b3dSmrg switch (channel->filter) { 296428d7b3dSmrg default: 297428d7b3dSmrg assert(0); 298428d7b3dSmrg case PictFilterNearest: 299428d7b3dSmrg filter = (FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT | 300428d7b3dSmrg FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT | 301428d7b3dSmrg MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); 302428d7b3dSmrg break; 303428d7b3dSmrg case PictFilterBilinear: 304428d7b3dSmrg filter = (FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT | 305428d7b3dSmrg FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT | 306428d7b3dSmrg MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); 307428d7b3dSmrg break; 308428d7b3dSmrg } 309428d7b3dSmrg 310428d7b3dSmrg BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | LOAD_TEXTURE_MAP(unit) | 4); 311428d7b3dSmrg BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 312428d7b3dSmrg channel->bo, 313428d7b3dSmrg I915_GEM_DOMAIN_SAMPLER << 16, 314428d7b3dSmrg 0)); 315428d7b3dSmrg BATCH(((channel->height - 1) << TM0S1_HEIGHT_SHIFT) | 316428d7b3dSmrg ((channel->width - 1) << TM0S1_WIDTH_SHIFT) | 317428d7b3dSmrg gen2_get_card_format(sna, channel->pict_format) | 318428d7b3dSmrg gen2_sampler_tiling_bits(channel->bo->tiling)); 319428d7b3dSmrg BATCH((channel->bo->pitch / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D); 320428d7b3dSmrg BATCH(filter); 321428d7b3dSmrg BATCH(0); /* default color */ 322428d7b3dSmrg 323428d7b3dSmrg BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(unit) | 324428d7b3dSmrg ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | texcoordtype | 325428d7b3dSmrg ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(wrap_mode_v) | 326428d7b3dSmrg ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode_u)); 327428d7b3dSmrg} 328428d7b3dSmrg 329428d7b3dSmrgstatic void 330428d7b3dSmrggen2_get_blend_factors(const struct sna_composite_op *op, 331428d7b3dSmrg int blend, 332428d7b3dSmrg uint32_t *c_out, 333428d7b3dSmrg uint32_t *a_out) 334428d7b3dSmrg{ 335428d7b3dSmrg uint32_t cblend, ablend; 336428d7b3dSmrg 337428d7b3dSmrg /* If component alpha is active in the mask and the blend operation 338428d7b3dSmrg * uses the source alpha, then we know we don't need the source 339428d7b3dSmrg * value (otherwise we would have hit a fallback earlier), so we 340428d7b3dSmrg * provide the source alpha (src.A * mask.X) as output color. 341428d7b3dSmrg * Conversely, if CA is set and we don't need the source alpha, then 342428d7b3dSmrg * we produce the source value (src.X * mask.X) and the source alpha 343428d7b3dSmrg * is unused.. Otherwise, we provide the non-CA source value 344428d7b3dSmrg * (src.X * mask.A). 345428d7b3dSmrg * 346428d7b3dSmrg * The PICT_FORMAT_RGB(pict) == 0 fixups are not needed on 855+'s a8 347428d7b3dSmrg * pictures, but we need to implement it for 830/845 and there's no 348428d7b3dSmrg * harm done in leaving it in. 349428d7b3dSmrg */ 350428d7b3dSmrg cblend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OUTPUT_WRITE_CURRENT; 351428d7b3dSmrg ablend = TB0A_RESULT_SCALE_1X | TB0A_OUTPUT_WRITE_CURRENT; 352428d7b3dSmrg 353428d7b3dSmrg /* Get the source picture's channels into TBx_ARG1 */ 354428d7b3dSmrg if ((op->has_component_alpha && gen2_blend_op[blend].src_alpha) || 355428d7b3dSmrg op->dst.format == PICT_a8) { 356428d7b3dSmrg /* Producing source alpha value, so the first set of channels 357428d7b3dSmrg * is src.A instead of src.X. We also do this if the destination 358428d7b3dSmrg * is a8, in which case src.G is what's written, and the other 359428d7b3dSmrg * channels are ignored. 360428d7b3dSmrg */ 361428d7b3dSmrg if (op->src.is_opaque) { 362428d7b3dSmrg ablend |= TB0C_ARG1_SEL_ONE; 363428d7b3dSmrg cblend |= TB0C_ARG1_SEL_ONE; 364428d7b3dSmrg } else if (op->src.is_solid) { 365428d7b3dSmrg ablend |= TB0C_ARG1_SEL_DIFFUSE; 366428d7b3dSmrg cblend |= TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA; 367428d7b3dSmrg } else { 368428d7b3dSmrg ablend |= TB0C_ARG1_SEL_TEXEL0; 369428d7b3dSmrg cblend |= TB0C_ARG1_SEL_TEXEL0 | TB0C_ARG1_REPLICATE_ALPHA; 370428d7b3dSmrg } 371428d7b3dSmrg } else { 372428d7b3dSmrg if (op->src.is_solid) 373428d7b3dSmrg cblend |= TB0C_ARG1_SEL_DIFFUSE; 374428d7b3dSmrg else if (PICT_FORMAT_RGB(op->src.pict_format) != 0) 375428d7b3dSmrg cblend |= TB0C_ARG1_SEL_TEXEL0; 376428d7b3dSmrg else 377428d7b3dSmrg cblend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT; /* 0.0 */ 378428d7b3dSmrg 379428d7b3dSmrg if (op->src.is_opaque) 380428d7b3dSmrg ablend |= TB0A_ARG1_SEL_ONE; 381428d7b3dSmrg else if (op->src.is_solid) 382428d7b3dSmrg ablend |= TB0A_ARG1_SEL_DIFFUSE; 383428d7b3dSmrg else 384428d7b3dSmrg ablend |= TB0A_ARG1_SEL_TEXEL0; 385428d7b3dSmrg } 386428d7b3dSmrg 387428d7b3dSmrg if (op->mask.bo) { 388428d7b3dSmrg if (op->src.is_solid) { 389428d7b3dSmrg cblend |= TB0C_ARG2_SEL_TEXEL0; 390428d7b3dSmrg ablend |= TB0A_ARG2_SEL_TEXEL0; 391428d7b3dSmrg } else { 392428d7b3dSmrg cblend |= TB0C_ARG2_SEL_TEXEL1; 393428d7b3dSmrg ablend |= TB0A_ARG2_SEL_TEXEL1; 394428d7b3dSmrg } 395428d7b3dSmrg 396428d7b3dSmrg if (op->dst.format == PICT_a8 || !op->has_component_alpha) 397428d7b3dSmrg cblend |= TB0C_ARG2_REPLICATE_ALPHA; 398428d7b3dSmrg 399428d7b3dSmrg cblend |= TB0C_OP_MODULATE; 400428d7b3dSmrg ablend |= TB0A_OP_MODULATE; 401428d7b3dSmrg } else if (op->mask.is_solid) { 402428d7b3dSmrg cblend |= TB0C_ARG2_SEL_DIFFUSE; 403428d7b3dSmrg ablend |= TB0A_ARG2_SEL_DIFFUSE; 404428d7b3dSmrg 405428d7b3dSmrg if (op->dst.format == PICT_a8 || !op->has_component_alpha) 406428d7b3dSmrg cblend |= TB0C_ARG2_REPLICATE_ALPHA; 407428d7b3dSmrg 408428d7b3dSmrg cblend |= TB0C_OP_MODULATE; 409428d7b3dSmrg ablend |= TB0A_OP_MODULATE; 410428d7b3dSmrg } else { 411428d7b3dSmrg cblend |= TB0C_OP_ARG1; 412428d7b3dSmrg ablend |= TB0A_OP_ARG1; 413428d7b3dSmrg } 414428d7b3dSmrg 415428d7b3dSmrg *c_out = cblend; 416428d7b3dSmrg *a_out = ablend; 417428d7b3dSmrg} 418428d7b3dSmrg 419428d7b3dSmrgstatic uint32_t gen2_get_blend_cntl(int op, 420428d7b3dSmrg bool has_component_alpha, 421428d7b3dSmrg uint32_t dst_format) 422428d7b3dSmrg{ 423428d7b3dSmrg uint32_t sblend, dblend; 424428d7b3dSmrg 425428d7b3dSmrg if (op <= PictOpSrc) 426428d7b3dSmrg return S8_ENABLE_COLOR_BUFFER_WRITE; 427428d7b3dSmrg 428428d7b3dSmrg sblend = gen2_blend_op[op].src_blend; 429428d7b3dSmrg dblend = gen2_blend_op[op].dst_blend; 430428d7b3dSmrg 431428d7b3dSmrg if (gen2_blend_op[op].dst_alpha) { 432428d7b3dSmrg /* If there's no dst alpha channel, adjust the blend op so that 433428d7b3dSmrg * we'll treat it as always 1. 434428d7b3dSmrg */ 435428d7b3dSmrg if (PICT_FORMAT_A(dst_format) == 0) { 436428d7b3dSmrg if (sblend == BLENDFACTOR_DST_ALPHA) 437428d7b3dSmrg sblend = BLENDFACTOR_ONE; 438428d7b3dSmrg else if (sblend == BLENDFACTOR_INV_DST_ALPHA) 439428d7b3dSmrg sblend = BLENDFACTOR_ZERO; 440428d7b3dSmrg } 441428d7b3dSmrg 442428d7b3dSmrg /* gen2 engine reads 8bit color buffer into green channel 443428d7b3dSmrg * in cases like color buffer blending etc., and also writes 444428d7b3dSmrg * back green channel. So with dst_alpha blend we should use 445428d7b3dSmrg * color factor. 446428d7b3dSmrg */ 447428d7b3dSmrg if (dst_format == PICT_a8) { 448428d7b3dSmrg if (sblend == BLENDFACTOR_DST_ALPHA) 449428d7b3dSmrg sblend = BLENDFACTOR_DST_COLR; 450428d7b3dSmrg else if (sblend == BLENDFACTOR_INV_DST_ALPHA) 451428d7b3dSmrg sblend = BLENDFACTOR_INV_DST_COLR; 452428d7b3dSmrg } 453428d7b3dSmrg } 454428d7b3dSmrg 455428d7b3dSmrg /* If the source alpha is being used, then we should only be in a case 456428d7b3dSmrg * where the source blend factor is 0, and the source blend value is 457428d7b3dSmrg * the mask channels multiplied by the source picture's alpha. 458428d7b3dSmrg */ 459428d7b3dSmrg if (has_component_alpha && gen2_blend_op[op].src_alpha) { 460428d7b3dSmrg if (dblend == BLENDFACTOR_SRC_ALPHA) 461428d7b3dSmrg dblend = BLENDFACTOR_SRC_COLR; 462428d7b3dSmrg else if (dblend == BLENDFACTOR_INV_SRC_ALPHA) 463428d7b3dSmrg dblend = BLENDFACTOR_INV_SRC_COLR; 464428d7b3dSmrg } 465428d7b3dSmrg 466428d7b3dSmrg return (sblend << S8_SRC_BLEND_FACTOR_SHIFT | 467428d7b3dSmrg dblend << S8_DST_BLEND_FACTOR_SHIFT | 468428d7b3dSmrg S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD | 469428d7b3dSmrg S8_ENABLE_COLOR_BUFFER_WRITE); 470428d7b3dSmrg} 471428d7b3dSmrg 472428d7b3dSmrgstatic void gen2_emit_invariant(struct sna *sna) 473428d7b3dSmrg{ 474428d7b3dSmrg int i; 475428d7b3dSmrg 476428d7b3dSmrg for (i = 0; i < 4; i++) { 477428d7b3dSmrg BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(i)); 478428d7b3dSmrg BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | MAP_UNIT(i) | 479428d7b3dSmrg DISABLE_TEX_STREAM_BUMP | 480428d7b3dSmrg ENABLE_TEX_STREAM_COORD_SET | TEX_STREAM_COORD_SET(i) | 481428d7b3dSmrg ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(i)); 482428d7b3dSmrg BATCH(_3DSTATE_MAP_COORD_TRANSFORM); 483428d7b3dSmrg BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(i)); 484428d7b3dSmrg } 485428d7b3dSmrg 486428d7b3dSmrg BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD); 487428d7b3dSmrg BATCH(TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) | 488428d7b3dSmrg TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) | 489428d7b3dSmrg TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | 490428d7b3dSmrg TEXBIND_SET0(TEXCOORDSRC_VTXSET_0)); 491428d7b3dSmrg 492428d7b3dSmrg BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); 493428d7b3dSmrg 494428d7b3dSmrg BATCH(_3DSTATE_VERTEX_TRANSFORM); 495428d7b3dSmrg BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE); 496428d7b3dSmrg 497428d7b3dSmrg BATCH(_3DSTATE_W_STATE_CMD); 498428d7b3dSmrg BATCH(MAGIC_W_STATE_DWORD1); 499428d7b3dSmrg BATCH_F(1.0); 500428d7b3dSmrg 501428d7b3dSmrg BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD | 502428d7b3dSmrg DISABLE_INDPT_ALPHA_BLEND | 503428d7b3dSmrg ENABLE_ALPHA_BLENDFUNC | ABLENDFUNC_ADD); 504428d7b3dSmrg 505428d7b3dSmrg BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD); 506428d7b3dSmrg BATCH(0); 507428d7b3dSmrg 508428d7b3dSmrg BATCH(_3DSTATE_MODES_1_CMD | 509428d7b3dSmrg ENABLE_COLR_BLND_FUNC | BLENDFUNC_ADD | 510428d7b3dSmrg ENABLE_SRC_BLND_FACTOR | SRC_BLND_FACT(BLENDFACTOR_ONE) | 511428d7b3dSmrg ENABLE_DST_BLND_FACTOR | DST_BLND_FACT(BLENDFACTOR_ZERO)); 512428d7b3dSmrg 513428d7b3dSmrg BATCH(_3DSTATE_ENABLES_1_CMD | 514428d7b3dSmrg DISABLE_LOGIC_OP | 515428d7b3dSmrg DISABLE_STENCIL_TEST | 516428d7b3dSmrg DISABLE_DEPTH_BIAS | 517428d7b3dSmrg DISABLE_SPEC_ADD | 518428d7b3dSmrg DISABLE_FOG | 519428d7b3dSmrg DISABLE_ALPHA_TEST | 520428d7b3dSmrg DISABLE_DEPTH_TEST | 521428d7b3dSmrg ENABLE_COLOR_BLEND); 522428d7b3dSmrg 523428d7b3dSmrg BATCH(_3DSTATE_ENABLES_2_CMD | 524428d7b3dSmrg DISABLE_STENCIL_WRITE | 525428d7b3dSmrg DISABLE_DITHER | 526428d7b3dSmrg DISABLE_DEPTH_WRITE | 527428d7b3dSmrg ENABLE_COLOR_MASK | 528428d7b3dSmrg ENABLE_COLOR_WRITE | 529428d7b3dSmrg ENABLE_TEX_CACHE); 530428d7b3dSmrg 531428d7b3dSmrg BATCH(_3DSTATE_STIPPLE); 532428d7b3dSmrg BATCH(0); 533428d7b3dSmrg 534428d7b3dSmrg BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | 535428d7b3dSmrg TEXPIPE_COLOR | 536428d7b3dSmrg ENABLE_TEXOUTPUT_WRT_SEL | 537428d7b3dSmrg TEXOP_OUTPUT_CURRENT | 538428d7b3dSmrg DISABLE_TEX_CNTRL_STAGE | 539428d7b3dSmrg TEXOP_SCALE_1X | 540428d7b3dSmrg TEXOP_MODIFY_PARMS | TEXOP_LAST_STAGE | 541428d7b3dSmrg TEXBLENDOP_ARG1); 542428d7b3dSmrg BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | 543428d7b3dSmrg TEXPIPE_ALPHA | 544428d7b3dSmrg ENABLE_TEXOUTPUT_WRT_SEL | 545428d7b3dSmrg TEXOP_OUTPUT_CURRENT | 546428d7b3dSmrg TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | 547428d7b3dSmrg TEXBLENDOP_ARG1); 548428d7b3dSmrg BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) | 549428d7b3dSmrg TEXPIPE_COLOR | 550428d7b3dSmrg TEXBLEND_ARG1 | 551428d7b3dSmrg TEXBLENDARG_MODIFY_PARMS | 552428d7b3dSmrg TEXBLENDARG_DIFFUSE); 553428d7b3dSmrg BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) | 554428d7b3dSmrg TEXPIPE_ALPHA | 555428d7b3dSmrg TEXBLEND_ARG1 | 556428d7b3dSmrg TEXBLENDARG_MODIFY_PARMS | 557428d7b3dSmrg TEXBLENDARG_DIFFUSE); 558428d7b3dSmrg 559428d7b3dSmrg#define INVARIANT_SIZE 35 560428d7b3dSmrg 561428d7b3dSmrg sna->render_state.gen2.need_invariant = false; 562428d7b3dSmrg} 563428d7b3dSmrg 564428d7b3dSmrgstatic void 565428d7b3dSmrggen2_get_batch(struct sna *sna, const struct sna_composite_op *op) 566428d7b3dSmrg{ 567428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 568428d7b3dSmrg 569428d7b3dSmrg if (!kgem_check_batch(&sna->kgem, INVARIANT_SIZE+40)) { 570428d7b3dSmrg DBG(("%s: flushing batch: size %d > %d\n", 571428d7b3dSmrg __FUNCTION__, INVARIANT_SIZE+40, 572428d7b3dSmrg sna->kgem.surface-sna->kgem.nbatch)); 573428d7b3dSmrg kgem_submit(&sna->kgem); 574428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 575428d7b3dSmrg } 576428d7b3dSmrg 577428d7b3dSmrg if (!kgem_check_reloc(&sna->kgem, 3)) { 578428d7b3dSmrg DBG(("%s: flushing batch: reloc %d >= %d\n", 579428d7b3dSmrg __FUNCTION__, 580428d7b3dSmrg sna->kgem.nreloc + 3, 581428d7b3dSmrg (int)KGEM_RELOC_SIZE(&sna->kgem))); 582428d7b3dSmrg kgem_submit(&sna->kgem); 583428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 584428d7b3dSmrg } 585428d7b3dSmrg 586428d7b3dSmrg if (!kgem_check_exec(&sna->kgem, 3)) { 587428d7b3dSmrg DBG(("%s: flushing batch: exec %d >= %d\n", 588428d7b3dSmrg __FUNCTION__, 589428d7b3dSmrg sna->kgem.nexec + 1, 590428d7b3dSmrg (int)KGEM_EXEC_SIZE(&sna->kgem))); 591428d7b3dSmrg kgem_submit(&sna->kgem); 592428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 593428d7b3dSmrg } 594428d7b3dSmrg 595428d7b3dSmrg if (sna->render_state.gen2.need_invariant) 596428d7b3dSmrg gen2_emit_invariant(sna); 597428d7b3dSmrg} 598428d7b3dSmrg 599428d7b3dSmrgstatic void gen2_emit_target(struct sna *sna, const struct sna_composite_op *op) 600428d7b3dSmrg{ 601428d7b3dSmrg assert(!too_large(op->dst.width, op->dst.height)); 602428d7b3dSmrg assert(op->dst.bo->pitch >= 8 && op->dst.bo->pitch <= MAX_3D_PITCH); 603428d7b3dSmrg assert(sna->render.vertex_offset == 0); 604428d7b3dSmrg 605428d7b3dSmrg assert(op->dst.bo->unique_id); 606428d7b3dSmrg if (sna->render_state.gen2.target == op->dst.bo->unique_id) { 607428d7b3dSmrg kgem_bo_mark_dirty(op->dst.bo); 608428d7b3dSmrg return; 609428d7b3dSmrg } 610428d7b3dSmrg 611428d7b3dSmrg BATCH(_3DSTATE_BUF_INFO_CMD); 612428d7b3dSmrg BATCH(BUF_3D_ID_COLOR_BACK | 613428d7b3dSmrg gen2_buf_tiling(op->dst.bo->tiling) | 614428d7b3dSmrg BUF_3D_PITCH(op->dst.bo->pitch)); 615428d7b3dSmrg BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 616428d7b3dSmrg op->dst.bo, 617428d7b3dSmrg I915_GEM_DOMAIN_RENDER << 16 | 618428d7b3dSmrg I915_GEM_DOMAIN_RENDER, 619428d7b3dSmrg 0)); 620428d7b3dSmrg 621428d7b3dSmrg BATCH(_3DSTATE_DST_BUF_VARS_CMD); 622428d7b3dSmrg BATCH(gen2_get_dst_format(op->dst.format)); 623428d7b3dSmrg 624428d7b3dSmrg BATCH(_3DSTATE_DRAW_RECT_CMD); 625428d7b3dSmrg BATCH(0); 626428d7b3dSmrg BATCH(0); /* ymin, xmin */ 627428d7b3dSmrg BATCH(DRAW_YMAX(op->dst.height - 1) | 628428d7b3dSmrg DRAW_XMAX(op->dst.width - 1)); 629428d7b3dSmrg BATCH(0); /* yorig, xorig */ 630428d7b3dSmrg 631428d7b3dSmrg sna->render_state.gen2.target = op->dst.bo->unique_id; 632428d7b3dSmrg} 633428d7b3dSmrg 634428d7b3dSmrgstatic void gen2_disable_logic_op(struct sna *sna) 635428d7b3dSmrg{ 636428d7b3dSmrg if (!sna->render_state.gen2.logic_op_enabled) 637428d7b3dSmrg return; 638428d7b3dSmrg 639428d7b3dSmrg DBG(("%s\n", __FUNCTION__)); 640428d7b3dSmrg 641428d7b3dSmrg BATCH(_3DSTATE_ENABLES_1_CMD | 642428d7b3dSmrg DISABLE_LOGIC_OP | ENABLE_COLOR_BLEND); 643428d7b3dSmrg 644428d7b3dSmrg sna->render_state.gen2.logic_op_enabled = 0; 645428d7b3dSmrg} 646428d7b3dSmrg 647428d7b3dSmrgstatic void gen2_enable_logic_op(struct sna *sna, int op) 648428d7b3dSmrg{ 649428d7b3dSmrg static const uint8_t logic_op[] = { 650428d7b3dSmrg LOGICOP_CLEAR, /* GXclear */ 651428d7b3dSmrg LOGICOP_AND, /* GXand */ 652428d7b3dSmrg LOGICOP_AND_RVRSE, /* GXandReverse */ 653428d7b3dSmrg LOGICOP_COPY, /* GXcopy */ 654428d7b3dSmrg LOGICOP_AND_INV, /* GXandInverted */ 655428d7b3dSmrg LOGICOP_NOOP, /* GXnoop */ 656428d7b3dSmrg LOGICOP_XOR, /* GXxor */ 657428d7b3dSmrg LOGICOP_OR, /* GXor */ 658428d7b3dSmrg LOGICOP_NOR, /* GXnor */ 659428d7b3dSmrg LOGICOP_EQUIV, /* GXequiv */ 660428d7b3dSmrg LOGICOP_INV, /* GXinvert */ 661428d7b3dSmrg LOGICOP_OR_RVRSE, /* GXorReverse */ 662428d7b3dSmrg LOGICOP_COPY_INV, /* GXcopyInverted */ 663428d7b3dSmrg LOGICOP_OR_INV, /* GXorInverted */ 664428d7b3dSmrg LOGICOP_NAND, /* GXnand */ 665428d7b3dSmrg LOGICOP_SET /* GXset */ 666428d7b3dSmrg }; 667428d7b3dSmrg 668428d7b3dSmrg if (sna->render_state.gen2.logic_op_enabled != op+1) { 669428d7b3dSmrg if (!sna->render_state.gen2.logic_op_enabled) { 670428d7b3dSmrg if (op == GXclear || op == GXcopy) 671428d7b3dSmrg return; 672428d7b3dSmrg 673428d7b3dSmrg DBG(("%s\n", __FUNCTION__)); 674428d7b3dSmrg 675428d7b3dSmrg BATCH(_3DSTATE_ENABLES_1_CMD | 676428d7b3dSmrg ENABLE_LOGIC_OP | DISABLE_COLOR_BLEND); 677428d7b3dSmrg } 678428d7b3dSmrg 679428d7b3dSmrg BATCH(_3DSTATE_MODES_4_CMD | 680428d7b3dSmrg ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(logic_op[op])); 681428d7b3dSmrg sna->render_state.gen2.logic_op_enabled = op+1; 682428d7b3dSmrg } 683428d7b3dSmrg} 684428d7b3dSmrg 685428d7b3dSmrgstatic void gen2_emit_composite_state(struct sna *sna, 686428d7b3dSmrg const struct sna_composite_op *op) 687428d7b3dSmrg{ 688428d7b3dSmrg uint32_t texcoordfmt, v, unwind; 689428d7b3dSmrg uint32_t cblend, ablend; 690428d7b3dSmrg int tex; 691428d7b3dSmrg 692428d7b3dSmrg gen2_get_batch(sna, op); 693428d7b3dSmrg 694428d7b3dSmrg if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { 695428d7b3dSmrg if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo) 696428d7b3dSmrg BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE); 697428d7b3dSmrg else 698428d7b3dSmrg BATCH(_3DSTATE_MODES_5_CMD | 699428d7b3dSmrg PIPELINE_FLUSH_RENDER_CACHE | 700428d7b3dSmrg PIPELINE_FLUSH_TEXTURE_CACHE); 701428d7b3dSmrg kgem_clear_dirty(&sna->kgem); 702428d7b3dSmrg } 703428d7b3dSmrg 704428d7b3dSmrg gen2_emit_target(sna, op); 705428d7b3dSmrg 706428d7b3dSmrg unwind = sna->kgem.nbatch; 707428d7b3dSmrg BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 708428d7b3dSmrg I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 709428d7b3dSmrg BATCH((!op->src.is_solid + (op->mask.bo != NULL)) << 12); 710428d7b3dSmrg BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 711428d7b3dSmrg BATCH(gen2_get_blend_cntl(op->op, 712428d7b3dSmrg op->has_component_alpha, 713428d7b3dSmrg op->dst.format)); 714428d7b3dSmrg if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 715428d7b3dSmrg sna->kgem.batch + unwind + 1, 716428d7b3dSmrg 3 * sizeof(uint32_t)) == 0) 717428d7b3dSmrg sna->kgem.nbatch = unwind; 718428d7b3dSmrg else 719428d7b3dSmrg sna->render_state.gen2.ls1 = unwind; 720428d7b3dSmrg 721428d7b3dSmrg gen2_disable_logic_op(sna); 722428d7b3dSmrg 723428d7b3dSmrg gen2_get_blend_factors(op, op->op, &cblend, &ablend); 724428d7b3dSmrg unwind = sna->kgem.nbatch; 725428d7b3dSmrg BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 726428d7b3dSmrg LOAD_TEXTURE_BLEND_STAGE(0) | 1); 727428d7b3dSmrg BATCH(cblend); 728428d7b3dSmrg BATCH(ablend); 729428d7b3dSmrg if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 730428d7b3dSmrg sna->kgem.batch + unwind + 1, 731428d7b3dSmrg 2 * sizeof(uint32_t)) == 0) 732428d7b3dSmrg sna->kgem.nbatch = unwind; 733428d7b3dSmrg else 734428d7b3dSmrg sna->render_state.gen2.ls2 = unwind; 735428d7b3dSmrg 736428d7b3dSmrg tex = texcoordfmt = 0; 737428d7b3dSmrg if (!op->src.is_solid) { 738428d7b3dSmrg if (op->src.is_affine) 739428d7b3dSmrg texcoordfmt |= TEXCOORDFMT_2D << (2*tex); 740428d7b3dSmrg else 741428d7b3dSmrg texcoordfmt |= TEXCOORDFMT_3D << (2*tex); 742428d7b3dSmrg gen2_emit_texture(sna, &op->src, tex++); 743428d7b3dSmrg } else { 744428d7b3dSmrg if (op->src.u.gen2.pixel != sna->render_state.gen2.diffuse) { 745428d7b3dSmrg BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 746428d7b3dSmrg BATCH(op->src.u.gen2.pixel); 747428d7b3dSmrg sna->render_state.gen2.diffuse = op->src.u.gen2.pixel; 748428d7b3dSmrg } 749428d7b3dSmrg } 750428d7b3dSmrg if (op->mask.bo) { 751428d7b3dSmrg if (op->mask.is_affine) 752428d7b3dSmrg texcoordfmt |= TEXCOORDFMT_2D << (2*tex); 753428d7b3dSmrg else 754428d7b3dSmrg texcoordfmt |= TEXCOORDFMT_3D << (2*tex); 755428d7b3dSmrg gen2_emit_texture(sna, &op->mask, tex++); 756428d7b3dSmrg } else if (op->mask.is_solid) { 757428d7b3dSmrg if (op->mask.u.gen2.pixel != sna->render_state.gen2.diffuse) { 758428d7b3dSmrg BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 759428d7b3dSmrg BATCH(op->mask.u.gen2.pixel); 760428d7b3dSmrg sna->render_state.gen2.diffuse = op->mask.u.gen2.pixel; 761428d7b3dSmrg } 762428d7b3dSmrg } 763428d7b3dSmrg 764428d7b3dSmrg v = _3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt; 765428d7b3dSmrg if (sna->render_state.gen2.vft != v) { 766428d7b3dSmrg BATCH(v); 767428d7b3dSmrg sna->render_state.gen2.vft = v; 768428d7b3dSmrg } 769428d7b3dSmrg} 770428d7b3dSmrg 771428d7b3dSmrgstatic inline void 772428d7b3dSmrggen2_emit_composite_dstcoord(struct sna *sna, int dstX, int dstY) 773428d7b3dSmrg{ 774428d7b3dSmrg VERTEX(dstX); 775428d7b3dSmrg VERTEX(dstY); 776428d7b3dSmrg} 777428d7b3dSmrg 778428d7b3dSmrginline static void 779428d7b3dSmrggen2_emit_composite_linear(struct sna *sna, 780428d7b3dSmrg const struct sna_composite_channel *channel, 781428d7b3dSmrg int16_t x, int16_t y) 782428d7b3dSmrg{ 783428d7b3dSmrg float v; 784428d7b3dSmrg 785428d7b3dSmrg v = (x * channel->u.linear.dx + 786428d7b3dSmrg y * channel->u.linear.dy + 787428d7b3dSmrg channel->u.linear.offset); 788428d7b3dSmrg DBG(("%s: (%d, %d) -> %f\n", __FUNCTION__, x, y, v)); 789428d7b3dSmrg VERTEX(v); 790428d7b3dSmrg VERTEX(v); 791428d7b3dSmrg} 792428d7b3dSmrg 793428d7b3dSmrgstatic void 794428d7b3dSmrggen2_emit_composite_texcoord(struct sna *sna, 795428d7b3dSmrg const struct sna_composite_channel *channel, 796428d7b3dSmrg int16_t x, int16_t y) 797428d7b3dSmrg{ 798428d7b3dSmrg float s = 0, t = 0, w = 1; 799428d7b3dSmrg 800428d7b3dSmrg x += channel->offset[0]; 801428d7b3dSmrg y += channel->offset[1]; 802428d7b3dSmrg 803428d7b3dSmrg if (channel->is_affine) { 804428d7b3dSmrg sna_get_transformed_coordinates(x, y, 805428d7b3dSmrg channel->transform, 806428d7b3dSmrg &s, &t); 807428d7b3dSmrg VERTEX(s * channel->scale[0]); 808428d7b3dSmrg VERTEX(t * channel->scale[1]); 809428d7b3dSmrg } else { 810428d7b3dSmrg sna_get_transformed_coordinates_3d(x, y, 811428d7b3dSmrg channel->transform, 812428d7b3dSmrg &s, &t, &w); 813428d7b3dSmrg VERTEX(s * channel->scale[0]); 814428d7b3dSmrg VERTEX(t * channel->scale[1]); 815428d7b3dSmrg VERTEX(w); 816428d7b3dSmrg } 817428d7b3dSmrg} 818428d7b3dSmrg 819428d7b3dSmrgstatic void 820428d7b3dSmrggen2_emit_composite_vertex(struct sna *sna, 821428d7b3dSmrg const struct sna_composite_op *op, 822428d7b3dSmrg int16_t srcX, int16_t srcY, 823428d7b3dSmrg int16_t mskX, int16_t mskY, 824428d7b3dSmrg int16_t dstX, int16_t dstY) 825428d7b3dSmrg{ 826428d7b3dSmrg gen2_emit_composite_dstcoord(sna, dstX, dstY); 827428d7b3dSmrg if (op->src.is_linear) 828428d7b3dSmrg gen2_emit_composite_linear(sna, &op->src, srcX, srcY); 829428d7b3dSmrg else if (!op->src.is_solid) 830428d7b3dSmrg gen2_emit_composite_texcoord(sna, &op->src, srcX, srcY); 831428d7b3dSmrg 832428d7b3dSmrg if (op->mask.is_linear) 833428d7b3dSmrg gen2_emit_composite_linear(sna, &op->mask, mskX, mskY); 834428d7b3dSmrg else if (op->mask.bo) 835428d7b3dSmrg gen2_emit_composite_texcoord(sna, &op->mask, mskX, mskY); 836428d7b3dSmrg} 837428d7b3dSmrg 838428d7b3dSmrgfastcall static void 839428d7b3dSmrggen2_emit_composite_primitive(struct sna *sna, 840428d7b3dSmrg const struct sna_composite_op *op, 841428d7b3dSmrg const struct sna_composite_rectangles *r) 842428d7b3dSmrg{ 843428d7b3dSmrg gen2_emit_composite_vertex(sna, op, 844428d7b3dSmrg r->src.x + r->width, 845428d7b3dSmrg r->src.y + r->height, 846428d7b3dSmrg r->mask.x + r->width, 847428d7b3dSmrg r->mask.y + r->height, 848428d7b3dSmrg op->dst.x + r->dst.x + r->width, 849428d7b3dSmrg op->dst.y + r->dst.y + r->height); 850428d7b3dSmrg gen2_emit_composite_vertex(sna, op, 851428d7b3dSmrg r->src.x, 852428d7b3dSmrg r->src.y + r->height, 853428d7b3dSmrg r->mask.x, 854428d7b3dSmrg r->mask.y + r->height, 855428d7b3dSmrg op->dst.x + r->dst.x, 856428d7b3dSmrg op->dst.y + r->dst.y + r->height); 857428d7b3dSmrg gen2_emit_composite_vertex(sna, op, 858428d7b3dSmrg r->src.x, 859428d7b3dSmrg r->src.y, 860428d7b3dSmrg r->mask.x, 861428d7b3dSmrg r->mask.y, 862428d7b3dSmrg op->dst.x + r->dst.x, 863428d7b3dSmrg op->dst.y + r->dst.y); 864428d7b3dSmrg} 865428d7b3dSmrg 866428d7b3dSmrgfastcall static void 867428d7b3dSmrggen2_emit_composite_primitive_constant(struct sna *sna, 868428d7b3dSmrg const struct sna_composite_op *op, 869428d7b3dSmrg const struct sna_composite_rectangles *r) 870428d7b3dSmrg{ 871428d7b3dSmrg int16_t dst_x = r->dst.x + op->dst.x; 872428d7b3dSmrg int16_t dst_y = r->dst.y + op->dst.y; 873428d7b3dSmrg 874428d7b3dSmrg gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 875428d7b3dSmrg gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 876428d7b3dSmrg gen2_emit_composite_dstcoord(sna, dst_x, dst_y); 877428d7b3dSmrg} 878428d7b3dSmrg 879428d7b3dSmrgfastcall static void 880428d7b3dSmrggen2_emit_composite_primitive_linear(struct sna *sna, 881428d7b3dSmrg const struct sna_composite_op *op, 882428d7b3dSmrg const struct sna_composite_rectangles *r) 883428d7b3dSmrg{ 884428d7b3dSmrg int16_t dst_x = r->dst.x + op->dst.x; 885428d7b3dSmrg int16_t dst_y = r->dst.y + op->dst.y; 886428d7b3dSmrg 887428d7b3dSmrg gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 888428d7b3dSmrg gen2_emit_composite_linear(sna, &op->src, 889428d7b3dSmrg r->src.x + r->width, r->src.y + r->height); 890428d7b3dSmrg 891428d7b3dSmrg gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 892428d7b3dSmrg gen2_emit_composite_linear(sna, &op->src, 893428d7b3dSmrg r->src.x, r->src.y + r->height); 894428d7b3dSmrg 895428d7b3dSmrg gen2_emit_composite_dstcoord(sna, dst_x, dst_y); 896428d7b3dSmrg gen2_emit_composite_linear(sna, &op->src, 897428d7b3dSmrg r->src.x, r->src.y); 898428d7b3dSmrg} 899428d7b3dSmrg 900428d7b3dSmrgfastcall static void 901428d7b3dSmrggen2_emit_composite_primitive_identity(struct sna *sna, 902428d7b3dSmrg const struct sna_composite_op *op, 903428d7b3dSmrg const struct sna_composite_rectangles *r) 904428d7b3dSmrg{ 905428d7b3dSmrg float w = r->width; 906428d7b3dSmrg float h = r->height; 907428d7b3dSmrg float *v; 908428d7b3dSmrg 909428d7b3dSmrg v = (float *)sna->kgem.batch + sna->kgem.nbatch; 910428d7b3dSmrg sna->kgem.nbatch += 12; 911428d7b3dSmrg 912428d7b3dSmrg v[8] = v[4] = r->dst.x + op->dst.x; 913428d7b3dSmrg v[0] = v[4] + w; 914428d7b3dSmrg 915428d7b3dSmrg v[9] = r->dst.y + op->dst.y; 916428d7b3dSmrg v[5] = v[1] = v[9] + h; 917428d7b3dSmrg 918428d7b3dSmrg v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 919428d7b3dSmrg v[2] = v[6] + w * op->src.scale[0]; 920428d7b3dSmrg 921428d7b3dSmrg v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 922428d7b3dSmrg v[7] = v[3] = v[11] + h * op->src.scale[1]; 923428d7b3dSmrg} 924428d7b3dSmrg 925428d7b3dSmrgfastcall static void 926428d7b3dSmrggen2_emit_composite_primitive_affine(struct sna *sna, 927428d7b3dSmrg const struct sna_composite_op *op, 928428d7b3dSmrg const struct sna_composite_rectangles *r) 929428d7b3dSmrg{ 930428d7b3dSmrg PictTransform *transform = op->src.transform; 931428d7b3dSmrg int src_x = r->src.x + (int)op->src.offset[0]; 932428d7b3dSmrg int src_y = r->src.y + (int)op->src.offset[1]; 933428d7b3dSmrg float *v; 934428d7b3dSmrg 935428d7b3dSmrg v = (float *)sna->kgem.batch + sna->kgem.nbatch; 936428d7b3dSmrg sna->kgem.nbatch += 12; 937428d7b3dSmrg 938428d7b3dSmrg v[8] = v[4] = r->dst.x + op->dst.x; 939428d7b3dSmrg v[0] = v[4] + r->width; 940428d7b3dSmrg 941428d7b3dSmrg v[9] = r->dst.y + op->dst.y; 942428d7b3dSmrg v[5] = v[1] = v[9] + r->height; 943428d7b3dSmrg 944428d7b3dSmrg _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 945428d7b3dSmrg transform, op->src.scale, 946428d7b3dSmrg &v[2], &v[3]); 947428d7b3dSmrg 948428d7b3dSmrg _sna_get_transformed_scaled(src_x, src_y + r->height, 949428d7b3dSmrg transform, op->src.scale, 950428d7b3dSmrg &v[6], &v[7]); 951428d7b3dSmrg 952428d7b3dSmrg _sna_get_transformed_scaled(src_x, src_y, 953428d7b3dSmrg transform, op->src.scale, 954428d7b3dSmrg &v[10], &v[11]); 955428d7b3dSmrg} 956428d7b3dSmrg 957428d7b3dSmrgfastcall static void 958428d7b3dSmrggen2_emit_composite_primitive_constant_identity_mask(struct sna *sna, 959428d7b3dSmrg const struct sna_composite_op *op, 960428d7b3dSmrg const struct sna_composite_rectangles *r) 961428d7b3dSmrg{ 962428d7b3dSmrg float w = r->width; 963428d7b3dSmrg float h = r->height; 964428d7b3dSmrg float *v; 965428d7b3dSmrg 966428d7b3dSmrg v = (float *)sna->kgem.batch + sna->kgem.nbatch; 967428d7b3dSmrg sna->kgem.nbatch += 12; 968428d7b3dSmrg 969428d7b3dSmrg v[8] = v[4] = r->dst.x + op->dst.x; 970428d7b3dSmrg v[0] = v[4] + w; 971428d7b3dSmrg 972428d7b3dSmrg v[9] = r->dst.y + op->dst.y; 973428d7b3dSmrg v[5] = v[1] = v[9] + h; 974428d7b3dSmrg 975428d7b3dSmrg v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0]; 976428d7b3dSmrg v[2] = v[6] + w * op->mask.scale[0]; 977428d7b3dSmrg 978428d7b3dSmrg v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1]; 979428d7b3dSmrg v[7] = v[3] = v[11] + h * op->mask.scale[1]; 980428d7b3dSmrg} 981428d7b3dSmrg 982428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__) 983428d7b3dSmrgsse2 fastcall static void 984428d7b3dSmrggen2_emit_composite_primitive_constant__sse2(struct sna *sna, 985428d7b3dSmrg const struct sna_composite_op *op, 986428d7b3dSmrg const struct sna_composite_rectangles *r) 987428d7b3dSmrg{ 988428d7b3dSmrg int16_t dst_x = r->dst.x + op->dst.x; 989428d7b3dSmrg int16_t dst_y = r->dst.y + op->dst.y; 990428d7b3dSmrg 991428d7b3dSmrg gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 992428d7b3dSmrg gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 993428d7b3dSmrg gen2_emit_composite_dstcoord(sna, dst_x, dst_y); 994428d7b3dSmrg} 995428d7b3dSmrg 996428d7b3dSmrgsse2 fastcall static void 997428d7b3dSmrggen2_emit_composite_primitive_linear__sse2(struct sna *sna, 998428d7b3dSmrg const struct sna_composite_op *op, 999428d7b3dSmrg const struct sna_composite_rectangles *r) 1000428d7b3dSmrg{ 1001428d7b3dSmrg int16_t dst_x = r->dst.x + op->dst.x; 1002428d7b3dSmrg int16_t dst_y = r->dst.y + op->dst.y; 1003428d7b3dSmrg 1004428d7b3dSmrg gen2_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); 1005428d7b3dSmrg gen2_emit_composite_linear(sna, &op->src, 1006428d7b3dSmrg r->src.x + r->width, r->src.y + r->height); 1007428d7b3dSmrg 1008428d7b3dSmrg gen2_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); 1009428d7b3dSmrg gen2_emit_composite_linear(sna, &op->src, 1010428d7b3dSmrg r->src.x, r->src.y + r->height); 1011428d7b3dSmrg 1012428d7b3dSmrg gen2_emit_composite_dstcoord(sna, dst_x, dst_y); 1013428d7b3dSmrg gen2_emit_composite_linear(sna, &op->src, 1014428d7b3dSmrg r->src.x, r->src.y); 1015428d7b3dSmrg} 1016428d7b3dSmrg 1017428d7b3dSmrgsse2 fastcall static void 1018428d7b3dSmrggen2_emit_composite_primitive_identity__sse2(struct sna *sna, 1019428d7b3dSmrg const struct sna_composite_op *op, 1020428d7b3dSmrg const struct sna_composite_rectangles *r) 1021428d7b3dSmrg{ 1022428d7b3dSmrg float w = r->width; 1023428d7b3dSmrg float h = r->height; 1024428d7b3dSmrg float *v; 1025428d7b3dSmrg 1026428d7b3dSmrg v = (float *)sna->kgem.batch + sna->kgem.nbatch; 1027428d7b3dSmrg sna->kgem.nbatch += 12; 1028428d7b3dSmrg 1029428d7b3dSmrg v[8] = v[4] = r->dst.x + op->dst.x; 1030428d7b3dSmrg v[0] = v[4] + w; 1031428d7b3dSmrg 1032428d7b3dSmrg v[9] = r->dst.y + op->dst.y; 1033428d7b3dSmrg v[5] = v[1] = v[9] + h; 1034428d7b3dSmrg 1035428d7b3dSmrg v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; 1036428d7b3dSmrg v[2] = v[6] + w * op->src.scale[0]; 1037428d7b3dSmrg 1038428d7b3dSmrg v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; 1039428d7b3dSmrg v[7] = v[3] = v[11] + h * op->src.scale[1]; 1040428d7b3dSmrg} 1041428d7b3dSmrg 1042428d7b3dSmrgsse2 fastcall static void 1043428d7b3dSmrggen2_emit_composite_primitive_affine__sse2(struct sna *sna, 1044428d7b3dSmrg const struct sna_composite_op *op, 1045428d7b3dSmrg const struct sna_composite_rectangles *r) 1046428d7b3dSmrg{ 1047428d7b3dSmrg PictTransform *transform = op->src.transform; 1048428d7b3dSmrg int src_x = r->src.x + (int)op->src.offset[0]; 1049428d7b3dSmrg int src_y = r->src.y + (int)op->src.offset[1]; 1050428d7b3dSmrg float *v; 1051428d7b3dSmrg 1052428d7b3dSmrg v = (float *)sna->kgem.batch + sna->kgem.nbatch; 1053428d7b3dSmrg sna->kgem.nbatch += 12; 1054428d7b3dSmrg 1055428d7b3dSmrg v[8] = v[4] = r->dst.x + op->dst.x; 1056428d7b3dSmrg v[0] = v[4] + r->width; 1057428d7b3dSmrg 1058428d7b3dSmrg v[9] = r->dst.y + op->dst.y; 1059428d7b3dSmrg v[5] = v[1] = v[9] + r->height; 1060428d7b3dSmrg 1061428d7b3dSmrg _sna_get_transformed_scaled(src_x + r->width, src_y + r->height, 1062428d7b3dSmrg transform, op->src.scale, 1063428d7b3dSmrg &v[2], &v[3]); 1064428d7b3dSmrg 1065428d7b3dSmrg _sna_get_transformed_scaled(src_x, src_y + r->height, 1066428d7b3dSmrg transform, op->src.scale, 1067428d7b3dSmrg &v[6], &v[7]); 1068428d7b3dSmrg 1069428d7b3dSmrg _sna_get_transformed_scaled(src_x, src_y, 1070428d7b3dSmrg transform, op->src.scale, 1071428d7b3dSmrg &v[10], &v[11]); 1072428d7b3dSmrg} 1073428d7b3dSmrg 1074428d7b3dSmrgsse2 fastcall static void 1075428d7b3dSmrggen2_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna, 1076428d7b3dSmrg const struct sna_composite_op *op, 1077428d7b3dSmrg const struct sna_composite_rectangles *r) 1078428d7b3dSmrg{ 1079428d7b3dSmrg float w = r->width; 1080428d7b3dSmrg float h = r->height; 1081428d7b3dSmrg float *v; 1082428d7b3dSmrg 1083428d7b3dSmrg v = (float *)sna->kgem.batch + sna->kgem.nbatch; 1084428d7b3dSmrg sna->kgem.nbatch += 12; 1085428d7b3dSmrg 1086428d7b3dSmrg v[8] = v[4] = r->dst.x + op->dst.x; 1087428d7b3dSmrg v[0] = v[4] + w; 1088428d7b3dSmrg 1089428d7b3dSmrg v[9] = r->dst.y + op->dst.y; 1090428d7b3dSmrg v[5] = v[1] = v[9] + h; 1091428d7b3dSmrg 1092428d7b3dSmrg v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0]; 1093428d7b3dSmrg v[2] = v[6] + w * op->mask.scale[0]; 1094428d7b3dSmrg 1095428d7b3dSmrg v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1]; 1096428d7b3dSmrg v[7] = v[3] = v[11] + h * op->mask.scale[1]; 1097428d7b3dSmrg} 1098428d7b3dSmrg#endif 1099428d7b3dSmrg 1100428d7b3dSmrgstatic void gen2_magic_ca_pass(struct sna *sna, 1101428d7b3dSmrg const struct sna_composite_op *op) 1102428d7b3dSmrg{ 1103428d7b3dSmrg uint32_t ablend, cblend, *src, *dst; 1104428d7b3dSmrg int n; 1105428d7b3dSmrg 1106428d7b3dSmrg if (!op->need_magic_ca_pass) 1107428d7b3dSmrg return; 1108428d7b3dSmrg 1109428d7b3dSmrg DBG(("%s: batch=%x, vertex=%x\n", __FUNCTION__, 1110428d7b3dSmrg sna->kgem.nbatch, sna->render.vertex_offset)); 1111428d7b3dSmrg 1112428d7b3dSmrg assert(op->mask.bo); 1113428d7b3dSmrg assert(op->has_component_alpha); 1114428d7b3dSmrg 1115428d7b3dSmrg BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(8) | 0); 1116428d7b3dSmrg BATCH(BLENDFACTOR_ONE << S8_SRC_BLEND_FACTOR_SHIFT | 1117428d7b3dSmrg BLENDFACTOR_ONE << S8_DST_BLEND_FACTOR_SHIFT | 1118428d7b3dSmrg S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD | 1119428d7b3dSmrg S8_ENABLE_COLOR_BUFFER_WRITE); 1120428d7b3dSmrg sna->render_state.gen2.ls1 = 0; 1121428d7b3dSmrg 1122428d7b3dSmrg gen2_get_blend_factors(op, PictOpAdd, &cblend, &ablend); 1123428d7b3dSmrg BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 1124428d7b3dSmrg LOAD_TEXTURE_BLEND_STAGE(0) | 1); 1125428d7b3dSmrg BATCH(cblend); 1126428d7b3dSmrg BATCH(ablend); 1127428d7b3dSmrg sna->render_state.gen2.ls2 = 0; 1128428d7b3dSmrg 1129428d7b3dSmrg src = sna->kgem.batch + sna->render.vertex_offset; 1130428d7b3dSmrg dst = sna->kgem.batch + sna->kgem.nbatch; 1131428d7b3dSmrg n = 1 + sna->render.vertex_index; 1132428d7b3dSmrg sna->kgem.nbatch += n; 1133428d7b3dSmrg assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem)); 1134428d7b3dSmrg while (n--) 1135428d7b3dSmrg *dst++ = *src++; 1136428d7b3dSmrg} 1137428d7b3dSmrg 1138428d7b3dSmrgstatic void gen2_vertex_flush(struct sna *sna, 1139428d7b3dSmrg const struct sna_composite_op *op) 1140428d7b3dSmrg{ 1141428d7b3dSmrg if (sna->render.vertex_index == 0) 1142428d7b3dSmrg return; 1143428d7b3dSmrg 1144428d7b3dSmrg sna->kgem.batch[sna->render.vertex_offset] |= 1145428d7b3dSmrg sna->render.vertex_index - 1; 1146428d7b3dSmrg 1147428d7b3dSmrg gen2_magic_ca_pass(sna, op); 1148428d7b3dSmrg 1149428d7b3dSmrg sna->render.vertex_offset = 0; 1150428d7b3dSmrg sna->render.vertex_index = 0; 1151428d7b3dSmrg} 1152428d7b3dSmrg 1153428d7b3dSmrginline static int gen2_get_rectangles(struct sna *sna, 1154428d7b3dSmrg const struct sna_composite_op *op, 1155428d7b3dSmrg int want) 1156428d7b3dSmrg{ 1157428d7b3dSmrg int rem = batch_space(sna), size, need; 1158428d7b3dSmrg 1159428d7b3dSmrg DBG(("%s: want=%d, floats_per_vertex=%d, rem=%d\n", 1160428d7b3dSmrg __FUNCTION__, want, op->floats_per_vertex, rem)); 1161428d7b3dSmrg 1162428d7b3dSmrg assert(op->floats_per_vertex); 1163428d7b3dSmrg assert(op->floats_per_rect == 3 * op->floats_per_vertex); 1164428d7b3dSmrg 1165428d7b3dSmrg need = 1; 1166428d7b3dSmrg size = op->floats_per_rect; 1167428d7b3dSmrg if (op->need_magic_ca_pass) 1168428d7b3dSmrg need += 6 + size*sna->render.vertex_index, size *= 2; 1169428d7b3dSmrg 1170428d7b3dSmrg DBG(("%s: want=%d, need=%d,size=%d, rem=%d\n", 1171428d7b3dSmrg __FUNCTION__, want, need, size, rem)); 1172428d7b3dSmrg if (rem < need + size) { 1173428d7b3dSmrg gen2_vertex_flush(sna, op); 1174428d7b3dSmrg kgem_submit(&sna->kgem); 1175428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1176428d7b3dSmrg return 0; 1177428d7b3dSmrg } 1178428d7b3dSmrg 1179428d7b3dSmrg rem -= need; 1180428d7b3dSmrg if (sna->render.vertex_offset == 0) { 1181428d7b3dSmrg if ((sna->kgem.batch[sna->kgem.nbatch-1] & ~0xffff) == 1182428d7b3dSmrg (PRIM3D_INLINE | PRIM3D_RECTLIST)) { 1183428d7b3dSmrg uint32_t *b = &sna->kgem.batch[sna->kgem.nbatch-1]; 1184428d7b3dSmrg assert(*b & 0xffff); 1185428d7b3dSmrg sna->render.vertex_index = 1 + (*b & 0xffff); 1186428d7b3dSmrg *b = PRIM3D_INLINE | PRIM3D_RECTLIST; 1187428d7b3dSmrg sna->render.vertex_offset = sna->kgem.nbatch - 1; 1188428d7b3dSmrg assert(!op->need_magic_ca_pass); 1189428d7b3dSmrg } else { 1190428d7b3dSmrg sna->render.vertex_offset = sna->kgem.nbatch; 1191428d7b3dSmrg BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST); 1192428d7b3dSmrg } 1193428d7b3dSmrg } 1194428d7b3dSmrg 1195428d7b3dSmrg if (want > 1 && want * size > rem) 1196428d7b3dSmrg want = rem / size; 1197428d7b3dSmrg 1198428d7b3dSmrg assert(want); 1199428d7b3dSmrg sna->render.vertex_index += want*op->floats_per_rect; 1200428d7b3dSmrg return want; 1201428d7b3dSmrg} 1202428d7b3dSmrg 1203428d7b3dSmrgfastcall static void 1204428d7b3dSmrggen2_render_composite_blt(struct sna *sna, 1205428d7b3dSmrg const struct sna_composite_op *op, 1206428d7b3dSmrg const struct sna_composite_rectangles *r) 1207428d7b3dSmrg{ 1208428d7b3dSmrg if (!gen2_get_rectangles(sna, op, 1)) { 1209428d7b3dSmrg gen2_emit_composite_state(sna, op); 1210428d7b3dSmrg gen2_get_rectangles(sna, op, 1); 1211428d7b3dSmrg } 1212428d7b3dSmrg 1213428d7b3dSmrg op->prim_emit(sna, op, r); 1214428d7b3dSmrg} 1215428d7b3dSmrg 1216428d7b3dSmrgfastcall static void 1217428d7b3dSmrggen2_render_composite_box(struct sna *sna, 1218428d7b3dSmrg const struct sna_composite_op *op, 1219428d7b3dSmrg const BoxRec *box) 1220428d7b3dSmrg{ 1221428d7b3dSmrg struct sna_composite_rectangles r; 1222428d7b3dSmrg 1223428d7b3dSmrg if (!gen2_get_rectangles(sna, op, 1)) { 1224428d7b3dSmrg gen2_emit_composite_state(sna, op); 1225428d7b3dSmrg gen2_get_rectangles(sna, op, 1); 1226428d7b3dSmrg } 1227428d7b3dSmrg 1228428d7b3dSmrg DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 1229428d7b3dSmrg box->x1, box->y1, 1230428d7b3dSmrg box->x2 - box->x1, 1231428d7b3dSmrg box->y2 - box->y1)); 1232428d7b3dSmrg 1233428d7b3dSmrg r.dst.x = box->x1; r.dst.y = box->y1; 1234428d7b3dSmrg r.width = box->x2 - box->x1; 1235428d7b3dSmrg r.height = box->y2 - box->y1; 1236428d7b3dSmrg r.src = r.mask = r.dst; 1237428d7b3dSmrg 1238428d7b3dSmrg op->prim_emit(sna, op, &r); 1239428d7b3dSmrg} 1240428d7b3dSmrg 1241428d7b3dSmrgstatic void 1242428d7b3dSmrggen2_render_composite_boxes(struct sna *sna, 1243428d7b3dSmrg const struct sna_composite_op *op, 1244428d7b3dSmrg const BoxRec *box, int nbox) 1245428d7b3dSmrg{ 1246428d7b3dSmrg do { 1247428d7b3dSmrg int nbox_this_time; 1248428d7b3dSmrg 1249428d7b3dSmrg nbox_this_time = gen2_get_rectangles(sna, op, nbox); 1250428d7b3dSmrg if (nbox_this_time == 0) { 1251428d7b3dSmrg gen2_emit_composite_state(sna, op); 1252428d7b3dSmrg nbox_this_time = gen2_get_rectangles(sna, op, nbox); 1253428d7b3dSmrg } 1254428d7b3dSmrg nbox -= nbox_this_time; 1255428d7b3dSmrg 1256428d7b3dSmrg do { 1257428d7b3dSmrg struct sna_composite_rectangles r; 1258428d7b3dSmrg 1259428d7b3dSmrg DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 1260428d7b3dSmrg box->x1, box->y1, 1261428d7b3dSmrg box->x2 - box->x1, 1262428d7b3dSmrg box->y2 - box->y1)); 1263428d7b3dSmrg 1264428d7b3dSmrg r.dst.x = box->x1; r.dst.y = box->y1; 1265428d7b3dSmrg r.width = box->x2 - box->x1; 1266428d7b3dSmrg r.height = box->y2 - box->y1; 1267428d7b3dSmrg r.src = r.mask = r.dst; 1268428d7b3dSmrg 1269428d7b3dSmrg op->prim_emit(sna, op, &r); 1270428d7b3dSmrg box++; 1271428d7b3dSmrg } while (--nbox_this_time); 1272428d7b3dSmrg } while (nbox); 1273428d7b3dSmrg} 1274428d7b3dSmrg 1275428d7b3dSmrgstatic void gen2_render_composite_done(struct sna *sna, 1276428d7b3dSmrg const struct sna_composite_op *op) 1277428d7b3dSmrg{ 1278428d7b3dSmrg gen2_vertex_flush(sna, op); 1279428d7b3dSmrg 1280428d7b3dSmrg if (op->mask.bo) 1281428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->mask.bo); 1282428d7b3dSmrg if (op->src.bo) 1283428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->src.bo); 1284428d7b3dSmrg sna_render_composite_redirect_done(sna, op); 1285428d7b3dSmrg} 1286428d7b3dSmrg 1287428d7b3dSmrgstatic bool 1288428d7b3dSmrggen2_composite_solid_init(struct sna *sna, 1289428d7b3dSmrg struct sna_composite_channel *channel, 1290428d7b3dSmrg uint32_t color) 1291428d7b3dSmrg{ 1292428d7b3dSmrg channel->filter = PictFilterNearest; 1293428d7b3dSmrg channel->repeat = RepeatNormal; 1294428d7b3dSmrg channel->is_solid = true; 1295428d7b3dSmrg channel->is_affine = true; 1296428d7b3dSmrg channel->width = 1; 1297428d7b3dSmrg channel->height = 1; 1298428d7b3dSmrg channel->pict_format = PICT_a8r8g8b8; 1299428d7b3dSmrg 1300428d7b3dSmrg channel->bo = NULL; 1301428d7b3dSmrg channel->u.gen2.pixel = color; 1302428d7b3dSmrg 1303428d7b3dSmrg channel->scale[0] = channel->scale[1] = 1; 1304428d7b3dSmrg channel->offset[0] = channel->offset[1] = 0; 1305428d7b3dSmrg return true; 1306428d7b3dSmrg} 1307428d7b3dSmrg 1308428d7b3dSmrg#define xFixedToDouble(f) pixman_fixed_to_double(f) 1309428d7b3dSmrg 1310428d7b3dSmrgstatic bool 1311428d7b3dSmrggen2_composite_linear_init(struct sna *sna, 1312428d7b3dSmrg PicturePtr picture, 1313428d7b3dSmrg struct sna_composite_channel *channel, 1314428d7b3dSmrg int x, int y, 1315428d7b3dSmrg int w, int h, 1316428d7b3dSmrg int dst_x, int dst_y) 1317428d7b3dSmrg{ 1318428d7b3dSmrg PictLinearGradient *linear = 1319428d7b3dSmrg (PictLinearGradient *)picture->pSourcePict; 1320428d7b3dSmrg pixman_fixed_t tx, ty; 1321428d7b3dSmrg float x0, y0, sf; 1322428d7b3dSmrg float dx, dy; 1323428d7b3dSmrg 1324428d7b3dSmrg DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n", 1325428d7b3dSmrg __FUNCTION__, 1326428d7b3dSmrg xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y), 1327428d7b3dSmrg xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y))); 1328428d7b3dSmrg 1329428d7b3dSmrg if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y) 1330428d7b3dSmrg return 0; 1331428d7b3dSmrg 1332428d7b3dSmrg if (!sna_transform_is_affine(picture->transform)) { 1333428d7b3dSmrg DBG(("%s: fallback due to projective transform\n", 1334428d7b3dSmrg __FUNCTION__)); 1335428d7b3dSmrg return sna_render_picture_fixup(sna, picture, channel, 1336428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1337428d7b3dSmrg } 1338428d7b3dSmrg 1339428d7b3dSmrg channel->bo = sna_render_get_gradient(sna, (PictGradient *)linear); 1340428d7b3dSmrg if (!channel->bo) 1341428d7b3dSmrg return 0; 1342428d7b3dSmrg 1343428d7b3dSmrg channel->filter = PictFilterNearest; 1344428d7b3dSmrg channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1345428d7b3dSmrg channel->is_linear = true; 1346428d7b3dSmrg channel->width = channel->bo->pitch / 4; 1347428d7b3dSmrg channel->height = 1; 1348428d7b3dSmrg channel->pict_format = PICT_a8r8g8b8; 1349428d7b3dSmrg 1350428d7b3dSmrg channel->scale[0] = channel->scale[1] = 1; 1351428d7b3dSmrg channel->offset[0] = channel->offset[1] = 0; 1352428d7b3dSmrg 1353428d7b3dSmrg if (sna_transform_is_translation(picture->transform, &tx, &ty)) { 1354428d7b3dSmrg dx = xFixedToDouble(linear->p2.x - linear->p1.x); 1355428d7b3dSmrg dy = xFixedToDouble(linear->p2.y - linear->p1.y); 1356428d7b3dSmrg 1357428d7b3dSmrg x0 = xFixedToDouble(linear->p1.x); 1358428d7b3dSmrg y0 = xFixedToDouble(linear->p1.y); 1359428d7b3dSmrg 1360428d7b3dSmrg if (tx | ty) { 1361428d7b3dSmrg x0 -= pixman_fixed_to_double(tx); 1362428d7b3dSmrg y0 -= pixman_fixed_to_double(ty); 1363428d7b3dSmrg } 1364428d7b3dSmrg } else { 1365428d7b3dSmrg struct pixman_f_vector p1, p2; 1366428d7b3dSmrg struct pixman_f_transform m, inv; 1367428d7b3dSmrg 1368428d7b3dSmrg pixman_f_transform_from_pixman_transform(&m, picture->transform); 1369428d7b3dSmrg DBG(("%s: transform = [%f %f %f, %f %f %f, %f %f %f]\n", 1370428d7b3dSmrg __FUNCTION__, 1371428d7b3dSmrg m.m[0][0], m.m[0][1], m.m[0][2], 1372428d7b3dSmrg m.m[1][0], m.m[1][1], m.m[1][2], 1373428d7b3dSmrg m.m[2][0], m.m[2][1], m.m[2][2])); 1374428d7b3dSmrg if (!pixman_f_transform_invert(&inv, &m)) 1375428d7b3dSmrg return 0; 1376428d7b3dSmrg 1377428d7b3dSmrg p1.v[0] = pixman_fixed_to_double(linear->p1.x); 1378428d7b3dSmrg p1.v[1] = pixman_fixed_to_double(linear->p1.y); 1379428d7b3dSmrg p1.v[2] = 1.; 1380428d7b3dSmrg pixman_f_transform_point(&inv, &p1); 1381428d7b3dSmrg 1382428d7b3dSmrg p2.v[0] = pixman_fixed_to_double(linear->p2.x); 1383428d7b3dSmrg p2.v[1] = pixman_fixed_to_double(linear->p2.y); 1384428d7b3dSmrg p2.v[2] = 1.; 1385428d7b3dSmrg pixman_f_transform_point(&inv, &p2); 1386428d7b3dSmrg 1387428d7b3dSmrg DBG(("%s: untransformed: p1=(%f, %f, %f), p2=(%f, %f, %f)\n", 1388428d7b3dSmrg __FUNCTION__, 1389428d7b3dSmrg p1.v[0], p1.v[1], p1.v[2], 1390428d7b3dSmrg p2.v[0], p2.v[1], p2.v[2])); 1391428d7b3dSmrg 1392428d7b3dSmrg dx = p2.v[0] - p1.v[0]; 1393428d7b3dSmrg dy = p2.v[1] - p1.v[1]; 1394428d7b3dSmrg 1395428d7b3dSmrg x0 = p1.v[0]; 1396428d7b3dSmrg y0 = p1.v[1]; 1397428d7b3dSmrg } 1398428d7b3dSmrg 1399428d7b3dSmrg sf = dx*dx + dy*dy; 1400428d7b3dSmrg dx /= sf; 1401428d7b3dSmrg dy /= sf; 1402428d7b3dSmrg 1403428d7b3dSmrg channel->u.linear.dx = dx; 1404428d7b3dSmrg channel->u.linear.dy = dy; 1405428d7b3dSmrg channel->u.linear.offset = -dx*(x0+dst_x-x) + -dy*(y0+dst_y-y); 1406428d7b3dSmrg 1407428d7b3dSmrg DBG(("%s: dx=%f, dy=%f, offset=%f\n", 1408428d7b3dSmrg __FUNCTION__, dx, dy, channel->u.linear.offset)); 1409428d7b3dSmrg 1410428d7b3dSmrg return channel->bo != NULL; 1411428d7b3dSmrg} 1412428d7b3dSmrg 1413428d7b3dSmrgstatic bool source_is_covered(PicturePtr picture, 1414428d7b3dSmrg int x, int y, 1415428d7b3dSmrg int width, int height) 1416428d7b3dSmrg{ 1417428d7b3dSmrg int x1, y1, x2, y2; 1418428d7b3dSmrg 1419428d7b3dSmrg if (picture->repeat && picture->repeatType != RepeatNone) 1420428d7b3dSmrg return true; 1421428d7b3dSmrg 1422428d7b3dSmrg if (picture->pDrawable == NULL) 1423428d7b3dSmrg return false; 1424428d7b3dSmrg 1425428d7b3dSmrg if (picture->transform) { 1426428d7b3dSmrg pixman_box16_t sample; 1427428d7b3dSmrg 1428428d7b3dSmrg sample.x1 = x; 1429428d7b3dSmrg sample.y1 = y; 1430428d7b3dSmrg sample.x2 = x + width; 1431428d7b3dSmrg sample.y2 = y + height; 1432428d7b3dSmrg 1433428d7b3dSmrg pixman_transform_bounds(picture->transform, &sample); 1434428d7b3dSmrg 1435428d7b3dSmrg x1 = sample.x1; 1436428d7b3dSmrg x2 = sample.x2; 1437428d7b3dSmrg y1 = sample.y1; 1438428d7b3dSmrg y2 = sample.y2; 1439428d7b3dSmrg } else { 1440428d7b3dSmrg x1 = x; 1441428d7b3dSmrg y1 = y; 1442428d7b3dSmrg x2 = x + width; 1443428d7b3dSmrg y2 = y + height; 1444428d7b3dSmrg } 1445428d7b3dSmrg 1446428d7b3dSmrg return 1447428d7b3dSmrg x1 >= 0 && y1 >= 0 && 1448428d7b3dSmrg x2 <= picture->pDrawable->width && 1449428d7b3dSmrg y2 <= picture->pDrawable->height; 1450428d7b3dSmrg} 1451428d7b3dSmrg 1452428d7b3dSmrgstatic bool 1453428d7b3dSmrggen2_check_card_format(struct sna *sna, 1454428d7b3dSmrg PicturePtr picture, 1455428d7b3dSmrg struct sna_composite_channel *channel, 1456428d7b3dSmrg int x, int y, int w, int h, 1457428d7b3dSmrg bool *fixup_alpha) 1458428d7b3dSmrg{ 1459428d7b3dSmrg uint32_t format = picture->format; 1460428d7b3dSmrg unsigned int i; 1461428d7b3dSmrg 1462428d7b3dSmrg for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) { 1463428d7b3dSmrg if (i8xx_tex_formats[i].fmt == format) 1464428d7b3dSmrg return true; 1465428d7b3dSmrg } 1466428d7b3dSmrg 1467428d7b3dSmrg for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) { 1468428d7b3dSmrg if (i85x_tex_formats[i].fmt == format) { 1469428d7b3dSmrg if (sna->kgem.gen >= 021) 1470428d7b3dSmrg return true; 1471428d7b3dSmrg 1472428d7b3dSmrg if (source_is_covered(picture, x, y, w,h)) { 1473428d7b3dSmrg channel->is_opaque = true; 1474428d7b3dSmrg return true; 1475428d7b3dSmrg } 1476428d7b3dSmrg 1477428d7b3dSmrg *fixup_alpha = true; 1478428d7b3dSmrg return false; 1479428d7b3dSmrg } 1480428d7b3dSmrg } 1481428d7b3dSmrg 1482428d7b3dSmrg *fixup_alpha = false; 1483428d7b3dSmrg return false; 1484428d7b3dSmrg} 1485428d7b3dSmrg 1486428d7b3dSmrgstatic int 1487428d7b3dSmrggen2_composite_picture(struct sna *sna, 1488428d7b3dSmrg PicturePtr picture, 1489428d7b3dSmrg struct sna_composite_channel *channel, 1490428d7b3dSmrg int x, int y, 1491428d7b3dSmrg int w, int h, 1492428d7b3dSmrg int dst_x, int dst_y, 1493428d7b3dSmrg bool precise) 1494428d7b3dSmrg{ 1495428d7b3dSmrg PixmapPtr pixmap; 1496428d7b3dSmrg uint32_t color; 1497428d7b3dSmrg int16_t dx, dy; 1498428d7b3dSmrg bool fixup_alpha; 1499428d7b3dSmrg 1500428d7b3dSmrg DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1501428d7b3dSmrg __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1502428d7b3dSmrg 1503428d7b3dSmrg channel->is_solid = false; 1504428d7b3dSmrg channel->is_linear = false; 1505428d7b3dSmrg channel->is_opaque = false; 1506428d7b3dSmrg channel->is_affine = true; 1507428d7b3dSmrg channel->transform = NULL; 1508428d7b3dSmrg channel->card_format = -1; 1509428d7b3dSmrg 1510428d7b3dSmrg if (sna_picture_is_solid(picture, &color)) 1511428d7b3dSmrg return gen2_composite_solid_init(sna, channel, color); 1512428d7b3dSmrg 1513428d7b3dSmrg if (!gen2_check_repeat(picture)) { 1514428d7b3dSmrg DBG(("%s -- fallback, unhandled repeat %d\n", 1515428d7b3dSmrg __FUNCTION__, picture->repeat)); 1516428d7b3dSmrg return sna_render_picture_fixup(sna, picture, channel, 1517428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1518428d7b3dSmrg } 1519428d7b3dSmrg 1520428d7b3dSmrg if (!gen2_check_filter(picture)) { 1521428d7b3dSmrg DBG(("%s -- fallback, unhandled filter %d\n", 1522428d7b3dSmrg __FUNCTION__, picture->filter)); 1523428d7b3dSmrg return sna_render_picture_fixup(sna, picture, channel, 1524428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1525428d7b3dSmrg } 1526428d7b3dSmrg 1527428d7b3dSmrg if (picture->pDrawable == NULL) { 1528428d7b3dSmrg int ret; 1529428d7b3dSmrg 1530428d7b3dSmrg if (picture->pSourcePict->type == SourcePictTypeLinear) 1531428d7b3dSmrg return gen2_composite_linear_init(sna, picture, channel, 1532428d7b3dSmrg x, y, 1533428d7b3dSmrg w, h, 1534428d7b3dSmrg dst_x, dst_y); 1535428d7b3dSmrg 1536428d7b3dSmrg DBG(("%s -- fallback, unhandled source %d\n", 1537428d7b3dSmrg __FUNCTION__, picture->pSourcePict->type)); 1538428d7b3dSmrg ret = -1; 1539428d7b3dSmrg if (!precise) 1540428d7b3dSmrg ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1541428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1542428d7b3dSmrg if (ret == -1) 1543428d7b3dSmrg ret = sna_render_picture_fixup(sna, picture, channel, 1544428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1545428d7b3dSmrg return ret; 1546428d7b3dSmrg } 1547428d7b3dSmrg 1548428d7b3dSmrg if (picture->alphaMap) { 1549428d7b3dSmrg DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 1550428d7b3dSmrg return sna_render_picture_fixup(sna, picture, channel, 1551428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1552428d7b3dSmrg } 1553428d7b3dSmrg 1554428d7b3dSmrg channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1555428d7b3dSmrg channel->filter = picture->filter; 1556428d7b3dSmrg 1557428d7b3dSmrg pixmap = get_drawable_pixmap(picture->pDrawable); 1558428d7b3dSmrg get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 1559428d7b3dSmrg 1560428d7b3dSmrg x += dx + picture->pDrawable->x; 1561428d7b3dSmrg y += dy + picture->pDrawable->y; 1562428d7b3dSmrg 1563428d7b3dSmrg channel->is_affine = sna_transform_is_affine(picture->transform); 1564428d7b3dSmrg if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 1565428d7b3dSmrg DBG(("%s: integer translation (%d, %d), removing\n", 1566428d7b3dSmrg __FUNCTION__, dx, dy)); 1567428d7b3dSmrg x += dx; 1568428d7b3dSmrg y += dy; 1569428d7b3dSmrg channel->transform = NULL; 1570428d7b3dSmrg channel->filter = PictFilterNearest; 1571428d7b3dSmrg 1572428d7b3dSmrg if (channel->repeat && 1573428d7b3dSmrg (x >= 0 && 1574428d7b3dSmrg y >= 0 && 1575428d7b3dSmrg x + w < pixmap->drawable.width && 1576428d7b3dSmrg y + h < pixmap->drawable.height)) { 1577428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 1578428d7b3dSmrg if (priv && priv->clear) { 1579428d7b3dSmrg DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 1580428d7b3dSmrg return gen2_composite_solid_init(sna, channel, priv->clear_color); 1581428d7b3dSmrg } 1582428d7b3dSmrg } 1583428d7b3dSmrg } else 1584428d7b3dSmrg channel->transform = picture->transform; 1585428d7b3dSmrg 1586428d7b3dSmrg if (!gen2_check_card_format(sna, picture, channel, x, y, w ,h, &fixup_alpha)) 1587428d7b3dSmrg return sna_render_picture_convert(sna, picture, channel, pixmap, 1588428d7b3dSmrg x, y, w, h, dst_x, dst_y, fixup_alpha); 1589428d7b3dSmrg 1590428d7b3dSmrg channel->pict_format = picture->format; 1591428d7b3dSmrg if (too_large(pixmap->drawable.width, pixmap->drawable.height)) 1592428d7b3dSmrg return sna_render_picture_extract(sna, picture, channel, 1593428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1594428d7b3dSmrg 1595428d7b3dSmrg return sna_render_pixmap_bo(sna, channel, pixmap, 1596428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1597428d7b3dSmrg} 1598428d7b3dSmrg 1599428d7b3dSmrgstatic bool 1600428d7b3dSmrggen2_composite_set_target(struct sna *sna, 1601428d7b3dSmrg struct sna_composite_op *op, 1602428d7b3dSmrg PicturePtr dst, 1603428d7b3dSmrg int x, int y, int w, int h, 1604428d7b3dSmrg bool partial) 1605428d7b3dSmrg{ 1606428d7b3dSmrg BoxRec box; 1607428d7b3dSmrg unsigned hint; 1608428d7b3dSmrg 1609428d7b3dSmrg op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 1610428d7b3dSmrg op->dst.format = dst->format; 1611428d7b3dSmrg op->dst.width = op->dst.pixmap->drawable.width; 1612428d7b3dSmrg op->dst.height = op->dst.pixmap->drawable.height; 1613428d7b3dSmrg 1614428d7b3dSmrg if (w && h) { 1615428d7b3dSmrg box.x1 = x; 1616428d7b3dSmrg box.y1 = y; 1617428d7b3dSmrg box.x2 = x + w; 1618428d7b3dSmrg box.y2 = y + h; 1619428d7b3dSmrg } else 1620428d7b3dSmrg sna_render_picture_extents(dst, &box); 1621428d7b3dSmrg 1622428d7b3dSmrg hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; 1623428d7b3dSmrg if (!partial) { 1624428d7b3dSmrg hint |= IGNORE_DAMAGE; 1625428d7b3dSmrg if (w == op->dst.width && h == op->dst.height) 1626428d7b3dSmrg hint |= REPLACES; 1627428d7b3dSmrg } 1628428d7b3dSmrg 1629428d7b3dSmrg op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 1630428d7b3dSmrg if (op->dst.bo == NULL) 1631428d7b3dSmrg return false; 1632428d7b3dSmrg 1633428d7b3dSmrg if (hint & REPLACES) { 1634428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 1635428d7b3dSmrg kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 1636428d7b3dSmrg } 1637428d7b3dSmrg 1638428d7b3dSmrg assert((op->dst.bo->pitch & 7) == 0); 1639428d7b3dSmrg 1640428d7b3dSmrg get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 1641428d7b3dSmrg &op->dst.x, &op->dst.y); 1642428d7b3dSmrg 1643428d7b3dSmrg DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 1644428d7b3dSmrg __FUNCTION__, 1645428d7b3dSmrg op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 1646428d7b3dSmrg op->dst.width, op->dst.height, 1647428d7b3dSmrg op->dst.bo->pitch, 1648428d7b3dSmrg op->dst.x, op->dst.y, 1649428d7b3dSmrg op->damage ? *op->damage : (void *)-1)); 1650428d7b3dSmrg 1651428d7b3dSmrg assert(op->dst.bo->proxy == NULL); 1652428d7b3dSmrg 1653428d7b3dSmrg if (((too_large(op->dst.width, op->dst.height) || 1654428d7b3dSmrg op->dst.bo->pitch > MAX_3D_PITCH)) && 1655428d7b3dSmrg !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 1656428d7b3dSmrg return false; 1657428d7b3dSmrg 1658428d7b3dSmrg return true; 1659428d7b3dSmrg} 1660428d7b3dSmrg 1661428d7b3dSmrgstatic bool 1662428d7b3dSmrgis_unhandled_gradient(PicturePtr picture, bool precise) 1663428d7b3dSmrg{ 1664428d7b3dSmrg if (picture->pDrawable) 1665428d7b3dSmrg return false; 1666428d7b3dSmrg 1667428d7b3dSmrg switch (picture->pSourcePict->type) { 1668428d7b3dSmrg case SourcePictTypeSolidFill: 1669428d7b3dSmrg case SourcePictTypeLinear: 1670428d7b3dSmrg return false; 1671428d7b3dSmrg default: 1672428d7b3dSmrg return precise; 1673428d7b3dSmrg } 1674428d7b3dSmrg} 1675428d7b3dSmrg 1676428d7b3dSmrgstatic bool 1677428d7b3dSmrghas_alphamap(PicturePtr p) 1678428d7b3dSmrg{ 1679428d7b3dSmrg return p->alphaMap != NULL; 1680428d7b3dSmrg} 1681428d7b3dSmrg 1682428d7b3dSmrgstatic bool 1683428d7b3dSmrgneed_upload(PicturePtr p) 1684428d7b3dSmrg{ 1685428d7b3dSmrg return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 1686428d7b3dSmrg} 1687428d7b3dSmrg 1688428d7b3dSmrgstatic bool 1689428d7b3dSmrgsource_is_busy(PixmapPtr pixmap) 1690428d7b3dSmrg{ 1691428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 1692428d7b3dSmrg if (priv == NULL) 1693428d7b3dSmrg return false; 1694428d7b3dSmrg 1695428d7b3dSmrg if (priv->clear) 1696428d7b3dSmrg return false; 1697428d7b3dSmrg 1698428d7b3dSmrg if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 1699428d7b3dSmrg return true; 1700428d7b3dSmrg 1701428d7b3dSmrg if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 1702428d7b3dSmrg return true; 1703428d7b3dSmrg 1704428d7b3dSmrg return priv->gpu_damage && !priv->cpu_damage; 1705428d7b3dSmrg} 1706428d7b3dSmrg 1707428d7b3dSmrgstatic bool 1708428d7b3dSmrgsource_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 1709428d7b3dSmrg{ 1710428d7b3dSmrg if (sna_picture_is_solid(p, NULL)) 1711428d7b3dSmrg return false; 1712428d7b3dSmrg 1713428d7b3dSmrg if (is_unhandled_gradient(p, precise) || !gen2_check_repeat(p)) 1714428d7b3dSmrg return true; 1715428d7b3dSmrg 1716428d7b3dSmrg if (pixmap && source_is_busy(pixmap)) 1717428d7b3dSmrg return false; 1718428d7b3dSmrg 1719428d7b3dSmrg return has_alphamap(p) || !gen2_check_filter(p) || need_upload(p); 1720428d7b3dSmrg} 1721428d7b3dSmrg 1722428d7b3dSmrgstatic bool 1723428d7b3dSmrggen2_composite_fallback(struct sna *sna, 1724428d7b3dSmrg PicturePtr src, 1725428d7b3dSmrg PicturePtr mask, 1726428d7b3dSmrg PicturePtr dst) 1727428d7b3dSmrg{ 1728428d7b3dSmrg PixmapPtr src_pixmap; 1729428d7b3dSmrg PixmapPtr mask_pixmap; 1730428d7b3dSmrg PixmapPtr dst_pixmap; 1731428d7b3dSmrg bool src_fallback, mask_fallback; 1732428d7b3dSmrg 1733428d7b3dSmrg if (!gen2_check_dst_format(dst->format)) { 1734428d7b3dSmrg DBG(("%s: unknown destination format: %d\n", 1735428d7b3dSmrg __FUNCTION__, dst->format)); 1736428d7b3dSmrg return true; 1737428d7b3dSmrg } 1738428d7b3dSmrg 1739428d7b3dSmrg dst_pixmap = get_drawable_pixmap(dst->pDrawable); 1740428d7b3dSmrg 1741428d7b3dSmrg src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 1742428d7b3dSmrg src_fallback = source_fallback(src, src_pixmap, 1743428d7b3dSmrg dst->polyMode == PolyModePrecise); 1744428d7b3dSmrg 1745428d7b3dSmrg if (mask) { 1746428d7b3dSmrg mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 1747428d7b3dSmrg mask_fallback = source_fallback(mask, mask_pixmap, 1748428d7b3dSmrg dst->polyMode == PolyModePrecise); 1749428d7b3dSmrg } else { 1750428d7b3dSmrg mask_pixmap = NULL; 1751428d7b3dSmrg mask_fallback = NULL; 1752428d7b3dSmrg } 1753428d7b3dSmrg 1754428d7b3dSmrg /* If we are using the destination as a source and need to 1755428d7b3dSmrg * readback in order to upload the source, do it all 1756428d7b3dSmrg * on the cpu. 1757428d7b3dSmrg */ 1758428d7b3dSmrg if (src_pixmap == dst_pixmap && src_fallback) { 1759428d7b3dSmrg DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 1760428d7b3dSmrg return true; 1761428d7b3dSmrg } 1762428d7b3dSmrg if (mask_pixmap == dst_pixmap && mask_fallback) { 1763428d7b3dSmrg DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 1764428d7b3dSmrg return true; 1765428d7b3dSmrg } 1766428d7b3dSmrg 1767428d7b3dSmrg /* If anything is on the GPU, push everything out to the GPU */ 1768428d7b3dSmrg if (dst_use_gpu(dst_pixmap)) { 1769428d7b3dSmrg DBG(("%s: dst is already on the GPU, try to use GPU\n", 1770428d7b3dSmrg __FUNCTION__)); 1771428d7b3dSmrg return false; 1772428d7b3dSmrg } 1773428d7b3dSmrg 1774428d7b3dSmrg if (src_pixmap && !src_fallback) { 1775428d7b3dSmrg DBG(("%s: src is already on the GPU, try to use GPU\n", 1776428d7b3dSmrg __FUNCTION__)); 1777428d7b3dSmrg return false; 1778428d7b3dSmrg } 1779428d7b3dSmrg if (mask_pixmap && !mask_fallback) { 1780428d7b3dSmrg DBG(("%s: mask is already on the GPU, try to use GPU\n", 1781428d7b3dSmrg __FUNCTION__)); 1782428d7b3dSmrg return false; 1783428d7b3dSmrg } 1784428d7b3dSmrg 1785428d7b3dSmrg /* However if the dst is not on the GPU and we need to 1786428d7b3dSmrg * render one of the sources using the CPU, we may 1787428d7b3dSmrg * as well do the entire operation in place onthe CPU. 1788428d7b3dSmrg */ 1789428d7b3dSmrg if (src_fallback) { 1790428d7b3dSmrg DBG(("%s: dst is on the CPU and src will fallback\n", 1791428d7b3dSmrg __FUNCTION__)); 1792428d7b3dSmrg return true; 1793428d7b3dSmrg } 1794428d7b3dSmrg 1795428d7b3dSmrg if (mask && mask_fallback) { 1796428d7b3dSmrg DBG(("%s: dst is on the CPU and mask will fallback\n", 1797428d7b3dSmrg __FUNCTION__)); 1798428d7b3dSmrg return true; 1799428d7b3dSmrg } 1800428d7b3dSmrg 1801428d7b3dSmrg if (too_large(dst_pixmap->drawable.width, 1802428d7b3dSmrg dst_pixmap->drawable.height) && 1803428d7b3dSmrg dst_is_cpu(dst_pixmap)) { 1804428d7b3dSmrg DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 1805428d7b3dSmrg return true; 1806428d7b3dSmrg } 1807428d7b3dSmrg 1808428d7b3dSmrg DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 1809428d7b3dSmrg __FUNCTION__)); 1810428d7b3dSmrg return dst_use_cpu(dst_pixmap); 1811428d7b3dSmrg} 1812428d7b3dSmrg 1813428d7b3dSmrgstatic int 1814428d7b3dSmrgreuse_source(struct sna *sna, 1815428d7b3dSmrg PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 1816428d7b3dSmrg PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 1817428d7b3dSmrg{ 1818428d7b3dSmrg uint32_t color; 1819428d7b3dSmrg 1820428d7b3dSmrg if (src_x != msk_x || src_y != msk_y) 1821428d7b3dSmrg return false; 1822428d7b3dSmrg 1823428d7b3dSmrg if (sna_picture_is_solid(mask, &color)) 1824428d7b3dSmrg return gen2_composite_solid_init(sna, mc, color); 1825428d7b3dSmrg 1826428d7b3dSmrg if (sc->is_solid) 1827428d7b3dSmrg return false; 1828428d7b3dSmrg 1829428d7b3dSmrg if (src == mask) { 1830428d7b3dSmrg DBG(("%s: mask is source\n", __FUNCTION__)); 1831428d7b3dSmrg *mc = *sc; 1832428d7b3dSmrg mc->bo = kgem_bo_reference(mc->bo); 1833428d7b3dSmrg return true; 1834428d7b3dSmrg } 1835428d7b3dSmrg 1836428d7b3dSmrg if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 1837428d7b3dSmrg return false; 1838428d7b3dSmrg 1839428d7b3dSmrg DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 1840428d7b3dSmrg 1841428d7b3dSmrg if (!sna_transform_equal(src->transform, mask->transform)) 1842428d7b3dSmrg return false; 1843428d7b3dSmrg 1844428d7b3dSmrg if (!sna_picture_alphamap_equal(src, mask)) 1845428d7b3dSmrg return false; 1846428d7b3dSmrg 1847428d7b3dSmrg if (!gen2_check_repeat(mask)) 1848428d7b3dSmrg return false; 1849428d7b3dSmrg 1850428d7b3dSmrg if (!gen2_check_filter(mask)) 1851428d7b3dSmrg return false; 1852428d7b3dSmrg 1853428d7b3dSmrg if (!gen2_check_format(sna, mask)) 1854428d7b3dSmrg return false; 1855428d7b3dSmrg 1856428d7b3dSmrg DBG(("%s: reusing source channel for mask with a twist\n", 1857428d7b3dSmrg __FUNCTION__)); 1858428d7b3dSmrg 1859428d7b3dSmrg *mc = *sc; 1860428d7b3dSmrg mc->repeat = mask->repeat ? mask->repeatType : RepeatNone; 1861428d7b3dSmrg mc->filter = mask->filter; 1862428d7b3dSmrg mc->pict_format = mask->format; 1863428d7b3dSmrg mc->bo = kgem_bo_reference(mc->bo); 1864428d7b3dSmrg return true; 1865428d7b3dSmrg} 1866428d7b3dSmrg 1867428d7b3dSmrgstatic bool 1868428d7b3dSmrggen2_render_composite(struct sna *sna, 1869428d7b3dSmrg uint8_t op, 1870428d7b3dSmrg PicturePtr src, 1871428d7b3dSmrg PicturePtr mask, 1872428d7b3dSmrg PicturePtr dst, 1873428d7b3dSmrg int16_t src_x, int16_t src_y, 1874428d7b3dSmrg int16_t mask_x, int16_t mask_y, 1875428d7b3dSmrg int16_t dst_x, int16_t dst_y, 1876428d7b3dSmrg int16_t width, int16_t height, 1877428d7b3dSmrg unsigned flags, 1878428d7b3dSmrg struct sna_composite_op *tmp) 1879428d7b3dSmrg{ 1880428d7b3dSmrg DBG(("%s()\n", __FUNCTION__)); 1881428d7b3dSmrg 1882428d7b3dSmrg if (op >= ARRAY_SIZE(gen2_blend_op)) { 1883428d7b3dSmrg DBG(("%s: fallback due to unhandled blend op: %d\n", 1884428d7b3dSmrg __FUNCTION__, op)); 1885428d7b3dSmrg return false; 1886428d7b3dSmrg } 1887428d7b3dSmrg 1888428d7b3dSmrg if (mask == NULL && 1889428d7b3dSmrg sna_blt_composite(sna, op, src, dst, 1890428d7b3dSmrg src_x, src_y, 1891428d7b3dSmrg dst_x, dst_y, 1892428d7b3dSmrg width, height, 1893428d7b3dSmrg flags, tmp)) 1894428d7b3dSmrg return true; 1895428d7b3dSmrg 1896428d7b3dSmrg if (gen2_composite_fallback(sna, src, mask, dst)) 1897428d7b3dSmrg goto fallback; 1898428d7b3dSmrg 1899428d7b3dSmrg if (need_tiling(sna, width, height)) 1900428d7b3dSmrg return sna_tiling_composite(op, src, mask, dst, 1901428d7b3dSmrg src_x, src_y, 1902428d7b3dSmrg mask_x, mask_y, 1903428d7b3dSmrg dst_x, dst_y, 1904428d7b3dSmrg width, height, 1905428d7b3dSmrg tmp); 1906428d7b3dSmrg 1907428d7b3dSmrg tmp->op = op; 1908428d7b3dSmrg sna_render_composite_redirect_init(tmp); 1909428d7b3dSmrg 1910428d7b3dSmrg if (!gen2_composite_set_target(sna, tmp, dst, 1911428d7b3dSmrg dst_x, dst_y, width, height, 1912428d7b3dSmrg flags & COMPOSITE_PARTIAL || op > PictOpSrc)) { 1913428d7b3dSmrg DBG(("%s: unable to set render target\n", 1914428d7b3dSmrg __FUNCTION__)); 1915428d7b3dSmrg goto fallback; 1916428d7b3dSmrg } 1917428d7b3dSmrg 1918428d7b3dSmrg switch (gen2_composite_picture(sna, src, &tmp->src, 1919428d7b3dSmrg src_x, src_y, 1920428d7b3dSmrg width, height, 1921428d7b3dSmrg dst_x, dst_y, 1922428d7b3dSmrg dst->polyMode == PolyModePrecise)) { 1923428d7b3dSmrg case -1: 1924428d7b3dSmrg DBG(("%s: fallback -- unable to prepare source\n", 1925428d7b3dSmrg __FUNCTION__)); 1926428d7b3dSmrg goto cleanup_dst; 1927428d7b3dSmrg case 0: 1928428d7b3dSmrg gen2_composite_solid_init(sna, &tmp->src, 0); 1929428d7b3dSmrg break; 1930428d7b3dSmrg case 1: 1931428d7b3dSmrg if (mask == NULL && tmp->src.bo && 1932428d7b3dSmrg sna_blt_composite__convert(sna, 1933428d7b3dSmrg dst_x, dst_y, width, height, 1934428d7b3dSmrg tmp)) 1935428d7b3dSmrg return true; 1936428d7b3dSmrg break; 1937428d7b3dSmrg } 1938428d7b3dSmrg 1939428d7b3dSmrg if (mask) { 1940428d7b3dSmrg if (!reuse_source(sna, 1941428d7b3dSmrg src, &tmp->src, src_x, src_y, 1942428d7b3dSmrg mask, &tmp->mask, mask_x, mask_y)) { 1943428d7b3dSmrg switch (gen2_composite_picture(sna, mask, &tmp->mask, 1944428d7b3dSmrg mask_x, mask_y, 1945428d7b3dSmrg width, height, 1946428d7b3dSmrg dst_x, dst_y, 1947428d7b3dSmrg dst->polyMode == PolyModePrecise)) { 1948428d7b3dSmrg case -1: 1949428d7b3dSmrg DBG(("%s: fallback -- unable to prepare mask\n", 1950428d7b3dSmrg __FUNCTION__)); 1951428d7b3dSmrg goto cleanup_src; 1952428d7b3dSmrg case 0: 1953428d7b3dSmrg gen2_composite_solid_init(sna, &tmp->mask, 0); 1954428d7b3dSmrg case 1: 1955428d7b3dSmrg break; 1956428d7b3dSmrg } 1957428d7b3dSmrg } 1958428d7b3dSmrg 1959428d7b3dSmrg if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 1960428d7b3dSmrg /* Check if it's component alpha that relies on a source alpha 1961428d7b3dSmrg * and on the source value. We can only get one of those 1962428d7b3dSmrg * into the single source value that we get to blend with. 1963428d7b3dSmrg */ 1964428d7b3dSmrg tmp->has_component_alpha = true; 1965428d7b3dSmrg if (gen2_blend_op[op].src_alpha && 1966428d7b3dSmrg (gen2_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { 1967428d7b3dSmrg if (op != PictOpOver) { 1968428d7b3dSmrg DBG(("%s: fallback -- unsupported CA blend (src_blend=%d)\n", 1969428d7b3dSmrg __FUNCTION__, 1970428d7b3dSmrg gen2_blend_op[op].src_blend)); 1971428d7b3dSmrg goto cleanup_src; 1972428d7b3dSmrg } 1973428d7b3dSmrg 1974428d7b3dSmrg tmp->need_magic_ca_pass = true; 1975428d7b3dSmrg tmp->op = PictOpOutReverse; 1976428d7b3dSmrg } 1977428d7b3dSmrg } 1978428d7b3dSmrg 1979428d7b3dSmrg /* convert solid to a texture (pure convenience) */ 1980428d7b3dSmrg if (tmp->mask.is_solid && tmp->src.is_solid) { 1981428d7b3dSmrg assert(tmp->mask.is_affine); 1982428d7b3dSmrg tmp->mask.bo = sna_render_get_solid(sna, tmp->mask.u.gen2.pixel); 1983428d7b3dSmrg if (!tmp->mask.bo) 1984428d7b3dSmrg goto cleanup_src; 1985428d7b3dSmrg } 1986428d7b3dSmrg } 1987428d7b3dSmrg 1988428d7b3dSmrg tmp->floats_per_vertex = 2; 1989428d7b3dSmrg if (!tmp->src.is_solid) 1990428d7b3dSmrg tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 3; 1991428d7b3dSmrg if (tmp->mask.bo) 1992428d7b3dSmrg tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 3; 1993428d7b3dSmrg tmp->floats_per_rect = 3*tmp->floats_per_vertex; 1994428d7b3dSmrg 1995428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_primitive; 1996428d7b3dSmrg if (tmp->mask.bo) { 1997428d7b3dSmrg if (tmp->mask.transform == NULL) { 1998428d7b3dSmrg if (tmp->src.is_solid) { 1999428d7b3dSmrg assert(tmp->floats_per_rect == 12); 2000428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__) 2001428d7b3dSmrg if (sna->cpu_features & SSE2) { 2002428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask__sse2; 2003428d7b3dSmrg } else 2004428d7b3dSmrg#endif 2005428d7b3dSmrg { 2006428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_primitive_constant_identity_mask; 2007428d7b3dSmrg } 2008428d7b3dSmrg } 2009428d7b3dSmrg } 2010428d7b3dSmrg } else { 2011428d7b3dSmrg if (tmp->src.is_solid) { 2012428d7b3dSmrg assert(tmp->floats_per_rect == 6); 2013428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__) 2014428d7b3dSmrg if (sna->cpu_features & SSE2) { 2015428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_primitive_constant__sse2; 2016428d7b3dSmrg } else 2017428d7b3dSmrg#endif 2018428d7b3dSmrg { 2019428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_primitive_constant; 2020428d7b3dSmrg } 2021428d7b3dSmrg } else if (tmp->src.is_linear) { 2022428d7b3dSmrg assert(tmp->floats_per_rect == 12); 2023428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__) 2024428d7b3dSmrg if (sna->cpu_features & SSE2) { 2025428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_primitive_linear__sse2; 2026428d7b3dSmrg } else 2027428d7b3dSmrg#endif 2028428d7b3dSmrg { 2029428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_primitive_linear; 2030428d7b3dSmrg } 2031428d7b3dSmrg } else if (tmp->src.transform == NULL) { 2032428d7b3dSmrg assert(tmp->floats_per_rect == 12); 2033428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__) 2034428d7b3dSmrg if (sna->cpu_features & SSE2) { 2035428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_primitive_identity__sse2; 2036428d7b3dSmrg } else 2037428d7b3dSmrg#endif 2038428d7b3dSmrg { 2039428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_primitive_identity; 2040428d7b3dSmrg } 2041428d7b3dSmrg } else if (tmp->src.is_affine) { 2042428d7b3dSmrg assert(tmp->floats_per_rect == 12); 2043428d7b3dSmrg tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; 2044428d7b3dSmrg tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; 2045428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__) 2046428d7b3dSmrg if (sna->cpu_features & SSE2) { 2047428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_primitive_affine__sse2; 2048428d7b3dSmrg } else 2049428d7b3dSmrg#endif 2050428d7b3dSmrg { 2051428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_primitive_affine; 2052428d7b3dSmrg } 2053428d7b3dSmrg } 2054428d7b3dSmrg } 2055428d7b3dSmrg 2056428d7b3dSmrg tmp->blt = gen2_render_composite_blt; 2057428d7b3dSmrg tmp->box = gen2_render_composite_box; 2058428d7b3dSmrg tmp->boxes = gen2_render_composite_boxes; 2059428d7b3dSmrg tmp->done = gen2_render_composite_done; 2060428d7b3dSmrg 2061428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2062428d7b3dSmrg tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2063428d7b3dSmrg NULL)) { 2064428d7b3dSmrg kgem_submit(&sna->kgem); 2065428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2066428d7b3dSmrg tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2067428d7b3dSmrg NULL)) { 2068428d7b3dSmrg DBG(("%s: fallback, operation does not fit into GTT\n", 2069428d7b3dSmrg __FUNCTION__)); 2070428d7b3dSmrg goto cleanup_mask; 2071428d7b3dSmrg } 2072428d7b3dSmrg } 2073428d7b3dSmrg 2074428d7b3dSmrg gen2_emit_composite_state(sna, tmp); 2075428d7b3dSmrg return true; 2076428d7b3dSmrg 2077428d7b3dSmrgcleanup_mask: 2078428d7b3dSmrg if (tmp->mask.bo) { 2079428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2080428d7b3dSmrg tmp->mask.bo = NULL; 2081428d7b3dSmrg } 2082428d7b3dSmrgcleanup_src: 2083428d7b3dSmrg if (tmp->src.bo) { 2084428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2085428d7b3dSmrg tmp->src.bo = NULL; 2086428d7b3dSmrg } 2087428d7b3dSmrgcleanup_dst: 2088428d7b3dSmrg if (tmp->redirect.real_bo) { 2089428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2090428d7b3dSmrg tmp->redirect.real_bo = NULL; 2091428d7b3dSmrg } 2092428d7b3dSmrgfallback: 2093428d7b3dSmrg return (mask == NULL && 2094428d7b3dSmrg sna_blt_composite(sna, op, src, dst, 2095428d7b3dSmrg src_x, src_y, 2096428d7b3dSmrg dst_x, dst_y, 2097428d7b3dSmrg width, height, 2098428d7b3dSmrg flags | COMPOSITE_FALLBACK, tmp)); 2099428d7b3dSmrg} 2100428d7b3dSmrg 2101428d7b3dSmrgfastcall static void 2102428d7b3dSmrggen2_emit_composite_spans_primitive_constant(struct sna *sna, 2103428d7b3dSmrg const struct sna_composite_spans_op *op, 2104428d7b3dSmrg const BoxRec *box, 2105428d7b3dSmrg float opacity) 2106428d7b3dSmrg{ 2107428d7b3dSmrg float *v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2108428d7b3dSmrg uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2109428d7b3dSmrg sna->kgem.nbatch += 9; 2110428d7b3dSmrg 2111428d7b3dSmrg v[0] = op->base.dst.x + box->x2; 2112428d7b3dSmrg v[1] = op->base.dst.y + box->y2; 2113428d7b3dSmrg *((uint32_t *)v + 2) = alpha; 2114428d7b3dSmrg 2115428d7b3dSmrg v[3] = op->base.dst.x + box->x1; 2116428d7b3dSmrg v[4] = v[1]; 2117428d7b3dSmrg *((uint32_t *)v + 5) = alpha; 2118428d7b3dSmrg 2119428d7b3dSmrg v[6] = v[3]; 2120428d7b3dSmrg v[7] = op->base.dst.y + box->y1; 2121428d7b3dSmrg *((uint32_t *)v + 8) = alpha; 2122428d7b3dSmrg} 2123428d7b3dSmrg 2124428d7b3dSmrgfastcall static void 2125428d7b3dSmrggen2_emit_composite_spans_primitive_linear(struct sna *sna, 2126428d7b3dSmrg const struct sna_composite_spans_op *op, 2127428d7b3dSmrg const BoxRec *box, 2128428d7b3dSmrg float opacity) 2129428d7b3dSmrg{ 2130428d7b3dSmrg union { 2131428d7b3dSmrg float f; 2132428d7b3dSmrg uint32_t u; 2133428d7b3dSmrg } alpha; 2134428d7b3dSmrg 2135428d7b3dSmrg alpha.u = (uint8_t)(255 * opacity) << 24; 2136428d7b3dSmrg 2137428d7b3dSmrg gen2_emit_composite_dstcoord(sna, 2138428d7b3dSmrg op->base.dst.x + box->x2, 2139428d7b3dSmrg op->base.dst.y + box->y2); 2140428d7b3dSmrg VERTEX(alpha.f); 2141428d7b3dSmrg gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2); 2142428d7b3dSmrg 2143428d7b3dSmrg gen2_emit_composite_dstcoord(sna, 2144428d7b3dSmrg op->base.dst.x + box->x1, 2145428d7b3dSmrg op->base.dst.y + box->y2); 2146428d7b3dSmrg VERTEX(alpha.f); 2147428d7b3dSmrg gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2); 2148428d7b3dSmrg 2149428d7b3dSmrg gen2_emit_composite_dstcoord(sna, 2150428d7b3dSmrg op->base.dst.x + box->x1, 2151428d7b3dSmrg op->base.dst.y + box->y1); 2152428d7b3dSmrg VERTEX(alpha.f); 2153428d7b3dSmrg gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1); 2154428d7b3dSmrg} 2155428d7b3dSmrg 2156428d7b3dSmrgfastcall static void 2157428d7b3dSmrggen2_emit_composite_spans_primitive_identity_source(struct sna *sna, 2158428d7b3dSmrg const struct sna_composite_spans_op *op, 2159428d7b3dSmrg const BoxRec *box, 2160428d7b3dSmrg float opacity) 2161428d7b3dSmrg{ 2162428d7b3dSmrg float *v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2163428d7b3dSmrg uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2164428d7b3dSmrg sna->kgem.nbatch += 15; 2165428d7b3dSmrg 2166428d7b3dSmrg v[0] = op->base.dst.x + box->x2; 2167428d7b3dSmrg v[1] = op->base.dst.y + box->y2; 2168428d7b3dSmrg *((uint32_t *)v + 2) = alpha; 2169428d7b3dSmrg v[3] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0]; 2170428d7b3dSmrg v[4] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1]; 2171428d7b3dSmrg 2172428d7b3dSmrg v[5] = op->base.dst.x + box->x1; 2173428d7b3dSmrg v[6] = v[1]; 2174428d7b3dSmrg *((uint32_t *)v + 7) = alpha; 2175428d7b3dSmrg v[8] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0]; 2176428d7b3dSmrg v[9] = v[4]; 2177428d7b3dSmrg 2178428d7b3dSmrg v[10] = v[5]; 2179428d7b3dSmrg v[11] = op->base.dst.y + box->y1; 2180428d7b3dSmrg *((uint32_t *)v + 12) = alpha; 2181428d7b3dSmrg v[13] = v[8]; 2182428d7b3dSmrg v[14] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1]; 2183428d7b3dSmrg} 2184428d7b3dSmrg 2185428d7b3dSmrgfastcall static void 2186428d7b3dSmrggen2_emit_composite_spans_primitive_affine_source(struct sna *sna, 2187428d7b3dSmrg const struct sna_composite_spans_op *op, 2188428d7b3dSmrg const BoxRec *box, 2189428d7b3dSmrg float opacity) 2190428d7b3dSmrg{ 2191428d7b3dSmrg PictTransform *transform = op->base.src.transform; 2192428d7b3dSmrg uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2193428d7b3dSmrg float *v; 2194428d7b3dSmrg 2195428d7b3dSmrg v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2196428d7b3dSmrg sna->kgem.nbatch += 15; 2197428d7b3dSmrg 2198428d7b3dSmrg v[0] = op->base.dst.x + box->x2; 2199428d7b3dSmrg v[6] = v[1] = op->base.dst.y + box->y2; 2200428d7b3dSmrg v[10] = v[5] = op->base.dst.x + box->x1; 2201428d7b3dSmrg v[11] = op->base.dst.y + box->y1; 2202428d7b3dSmrg *((uint32_t *)v + 2) = alpha; 2203428d7b3dSmrg *((uint32_t *)v + 7) = alpha; 2204428d7b3dSmrg *((uint32_t *)v + 12) = alpha; 2205428d7b3dSmrg 2206428d7b3dSmrg _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2, 2207428d7b3dSmrg (int)op->base.src.offset[1] + box->y2, 2208428d7b3dSmrg transform, op->base.src.scale, 2209428d7b3dSmrg &v[3], &v[4]); 2210428d7b3dSmrg 2211428d7b3dSmrg _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 2212428d7b3dSmrg (int)op->base.src.offset[1] + box->y2, 2213428d7b3dSmrg transform, op->base.src.scale, 2214428d7b3dSmrg &v[8], &v[9]); 2215428d7b3dSmrg 2216428d7b3dSmrg _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 2217428d7b3dSmrg (int)op->base.src.offset[1] + box->y1, 2218428d7b3dSmrg transform, op->base.src.scale, 2219428d7b3dSmrg &v[13], &v[14]); 2220428d7b3dSmrg} 2221428d7b3dSmrg 2222428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__) 2223428d7b3dSmrgsse2 fastcall static void 2224428d7b3dSmrggen2_emit_composite_spans_primitive_constant__sse2(struct sna *sna, 2225428d7b3dSmrg const struct sna_composite_spans_op *op, 2226428d7b3dSmrg const BoxRec *box, 2227428d7b3dSmrg float opacity) 2228428d7b3dSmrg{ 2229428d7b3dSmrg float *v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2230428d7b3dSmrg uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2231428d7b3dSmrg sna->kgem.nbatch += 9; 2232428d7b3dSmrg 2233428d7b3dSmrg v[0] = op->base.dst.x + box->x2; 2234428d7b3dSmrg v[1] = op->base.dst.y + box->y2; 2235428d7b3dSmrg *((uint32_t *)v + 2) = alpha; 2236428d7b3dSmrg 2237428d7b3dSmrg v[3] = op->base.dst.x + box->x1; 2238428d7b3dSmrg v[4] = v[1]; 2239428d7b3dSmrg *((uint32_t *)v + 5) = alpha; 2240428d7b3dSmrg 2241428d7b3dSmrg v[6] = v[3]; 2242428d7b3dSmrg v[7] = op->base.dst.y + box->y1; 2243428d7b3dSmrg *((uint32_t *)v + 8) = alpha; 2244428d7b3dSmrg} 2245428d7b3dSmrg 2246428d7b3dSmrgsse2 fastcall static void 2247428d7b3dSmrggen2_emit_composite_spans_primitive_linear__sse2(struct sna *sna, 2248428d7b3dSmrg const struct sna_composite_spans_op *op, 2249428d7b3dSmrg const BoxRec *box, 2250428d7b3dSmrg float opacity) 2251428d7b3dSmrg{ 2252428d7b3dSmrg union { 2253428d7b3dSmrg float f; 2254428d7b3dSmrg uint32_t u; 2255428d7b3dSmrg } alpha; 2256428d7b3dSmrg 2257428d7b3dSmrg alpha.u = (uint8_t)(255 * opacity) << 24; 2258428d7b3dSmrg 2259428d7b3dSmrg gen2_emit_composite_dstcoord(sna, 2260428d7b3dSmrg op->base.dst.x + box->x2, 2261428d7b3dSmrg op->base.dst.y + box->y2); 2262428d7b3dSmrg VERTEX(alpha.f); 2263428d7b3dSmrg gen2_emit_composite_linear(sna, &op->base.src, box->x2, box->y2); 2264428d7b3dSmrg 2265428d7b3dSmrg gen2_emit_composite_dstcoord(sna, 2266428d7b3dSmrg op->base.dst.x + box->x1, 2267428d7b3dSmrg op->base.dst.y + box->y2); 2268428d7b3dSmrg VERTEX(alpha.f); 2269428d7b3dSmrg gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y2); 2270428d7b3dSmrg 2271428d7b3dSmrg gen2_emit_composite_dstcoord(sna, 2272428d7b3dSmrg op->base.dst.x + box->x1, 2273428d7b3dSmrg op->base.dst.y + box->y1); 2274428d7b3dSmrg VERTEX(alpha.f); 2275428d7b3dSmrg gen2_emit_composite_linear(sna, &op->base.src, box->x1, box->y1); 2276428d7b3dSmrg} 2277428d7b3dSmrg 2278428d7b3dSmrgsse2 fastcall static void 2279428d7b3dSmrggen2_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna, 2280428d7b3dSmrg const struct sna_composite_spans_op *op, 2281428d7b3dSmrg const BoxRec *box, 2282428d7b3dSmrg float opacity) 2283428d7b3dSmrg{ 2284428d7b3dSmrg float *v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2285428d7b3dSmrg uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2286428d7b3dSmrg sna->kgem.nbatch += 15; 2287428d7b3dSmrg 2288428d7b3dSmrg v[0] = op->base.dst.x + box->x2; 2289428d7b3dSmrg v[1] = op->base.dst.y + box->y2; 2290428d7b3dSmrg *((uint32_t *)v + 2) = alpha; 2291428d7b3dSmrg v[3] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0]; 2292428d7b3dSmrg v[4] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1]; 2293428d7b3dSmrg 2294428d7b3dSmrg v[5] = op->base.dst.x + box->x1; 2295428d7b3dSmrg v[6] = v[1]; 2296428d7b3dSmrg *((uint32_t *)v + 7) = alpha; 2297428d7b3dSmrg v[8] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0]; 2298428d7b3dSmrg v[9] = v[4]; 2299428d7b3dSmrg 2300428d7b3dSmrg v[10] = v[5]; 2301428d7b3dSmrg v[11] = op->base.dst.y + box->y1; 2302428d7b3dSmrg *((uint32_t *)v + 12) = alpha; 2303428d7b3dSmrg v[13] = v[8]; 2304428d7b3dSmrg v[14] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1]; 2305428d7b3dSmrg} 2306428d7b3dSmrg 2307428d7b3dSmrgsse2 fastcall static void 2308428d7b3dSmrggen2_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna, 2309428d7b3dSmrg const struct sna_composite_spans_op *op, 2310428d7b3dSmrg const BoxRec *box, 2311428d7b3dSmrg float opacity) 2312428d7b3dSmrg{ 2313428d7b3dSmrg PictTransform *transform = op->base.src.transform; 2314428d7b3dSmrg uint32_t alpha = (uint8_t)(255 * opacity) << 24; 2315428d7b3dSmrg float *v; 2316428d7b3dSmrg 2317428d7b3dSmrg v = (float *)sna->kgem.batch + sna->kgem.nbatch; 2318428d7b3dSmrg sna->kgem.nbatch += 15; 2319428d7b3dSmrg 2320428d7b3dSmrg v[0] = op->base.dst.x + box->x2; 2321428d7b3dSmrg v[6] = v[1] = op->base.dst.y + box->y2; 2322428d7b3dSmrg v[10] = v[5] = op->base.dst.x + box->x1; 2323428d7b3dSmrg v[11] = op->base.dst.y + box->y1; 2324428d7b3dSmrg *((uint32_t *)v + 2) = alpha; 2325428d7b3dSmrg *((uint32_t *)v + 7) = alpha; 2326428d7b3dSmrg *((uint32_t *)v + 12) = alpha; 2327428d7b3dSmrg 2328428d7b3dSmrg _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2, 2329428d7b3dSmrg (int)op->base.src.offset[1] + box->y2, 2330428d7b3dSmrg transform, op->base.src.scale, 2331428d7b3dSmrg &v[3], &v[4]); 2332428d7b3dSmrg 2333428d7b3dSmrg _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 2334428d7b3dSmrg (int)op->base.src.offset[1] + box->y2, 2335428d7b3dSmrg transform, op->base.src.scale, 2336428d7b3dSmrg &v[8], &v[9]); 2337428d7b3dSmrg 2338428d7b3dSmrg _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, 2339428d7b3dSmrg (int)op->base.src.offset[1] + box->y1, 2340428d7b3dSmrg transform, op->base.src.scale, 2341428d7b3dSmrg &v[13], &v[14]); 2342428d7b3dSmrg} 2343428d7b3dSmrg#endif 2344428d7b3dSmrg 2345428d7b3dSmrgstatic void 2346428d7b3dSmrggen2_emit_composite_spans_vertex(struct sna *sna, 2347428d7b3dSmrg const struct sna_composite_spans_op *op, 2348428d7b3dSmrg int16_t x, int16_t y, 2349428d7b3dSmrg float opacity) 2350428d7b3dSmrg{ 2351428d7b3dSmrg gen2_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y); 2352428d7b3dSmrg BATCH((uint8_t)(opacity * 255) << 24); 2353428d7b3dSmrg assert(!op->base.src.is_solid); 2354428d7b3dSmrg if (op->base.src.is_linear) 2355428d7b3dSmrg gen2_emit_composite_linear(sna, &op->base.src, x, y); 2356428d7b3dSmrg else 2357428d7b3dSmrg gen2_emit_composite_texcoord(sna, &op->base.src, x, y); 2358428d7b3dSmrg} 2359428d7b3dSmrg 2360428d7b3dSmrgfastcall static void 2361428d7b3dSmrggen2_emit_composite_spans_primitive(struct sna *sna, 2362428d7b3dSmrg const struct sna_composite_spans_op *op, 2363428d7b3dSmrg const BoxRec *box, 2364428d7b3dSmrg float opacity) 2365428d7b3dSmrg{ 2366428d7b3dSmrg gen2_emit_composite_spans_vertex(sna, op, box->x2, box->y2, opacity); 2367428d7b3dSmrg gen2_emit_composite_spans_vertex(sna, op, box->x1, box->y2, opacity); 2368428d7b3dSmrg gen2_emit_composite_spans_vertex(sna, op, box->x1, box->y1, opacity); 2369428d7b3dSmrg} 2370428d7b3dSmrg 2371428d7b3dSmrgstatic void 2372428d7b3dSmrggen2_emit_spans_pipeline(struct sna *sna, 2373428d7b3dSmrg const struct sna_composite_spans_op *op) 2374428d7b3dSmrg{ 2375428d7b3dSmrg uint32_t cblend, ablend; 2376428d7b3dSmrg uint32_t unwind; 2377428d7b3dSmrg 2378428d7b3dSmrg cblend = 2379428d7b3dSmrg TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_MODULATE | 2380428d7b3dSmrg TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA | 2381428d7b3dSmrg TB0C_OUTPUT_WRITE_CURRENT; 2382428d7b3dSmrg ablend = 2383428d7b3dSmrg TB0A_RESULT_SCALE_1X | TB0A_OP_MODULATE | 2384428d7b3dSmrg TB0A_ARG1_SEL_DIFFUSE | 2385428d7b3dSmrg TB0A_OUTPUT_WRITE_CURRENT; 2386428d7b3dSmrg 2387428d7b3dSmrg if (op->base.src.is_solid) { 2388428d7b3dSmrg ablend |= TB0A_ARG2_SEL_SPECULAR; 2389428d7b3dSmrg cblend |= TB0C_ARG2_SEL_SPECULAR; 2390428d7b3dSmrg if (op->base.dst.format == PICT_a8) 2391428d7b3dSmrg cblend |= TB0C_ARG2_REPLICATE_ALPHA; 2392428d7b3dSmrg } else if (op->base.dst.format == PICT_a8) { 2393428d7b3dSmrg ablend |= TB0A_ARG2_SEL_TEXEL0; 2394428d7b3dSmrg cblend |= TB0C_ARG2_SEL_TEXEL0 | TB0C_ARG2_REPLICATE_ALPHA; 2395428d7b3dSmrg } else { 2396428d7b3dSmrg if (PICT_FORMAT_RGB(op->base.src.pict_format) != 0) 2397428d7b3dSmrg cblend |= TB0C_ARG2_SEL_TEXEL0; 2398428d7b3dSmrg else 2399428d7b3dSmrg cblend |= TB0C_ARG2_SEL_ONE | TB0C_ARG2_INVERT; 2400428d7b3dSmrg 2401428d7b3dSmrg if (op->base.src.is_opaque) 2402428d7b3dSmrg ablend |= TB0A_ARG2_SEL_ONE; 2403428d7b3dSmrg else 2404428d7b3dSmrg ablend |= TB0A_ARG2_SEL_TEXEL0; 2405428d7b3dSmrg } 2406428d7b3dSmrg 2407428d7b3dSmrg unwind = sna->kgem.nbatch; 2408428d7b3dSmrg BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 2409428d7b3dSmrg LOAD_TEXTURE_BLEND_STAGE(0) | 1); 2410428d7b3dSmrg BATCH(cblend); 2411428d7b3dSmrg BATCH(ablend); 2412428d7b3dSmrg if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 2413428d7b3dSmrg sna->kgem.batch + unwind + 1, 2414428d7b3dSmrg 2 * sizeof(uint32_t)) == 0) 2415428d7b3dSmrg sna->kgem.nbatch = unwind; 2416428d7b3dSmrg else 2417428d7b3dSmrg sna->render_state.gen2.ls2 = unwind; 2418428d7b3dSmrg} 2419428d7b3dSmrg 2420428d7b3dSmrgstatic void gen2_emit_composite_spans_state(struct sna *sna, 2421428d7b3dSmrg const struct sna_composite_spans_op *op) 2422428d7b3dSmrg{ 2423428d7b3dSmrg uint32_t unwind; 2424428d7b3dSmrg 2425428d7b3dSmrg gen2_get_batch(sna, &op->base); 2426428d7b3dSmrg gen2_emit_target(sna, &op->base); 2427428d7b3dSmrg 2428428d7b3dSmrg unwind = sna->kgem.nbatch; 2429428d7b3dSmrg BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 2430428d7b3dSmrg I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 2431428d7b3dSmrg BATCH(!op->base.src.is_solid << 12); 2432428d7b3dSmrg BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY | S3_DIFFUSE_PRESENT); 2433428d7b3dSmrg BATCH(gen2_get_blend_cntl(op->base.op, false, op->base.dst.format)); 2434428d7b3dSmrg if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 2435428d7b3dSmrg sna->kgem.batch + unwind + 1, 2436428d7b3dSmrg 3 * sizeof(uint32_t)) == 0) 2437428d7b3dSmrg sna->kgem.nbatch = unwind; 2438428d7b3dSmrg else 2439428d7b3dSmrg sna->render_state.gen2.ls1 = unwind; 2440428d7b3dSmrg 2441428d7b3dSmrg gen2_disable_logic_op(sna); 2442428d7b3dSmrg gen2_emit_spans_pipeline(sna, op); 2443428d7b3dSmrg 2444428d7b3dSmrg if (op->base.src.is_solid) { 2445428d7b3dSmrg if (op->base.src.u.gen2.pixel != sna->render_state.gen2.specular) { 2446428d7b3dSmrg BATCH(_3DSTATE_DFLT_SPECULAR_CMD); 2447428d7b3dSmrg BATCH(op->base.src.u.gen2.pixel); 2448428d7b3dSmrg sna->render_state.gen2.specular = op->base.src.u.gen2.pixel; 2449428d7b3dSmrg } 2450428d7b3dSmrg } else { 2451428d7b3dSmrg uint32_t v =_3DSTATE_VERTEX_FORMAT_2_CMD | 2452428d7b3dSmrg (op->base.src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_3D); 2453428d7b3dSmrg if (sna->render_state.gen2.vft != v) { 2454428d7b3dSmrg BATCH(v); 2455428d7b3dSmrg sna->render_state.gen2.vft = v; 2456428d7b3dSmrg } 2457428d7b3dSmrg gen2_emit_texture(sna, &op->base.src, 0); 2458428d7b3dSmrg } 2459428d7b3dSmrg} 2460428d7b3dSmrg 2461428d7b3dSmrgfastcall static void 2462428d7b3dSmrggen2_render_composite_spans_box(struct sna *sna, 2463428d7b3dSmrg const struct sna_composite_spans_op *op, 2464428d7b3dSmrg const BoxRec *box, float opacity) 2465428d7b3dSmrg{ 2466428d7b3dSmrg DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2467428d7b3dSmrg __FUNCTION__, 2468428d7b3dSmrg op->base.src.offset[0], op->base.src.offset[1], 2469428d7b3dSmrg opacity, 2470428d7b3dSmrg op->base.dst.x, op->base.dst.y, 2471428d7b3dSmrg box->x1, box->y1, 2472428d7b3dSmrg box->x2 - box->x1, 2473428d7b3dSmrg box->y2 - box->y1)); 2474428d7b3dSmrg 2475428d7b3dSmrg if (gen2_get_rectangles(sna, &op->base, 1) == 0) { 2476428d7b3dSmrg gen2_emit_composite_spans_state(sna, op); 2477428d7b3dSmrg gen2_get_rectangles(sna, &op->base, 1); 2478428d7b3dSmrg } 2479428d7b3dSmrg 2480428d7b3dSmrg op->prim_emit(sna, op, box, opacity); 2481428d7b3dSmrg} 2482428d7b3dSmrg 2483428d7b3dSmrgstatic void 2484428d7b3dSmrggen2_render_composite_spans_boxes(struct sna *sna, 2485428d7b3dSmrg const struct sna_composite_spans_op *op, 2486428d7b3dSmrg const BoxRec *box, int nbox, 2487428d7b3dSmrg float opacity) 2488428d7b3dSmrg{ 2489428d7b3dSmrg DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2490428d7b3dSmrg __FUNCTION__, nbox, 2491428d7b3dSmrg op->base.src.offset[0], op->base.src.offset[1], 2492428d7b3dSmrg opacity, 2493428d7b3dSmrg op->base.dst.x, op->base.dst.y)); 2494428d7b3dSmrg 2495428d7b3dSmrg do { 2496428d7b3dSmrg int nbox_this_time; 2497428d7b3dSmrg 2498428d7b3dSmrg nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); 2499428d7b3dSmrg if (nbox_this_time == 0) { 2500428d7b3dSmrg gen2_emit_composite_spans_state(sna, op); 2501428d7b3dSmrg nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); 2502428d7b3dSmrg } 2503428d7b3dSmrg nbox -= nbox_this_time; 2504428d7b3dSmrg 2505428d7b3dSmrg do { 2506428d7b3dSmrg DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2507428d7b3dSmrg box->x1, box->y1, 2508428d7b3dSmrg box->x2 - box->x1, 2509428d7b3dSmrg box->y2 - box->y1)); 2510428d7b3dSmrg 2511428d7b3dSmrg op->prim_emit(sna, op, box++, opacity); 2512428d7b3dSmrg } while (--nbox_this_time); 2513428d7b3dSmrg } while (nbox); 2514428d7b3dSmrg} 2515428d7b3dSmrg 2516428d7b3dSmrgfastcall static void 2517428d7b3dSmrggen2_render_composite_spans_done(struct sna *sna, 2518428d7b3dSmrg const struct sna_composite_spans_op *op) 2519428d7b3dSmrg{ 2520428d7b3dSmrg DBG(("%s()\n", __FUNCTION__)); 2521428d7b3dSmrg 2522428d7b3dSmrg gen2_vertex_flush(sna, &op->base); 2523428d7b3dSmrg 2524428d7b3dSmrg if (op->base.src.bo) 2525428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2526428d7b3dSmrg 2527428d7b3dSmrg sna_render_composite_redirect_done(sna, &op->base); 2528428d7b3dSmrg} 2529428d7b3dSmrg 2530428d7b3dSmrgstatic bool 2531428d7b3dSmrggen2_check_composite_spans(struct sna *sna, 2532428d7b3dSmrg uint8_t op, PicturePtr src, PicturePtr dst, 2533428d7b3dSmrg int16_t width, int16_t height, unsigned flags) 2534428d7b3dSmrg{ 2535428d7b3dSmrg if (op >= ARRAY_SIZE(gen2_blend_op)) 2536428d7b3dSmrg return false; 2537428d7b3dSmrg 2538428d7b3dSmrg if (gen2_composite_fallback(sna, src, NULL, dst)) 2539428d7b3dSmrg return false; 2540428d7b3dSmrg 2541428d7b3dSmrg if (need_tiling(sna, width, height)) { 2542428d7b3dSmrg if (!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2543428d7b3dSmrg DBG(("%s: fallback, tiled operation not on GPU\n", 2544428d7b3dSmrg __FUNCTION__)); 2545428d7b3dSmrg return false; 2546428d7b3dSmrg } 2547428d7b3dSmrg } 2548428d7b3dSmrg 2549428d7b3dSmrg return true; 2550428d7b3dSmrg} 2551428d7b3dSmrg 2552428d7b3dSmrgstatic bool 2553428d7b3dSmrggen2_render_composite_spans(struct sna *sna, 2554428d7b3dSmrg uint8_t op, 2555428d7b3dSmrg PicturePtr src, 2556428d7b3dSmrg PicturePtr dst, 2557428d7b3dSmrg int16_t src_x, int16_t src_y, 2558428d7b3dSmrg int16_t dst_x, int16_t dst_y, 2559428d7b3dSmrg int16_t width, int16_t height, 2560428d7b3dSmrg unsigned flags, 2561428d7b3dSmrg struct sna_composite_spans_op *tmp) 2562428d7b3dSmrg{ 2563428d7b3dSmrg DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__, 2564428d7b3dSmrg src_x, src_y, dst_x, dst_y, width, height)); 2565428d7b3dSmrg 2566428d7b3dSmrg assert(gen2_check_composite_spans(sna, op, src, dst, width, height, flags)); 2567428d7b3dSmrg if (need_tiling(sna, width, height)) { 2568428d7b3dSmrg DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2569428d7b3dSmrg __FUNCTION__, width, height)); 2570428d7b3dSmrg return sna_tiling_composite_spans(op, src, dst, 2571428d7b3dSmrg src_x, src_y, dst_x, dst_y, 2572428d7b3dSmrg width, height, flags, tmp); 2573428d7b3dSmrg } 2574428d7b3dSmrg 2575428d7b3dSmrg tmp->base.op = op; 2576428d7b3dSmrg sna_render_composite_redirect_init(&tmp->base); 2577428d7b3dSmrg if (!gen2_composite_set_target(sna, &tmp->base, dst, 2578428d7b3dSmrg dst_x, dst_y, width, height, 2579428d7b3dSmrg true)) { 2580428d7b3dSmrg DBG(("%s: unable to set render target\n", 2581428d7b3dSmrg __FUNCTION__)); 2582428d7b3dSmrg return false; 2583428d7b3dSmrg } 2584428d7b3dSmrg 2585428d7b3dSmrg switch (gen2_composite_picture(sna, src, &tmp->base.src, 2586428d7b3dSmrg src_x, src_y, 2587428d7b3dSmrg width, height, 2588428d7b3dSmrg dst_x, dst_y, 2589428d7b3dSmrg dst->polyMode == PolyModePrecise)) { 2590428d7b3dSmrg case -1: 2591428d7b3dSmrg goto cleanup_dst; 2592428d7b3dSmrg case 0: 2593428d7b3dSmrg gen2_composite_solid_init(sna, &tmp->base.src, 0); 2594428d7b3dSmrg case 1: 2595428d7b3dSmrg break; 2596428d7b3dSmrg } 2597428d7b3dSmrg assert(tmp->base.src.bo || tmp->base.src.is_solid); 2598428d7b3dSmrg 2599428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_spans_primitive; 2600428d7b3dSmrg tmp->base.floats_per_vertex = 3; 2601428d7b3dSmrg if (tmp->base.src.is_solid) { 2602428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__) 2603428d7b3dSmrg if (sna->cpu_features & SSE2) { 2604428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_spans_primitive_constant__sse2; 2605428d7b3dSmrg } else 2606428d7b3dSmrg#endif 2607428d7b3dSmrg { 2608428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_spans_primitive_constant; 2609428d7b3dSmrg } 2610428d7b3dSmrg } else if (tmp->base.src.is_linear) { 2611428d7b3dSmrg tmp->base.floats_per_vertex += 2; 2612428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__) 2613428d7b3dSmrg if (sna->cpu_features & SSE2) { 2614428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_spans_primitive_linear__sse2; 2615428d7b3dSmrg } else 2616428d7b3dSmrg#endif 2617428d7b3dSmrg { 2618428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_spans_primitive_linear; 2619428d7b3dSmrg } 2620428d7b3dSmrg } else { 2621428d7b3dSmrg assert(tmp->base.src.bo); 2622428d7b3dSmrg tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3; 2623428d7b3dSmrg if (tmp->base.src.transform == NULL) { 2624428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__) 2625428d7b3dSmrg if (sna->cpu_features & SSE2) { 2626428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source__sse2; 2627428d7b3dSmrg } else 2628428d7b3dSmrg#endif 2629428d7b3dSmrg { 2630428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_spans_primitive_identity_source; 2631428d7b3dSmrg } 2632428d7b3dSmrg } else if (tmp->base.src.is_affine) { 2633428d7b3dSmrg tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2]; 2634428d7b3dSmrg tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2]; 2635428d7b3dSmrg#if defined(sse2) && !defined(__x86_64__) 2636428d7b3dSmrg if (sna->cpu_features & SSE2) { 2637428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source__sse2; 2638428d7b3dSmrg } else 2639428d7b3dSmrg#endif 2640428d7b3dSmrg { 2641428d7b3dSmrg tmp->prim_emit = gen2_emit_composite_spans_primitive_affine_source; 2642428d7b3dSmrg } 2643428d7b3dSmrg } 2644428d7b3dSmrg } 2645428d7b3dSmrg tmp->base.mask.bo = NULL; 2646428d7b3dSmrg tmp->base.floats_per_rect = 3*tmp->base.floats_per_vertex; 2647428d7b3dSmrg 2648428d7b3dSmrg tmp->box = gen2_render_composite_spans_box; 2649428d7b3dSmrg tmp->boxes = gen2_render_composite_spans_boxes; 2650428d7b3dSmrg tmp->done = gen2_render_composite_spans_done; 2651428d7b3dSmrg 2652428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2653428d7b3dSmrg tmp->base.dst.bo, tmp->base.src.bo, 2654428d7b3dSmrg NULL)) { 2655428d7b3dSmrg kgem_submit(&sna->kgem); 2656428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2657428d7b3dSmrg tmp->base.dst.bo, tmp->base.src.bo, 2658428d7b3dSmrg NULL)) 2659428d7b3dSmrg goto cleanup_src; 2660428d7b3dSmrg } 2661428d7b3dSmrg 2662428d7b3dSmrg gen2_emit_composite_spans_state(sna, tmp); 2663428d7b3dSmrg return true; 2664428d7b3dSmrg 2665428d7b3dSmrgcleanup_src: 2666428d7b3dSmrg if (tmp->base.src.bo) 2667428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2668428d7b3dSmrgcleanup_dst: 2669428d7b3dSmrg if (tmp->base.redirect.real_bo) 2670428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2671428d7b3dSmrg return false; 2672428d7b3dSmrg} 2673428d7b3dSmrg 2674428d7b3dSmrgstatic void 2675428d7b3dSmrggen2_emit_fill_pipeline(struct sna *sna, const struct sna_composite_op *op) 2676428d7b3dSmrg{ 2677428d7b3dSmrg uint32_t blend, unwind; 2678428d7b3dSmrg 2679428d7b3dSmrg unwind = sna->kgem.nbatch; 2680428d7b3dSmrg BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 2681428d7b3dSmrg LOAD_TEXTURE_BLEND_STAGE(0) | 1); 2682428d7b3dSmrg 2683428d7b3dSmrg blend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_ARG1 | 2684428d7b3dSmrg TB0C_ARG1_SEL_DIFFUSE | 2685428d7b3dSmrg TB0C_OUTPUT_WRITE_CURRENT; 2686428d7b3dSmrg if (op->dst.format == PICT_a8) 2687428d7b3dSmrg blend |= TB0C_ARG1_REPLICATE_ALPHA; 2688428d7b3dSmrg BATCH(blend); 2689428d7b3dSmrg 2690428d7b3dSmrg BATCH(TB0A_RESULT_SCALE_1X | TB0A_OP_ARG1 | 2691428d7b3dSmrg TB0A_ARG1_SEL_DIFFUSE | 2692428d7b3dSmrg TB0A_OUTPUT_WRITE_CURRENT); 2693428d7b3dSmrg 2694428d7b3dSmrg if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 2695428d7b3dSmrg sna->kgem.batch + unwind + 1, 2696428d7b3dSmrg 2 * sizeof(uint32_t)) == 0) 2697428d7b3dSmrg sna->kgem.nbatch = unwind; 2698428d7b3dSmrg else 2699428d7b3dSmrg sna->render_state.gen2.ls2 = unwind; 2700428d7b3dSmrg} 2701428d7b3dSmrg 2702428d7b3dSmrgstatic void gen2_emit_fill_composite_state(struct sna *sna, 2703428d7b3dSmrg const struct sna_composite_op *op, 2704428d7b3dSmrg uint32_t pixel) 2705428d7b3dSmrg{ 2706428d7b3dSmrg uint32_t ls1; 2707428d7b3dSmrg 2708428d7b3dSmrg gen2_get_batch(sna, op); 2709428d7b3dSmrg gen2_emit_target(sna, op); 2710428d7b3dSmrg 2711428d7b3dSmrg ls1 = sna->kgem.nbatch; 2712428d7b3dSmrg BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 2713428d7b3dSmrg I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 2714428d7b3dSmrg BATCH(0); 2715428d7b3dSmrg BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 2716428d7b3dSmrg BATCH(gen2_get_blend_cntl(op->op, false, op->dst.format)); 2717428d7b3dSmrg if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 2718428d7b3dSmrg sna->kgem.batch + ls1 + 1, 2719428d7b3dSmrg 3 * sizeof(uint32_t)) == 0) 2720428d7b3dSmrg sna->kgem.nbatch = ls1; 2721428d7b3dSmrg else 2722428d7b3dSmrg sna->render_state.gen2.ls1 = ls1; 2723428d7b3dSmrg 2724428d7b3dSmrg gen2_emit_fill_pipeline(sna, op); 2725428d7b3dSmrg 2726428d7b3dSmrg if (pixel != sna->render_state.gen2.diffuse) { 2727428d7b3dSmrg BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 2728428d7b3dSmrg BATCH(pixel); 2729428d7b3dSmrg sna->render_state.gen2.diffuse = pixel; 2730428d7b3dSmrg } 2731428d7b3dSmrg} 2732428d7b3dSmrg 2733428d7b3dSmrgstatic bool 2734428d7b3dSmrggen2_render_fill_boxes_try_blt(struct sna *sna, 2735428d7b3dSmrg CARD8 op, PictFormat format, 2736428d7b3dSmrg const xRenderColor *color, 2737428d7b3dSmrg const DrawableRec *dst, struct kgem_bo *dst_bo, 2738428d7b3dSmrg const BoxRec *box, int n) 2739428d7b3dSmrg{ 2740428d7b3dSmrg uint8_t alu; 2741428d7b3dSmrg uint32_t pixel; 2742428d7b3dSmrg 2743428d7b3dSmrg if (op > PictOpSrc) 2744428d7b3dSmrg return false; 2745428d7b3dSmrg 2746428d7b3dSmrg if (op == PictOpClear) { 2747428d7b3dSmrg alu = GXclear; 2748428d7b3dSmrg pixel = 0; 2749428d7b3dSmrg } else if (!sna_get_pixel_from_rgba(&pixel, 2750428d7b3dSmrg color->red, 2751428d7b3dSmrg color->green, 2752428d7b3dSmrg color->blue, 2753428d7b3dSmrg color->alpha, 2754428d7b3dSmrg format)) 2755428d7b3dSmrg return false; 2756428d7b3dSmrg else 2757428d7b3dSmrg alu = GXcopy; 2758428d7b3dSmrg 2759428d7b3dSmrg return sna_blt_fill_boxes(sna, alu, 2760428d7b3dSmrg dst_bo, dst->bitsPerPixel, 2761428d7b3dSmrg pixel, box, n); 2762428d7b3dSmrg} 2763428d7b3dSmrg 2764428d7b3dSmrgstatic bool 2765428d7b3dSmrggen2_render_fill_boxes(struct sna *sna, 2766428d7b3dSmrg CARD8 op, 2767428d7b3dSmrg PictFormat format, 2768428d7b3dSmrg const xRenderColor *color, 2769428d7b3dSmrg const DrawableRec *dst, struct kgem_bo *dst_bo, 2770428d7b3dSmrg const BoxRec *box, int n) 2771428d7b3dSmrg{ 2772428d7b3dSmrg struct sna_composite_op tmp; 2773428d7b3dSmrg uint32_t pixel; 2774428d7b3dSmrg 2775428d7b3dSmrg if (op >= ARRAY_SIZE(gen2_blend_op)) { 2776428d7b3dSmrg DBG(("%s: fallback due to unhandled blend op: %d\n", 2777428d7b3dSmrg __FUNCTION__, op)); 2778428d7b3dSmrg return false; 2779428d7b3dSmrg } 2780428d7b3dSmrg 2781428d7b3dSmrg#if NO_FILL_BOXES 2782428d7b3dSmrg return gen2_render_fill_boxes_try_blt(sna, op, format, color, 2783428d7b3dSmrg dst, dst_bo, 2784428d7b3dSmrg box, n); 2785428d7b3dSmrg#endif 2786428d7b3dSmrg if (gen2_render_fill_boxes_try_blt(sna, op, format, color, 2787428d7b3dSmrg dst, dst_bo, 2788428d7b3dSmrg box, n)) 2789428d7b3dSmrg return true; 2790428d7b3dSmrg 2791428d7b3dSmrg 2792428d7b3dSmrg DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n", 2793428d7b3dSmrg __FUNCTION__, op, (int)format, 2794428d7b3dSmrg color->red, color->green, color->blue, color->alpha)); 2795428d7b3dSmrg 2796428d7b3dSmrg if (too_large(dst->width, dst->height) || 2797428d7b3dSmrg dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH || 2798428d7b3dSmrg !gen2_check_dst_format(format)) { 2799428d7b3dSmrg DBG(("%s: try blt, too large or incompatible destination\n", 2800428d7b3dSmrg __FUNCTION__)); 2801428d7b3dSmrg if (!gen2_check_dst_format(format)) 2802428d7b3dSmrg return false; 2803428d7b3dSmrg 2804428d7b3dSmrg assert(dst_bo->pitch >= 8); 2805428d7b3dSmrg return sna_tiling_fill_boxes(sna, op, format, color, 2806428d7b3dSmrg dst, dst_bo, box, n); 2807428d7b3dSmrg } 2808428d7b3dSmrg 2809428d7b3dSmrg if (op == PictOpClear) 2810428d7b3dSmrg pixel = 0; 2811428d7b3dSmrg else if (!sna_get_pixel_from_rgba(&pixel, 2812428d7b3dSmrg color->red, 2813428d7b3dSmrg color->green, 2814428d7b3dSmrg color->blue, 2815428d7b3dSmrg color->alpha, 2816428d7b3dSmrg PICT_a8r8g8b8)) 2817428d7b3dSmrg return false; 2818428d7b3dSmrg 2819428d7b3dSmrg DBG(("%s: using shader for op=%d, format=%x, pixel=%x\n", 2820428d7b3dSmrg __FUNCTION__, op, (int)format, pixel)); 2821428d7b3dSmrg 2822428d7b3dSmrg memset(&tmp, 0, sizeof(tmp)); 2823428d7b3dSmrg tmp.op = op; 2824428d7b3dSmrg tmp.dst.pixmap = (PixmapPtr)dst; 2825428d7b3dSmrg tmp.dst.width = dst->width; 2826428d7b3dSmrg tmp.dst.height = dst->height; 2827428d7b3dSmrg tmp.dst.format = format; 2828428d7b3dSmrg tmp.dst.bo = dst_bo; 2829428d7b3dSmrg tmp.floats_per_vertex = 2; 2830428d7b3dSmrg tmp.floats_per_rect = 6; 2831428d7b3dSmrg 2832428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 2833428d7b3dSmrg kgem_submit(&sna->kgem); 2834428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) 2835428d7b3dSmrg return false; 2836428d7b3dSmrg } 2837428d7b3dSmrg 2838428d7b3dSmrg gen2_emit_fill_composite_state(sna, &tmp, pixel); 2839428d7b3dSmrg 2840428d7b3dSmrg do { 2841428d7b3dSmrg int n_this_time = gen2_get_rectangles(sna, &tmp, n); 2842428d7b3dSmrg if (n_this_time == 0) { 2843428d7b3dSmrg gen2_emit_fill_composite_state(sna, &tmp, pixel); 2844428d7b3dSmrg n_this_time = gen2_get_rectangles(sna, &tmp, n); 2845428d7b3dSmrg } 2846428d7b3dSmrg n -= n_this_time; 2847428d7b3dSmrg 2848428d7b3dSmrg do { 2849428d7b3dSmrg DBG((" (%d, %d), (%d, %d): %x\n", 2850428d7b3dSmrg box->x1, box->y1, box->x2, box->y2, pixel)); 2851428d7b3dSmrg VERTEX(box->x2); 2852428d7b3dSmrg VERTEX(box->y2); 2853428d7b3dSmrg VERTEX(box->x1); 2854428d7b3dSmrg VERTEX(box->y2); 2855428d7b3dSmrg VERTEX(box->x1); 2856428d7b3dSmrg VERTEX(box->y1); 2857428d7b3dSmrg box++; 2858428d7b3dSmrg } while (--n_this_time); 2859428d7b3dSmrg } while (n); 2860428d7b3dSmrg 2861428d7b3dSmrg gen2_vertex_flush(sna, &tmp); 2862428d7b3dSmrg return true; 2863428d7b3dSmrg} 2864428d7b3dSmrg 2865428d7b3dSmrgstatic void gen2_emit_fill_state(struct sna *sna, 2866428d7b3dSmrg const struct sna_composite_op *op) 2867428d7b3dSmrg{ 2868428d7b3dSmrg uint32_t ls1; 2869428d7b3dSmrg 2870428d7b3dSmrg gen2_get_batch(sna, op); 2871428d7b3dSmrg gen2_emit_target(sna, op); 2872428d7b3dSmrg 2873428d7b3dSmrg ls1 = sna->kgem.nbatch; 2874428d7b3dSmrg BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 2875428d7b3dSmrg I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 2876428d7b3dSmrg BATCH(0); 2877428d7b3dSmrg BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 2878428d7b3dSmrg BATCH(S8_ENABLE_COLOR_BUFFER_WRITE); 2879428d7b3dSmrg if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 2880428d7b3dSmrg sna->kgem.batch + ls1 + 1, 2881428d7b3dSmrg 3 * sizeof(uint32_t)) == 0) 2882428d7b3dSmrg sna->kgem.nbatch = ls1; 2883428d7b3dSmrg else 2884428d7b3dSmrg sna->render_state.gen2.ls1 = ls1; 2885428d7b3dSmrg 2886428d7b3dSmrg gen2_enable_logic_op(sna, op->op); 2887428d7b3dSmrg gen2_emit_fill_pipeline(sna, op); 2888428d7b3dSmrg 2889428d7b3dSmrg if (op->src.u.gen2.pixel != sna->render_state.gen2.diffuse) { 2890428d7b3dSmrg BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); 2891428d7b3dSmrg BATCH(op->src.u.gen2.pixel); 2892428d7b3dSmrg sna->render_state.gen2.diffuse = op->src.u.gen2.pixel; 2893428d7b3dSmrg } 2894428d7b3dSmrg} 2895428d7b3dSmrg 2896428d7b3dSmrgstatic void 2897428d7b3dSmrggen2_render_fill_op_blt(struct sna *sna, 2898428d7b3dSmrg const struct sna_fill_op *op, 2899428d7b3dSmrg int16_t x, int16_t y, int16_t w, int16_t h) 2900428d7b3dSmrg{ 2901428d7b3dSmrg if (!gen2_get_rectangles(sna, &op->base, 1)) { 2902428d7b3dSmrg gen2_emit_fill_state(sna, &op->base); 2903428d7b3dSmrg gen2_get_rectangles(sna, &op->base, 1); 2904428d7b3dSmrg } 2905428d7b3dSmrg 2906428d7b3dSmrg VERTEX(x+w); 2907428d7b3dSmrg VERTEX(y+h); 2908428d7b3dSmrg VERTEX(x); 2909428d7b3dSmrg VERTEX(y+h); 2910428d7b3dSmrg VERTEX(x); 2911428d7b3dSmrg VERTEX(y); 2912428d7b3dSmrg} 2913428d7b3dSmrg 2914428d7b3dSmrgfastcall static void 2915428d7b3dSmrggen2_render_fill_op_box(struct sna *sna, 2916428d7b3dSmrg const struct sna_fill_op *op, 2917428d7b3dSmrg const BoxRec *box) 2918428d7b3dSmrg{ 2919428d7b3dSmrg if (!gen2_get_rectangles(sna, &op->base, 1)) { 2920428d7b3dSmrg gen2_emit_fill_state(sna, &op->base); 2921428d7b3dSmrg gen2_get_rectangles(sna, &op->base, 1); 2922428d7b3dSmrg } 2923428d7b3dSmrg 2924428d7b3dSmrg VERTEX(box->x2); 2925428d7b3dSmrg VERTEX(box->y2); 2926428d7b3dSmrg VERTEX(box->x1); 2927428d7b3dSmrg VERTEX(box->y2); 2928428d7b3dSmrg VERTEX(box->x1); 2929428d7b3dSmrg VERTEX(box->y1); 2930428d7b3dSmrg} 2931428d7b3dSmrg 2932428d7b3dSmrgfastcall static void 2933428d7b3dSmrggen2_render_fill_op_boxes(struct sna *sna, 2934428d7b3dSmrg const struct sna_fill_op *op, 2935428d7b3dSmrg const BoxRec *box, 2936428d7b3dSmrg int nbox) 2937428d7b3dSmrg{ 2938428d7b3dSmrg DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 2939428d7b3dSmrg box->x1, box->y1, box->x2, box->y2, nbox)); 2940428d7b3dSmrg 2941428d7b3dSmrg do { 2942428d7b3dSmrg int nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); 2943428d7b3dSmrg if (nbox_this_time == 0) { 2944428d7b3dSmrg gen2_emit_fill_state(sna, &op->base); 2945428d7b3dSmrg nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox); 2946428d7b3dSmrg } 2947428d7b3dSmrg nbox -= nbox_this_time; 2948428d7b3dSmrg 2949428d7b3dSmrg do { 2950428d7b3dSmrg VERTEX(box->x2); 2951428d7b3dSmrg VERTEX(box->y2); 2952428d7b3dSmrg VERTEX(box->x1); 2953428d7b3dSmrg VERTEX(box->y2); 2954428d7b3dSmrg VERTEX(box->x1); 2955428d7b3dSmrg VERTEX(box->y1); 2956428d7b3dSmrg box++; 2957428d7b3dSmrg } while (--nbox_this_time); 2958428d7b3dSmrg } while (nbox); 2959428d7b3dSmrg} 2960428d7b3dSmrg 2961428d7b3dSmrgstatic void 2962428d7b3dSmrggen2_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 2963428d7b3dSmrg{ 2964428d7b3dSmrg gen2_vertex_flush(sna, &op->base); 2965428d7b3dSmrg} 2966428d7b3dSmrg 2967428d7b3dSmrgstatic bool 2968428d7b3dSmrggen2_render_fill(struct sna *sna, uint8_t alu, 2969428d7b3dSmrg PixmapPtr dst, struct kgem_bo *dst_bo, 2970428d7b3dSmrg uint32_t color, unsigned flags, 2971428d7b3dSmrg struct sna_fill_op *tmp) 2972428d7b3dSmrg{ 2973428d7b3dSmrg#if NO_FILL 2974428d7b3dSmrg return sna_blt_fill(sna, alu, 2975428d7b3dSmrg dst_bo, dst->drawable.bitsPerPixel, 2976428d7b3dSmrg color, 2977428d7b3dSmrg tmp); 2978428d7b3dSmrg#endif 2979428d7b3dSmrg 2980428d7b3dSmrg /* Prefer to use the BLT if already engaged */ 2981428d7b3dSmrg if (sna_blt_fill(sna, alu, 2982428d7b3dSmrg dst_bo, dst->drawable.bitsPerPixel, 2983428d7b3dSmrg color, 2984428d7b3dSmrg tmp)) 2985428d7b3dSmrg return true; 2986428d7b3dSmrg 2987428d7b3dSmrg /* Must use the BLT if we can't RENDER... */ 2988428d7b3dSmrg if (too_large(dst->drawable.width, dst->drawable.height) || 2989428d7b3dSmrg dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH) 2990428d7b3dSmrg return false; 2991428d7b3dSmrg 2992428d7b3dSmrg tmp->base.op = alu; 2993428d7b3dSmrg tmp->base.dst.pixmap = dst; 2994428d7b3dSmrg tmp->base.dst.width = dst->drawable.width; 2995428d7b3dSmrg tmp->base.dst.height = dst->drawable.height; 2996428d7b3dSmrg tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth); 2997428d7b3dSmrg tmp->base.dst.bo = dst_bo; 2998428d7b3dSmrg tmp->base.dst.x = tmp->base.dst.y = 0; 2999428d7b3dSmrg tmp->base.floats_per_vertex = 2; 3000428d7b3dSmrg tmp->base.floats_per_rect = 6; 3001428d7b3dSmrg 3002428d7b3dSmrg tmp->base.src.u.gen2.pixel = 3003428d7b3dSmrg sna_rgba_for_color(color, dst->drawable.depth); 3004428d7b3dSmrg 3005428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3006428d7b3dSmrg kgem_submit(&sna->kgem); 3007428d7b3dSmrg return sna_blt_fill(sna, alu, 3008428d7b3dSmrg dst_bo, dst->drawable.bitsPerPixel, 3009428d7b3dSmrg color, 3010428d7b3dSmrg tmp); 3011428d7b3dSmrg } 3012428d7b3dSmrg 3013428d7b3dSmrg tmp->blt = gen2_render_fill_op_blt; 3014428d7b3dSmrg tmp->box = gen2_render_fill_op_box; 3015428d7b3dSmrg tmp->boxes = gen2_render_fill_op_boxes; 3016428d7b3dSmrg tmp->points = NULL; 3017428d7b3dSmrg tmp->done = gen2_render_fill_op_done; 3018428d7b3dSmrg 3019428d7b3dSmrg gen2_emit_fill_state(sna, &tmp->base); 3020428d7b3dSmrg return true; 3021428d7b3dSmrg} 3022428d7b3dSmrg 3023428d7b3dSmrgstatic bool 3024428d7b3dSmrggen2_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3025428d7b3dSmrg uint32_t color, 3026428d7b3dSmrg int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3027428d7b3dSmrg uint8_t alu) 3028428d7b3dSmrg{ 3029428d7b3dSmrg BoxRec box; 3030428d7b3dSmrg 3031428d7b3dSmrg box.x1 = x1; 3032428d7b3dSmrg box.y1 = y1; 3033428d7b3dSmrg box.x2 = x2; 3034428d7b3dSmrg box.y2 = y2; 3035428d7b3dSmrg 3036428d7b3dSmrg return sna_blt_fill_boxes(sna, alu, 3037428d7b3dSmrg bo, dst->drawable.bitsPerPixel, 3038428d7b3dSmrg color, &box, 1); 3039428d7b3dSmrg} 3040428d7b3dSmrg 3041428d7b3dSmrgstatic bool 3042428d7b3dSmrggen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3043428d7b3dSmrg uint32_t color, 3044428d7b3dSmrg int16_t x1, int16_t y1, 3045428d7b3dSmrg int16_t x2, int16_t y2, 3046428d7b3dSmrg uint8_t alu) 3047428d7b3dSmrg{ 3048428d7b3dSmrg struct sna_composite_op tmp; 3049428d7b3dSmrg 3050428d7b3dSmrg#if NO_FILL_ONE 3051428d7b3dSmrg return gen2_render_fill_one_try_blt(sna, dst, bo, color, 3052428d7b3dSmrg x1, y1, x2, y2, alu); 3053428d7b3dSmrg#endif 3054428d7b3dSmrg 3055428d7b3dSmrg /* Prefer to use the BLT if already engaged */ 3056428d7b3dSmrg if (gen2_render_fill_one_try_blt(sna, dst, bo, color, 3057428d7b3dSmrg x1, y1, x2, y2, alu)) 3058428d7b3dSmrg return true; 3059428d7b3dSmrg 3060428d7b3dSmrg /* Must use the BLT if we can't RENDER... */ 3061428d7b3dSmrg if (too_large(dst->drawable.width, dst->drawable.height) || 3062428d7b3dSmrg bo->pitch < 8 || bo->pitch > MAX_3D_PITCH) 3063428d7b3dSmrg return false; 3064428d7b3dSmrg 3065428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3066428d7b3dSmrg kgem_submit(&sna->kgem); 3067428d7b3dSmrg 3068428d7b3dSmrg if (gen2_render_fill_one_try_blt(sna, dst, bo, color, 3069428d7b3dSmrg x1, y1, x2, y2, alu)) 3070428d7b3dSmrg return true; 3071428d7b3dSmrg 3072428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) 3073428d7b3dSmrg return false; 3074428d7b3dSmrg } 3075428d7b3dSmrg 3076428d7b3dSmrg tmp.op = alu; 3077428d7b3dSmrg tmp.dst.pixmap = dst; 3078428d7b3dSmrg tmp.dst.width = dst->drawable.width; 3079428d7b3dSmrg tmp.dst.height = dst->drawable.height; 3080428d7b3dSmrg tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3081428d7b3dSmrg tmp.dst.bo = bo; 3082428d7b3dSmrg tmp.floats_per_vertex = 2; 3083428d7b3dSmrg tmp.floats_per_rect = 6; 3084428d7b3dSmrg tmp.need_magic_ca_pass = false; 3085428d7b3dSmrg 3086428d7b3dSmrg tmp.src.u.gen2.pixel = 3087428d7b3dSmrg sna_rgba_for_color(color, dst->drawable.depth); 3088428d7b3dSmrg 3089428d7b3dSmrg gen2_emit_fill_state(sna, &tmp); 3090428d7b3dSmrg gen2_get_rectangles(sna, &tmp, 1); 3091428d7b3dSmrg DBG(("%s: (%d, %d), (%d, %d): %x\n", __FUNCTION__, 3092428d7b3dSmrg x1, y1, x2, y2, tmp.src.u.gen2.pixel)); 3093428d7b3dSmrg VERTEX(x2); 3094428d7b3dSmrg VERTEX(y2); 3095428d7b3dSmrg VERTEX(x1); 3096428d7b3dSmrg VERTEX(y2); 3097428d7b3dSmrg VERTEX(x1); 3098428d7b3dSmrg VERTEX(y1); 3099428d7b3dSmrg gen2_vertex_flush(sna, &tmp); 3100428d7b3dSmrg 3101428d7b3dSmrg return true; 3102428d7b3dSmrg} 3103428d7b3dSmrg 3104428d7b3dSmrgstatic void 3105428d7b3dSmrggen2_render_copy_setup_source(struct sna_composite_channel *channel, 3106428d7b3dSmrg const DrawableRec *draw, 3107428d7b3dSmrg struct kgem_bo *bo) 3108428d7b3dSmrg{ 3109428d7b3dSmrg assert(draw->width && draw->height); 3110428d7b3dSmrg 3111428d7b3dSmrg channel->filter = PictFilterNearest; 3112428d7b3dSmrg channel->repeat = RepeatNone; 3113428d7b3dSmrg channel->width = draw->width; 3114428d7b3dSmrg channel->height = draw->height; 3115428d7b3dSmrg channel->scale[0] = 1.f/draw->width; 3116428d7b3dSmrg channel->scale[1] = 1.f/draw->height; 3117428d7b3dSmrg channel->offset[0] = 0; 3118428d7b3dSmrg channel->offset[1] = 0; 3119428d7b3dSmrg channel->pict_format = sna_format_for_depth(draw->depth); 3120428d7b3dSmrg channel->bo = bo; 3121428d7b3dSmrg channel->is_affine = 1; 3122428d7b3dSmrg 3123428d7b3dSmrg DBG(("%s: source=%d, (%dx%d), format=%08x\n", 3124428d7b3dSmrg __FUNCTION__, bo->handle, 3125428d7b3dSmrg channel->width, channel->height, 3126428d7b3dSmrg channel->pict_format)); 3127428d7b3dSmrg} 3128428d7b3dSmrg 3129428d7b3dSmrgstatic void 3130428d7b3dSmrggen2_emit_copy_pipeline(struct sna *sna, const struct sna_composite_op *op) 3131428d7b3dSmrg{ 3132428d7b3dSmrg uint32_t blend, unwind; 3133428d7b3dSmrg 3134428d7b3dSmrg unwind = sna->kgem.nbatch; 3135428d7b3dSmrg BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 3136428d7b3dSmrg LOAD_TEXTURE_BLEND_STAGE(0) | 1); 3137428d7b3dSmrg 3138428d7b3dSmrg blend = TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_ARG1 | 3139428d7b3dSmrg TB0C_OUTPUT_WRITE_CURRENT; 3140428d7b3dSmrg if (op->dst.format == PICT_a8) 3141428d7b3dSmrg blend |= TB0C_ARG1_REPLICATE_ALPHA | TB0C_ARG1_SEL_TEXEL0; 3142428d7b3dSmrg else if (PICT_FORMAT_RGB(op->src.pict_format) != 0) 3143428d7b3dSmrg blend |= TB0C_ARG1_SEL_TEXEL0; 3144428d7b3dSmrg else 3145428d7b3dSmrg blend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT; /* 0.0 */ 3146428d7b3dSmrg BATCH(blend); 3147428d7b3dSmrg 3148428d7b3dSmrg blend = TB0A_RESULT_SCALE_1X | TB0A_OP_ARG1 | 3149428d7b3dSmrg TB0A_OUTPUT_WRITE_CURRENT; 3150428d7b3dSmrg if (PICT_FORMAT_A(op->src.pict_format) == 0) 3151428d7b3dSmrg blend |= TB0A_ARG1_SEL_ONE; 3152428d7b3dSmrg else 3153428d7b3dSmrg blend |= TB0A_ARG1_SEL_TEXEL0; 3154428d7b3dSmrg BATCH(blend); 3155428d7b3dSmrg 3156428d7b3dSmrg if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1, 3157428d7b3dSmrg sna->kgem.batch + unwind + 1, 3158428d7b3dSmrg 2 * sizeof(uint32_t)) == 0) 3159428d7b3dSmrg sna->kgem.nbatch = unwind; 3160428d7b3dSmrg else 3161428d7b3dSmrg sna->render_state.gen2.ls2 = unwind; 3162428d7b3dSmrg} 3163428d7b3dSmrg 3164428d7b3dSmrgstatic void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op *op) 3165428d7b3dSmrg{ 3166428d7b3dSmrg uint32_t ls1, v; 3167428d7b3dSmrg 3168428d7b3dSmrg gen2_get_batch(sna, op); 3169428d7b3dSmrg 3170428d7b3dSmrg if (kgem_bo_is_dirty(op->src.bo)) { 3171428d7b3dSmrg if (op->src.bo == op->dst.bo) 3172428d7b3dSmrg BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE); 3173428d7b3dSmrg else 3174428d7b3dSmrg BATCH(_3DSTATE_MODES_5_CMD | 3175428d7b3dSmrg PIPELINE_FLUSH_RENDER_CACHE | 3176428d7b3dSmrg PIPELINE_FLUSH_TEXTURE_CACHE); 3177428d7b3dSmrg kgem_clear_dirty(&sna->kgem); 3178428d7b3dSmrg } 3179428d7b3dSmrg gen2_emit_target(sna, op); 3180428d7b3dSmrg 3181428d7b3dSmrg ls1 = sna->kgem.nbatch; 3182428d7b3dSmrg BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 3183428d7b3dSmrg I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); 3184428d7b3dSmrg BATCH(1<<12); 3185428d7b3dSmrg BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); 3186428d7b3dSmrg BATCH(S8_ENABLE_COLOR_BUFFER_WRITE); 3187428d7b3dSmrg if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1, 3188428d7b3dSmrg sna->kgem.batch + ls1 + 1, 3189428d7b3dSmrg 3 * sizeof(uint32_t)) == 0) 3190428d7b3dSmrg sna->kgem.nbatch = ls1; 3191428d7b3dSmrg else 3192428d7b3dSmrg sna->render_state.gen2.ls1 = ls1; 3193428d7b3dSmrg 3194428d7b3dSmrg gen2_enable_logic_op(sna, op->op); 3195428d7b3dSmrg gen2_emit_copy_pipeline(sna, op); 3196428d7b3dSmrg 3197428d7b3dSmrg v = _3DSTATE_VERTEX_FORMAT_2_CMD | TEXCOORDFMT_2D; 3198428d7b3dSmrg if (sna->render_state.gen2.vft != v) { 3199428d7b3dSmrg BATCH(v); 3200428d7b3dSmrg sna->render_state.gen2.vft = v; 3201428d7b3dSmrg } 3202428d7b3dSmrg 3203428d7b3dSmrg gen2_emit_texture(sna, &op->src, 0); 3204428d7b3dSmrg} 3205428d7b3dSmrg 3206428d7b3dSmrgstatic bool 3207428d7b3dSmrggen2_render_copy_boxes(struct sna *sna, uint8_t alu, 3208428d7b3dSmrg const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 3209428d7b3dSmrg const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 3210428d7b3dSmrg const BoxRec *box, int n, unsigned flags) 3211428d7b3dSmrg{ 3212428d7b3dSmrg struct sna_composite_op tmp; 3213428d7b3dSmrg 3214428d7b3dSmrg#if NO_COPY_BOXES 3215428d7b3dSmrg if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3216428d7b3dSmrg return false; 3217428d7b3dSmrg 3218428d7b3dSmrg return sna_blt_copy_boxes(sna, alu, 3219428d7b3dSmrg src_bo, src_dx, src_dy, 3220428d7b3dSmrg dst_bo, dst_dx, dst_dy, 3221428d7b3dSmrg dst->drawable.bitsPerPixel, 3222428d7b3dSmrg box, n); 3223428d7b3dSmrg#endif 3224428d7b3dSmrg 3225428d7b3dSmrg DBG(("%s (%d, %d)->(%d, %d) x %d\n", 3226428d7b3dSmrg __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n)); 3227428d7b3dSmrg 3228428d7b3dSmrg if (sna_blt_compare_depth(src, dst) && 3229428d7b3dSmrg sna_blt_copy_boxes(sna, alu, 3230428d7b3dSmrg src_bo, src_dx, src_dy, 3231428d7b3dSmrg dst_bo, dst_dx, dst_dy, 3232428d7b3dSmrg dst->bitsPerPixel, 3233428d7b3dSmrg box, n)) 3234428d7b3dSmrg return true; 3235428d7b3dSmrg 3236428d7b3dSmrg if (src_bo == dst_bo || /* XXX handle overlap using 3D ? */ 3237428d7b3dSmrg too_large(src->width, src->height) || 3238428d7b3dSmrg src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch < 8) { 3239428d7b3dSmrgfallback: 3240428d7b3dSmrg return sna_blt_copy_boxes_fallback(sna, alu, 3241428d7b3dSmrg src, src_bo, src_dx, src_dy, 3242428d7b3dSmrg dst, dst_bo, dst_dx, dst_dy, 3243428d7b3dSmrg box, n); 3244428d7b3dSmrg } 3245428d7b3dSmrg 3246428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3247428d7b3dSmrg kgem_submit(&sna->kgem); 3248428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3249428d7b3dSmrg goto fallback; 3250428d7b3dSmrg } 3251428d7b3dSmrg 3252428d7b3dSmrg assert(dst_bo->pitch >= 8); 3253428d7b3dSmrg 3254428d7b3dSmrg memset(&tmp, 0, sizeof(tmp)); 3255428d7b3dSmrg tmp.op = alu; 3256428d7b3dSmrg 3257428d7b3dSmrg tmp.dst.pixmap = (PixmapPtr)dst; 3258428d7b3dSmrg tmp.dst.width = dst->width; 3259428d7b3dSmrg tmp.dst.height = dst->height; 3260428d7b3dSmrg tmp.dst.format = sna_format_for_depth(dst->depth); 3261428d7b3dSmrg tmp.dst.bo = dst_bo; 3262428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 3263428d7b3dSmrg tmp.damage = NULL; 3264428d7b3dSmrg 3265428d7b3dSmrg DBG(("%s: target=%d, format=%08x, size=%dx%d\n", 3266428d7b3dSmrg __FUNCTION__, dst_bo->handle, 3267428d7b3dSmrg (unsigned)tmp.dst.format, 3268428d7b3dSmrg tmp.dst.width, 3269428d7b3dSmrg tmp.dst.height)); 3270428d7b3dSmrg 3271428d7b3dSmrg sna_render_composite_redirect_init(&tmp); 3272428d7b3dSmrg if (too_large(tmp.dst.width, tmp.dst.height) || 3273428d7b3dSmrg dst_bo->pitch > MAX_3D_PITCH) { 3274428d7b3dSmrg BoxRec extents = box[0]; 3275428d7b3dSmrg int i; 3276428d7b3dSmrg 3277428d7b3dSmrg for (i = 1; i < n; i++) { 3278428d7b3dSmrg if (box[i].x1 < extents.x1) 3279428d7b3dSmrg extents.x1 = box[i].x1; 3280428d7b3dSmrg if (box[i].y1 < extents.y1) 3281428d7b3dSmrg extents.y1 = box[i].y1; 3282428d7b3dSmrg 3283428d7b3dSmrg if (box[i].x2 > extents.x2) 3284428d7b3dSmrg extents.x2 = box[i].x2; 3285428d7b3dSmrg if (box[i].y2 > extents.y2) 3286428d7b3dSmrg extents.y2 = box[i].y2; 3287428d7b3dSmrg } 3288428d7b3dSmrg if (!sna_render_composite_redirect(sna, &tmp, 3289428d7b3dSmrg extents.x1 + dst_dx, 3290428d7b3dSmrg extents.y1 + dst_dy, 3291428d7b3dSmrg extents.x2 - extents.x1, 3292428d7b3dSmrg extents.y2 - extents.y1, 3293428d7b3dSmrg alu != GXcopy || n > 1)) 3294428d7b3dSmrg goto fallback_tiled; 3295428d7b3dSmrg } 3296428d7b3dSmrg 3297428d7b3dSmrg tmp.floats_per_vertex = 4; 3298428d7b3dSmrg tmp.floats_per_rect = 12; 3299428d7b3dSmrg 3300428d7b3dSmrg dst_dx += tmp.dst.x; 3301428d7b3dSmrg dst_dy += tmp.dst.y; 3302428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 3303428d7b3dSmrg 3304428d7b3dSmrg gen2_render_copy_setup_source(&tmp.src, src, src_bo); 3305428d7b3dSmrg gen2_emit_copy_state(sna, &tmp); 3306428d7b3dSmrg do { 3307428d7b3dSmrg int n_this_time; 3308428d7b3dSmrg 3309428d7b3dSmrg n_this_time = gen2_get_rectangles(sna, &tmp, n); 3310428d7b3dSmrg if (n_this_time == 0) { 3311428d7b3dSmrg gen2_emit_copy_state(sna, &tmp); 3312428d7b3dSmrg n_this_time = gen2_get_rectangles(sna, &tmp, n); 3313428d7b3dSmrg } 3314428d7b3dSmrg n -= n_this_time; 3315428d7b3dSmrg 3316428d7b3dSmrg do { 3317428d7b3dSmrg DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 3318428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 3319428d7b3dSmrg box->x1 + dst_dx, box->y1 + dst_dy, 3320428d7b3dSmrg box->x2 - box->x1, box->y2 - box->y1)); 3321428d7b3dSmrg VERTEX(box->x2 + dst_dx); 3322428d7b3dSmrg VERTEX(box->y2 + dst_dy); 3323428d7b3dSmrg VERTEX((box->x2 + src_dx) * tmp.src.scale[0]); 3324428d7b3dSmrg VERTEX((box->y2 + src_dy) * tmp.src.scale[1]); 3325428d7b3dSmrg 3326428d7b3dSmrg VERTEX(box->x1 + dst_dx); 3327428d7b3dSmrg VERTEX(box->y2 + dst_dy); 3328428d7b3dSmrg VERTEX((box->x1 + src_dx) * tmp.src.scale[0]); 3329428d7b3dSmrg VERTEX((box->y2 + src_dy) * tmp.src.scale[1]); 3330428d7b3dSmrg 3331428d7b3dSmrg VERTEX(box->x1 + dst_dx); 3332428d7b3dSmrg VERTEX(box->y1 + dst_dy); 3333428d7b3dSmrg VERTEX((box->x1 + src_dx) * tmp.src.scale[0]); 3334428d7b3dSmrg VERTEX((box->y1 + src_dy) * tmp.src.scale[1]); 3335428d7b3dSmrg 3336428d7b3dSmrg box++; 3337428d7b3dSmrg } while (--n_this_time); 3338428d7b3dSmrg } while (n); 3339428d7b3dSmrg 3340428d7b3dSmrg gen2_vertex_flush(sna, &tmp); 3341428d7b3dSmrg sna_render_composite_redirect_done(sna, &tmp); 3342428d7b3dSmrg return true; 3343428d7b3dSmrg 3344428d7b3dSmrgfallback_tiled: 3345428d7b3dSmrg return sna_tiling_copy_boxes(sna, alu, 3346428d7b3dSmrg src, src_bo, src_dx, src_dy, 3347428d7b3dSmrg dst, dst_bo, dst_dx, dst_dy, 3348428d7b3dSmrg box, n); 3349428d7b3dSmrg} 3350428d7b3dSmrg 3351428d7b3dSmrgstatic void 3352428d7b3dSmrggen2_render_copy_blt(struct sna *sna, 3353428d7b3dSmrg const struct sna_copy_op *op, 3354428d7b3dSmrg int16_t sx, int16_t sy, 3355428d7b3dSmrg int16_t w, int16_t h, 3356428d7b3dSmrg int16_t dx, int16_t dy) 3357428d7b3dSmrg{ 3358428d7b3dSmrg if (!gen2_get_rectangles(sna, &op->base, 1)) { 3359428d7b3dSmrg gen2_emit_copy_state(sna, &op->base); 3360428d7b3dSmrg gen2_get_rectangles(sna, &op->base, 1); 3361428d7b3dSmrg } 3362428d7b3dSmrg 3363428d7b3dSmrg VERTEX(dx+w); 3364428d7b3dSmrg VERTEX(dy+h); 3365428d7b3dSmrg VERTEX((sx+w)*op->base.src.scale[0]); 3366428d7b3dSmrg VERTEX((sy+h)*op->base.src.scale[1]); 3367428d7b3dSmrg 3368428d7b3dSmrg VERTEX(dx); 3369428d7b3dSmrg VERTEX(dy+h); 3370428d7b3dSmrg VERTEX(sx*op->base.src.scale[0]); 3371428d7b3dSmrg VERTEX((sy+h)*op->base.src.scale[1]); 3372428d7b3dSmrg 3373428d7b3dSmrg VERTEX(dx); 3374428d7b3dSmrg VERTEX(dy); 3375428d7b3dSmrg VERTEX(sx*op->base.src.scale[0]); 3376428d7b3dSmrg VERTEX(sy*op->base.src.scale[1]); 3377428d7b3dSmrg} 3378428d7b3dSmrg 3379428d7b3dSmrgstatic void 3380428d7b3dSmrggen2_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 3381428d7b3dSmrg{ 3382428d7b3dSmrg gen2_vertex_flush(sna, &op->base); 3383428d7b3dSmrg} 3384428d7b3dSmrg 3385428d7b3dSmrgstatic bool 3386428d7b3dSmrggen2_render_copy(struct sna *sna, uint8_t alu, 3387428d7b3dSmrg PixmapPtr src, struct kgem_bo *src_bo, 3388428d7b3dSmrg PixmapPtr dst, struct kgem_bo *dst_bo, 3389428d7b3dSmrg struct sna_copy_op *tmp) 3390428d7b3dSmrg{ 3391428d7b3dSmrg#if NO_COPY 3392428d7b3dSmrg if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3393428d7b3dSmrg return false; 3394428d7b3dSmrg 3395428d7b3dSmrg return sna_blt_copy(sna, alu, 3396428d7b3dSmrg src_bo, dst_bo, 3397428d7b3dSmrg dst->drawable.bitsPerPixel, 3398428d7b3dSmrg tmp); 3399428d7b3dSmrg#endif 3400428d7b3dSmrg 3401428d7b3dSmrg /* Prefer to use the BLT */ 3402428d7b3dSmrg if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && 3403428d7b3dSmrg sna_blt_copy(sna, alu, 3404428d7b3dSmrg src_bo, dst_bo, 3405428d7b3dSmrg dst->drawable.bitsPerPixel, 3406428d7b3dSmrg tmp)) 3407428d7b3dSmrg return true; 3408428d7b3dSmrg 3409428d7b3dSmrg /* Must use the BLT if we can't RENDER... */ 3410428d7b3dSmrg if (too_large(src->drawable.width, src->drawable.height) || 3411428d7b3dSmrg too_large(dst->drawable.width, dst->drawable.height) || 3412428d7b3dSmrg src_bo->pitch > MAX_3D_PITCH || 3413428d7b3dSmrg dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH) { 3414428d7b3dSmrgfallback: 3415428d7b3dSmrg if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3416428d7b3dSmrg return false; 3417428d7b3dSmrg 3418428d7b3dSmrg return sna_blt_copy(sna, alu, src_bo, dst_bo, 3419428d7b3dSmrg dst->drawable.bitsPerPixel, 3420428d7b3dSmrg tmp); 3421428d7b3dSmrg } 3422428d7b3dSmrg 3423428d7b3dSmrg tmp->base.op = alu; 3424428d7b3dSmrg 3425428d7b3dSmrg tmp->base.dst.pixmap = dst; 3426428d7b3dSmrg tmp->base.dst.width = dst->drawable.width; 3427428d7b3dSmrg tmp->base.dst.height = dst->drawable.height; 3428428d7b3dSmrg tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3429428d7b3dSmrg tmp->base.dst.bo = dst_bo; 3430428d7b3dSmrg 3431428d7b3dSmrg gen2_render_copy_setup_source(&tmp->base.src, &src->drawable, src_bo); 3432428d7b3dSmrg tmp->base.mask.bo = NULL; 3433428d7b3dSmrg 3434428d7b3dSmrg tmp->base.floats_per_vertex = 4; 3435428d7b3dSmrg tmp->base.floats_per_rect = 12; 3436428d7b3dSmrg 3437428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3438428d7b3dSmrg kgem_submit(&sna->kgem); 3439428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3440428d7b3dSmrg goto fallback; 3441428d7b3dSmrg } 3442428d7b3dSmrg 3443428d7b3dSmrg tmp->blt = gen2_render_copy_blt; 3444428d7b3dSmrg tmp->done = gen2_render_copy_done; 3445428d7b3dSmrg 3446428d7b3dSmrg gen2_emit_composite_state(sna, &tmp->base); 3447428d7b3dSmrg return true; 3448428d7b3dSmrg} 3449428d7b3dSmrg 3450428d7b3dSmrgstatic void 3451428d7b3dSmrggen2_render_reset(struct sna *sna) 3452428d7b3dSmrg{ 3453428d7b3dSmrg sna->render_state.gen2.need_invariant = true; 3454428d7b3dSmrg sna->render_state.gen2.logic_op_enabled = 0; 3455428d7b3dSmrg sna->render_state.gen2.target = 0; 3456428d7b3dSmrg 3457428d7b3dSmrg sna->render_state.gen2.ls1 = 0; 3458428d7b3dSmrg sna->render_state.gen2.ls2 = 0; 3459428d7b3dSmrg sna->render_state.gen2.vft = 0; 3460428d7b3dSmrg 3461428d7b3dSmrg sna->render_state.gen2.diffuse = 0x0c0ffee0; 3462428d7b3dSmrg sna->render_state.gen2.specular = 0x0c0ffee0; 3463428d7b3dSmrg} 3464428d7b3dSmrg 3465428d7b3dSmrgstatic void 3466428d7b3dSmrggen2_render_flush(struct sna *sna) 3467428d7b3dSmrg{ 3468428d7b3dSmrg assert(sna->render.vertex_index == 0); 3469428d7b3dSmrg assert(sna->render.vertex_offset == 0); 3470428d7b3dSmrg} 3471428d7b3dSmrg 3472428d7b3dSmrgstatic void 3473428d7b3dSmrggen2_render_context_switch(struct kgem *kgem, 3474428d7b3dSmrg int new_mode) 3475428d7b3dSmrg{ 3476428d7b3dSmrg struct sna *sna = container_of(kgem, struct sna, kgem); 3477428d7b3dSmrg 3478428d7b3dSmrg if (!kgem->nbatch) 3479428d7b3dSmrg return; 3480428d7b3dSmrg 3481428d7b3dSmrg /* Reload BLT registers following a lost context */ 3482428d7b3dSmrg sna->blt_state.fill_bo = 0; 3483428d7b3dSmrg 3484428d7b3dSmrg if (kgem_ring_is_idle(kgem, kgem->ring)) { 3485428d7b3dSmrg DBG(("%s: GPU idle, flushing\n", __FUNCTION__)); 3486428d7b3dSmrg _kgem_submit(kgem); 3487428d7b3dSmrg } 3488428d7b3dSmrg} 3489428d7b3dSmrg 3490428d7b3dSmrgconst char *gen2_render_init(struct sna *sna, const char *backend) 3491428d7b3dSmrg{ 3492428d7b3dSmrg struct sna_render *render = &sna->render; 3493428d7b3dSmrg 3494428d7b3dSmrg sna->kgem.context_switch = gen2_render_context_switch; 3495428d7b3dSmrg 3496428d7b3dSmrg /* Use the BLT (and overlay) for everything except when forced to 3497428d7b3dSmrg * use the texture combiners. 3498428d7b3dSmrg */ 3499428d7b3dSmrg#if !NO_COMPOSITE 3500428d7b3dSmrg render->composite = gen2_render_composite; 3501428d7b3dSmrg render->prefer_gpu |= PREFER_GPU_RENDER; 3502428d7b3dSmrg#endif 3503428d7b3dSmrg#if !NO_COMPOSITE_SPANS 3504428d7b3dSmrg render->check_composite_spans = gen2_check_composite_spans; 3505428d7b3dSmrg render->composite_spans = gen2_render_composite_spans; 3506428d7b3dSmrg render->prefer_gpu |= PREFER_GPU_SPANS; 3507428d7b3dSmrg#endif 3508428d7b3dSmrg render->fill_boxes = gen2_render_fill_boxes; 3509428d7b3dSmrg render->fill = gen2_render_fill; 3510428d7b3dSmrg render->fill_one = gen2_render_fill_one; 3511428d7b3dSmrg render->copy = gen2_render_copy; 3512428d7b3dSmrg render->copy_boxes = gen2_render_copy_boxes; 3513428d7b3dSmrg 3514428d7b3dSmrg /* XXX YUV color space conversion for video? */ 3515428d7b3dSmrg 3516428d7b3dSmrg render->reset = gen2_render_reset; 3517428d7b3dSmrg render->flush = gen2_render_flush; 3518428d7b3dSmrg 3519428d7b3dSmrg render->max_3d_size = MAX_3D_SIZE; 3520428d7b3dSmrg render->max_3d_pitch = MAX_3D_PITCH; 3521428d7b3dSmrg return "Almador (gen2)"; 3522428d7b3dSmrg} 3523