1428d7b3dSmrg/* 2428d7b3dSmrg * Copyright © 2012,2013 Intel Corporation 3428d7b3dSmrg * 4428d7b3dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 5428d7b3dSmrg * copy of this software and associated documentation files (the "Software"), 6428d7b3dSmrg * to deal in the Software without restriction, including without limitation 7428d7b3dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8428d7b3dSmrg * and/or sell copies of the Software, and to permit persons to whom the 9428d7b3dSmrg * Software is furnished to do so, subject to the following conditions: 10428d7b3dSmrg * 11428d7b3dSmrg * The above copyright notice and this permission notice (including the next 12428d7b3dSmrg * paragraph) shall be included in all copies or substantial portions of the 13428d7b3dSmrg * Software. 14428d7b3dSmrg * 15428d7b3dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16428d7b3dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17428d7b3dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18428d7b3dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19428d7b3dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20428d7b3dSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21428d7b3dSmrg * SOFTWARE. 22428d7b3dSmrg * 23428d7b3dSmrg * Authors: 24428d7b3dSmrg * Chris Wilson <chris@chris-wilson.co.uk> 25428d7b3dSmrg * 26428d7b3dSmrg */ 27428d7b3dSmrg 28428d7b3dSmrg#ifdef HAVE_CONFIG_H 29428d7b3dSmrg#include "config.h" 30428d7b3dSmrg#endif 31428d7b3dSmrg 32428d7b3dSmrg#include "sna.h" 33428d7b3dSmrg#include "sna_reg.h" 34428d7b3dSmrg#include "sna_render.h" 35428d7b3dSmrg#include "sna_render_inline.h" 36428d7b3dSmrg#include "sna_video.h" 37428d7b3dSmrg 38428d7b3dSmrg#include "gen8_render.h" 39428d7b3dSmrg#include "gen8_eu.h" 40428d7b3dSmrg#include "gen4_common.h" 41428d7b3dSmrg#include "gen4_source.h" 42428d7b3dSmrg#include "gen4_vertex.h" 43428d7b3dSmrg#include "gen6_common.h" 44428d7b3dSmrg#include "gen8_vertex.h" 45428d7b3dSmrg 46428d7b3dSmrg#define SIM 1 47428d7b3dSmrg 48428d7b3dSmrg#define ALWAYS_INVALIDATE 0 49428d7b3dSmrg#define ALWAYS_FLUSH 0 50428d7b3dSmrg#define ALWAYS_STALL 0 51428d7b3dSmrg 52428d7b3dSmrg#define NO_COMPOSITE 0 53428d7b3dSmrg#define NO_COMPOSITE_SPANS 0 54428d7b3dSmrg#define NO_COPY 0 55428d7b3dSmrg#define NO_COPY_BOXES 0 56428d7b3dSmrg#define NO_FILL 0 57428d7b3dSmrg#define NO_FILL_BOXES 0 58428d7b3dSmrg#define NO_FILL_ONE 0 59428d7b3dSmrg#define NO_FILL_CLEAR 0 60428d7b3dSmrg#define NO_VIDEO 0 61428d7b3dSmrg 62428d7b3dSmrg#define USE_8_PIXEL_DISPATCH 1 63428d7b3dSmrg#define USE_16_PIXEL_DISPATCH 1 64428d7b3dSmrg#define USE_32_PIXEL_DISPATCH 0 65428d7b3dSmrg 66428d7b3dSmrg#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH 67428d7b3dSmrg#error "Must select at least 8, 16 or 32 pixel dispatch" 68428d7b3dSmrg#endif 69428d7b3dSmrg 70428d7b3dSmrg#define GEN8_MAX_SIZE 16384 71428d7b3dSmrg 72428d7b3dSmrg/* XXX Todo 73428d7b3dSmrg * 74428d7b3dSmrg * STR (software tiled rendering) mode. No, really. 75428d7b3dSmrg * 64x32 pixel blocks align with the rendering cache. Worth considering. 76428d7b3dSmrg */ 77428d7b3dSmrg 78428d7b3dSmrg#define is_aligned(x, y) (((x) & ((y) - 1)) == 0) 79428d7b3dSmrg 80428d7b3dSmrg/* Pipeline stages: 81428d7b3dSmrg * 1. Command Streamer (CS) 82428d7b3dSmrg * 2. Vertex Fetch (VF) 83428d7b3dSmrg * 3. Vertex Shader (VS) 84428d7b3dSmrg * 4. Hull Shader (HS) 85428d7b3dSmrg * 5. Tesselation Engine (TE) 86428d7b3dSmrg * 6. Domain Shader (DS) 87428d7b3dSmrg * 7. Geometry Shader (GS) 88428d7b3dSmrg * 8. Stream Output Logic (SOL) 89428d7b3dSmrg * 9. Clipper (CLIP) 90428d7b3dSmrg * 10. Strip/Fan (SF) 91428d7b3dSmrg * 11. Windower/Masker (WM) 92428d7b3dSmrg * 12. Color Calculator (CC) 93428d7b3dSmrg */ 94428d7b3dSmrg 95428d7b3dSmrg#if !NO_VIDEO 96428d7b3dSmrgstatic const uint32_t ps_kernel_packed[][4] = { 97428d7b3dSmrg#include "exa_wm_src_affine.g8b" 98428d7b3dSmrg#include "exa_wm_src_sample_argb.g8b" 99428d7b3dSmrg#include "exa_wm_yuv_rgb.g8b" 100428d7b3dSmrg#include "exa_wm_write.g8b" 101428d7b3dSmrg}; 102428d7b3dSmrg 103428d7b3dSmrgstatic const uint32_t ps_kernel_planar[][4] = { 104428d7b3dSmrg#include "exa_wm_src_affine.g8b" 105428d7b3dSmrg#include "exa_wm_src_sample_planar.g8b" 106428d7b3dSmrg#include "exa_wm_yuv_rgb.g8b" 107428d7b3dSmrg#include "exa_wm_write.g8b" 108428d7b3dSmrg}; 109428d7b3dSmrg#endif 110428d7b3dSmrg 111428d7b3dSmrg#define SURFACE_DW (64 / sizeof(uint32_t)); 112428d7b3dSmrg 113428d7b3dSmrg#define KERNEL(kernel_enum, kernel, num_surfaces) \ 114428d7b3dSmrg [GEN8_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} 115428d7b3dSmrg#define NOKERNEL(kernel_enum, func, num_surfaces) \ 116428d7b3dSmrg [GEN8_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces} 117428d7b3dSmrgstatic const struct wm_kernel_info { 118428d7b3dSmrg const char *name; 119428d7b3dSmrg const void *data; 120428d7b3dSmrg unsigned int size; 121428d7b3dSmrg int num_surfaces; 122428d7b3dSmrg} wm_kernels[] = { 123428d7b3dSmrg NOKERNEL(NOMASK, gen8_wm_kernel__affine, 2), 124428d7b3dSmrg NOKERNEL(NOMASK_P, gen8_wm_kernel__projective, 2), 125428d7b3dSmrg 126428d7b3dSmrg NOKERNEL(MASK, gen8_wm_kernel__affine_mask, 3), 127428d7b3dSmrg NOKERNEL(MASK_P, gen8_wm_kernel__projective_mask, 3), 128428d7b3dSmrg 129428d7b3dSmrg NOKERNEL(MASKCA, gen8_wm_kernel__affine_mask_ca, 3), 130428d7b3dSmrg NOKERNEL(MASKCA_P, gen8_wm_kernel__projective_mask_ca, 3), 131428d7b3dSmrg 132428d7b3dSmrg NOKERNEL(MASKSA, gen8_wm_kernel__affine_mask_sa, 3), 133428d7b3dSmrg NOKERNEL(MASKSA_P, gen8_wm_kernel__projective_mask_sa, 3), 134428d7b3dSmrg 135428d7b3dSmrg NOKERNEL(OPACITY, gen8_wm_kernel__affine_opacity, 2), 136428d7b3dSmrg NOKERNEL(OPACITY_P, gen8_wm_kernel__projective_opacity, 2), 137428d7b3dSmrg 138428d7b3dSmrg#if !NO_VIDEO 139428d7b3dSmrg KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), 140428d7b3dSmrg KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), 141428d7b3dSmrg#endif 142428d7b3dSmrg}; 143428d7b3dSmrg#undef KERNEL 144428d7b3dSmrg 145428d7b3dSmrgstatic const struct blendinfo { 146428d7b3dSmrg uint8_t src_alpha; 147428d7b3dSmrg uint8_t src_blend; 148428d7b3dSmrg uint8_t dst_blend; 149428d7b3dSmrg} gen8_blend_op[] = { 150428d7b3dSmrg /* Clear */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO}, 151428d7b3dSmrg /* Src */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO}, 152428d7b3dSmrg /* Dst */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE}, 153428d7b3dSmrg /* Over */ {1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA}, 154428d7b3dSmrg /* OverReverse */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE}, 155428d7b3dSmrg /* In */ {0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO}, 156428d7b3dSmrg /* InReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA}, 157428d7b3dSmrg /* Out */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO}, 158428d7b3dSmrg /* OutReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA}, 159428d7b3dSmrg /* Atop */ {1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 160428d7b3dSmrg /* AtopReverse */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA}, 161428d7b3dSmrg /* Xor */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 162428d7b3dSmrg /* Add */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, 163428d7b3dSmrg}; 164428d7b3dSmrg 165428d7b3dSmrg/** 166428d7b3dSmrg * Highest-valued BLENDFACTOR used in gen8_blend_op. 167428d7b3dSmrg * 168428d7b3dSmrg * This leaves out GEN8_BLENDFACTOR_INV_DST_COLOR, 169428d7b3dSmrg * GEN8_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 170428d7b3dSmrg * GEN8_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 171428d7b3dSmrg */ 172428d7b3dSmrg#define GEN8_BLENDFACTOR_COUNT (BLENDFACTOR_INV_DST_ALPHA + 1) 173428d7b3dSmrg 174428d7b3dSmrg#define GEN8_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen8_blend_state), 64) 175428d7b3dSmrg 176428d7b3dSmrg#define BLEND_OFFSET(s, d) \ 177428d7b3dSmrg ((d != BLENDFACTOR_ZERO) << 15 | ((s) * GEN8_BLENDFACTOR_COUNT + (d)) << 4) 178428d7b3dSmrg 179428d7b3dSmrg#define NO_BLEND BLEND_OFFSET(BLENDFACTOR_ONE, BLENDFACTOR_ZERO) 180428d7b3dSmrg#define CLEAR BLEND_OFFSET(BLENDFACTOR_ZERO, BLENDFACTOR_ZERO) 181428d7b3dSmrg 182428d7b3dSmrg#define SAMPLER_OFFSET(sf, se, mf, me) \ 183428d7b3dSmrg (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) 184428d7b3dSmrg 185428d7b3dSmrg#define VERTEX_2s2s 0 186428d7b3dSmrg 187428d7b3dSmrg#define COPY_SAMPLER 0 188428d7b3dSmrg#define COPY_VERTEX VERTEX_2s2s 189428d7b3dSmrg#define COPY_FLAGS(a) GEN8_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN8_WM_KERNEL_NOMASK, COPY_VERTEX) 190428d7b3dSmrg 191428d7b3dSmrg#define FILL_SAMPLER 1 192428d7b3dSmrg#define FILL_VERTEX VERTEX_2s2s 193428d7b3dSmrg#define FILL_FLAGS(op, format) GEN8_SET_FLAGS(FILL_SAMPLER, gen8_get_blend((op), false, (format)), GEN8_WM_KERNEL_NOMASK, FILL_VERTEX) 194428d7b3dSmrg#define FILL_FLAGS_NOBLEND GEN8_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN8_WM_KERNEL_NOMASK, FILL_VERTEX) 195428d7b3dSmrg 196428d7b3dSmrg#define GEN8_SAMPLER(f) (((f) >> 20) & 0xfff) 197428d7b3dSmrg#define GEN8_BLEND(f) (((f) >> 4) & 0x7ff) 198428d7b3dSmrg#define GEN8_READS_DST(f) (((f) >> 15) & 1) 199428d7b3dSmrg#define GEN8_KERNEL(f) (((f) >> 16) & 0xf) 200428d7b3dSmrg#define GEN8_VERTEX(f) (((f) >> 0) & 0xf) 201428d7b3dSmrg#define GEN8_SET_FLAGS(S, B, K, V) ((S) << 20 | (K) << 16 | (B) | (V)) 202428d7b3dSmrg 203428d7b3dSmrg#define OUT_BATCH(v) batch_emit(sna, v) 204428d7b3dSmrg#define OUT_BATCH64(v) batch_emit64(sna, v) 205428d7b3dSmrg#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 206428d7b3dSmrg#define OUT_VERTEX_F(v) vertex_emit(sna, v) 207428d7b3dSmrg 208428d7b3dSmrgstatic inline bool too_large(int width, int height) 209428d7b3dSmrg{ 210428d7b3dSmrg return width > GEN8_MAX_SIZE || height > GEN8_MAX_SIZE; 211428d7b3dSmrg} 212428d7b3dSmrg 213428d7b3dSmrgstatic inline bool unaligned(struct kgem_bo *bo, int bpp) 214428d7b3dSmrg{ 215428d7b3dSmrg /* XXX What exactly do we need to meet H_ALIGN and V_ALIGN? */ 216428d7b3dSmrg#if 0 217428d7b3dSmrg int x, y; 218428d7b3dSmrg 219428d7b3dSmrg if (bo->proxy == NULL) 220428d7b3dSmrg return false; 221428d7b3dSmrg 222428d7b3dSmrg /* Assume that all tiled proxies are constructed correctly. */ 223428d7b3dSmrg if (bo->tiling) 224428d7b3dSmrg return false; 225428d7b3dSmrg 226428d7b3dSmrg DBG(("%s: checking alignment of a linear proxy, offset=%d, pitch=%d, bpp=%d: => (%d, %d)\n", 227428d7b3dSmrg __FUNCTION__, bo->delta, bo->pitch, bpp, 228428d7b3dSmrg 8 * (bo->delta % bo->pitch) / bpp, bo->delta / bo->pitch)); 229428d7b3dSmrg 230428d7b3dSmrg /* This may be a random userptr map, check that it meets the 231428d7b3dSmrg * render alignment of SURFACE_VALIGN_4 | SURFACE_HALIGN_4. 232428d7b3dSmrg */ 233428d7b3dSmrg y = bo->delta / bo->pitch; 234428d7b3dSmrg if (y & 3) 235428d7b3dSmrg return true; 236428d7b3dSmrg 237428d7b3dSmrg x = 8 * (bo->delta - y * bo->pitch); 238428d7b3dSmrg if (x & (4*bpp - 1)) 239428d7b3dSmrg return true; 240428d7b3dSmrg 241428d7b3dSmrg return false; 242428d7b3dSmrg#else 243428d7b3dSmrg return false; 244428d7b3dSmrg#endif 245428d7b3dSmrg} 246428d7b3dSmrg 247428d7b3dSmrgstatic uint32_t gen8_get_blend(int op, 248428d7b3dSmrg bool has_component_alpha, 249428d7b3dSmrg uint32_t dst_format) 250428d7b3dSmrg{ 251428d7b3dSmrg uint32_t src, dst; 252428d7b3dSmrg 253428d7b3dSmrg COMPILE_TIME_ASSERT(BLENDFACTOR_INV_DST_ALPHA*GEN8_BLENDFACTOR_COUNT + BLENDFACTOR_INV_DST_ALPHA <= 0x7ff); 254428d7b3dSmrg 255428d7b3dSmrg src = gen8_blend_op[op].src_blend; 256428d7b3dSmrg dst = gen8_blend_op[op].dst_blend; 257428d7b3dSmrg 258428d7b3dSmrg /* If there's no dst alpha channel, adjust the blend op so that 259428d7b3dSmrg * we'll treat it always as 1. 260428d7b3dSmrg */ 261428d7b3dSmrg if (PICT_FORMAT_A(dst_format) == 0) { 262428d7b3dSmrg if (src == BLENDFACTOR_DST_ALPHA) 263428d7b3dSmrg src = BLENDFACTOR_ONE; 264428d7b3dSmrg else if (src == BLENDFACTOR_INV_DST_ALPHA) 265428d7b3dSmrg src = BLENDFACTOR_ZERO; 266428d7b3dSmrg } 267428d7b3dSmrg 268428d7b3dSmrg /* If the source alpha is being used, then we should only be in a 269428d7b3dSmrg * case where the source blend factor is 0, and the source blend 270428d7b3dSmrg * value is the mask channels multiplied by the source picture's alpha. 271428d7b3dSmrg */ 272428d7b3dSmrg if (has_component_alpha && gen8_blend_op[op].src_alpha) { 273428d7b3dSmrg if (dst == BLENDFACTOR_SRC_ALPHA) 274428d7b3dSmrg dst = BLENDFACTOR_SRC_COLOR; 275428d7b3dSmrg else if (dst == BLENDFACTOR_INV_SRC_ALPHA) 276428d7b3dSmrg dst = BLENDFACTOR_INV_SRC_COLOR; 277428d7b3dSmrg } 278428d7b3dSmrg 279428d7b3dSmrg DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 280428d7b3dSmrg op, dst_format, PICT_FORMAT_A(dst_format), 281428d7b3dSmrg src, dst, (int)(BLEND_OFFSET(src, dst)>>4))); 282428d7b3dSmrg assert(BLEND_OFFSET(src, dst) >> 4 <= 0xfff); 283428d7b3dSmrg return BLEND_OFFSET(src, dst); 284428d7b3dSmrg} 285428d7b3dSmrg 286428d7b3dSmrgstatic uint32_t gen8_get_card_format(PictFormat format) 287428d7b3dSmrg{ 288428d7b3dSmrg switch (format) { 289428d7b3dSmrg default: 290428d7b3dSmrg return -1; 291428d7b3dSmrg case PICT_a8r8g8b8: 292428d7b3dSmrg return SURFACEFORMAT_B8G8R8A8_UNORM; 293428d7b3dSmrg case PICT_x8r8g8b8: 294428d7b3dSmrg return SURFACEFORMAT_B8G8R8X8_UNORM; 295428d7b3dSmrg case PICT_a8b8g8r8: 296428d7b3dSmrg return SURFACEFORMAT_R8G8B8A8_UNORM; 297428d7b3dSmrg case PICT_x8b8g8r8: 298428d7b3dSmrg return SURFACEFORMAT_R8G8B8X8_UNORM; 299428d7b3dSmrg#ifdef PICT_a2r10g10b10 300428d7b3dSmrg case PICT_a2r10g10b10: 301428d7b3dSmrg return SURFACEFORMAT_B10G10R10A2_UNORM; 302428d7b3dSmrg case PICT_x2r10g10b10: 303428d7b3dSmrg return SURFACEFORMAT_B10G10R10X2_UNORM; 304428d7b3dSmrg#endif 305428d7b3dSmrg case PICT_r8g8b8: 306428d7b3dSmrg return SURFACEFORMAT_R8G8B8_UNORM; 307428d7b3dSmrg case PICT_r5g6b5: 308428d7b3dSmrg return SURFACEFORMAT_B5G6R5_UNORM; 309428d7b3dSmrg case PICT_a1r5g5b5: 310428d7b3dSmrg return SURFACEFORMAT_B5G5R5A1_UNORM; 311428d7b3dSmrg case PICT_a8: 312428d7b3dSmrg return SURFACEFORMAT_A8_UNORM; 313428d7b3dSmrg case PICT_a4r4g4b4: 314428d7b3dSmrg return SURFACEFORMAT_B4G4R4A4_UNORM; 315428d7b3dSmrg } 316428d7b3dSmrg} 317428d7b3dSmrg 318428d7b3dSmrgstatic uint32_t gen8_get_dest_format(PictFormat format) 319428d7b3dSmrg{ 320428d7b3dSmrg switch (format) { 321428d7b3dSmrg default: 322428d7b3dSmrg return -1; 323428d7b3dSmrg case PICT_a8r8g8b8: 324428d7b3dSmrg case PICT_x8r8g8b8: 325428d7b3dSmrg return SURFACEFORMAT_B8G8R8A8_UNORM; 326428d7b3dSmrg case PICT_a8b8g8r8: 327428d7b3dSmrg case PICT_x8b8g8r8: 328428d7b3dSmrg return SURFACEFORMAT_R8G8B8A8_UNORM; 329428d7b3dSmrg#ifdef PICT_a2r10g10b10 330428d7b3dSmrg case PICT_a2r10g10b10: 331428d7b3dSmrg case PICT_x2r10g10b10: 332428d7b3dSmrg return SURFACEFORMAT_B10G10R10A2_UNORM; 333428d7b3dSmrg#endif 334428d7b3dSmrg case PICT_r5g6b5: 335428d7b3dSmrg return SURFACEFORMAT_B5G6R5_UNORM; 336428d7b3dSmrg case PICT_x1r5g5b5: 337428d7b3dSmrg case PICT_a1r5g5b5: 338428d7b3dSmrg return SURFACEFORMAT_B5G5R5A1_UNORM; 339428d7b3dSmrg case PICT_a8: 340428d7b3dSmrg return SURFACEFORMAT_A8_UNORM; 341428d7b3dSmrg case PICT_a4r4g4b4: 342428d7b3dSmrg case PICT_x4r4g4b4: 343428d7b3dSmrg return SURFACEFORMAT_B4G4R4A4_UNORM; 344428d7b3dSmrg } 345428d7b3dSmrg} 346428d7b3dSmrg 347428d7b3dSmrgstatic bool gen8_check_dst_format(PictFormat format) 348428d7b3dSmrg{ 349428d7b3dSmrg if (gen8_get_dest_format(format) != -1) 350428d7b3dSmrg return true; 351428d7b3dSmrg 352428d7b3dSmrg DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 353428d7b3dSmrg return false; 354428d7b3dSmrg} 355428d7b3dSmrg 356428d7b3dSmrgstatic bool gen8_check_format(uint32_t format) 357428d7b3dSmrg{ 358428d7b3dSmrg if (gen8_get_card_format(format) != -1) 359428d7b3dSmrg return true; 360428d7b3dSmrg 361428d7b3dSmrg DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 362428d7b3dSmrg return false; 363428d7b3dSmrg} 364428d7b3dSmrg 365428d7b3dSmrgstatic uint32_t gen8_filter(uint32_t filter) 366428d7b3dSmrg{ 367428d7b3dSmrg switch (filter) { 368428d7b3dSmrg default: 369428d7b3dSmrg assert(0); 370428d7b3dSmrg case PictFilterNearest: 371428d7b3dSmrg return SAMPLER_FILTER_NEAREST; 372428d7b3dSmrg case PictFilterBilinear: 373428d7b3dSmrg return SAMPLER_FILTER_BILINEAR; 374428d7b3dSmrg } 375428d7b3dSmrg} 376428d7b3dSmrg 377428d7b3dSmrgstatic uint32_t gen8_check_filter(PicturePtr picture) 378428d7b3dSmrg{ 379428d7b3dSmrg switch (picture->filter) { 380428d7b3dSmrg case PictFilterNearest: 381428d7b3dSmrg case PictFilterBilinear: 382428d7b3dSmrg return true; 383428d7b3dSmrg default: 384428d7b3dSmrg return false; 385428d7b3dSmrg } 386428d7b3dSmrg} 387428d7b3dSmrg 388428d7b3dSmrgstatic uint32_t gen8_repeat(uint32_t repeat) 389428d7b3dSmrg{ 390428d7b3dSmrg switch (repeat) { 391428d7b3dSmrg default: 392428d7b3dSmrg assert(0); 393428d7b3dSmrg case RepeatNone: 394428d7b3dSmrg return SAMPLER_EXTEND_NONE; 395428d7b3dSmrg case RepeatNormal: 396428d7b3dSmrg return SAMPLER_EXTEND_REPEAT; 397428d7b3dSmrg case RepeatPad: 398428d7b3dSmrg return SAMPLER_EXTEND_PAD; 399428d7b3dSmrg case RepeatReflect: 400428d7b3dSmrg return SAMPLER_EXTEND_REFLECT; 401428d7b3dSmrg } 402428d7b3dSmrg} 403428d7b3dSmrg 404428d7b3dSmrgstatic bool gen8_check_repeat(PicturePtr picture) 405428d7b3dSmrg{ 406428d7b3dSmrg if (!picture->repeat) 407428d7b3dSmrg return true; 408428d7b3dSmrg 409428d7b3dSmrg switch (picture->repeatType) { 410428d7b3dSmrg case RepeatNone: 411428d7b3dSmrg case RepeatNormal: 412428d7b3dSmrg case RepeatPad: 413428d7b3dSmrg case RepeatReflect: 414428d7b3dSmrg return true; 415428d7b3dSmrg default: 416428d7b3dSmrg return false; 417428d7b3dSmrg } 418428d7b3dSmrg} 419428d7b3dSmrg 420428d7b3dSmrgstatic int 421428d7b3dSmrggen8_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 422428d7b3dSmrg{ 423428d7b3dSmrg int base; 424428d7b3dSmrg 425428d7b3dSmrg if (has_mask) { 426428d7b3dSmrg if (is_ca) { 427428d7b3dSmrg if (gen8_blend_op[op].src_alpha) 428428d7b3dSmrg base = GEN8_WM_KERNEL_MASKSA; 429428d7b3dSmrg else 430428d7b3dSmrg base = GEN8_WM_KERNEL_MASKCA; 431428d7b3dSmrg } else 432428d7b3dSmrg base = GEN8_WM_KERNEL_MASK; 433428d7b3dSmrg } else 434428d7b3dSmrg base = GEN8_WM_KERNEL_NOMASK; 435428d7b3dSmrg 436428d7b3dSmrg return base + !is_affine; 437428d7b3dSmrg} 438428d7b3dSmrg 439428d7b3dSmrgstatic void 440428d7b3dSmrggen8_emit_push_constants(struct sna *sna) 441428d7b3dSmrg{ 442428d7b3dSmrg#if SIM 443428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2)); 444428d7b3dSmrg OUT_BATCH(0); 445428d7b3dSmrg 446428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2)); 447428d7b3dSmrg OUT_BATCH(0); 448428d7b3dSmrg 449428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2)); 450428d7b3dSmrg OUT_BATCH(0); 451428d7b3dSmrg 452428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2)); 453428d7b3dSmrg OUT_BATCH(0); 454428d7b3dSmrg 455428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); 456428d7b3dSmrg OUT_BATCH(0); 457428d7b3dSmrg#endif 458428d7b3dSmrg} 459428d7b3dSmrg 460428d7b3dSmrgstatic void 461428d7b3dSmrggen8_emit_urb(struct sna *sna) 462428d7b3dSmrg{ 463428d7b3dSmrg /* num of VS entries must be divisible by 8 if size < 9 */ 464428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_URB_VS | (2 - 2)); 465428d7b3dSmrg OUT_BATCH(960 << URB_ENTRY_NUMBER_SHIFT | 466428d7b3dSmrg (2 - 1) << URB_ENTRY_SIZE_SHIFT | 467428d7b3dSmrg 4 << URB_STARTING_ADDRESS_SHIFT); 468428d7b3dSmrg 469428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_URB_HS | (2 - 2)); 470428d7b3dSmrg OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 471428d7b3dSmrg 4 << URB_STARTING_ADDRESS_SHIFT); 472428d7b3dSmrg 473428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_URB_DS | (2 - 2)); 474428d7b3dSmrg OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 475428d7b3dSmrg 4 << URB_STARTING_ADDRESS_SHIFT); 476428d7b3dSmrg 477428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_URB_GS | (2 - 2)); 478428d7b3dSmrg OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 479428d7b3dSmrg 4 << URB_STARTING_ADDRESS_SHIFT); 480428d7b3dSmrg} 481428d7b3dSmrg 482428d7b3dSmrgstatic void 483428d7b3dSmrggen8_emit_state_base_address(struct sna *sna) 484428d7b3dSmrg{ 485428d7b3dSmrg uint32_t num_pages; 486428d7b3dSmrg 487428d7b3dSmrg assert(sna->kgem.surface - sna->kgem.nbatch <= 16384); 488428d7b3dSmrg 489428d7b3dSmrg OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2)); 490428d7b3dSmrg OUT_BATCH64(0); /* general */ 491428d7b3dSmrg OUT_BATCH(0); /* stateless dataport */ 492428d7b3dSmrg OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* surface */ 493428d7b3dSmrg sna->kgem.nbatch, 494428d7b3dSmrg NULL, 495428d7b3dSmrg I915_GEM_DOMAIN_INSTRUCTION << 16, 496428d7b3dSmrg BASE_ADDRESS_MODIFY)); 497428d7b3dSmrg OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* dynamic */ 498428d7b3dSmrg sna->kgem.nbatch, 499428d7b3dSmrg sna->render_state.gen8.general_bo, 500428d7b3dSmrg I915_GEM_DOMAIN_INSTRUCTION << 16, 501428d7b3dSmrg BASE_ADDRESS_MODIFY)); 502428d7b3dSmrg OUT_BATCH64(0); /* indirect */ 503428d7b3dSmrg OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* instruction */ 504428d7b3dSmrg sna->kgem.nbatch, 505428d7b3dSmrg sna->render_state.gen8.general_bo, 506428d7b3dSmrg I915_GEM_DOMAIN_INSTRUCTION << 16, 507428d7b3dSmrg BASE_ADDRESS_MODIFY)); 508428d7b3dSmrg /* upper bounds */ 509428d7b3dSmrg num_pages = sna->render_state.gen8.general_bo->size.pages.count; 510428d7b3dSmrg OUT_BATCH(0); /* general */ 511428d7b3dSmrg OUT_BATCH(num_pages << 12 | 1); /* dynamic */ 512428d7b3dSmrg OUT_BATCH(0); /* indirect */ 513428d7b3dSmrg OUT_BATCH(num_pages << 12 | 1); /* instruction */ 514428d7b3dSmrg} 515428d7b3dSmrg 516428d7b3dSmrgstatic void 517428d7b3dSmrggen8_emit_vs_invariant(struct sna *sna) 518428d7b3dSmrg{ 519428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_VS | (9 - 2)); 520428d7b3dSmrg OUT_BATCH64(0); /* no VS kernel */ 521428d7b3dSmrg OUT_BATCH(0); 522428d7b3dSmrg OUT_BATCH64(0); /* scratch */ 523428d7b3dSmrg OUT_BATCH(0); 524428d7b3dSmrg OUT_BATCH(1 << 1); /* pass-through */ 525428d7b3dSmrg OUT_BATCH(1 << 16 | 1 << 21); /* urb write to SBE */ 526428d7b3dSmrg 527428d7b3dSmrg#if SIM 528428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_CONSTANT_VS | (11 - 2)); 529428d7b3dSmrg OUT_BATCH(0); 530428d7b3dSmrg OUT_BATCH(0); 531428d7b3dSmrg OUT_BATCH64(0); 532428d7b3dSmrg OUT_BATCH64(0); 533428d7b3dSmrg OUT_BATCH64(0); 534428d7b3dSmrg OUT_BATCH64(0); 535428d7b3dSmrg 536428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); 537428d7b3dSmrg OUT_BATCH(0); 538428d7b3dSmrg 539428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); 540428d7b3dSmrg OUT_BATCH(0); 541428d7b3dSmrg#endif 542428d7b3dSmrg} 543428d7b3dSmrg 544428d7b3dSmrgstatic void 545428d7b3dSmrggen8_emit_hs_invariant(struct sna *sna) 546428d7b3dSmrg{ 547428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_HS | (9 - 2)); 548428d7b3dSmrg OUT_BATCH(0); 549428d7b3dSmrg OUT_BATCH(0); 550428d7b3dSmrg OUT_BATCH64(0); /* no HS kernel */ 551428d7b3dSmrg OUT_BATCH64(0); /* scratch */ 552428d7b3dSmrg OUT_BATCH(0); 553428d7b3dSmrg OUT_BATCH(0); /* pass-through */ 554428d7b3dSmrg 555428d7b3dSmrg#if SIM 556428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_CONSTANT_HS | (11 - 2)); 557428d7b3dSmrg OUT_BATCH(0); 558428d7b3dSmrg OUT_BATCH(0); 559428d7b3dSmrg OUT_BATCH64(0); 560428d7b3dSmrg OUT_BATCH64(0); 561428d7b3dSmrg OUT_BATCH64(0); 562428d7b3dSmrg OUT_BATCH64(0); 563428d7b3dSmrg 564428d7b3dSmrg#if 1 565428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); 566428d7b3dSmrg OUT_BATCH(0); 567428d7b3dSmrg 568428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); 569428d7b3dSmrg OUT_BATCH(0); 570428d7b3dSmrg#endif 571428d7b3dSmrg#endif 572428d7b3dSmrg} 573428d7b3dSmrg 574428d7b3dSmrgstatic void 575428d7b3dSmrggen8_emit_te_invariant(struct sna *sna) 576428d7b3dSmrg{ 577428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_TE | (4 - 2)); 578428d7b3dSmrg OUT_BATCH(0); 579428d7b3dSmrg OUT_BATCH(0); 580428d7b3dSmrg OUT_BATCH(0); 581428d7b3dSmrg} 582428d7b3dSmrg 583428d7b3dSmrgstatic void 584428d7b3dSmrggen8_emit_ds_invariant(struct sna *sna) 585428d7b3dSmrg{ 586428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_DS | (9 - 2)); 587428d7b3dSmrg OUT_BATCH64(0); /* no kernel */ 588428d7b3dSmrg OUT_BATCH(0); 589428d7b3dSmrg OUT_BATCH64(0); /* scratch */ 590428d7b3dSmrg OUT_BATCH(0); 591428d7b3dSmrg OUT_BATCH(0); 592428d7b3dSmrg OUT_BATCH(0); 593428d7b3dSmrg 594428d7b3dSmrg#if SIM 595428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_CONSTANT_DS | (11 - 2)); 596428d7b3dSmrg OUT_BATCH(0); 597428d7b3dSmrg OUT_BATCH(0); 598428d7b3dSmrg OUT_BATCH64(0); 599428d7b3dSmrg OUT_BATCH64(0); 600428d7b3dSmrg OUT_BATCH64(0); 601428d7b3dSmrg OUT_BATCH64(0); 602428d7b3dSmrg 603428d7b3dSmrg#if 1 604428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); 605428d7b3dSmrg OUT_BATCH(0); 606428d7b3dSmrg 607428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); 608428d7b3dSmrg OUT_BATCH(0); 609428d7b3dSmrg#endif 610428d7b3dSmrg#endif 611428d7b3dSmrg} 612428d7b3dSmrg 613428d7b3dSmrgstatic void 614428d7b3dSmrggen8_emit_gs_invariant(struct sna *sna) 615428d7b3dSmrg{ 616428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_GS | (10 - 2)); 617428d7b3dSmrg OUT_BATCH64(0); /* no GS kernel */ 618428d7b3dSmrg OUT_BATCH(0); 619428d7b3dSmrg OUT_BATCH64(0); /* scratch */ 620428d7b3dSmrg OUT_BATCH(0); 621428d7b3dSmrg OUT_BATCH(0); /* pass-through */ 622428d7b3dSmrg OUT_BATCH(0); 623428d7b3dSmrg OUT_BATCH(0); 624428d7b3dSmrg 625428d7b3dSmrg#if SIM 626428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_CONSTANT_GS | (11 - 2)); 627428d7b3dSmrg OUT_BATCH(0); 628428d7b3dSmrg OUT_BATCH(0); 629428d7b3dSmrg OUT_BATCH64(0); 630428d7b3dSmrg OUT_BATCH64(0); 631428d7b3dSmrg OUT_BATCH64(0); 632428d7b3dSmrg OUT_BATCH64(0); 633428d7b3dSmrg 634428d7b3dSmrg#if 1 635428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); 636428d7b3dSmrg OUT_BATCH(0); 637428d7b3dSmrg 638428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); 639428d7b3dSmrg OUT_BATCH(0); 640428d7b3dSmrg#endif 641428d7b3dSmrg#endif 642428d7b3dSmrg} 643428d7b3dSmrg 644428d7b3dSmrgstatic void 645428d7b3dSmrggen8_emit_sol_invariant(struct sna *sna) 646428d7b3dSmrg{ 647428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_STREAMOUT | (5 - 2)); 648428d7b3dSmrg OUT_BATCH(0); 649428d7b3dSmrg OUT_BATCH(0); 650428d7b3dSmrg OUT_BATCH(0); 651428d7b3dSmrg OUT_BATCH(0); 652428d7b3dSmrg} 653428d7b3dSmrg 654428d7b3dSmrgstatic void 655428d7b3dSmrggen8_emit_sf_invariant(struct sna *sna) 656428d7b3dSmrg{ 657428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_SF | (4 - 2)); 658428d7b3dSmrg OUT_BATCH(0); 659428d7b3dSmrg OUT_BATCH(0); 660428d7b3dSmrg OUT_BATCH(0); 661428d7b3dSmrg} 662428d7b3dSmrg 663428d7b3dSmrgstatic void 664428d7b3dSmrggen8_emit_clip_invariant(struct sna *sna) 665428d7b3dSmrg{ 666428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_CLIP | (4 - 2)); 667428d7b3dSmrg OUT_BATCH(0); 668428d7b3dSmrg OUT_BATCH(0); /* pass-through */ 669428d7b3dSmrg OUT_BATCH(0); 670428d7b3dSmrg 671428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP | (2 - 2)); 672428d7b3dSmrg OUT_BATCH(0); 673428d7b3dSmrg 674428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); 675428d7b3dSmrg OUT_BATCH(0); 676428d7b3dSmrg} 677428d7b3dSmrg 678428d7b3dSmrgstatic void 679428d7b3dSmrggen8_emit_null_depth_buffer(struct sna *sna) 680428d7b3dSmrg{ 681428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_DEPTH_BUFFER | (8 - 2)); 682428d7b3dSmrg#if 0 683428d7b3dSmrg OUT_BATCH(SURFACE_NULL << DEPTH_BUFFER_TYPE_SHIFT | 684428d7b3dSmrg DEPTHFORMAT_D32_FLOAT << DEPTH_BUFFER_FORMAT_SHIFT); 685428d7b3dSmrg#else 686428d7b3dSmrg OUT_BATCH(SURFACE_2D << DEPTH_BUFFER_TYPE_SHIFT | 687428d7b3dSmrg DEPTHFORMAT_D16_UNORM << DEPTH_BUFFER_FORMAT_SHIFT); 688428d7b3dSmrg#endif 689428d7b3dSmrg OUT_BATCH64(0); 690428d7b3dSmrg OUT_BATCH(0); 691428d7b3dSmrg OUT_BATCH(0); 692428d7b3dSmrg OUT_BATCH(0); 693428d7b3dSmrg OUT_BATCH(0); 694428d7b3dSmrg 695428d7b3dSmrg#if SIM 696428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2)); 697428d7b3dSmrg OUT_BATCH(0); 698428d7b3dSmrg OUT_BATCH64(0); 699428d7b3dSmrg OUT_BATCH(0); 700428d7b3dSmrg#endif 701428d7b3dSmrg 702428d7b3dSmrg#if SIM 703428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_STENCIL_BUFFER | (5 - 2)); 704428d7b3dSmrg OUT_BATCH(0); 705428d7b3dSmrg OUT_BATCH64(0); 706428d7b3dSmrg OUT_BATCH(0); 707428d7b3dSmrg#endif 708428d7b3dSmrg 709428d7b3dSmrg#if SIM 710428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2)); 711428d7b3dSmrg OUT_BATCH(0); 712428d7b3dSmrg OUT_BATCH(0); 713428d7b3dSmrg#endif 714428d7b3dSmrg 715428d7b3dSmrg#if SIM 716428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_CLEAR_PARAMS | (3 - 2)); 717428d7b3dSmrg OUT_BATCH(0); 718428d7b3dSmrg OUT_BATCH(0); 719428d7b3dSmrg#endif 720428d7b3dSmrg} 721428d7b3dSmrg 722428d7b3dSmrgstatic void 723428d7b3dSmrggen8_emit_wm_invariant(struct sna *sna) 724428d7b3dSmrg{ 725428d7b3dSmrg gen8_emit_null_depth_buffer(sna); 726428d7b3dSmrg 727428d7b3dSmrg#if SIM 728428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_SCISSOR_STATE_POINTERS | (2 - 2)); 729428d7b3dSmrg OUT_BATCH(0); 730428d7b3dSmrg#endif 731428d7b3dSmrg 732428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_WM | (2 - 2)); 733428d7b3dSmrg //OUT_BATCH(WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC); /* XXX */ 734428d7b3dSmrg OUT_BATCH(WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 735428d7b3dSmrg 736428d7b3dSmrg#if SIM 737428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_WM_CHROMAKEY | (2 - 2)); 738428d7b3dSmrg OUT_BATCH(0); 739428d7b3dSmrg#endif 740428d7b3dSmrg 741428d7b3dSmrg#if 0 742428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_WM_HZ_OP | (5 - 2)); 743428d7b3dSmrg OUT_BATCH(0); 744428d7b3dSmrg OUT_BATCH(0); 745428d7b3dSmrg OUT_BATCH(0); 746428d7b3dSmrg OUT_BATCH(0); 747428d7b3dSmrg#endif 748428d7b3dSmrg 749428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_PS_EXTRA | (2 - 2)); 750428d7b3dSmrg OUT_BATCH(PSX_PIXEL_SHADER_VALID | 751428d7b3dSmrg PSX_ATTRIBUTE_ENABLE); 752428d7b3dSmrg 753428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_RASTER | (5 - 2)); 754428d7b3dSmrg OUT_BATCH(RASTER_FRONT_WINDING_CCW | 755428d7b3dSmrg RASTER_CULL_NONE); 756428d7b3dSmrg OUT_BATCH(0); 757428d7b3dSmrg OUT_BATCH(0); 758428d7b3dSmrg OUT_BATCH(0); 759428d7b3dSmrg 760428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_SBE_SWIZ | (11 - 2)); 761428d7b3dSmrg OUT_BATCH(0); 762428d7b3dSmrg OUT_BATCH(0); 763428d7b3dSmrg OUT_BATCH(0); 764428d7b3dSmrg OUT_BATCH(0); 765428d7b3dSmrg OUT_BATCH(0); 766428d7b3dSmrg OUT_BATCH(0); 767428d7b3dSmrg OUT_BATCH(0); 768428d7b3dSmrg OUT_BATCH(0); 769428d7b3dSmrg OUT_BATCH(0); 770428d7b3dSmrg OUT_BATCH(0); 771428d7b3dSmrg 772428d7b3dSmrg#if SIM 773428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_CONSTANT_PS | (11 - 2)); 774428d7b3dSmrg OUT_BATCH(0); 775428d7b3dSmrg OUT_BATCH(0); 776428d7b3dSmrg OUT_BATCH64(0); 777428d7b3dSmrg OUT_BATCH64(0); 778428d7b3dSmrg OUT_BATCH64(0); 779428d7b3dSmrg OUT_BATCH64(0); 780428d7b3dSmrg#endif 781428d7b3dSmrg} 782428d7b3dSmrg 783428d7b3dSmrgstatic void 784428d7b3dSmrggen8_emit_cc_invariant(struct sna *sna) 785428d7b3dSmrg{ 786428d7b3dSmrg} 787428d7b3dSmrg 788428d7b3dSmrgstatic void 789428d7b3dSmrggen8_emit_vf_invariant(struct sna *sna) 790428d7b3dSmrg{ 791428d7b3dSmrg int n; 792428d7b3dSmrg 793428d7b3dSmrg#if 1 794428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_VF | (2 - 2)); 795428d7b3dSmrg OUT_BATCH(0); 796428d7b3dSmrg#endif 797428d7b3dSmrg 798428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_VF_SGVS | (2 - 2)); 799428d7b3dSmrg OUT_BATCH(0); 800428d7b3dSmrg 801428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2)); 802428d7b3dSmrg OUT_BATCH(RECTLIST); 803428d7b3dSmrg 804428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_VF_STATISTICS | 0); 805428d7b3dSmrg 806428d7b3dSmrg for (n = 1; n <= 3; n++) { 807428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_VF_INSTANCING | (3 - 2)); 808428d7b3dSmrg OUT_BATCH(n); 809428d7b3dSmrg OUT_BATCH(0); 810428d7b3dSmrg } 811428d7b3dSmrg} 812428d7b3dSmrg 813428d7b3dSmrgstatic void 814428d7b3dSmrggen8_emit_invariant(struct sna *sna) 815428d7b3dSmrg{ 816428d7b3dSmrg OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_3D); 817428d7b3dSmrg 818428d7b3dSmrg#if SIM 819428d7b3dSmrg OUT_BATCH(GEN8_STATE_SIP | (3 - 2)); 820428d7b3dSmrg OUT_BATCH64(0); 821428d7b3dSmrg#endif 822428d7b3dSmrg 823428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_MULTISAMPLE | (2 - 2)); 824428d7b3dSmrg OUT_BATCH(MULTISAMPLE_PIXEL_LOCATION_CENTER | 825428d7b3dSmrg MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ 826428d7b3dSmrg 827428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_SAMPLE_MASK | (2 - 2)); 828428d7b3dSmrg OUT_BATCH(1); 829428d7b3dSmrg 830428d7b3dSmrg#if SIM 831428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_SAMPLE_PATTERN | (5 - 2)); 832428d7b3dSmrg OUT_BATCH(0); 833428d7b3dSmrg OUT_BATCH(0); 834428d7b3dSmrg OUT_BATCH(0); 835428d7b3dSmrg //OUT_BATCH(8<<20 | 8<<16); 836428d7b3dSmrg OUT_BATCH(0); 837428d7b3dSmrg#endif 838428d7b3dSmrg 839428d7b3dSmrg gen8_emit_push_constants(sna); 840428d7b3dSmrg gen8_emit_urb(sna); 841428d7b3dSmrg 842428d7b3dSmrg gen8_emit_state_base_address(sna); 843428d7b3dSmrg 844428d7b3dSmrg gen8_emit_vf_invariant(sna); 845428d7b3dSmrg gen8_emit_vs_invariant(sna); 846428d7b3dSmrg gen8_emit_hs_invariant(sna); 847428d7b3dSmrg gen8_emit_te_invariant(sna); 848428d7b3dSmrg gen8_emit_ds_invariant(sna); 849428d7b3dSmrg gen8_emit_gs_invariant(sna); 850428d7b3dSmrg gen8_emit_sol_invariant(sna); 851428d7b3dSmrg gen8_emit_clip_invariant(sna); 852428d7b3dSmrg gen8_emit_sf_invariant(sna); 853428d7b3dSmrg gen8_emit_wm_invariant(sna); 854428d7b3dSmrg gen8_emit_cc_invariant(sna); 855428d7b3dSmrg 856428d7b3dSmrg sna->render_state.gen8.needs_invariant = false; 857428d7b3dSmrg} 858428d7b3dSmrg 859428d7b3dSmrgstatic void 860428d7b3dSmrggen8_emit_cc(struct sna *sna, uint32_t blend) 861428d7b3dSmrg{ 862428d7b3dSmrg struct gen8_render_state *render = &sna->render_state.gen8; 863428d7b3dSmrg 864428d7b3dSmrg if (render->blend == blend) 865428d7b3dSmrg return; 866428d7b3dSmrg 867428d7b3dSmrg DBG(("%s: blend=%x (current=%x), src=%d, dst=%d\n", 868428d7b3dSmrg __FUNCTION__, blend, render->blend, 869428d7b3dSmrg blend / GEN8_BLENDFACTOR_COUNT, 870428d7b3dSmrg blend % GEN8_BLENDFACTOR_COUNT)); 871428d7b3dSmrg 872428d7b3dSmrg assert(blend < GEN8_BLENDFACTOR_COUNT * GEN8_BLENDFACTOR_COUNT); 873428d7b3dSmrg assert(blend / GEN8_BLENDFACTOR_COUNT > 0); 874428d7b3dSmrg assert(blend % GEN8_BLENDFACTOR_COUNT > 0); 875428d7b3dSmrg 876428d7b3dSmrg /* XXX can have upto 8 blend states preload, selectable via 877428d7b3dSmrg * Render Target Index. What other side-effects of Render Target Index? 878428d7b3dSmrg */ 879428d7b3dSmrg 880428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_PS_BLEND | (2 - 2)); 881428d7b3dSmrg if (blend != GEN8_BLEND(NO_BLEND)) { 882428d7b3dSmrg uint32_t src = blend / GEN8_BLENDFACTOR_COUNT; 883428d7b3dSmrg uint32_t dst = blend % GEN8_BLENDFACTOR_COUNT; 884428d7b3dSmrg OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT | 885428d7b3dSmrg PS_BLEND_COLOR_BLEND_ENABLE | 886428d7b3dSmrg src << PS_BLEND_SRC_ALPHA_SHIFT | 887428d7b3dSmrg dst << PS_BLEND_DST_ALPHA_SHIFT | 888428d7b3dSmrg src << PS_BLEND_SRC_SHIFT | 889428d7b3dSmrg dst << PS_BLEND_DST_SHIFT); 890428d7b3dSmrg } else 891428d7b3dSmrg OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT); 892428d7b3dSmrg 893428d7b3dSmrg assert(is_aligned(render->cc_blend + blend * GEN8_BLEND_STATE_PADDED_SIZE, 64)); 894428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); 895428d7b3dSmrg OUT_BATCH((render->cc_blend + blend * GEN8_BLEND_STATE_PADDED_SIZE) | 1); 896428d7b3dSmrg 897428d7b3dSmrg /* Force a CC_STATE pointer change to improve blend performance */ 898428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_CC_STATE_POINTERS | (2 - 2)); 899428d7b3dSmrg OUT_BATCH(0); 900428d7b3dSmrg 901428d7b3dSmrg render->blend = blend; 902428d7b3dSmrg} 903428d7b3dSmrg 904428d7b3dSmrgstatic void 905428d7b3dSmrggen8_emit_sampler(struct sna *sna, uint32_t state) 906428d7b3dSmrg{ 907428d7b3dSmrg if (sna->render_state.gen8.samplers == state) 908428d7b3dSmrg return; 909428d7b3dSmrg 910428d7b3dSmrg sna->render_state.gen8.samplers = state; 911428d7b3dSmrg 912428d7b3dSmrg DBG(("%s: sampler = %x\n", __FUNCTION__, state)); 913428d7b3dSmrg 914428d7b3dSmrg assert(2 * sizeof(struct gen8_sampler_state) == 32); 915428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); 916428d7b3dSmrg OUT_BATCH(sna->render_state.gen8.wm_state + state * 2 * sizeof(struct gen8_sampler_state)); 917428d7b3dSmrg} 918428d7b3dSmrg 919428d7b3dSmrgstatic void 920428d7b3dSmrggen8_emit_sf(struct sna *sna, bool has_mask) 921428d7b3dSmrg{ 922428d7b3dSmrg int num_sf_outputs = has_mask ? 2 : 1; 923428d7b3dSmrg 924428d7b3dSmrg if (sna->render_state.gen8.num_sf_outputs == num_sf_outputs) 925428d7b3dSmrg return; 926428d7b3dSmrg 927428d7b3dSmrg DBG(("%s: num_sf_outputs=%d\n", __FUNCTION__, num_sf_outputs)); 928428d7b3dSmrg 929428d7b3dSmrg sna->render_state.gen8.num_sf_outputs = num_sf_outputs; 930428d7b3dSmrg 931428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_SBE | (4 - 2)); 932428d7b3dSmrg OUT_BATCH(num_sf_outputs << SBE_NUM_OUTPUTS_SHIFT | 933428d7b3dSmrg SBE_FORCE_VERTEX_URB_READ_LENGTH | /* forced is faster */ 934428d7b3dSmrg SBE_FORCE_VERTEX_URB_READ_OFFSET | 935428d7b3dSmrg 1 << SBE_URB_ENTRY_READ_LENGTH_SHIFT | 936428d7b3dSmrg 1 << SBE_URB_ENTRY_READ_OFFSET_SHIFT); 937428d7b3dSmrg OUT_BATCH(0); 938428d7b3dSmrg OUT_BATCH(0); 939428d7b3dSmrg} 940428d7b3dSmrg 941428d7b3dSmrgstatic void 942428d7b3dSmrggen8_emit_wm(struct sna *sna, int kernel) 943428d7b3dSmrg{ 944428d7b3dSmrg const uint32_t *kernels; 945428d7b3dSmrg 946428d7b3dSmrg assert(kernel < ARRAY_SIZE(wm_kernels)); 947428d7b3dSmrg if (sna->render_state.gen8.kernel == kernel) 948428d7b3dSmrg return; 949428d7b3dSmrg 950428d7b3dSmrg sna->render_state.gen8.kernel = kernel; 951428d7b3dSmrg kernels = sna->render_state.gen8.wm_kernel[kernel]; 952428d7b3dSmrg 953428d7b3dSmrg DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n", 954428d7b3dSmrg __FUNCTION__, 955428d7b3dSmrg wm_kernels[kernel].name, 956428d7b3dSmrg wm_kernels[kernel].num_surfaces, 957428d7b3dSmrg kernels[0], kernels[1], kernels[2])); 958428d7b3dSmrg assert(is_aligned(kernels[0], 64)); 959428d7b3dSmrg assert(is_aligned(kernels[1], 64)); 960428d7b3dSmrg assert(is_aligned(kernels[2], 64)); 961428d7b3dSmrg 962428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_PS | (12 - 2)); 963428d7b3dSmrg OUT_BATCH64(kernels[0] ?: kernels[1] ?: kernels[2]); 964428d7b3dSmrg OUT_BATCH(1 << PS_SAMPLER_COUNT_SHIFT | 965428d7b3dSmrg PS_VECTOR_MASK_ENABLE | 966428d7b3dSmrg wm_kernels[kernel].num_surfaces << PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); 967428d7b3dSmrg OUT_BATCH64(0); /* scratch address */ 968428d7b3dSmrg OUT_BATCH(PS_MAX_THREADS | 969428d7b3dSmrg (kernels[0] ? PS_8_DISPATCH_ENABLE : 0) | 970428d7b3dSmrg (kernels[1] ? PS_16_DISPATCH_ENABLE : 0) | 971428d7b3dSmrg (kernels[2] ? PS_32_DISPATCH_ENABLE : 0)); 972428d7b3dSmrg OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << PS_DISPATCH_START_GRF_SHIFT_0 | 973428d7b3dSmrg 8 << PS_DISPATCH_START_GRF_SHIFT_1 | 974428d7b3dSmrg 6 << PS_DISPATCH_START_GRF_SHIFT_2); 975428d7b3dSmrg OUT_BATCH64(kernels[2]); 976428d7b3dSmrg OUT_BATCH64(kernels[1]); 977428d7b3dSmrg} 978428d7b3dSmrg 979428d7b3dSmrgstatic bool 980428d7b3dSmrggen8_emit_binding_table(struct sna *sna, uint16_t offset) 981428d7b3dSmrg{ 982428d7b3dSmrg if (sna->render_state.gen8.surface_table == offset) 983428d7b3dSmrg return false; 984428d7b3dSmrg 985428d7b3dSmrg /* Binding table pointers */ 986428d7b3dSmrg assert(is_aligned(4*offset, 32)); 987428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); 988428d7b3dSmrg OUT_BATCH(offset*4); 989428d7b3dSmrg 990428d7b3dSmrg sna->render_state.gen8.surface_table = offset; 991428d7b3dSmrg return true; 992428d7b3dSmrg} 993428d7b3dSmrg 994428d7b3dSmrgstatic bool 995428d7b3dSmrggen8_emit_drawing_rectangle(struct sna *sna, 996428d7b3dSmrg const struct sna_composite_op *op) 997428d7b3dSmrg{ 998428d7b3dSmrg uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 999428d7b3dSmrg uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 1000428d7b3dSmrg 1001428d7b3dSmrg assert(!too_large(abs(op->dst.x), abs(op->dst.y))); 1002428d7b3dSmrg assert(!too_large(op->dst.width, op->dst.height)); 1003428d7b3dSmrg 1004428d7b3dSmrg if (sna->render_state.gen8.drawrect_limit == limit && 1005428d7b3dSmrg sna->render_state.gen8.drawrect_offset == offset) 1006428d7b3dSmrg return true; 1007428d7b3dSmrg 1008428d7b3dSmrg sna->render_state.gen8.drawrect_offset = offset; 1009428d7b3dSmrg sna->render_state.gen8.drawrect_limit = limit; 1010428d7b3dSmrg 1011428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 1012428d7b3dSmrg OUT_BATCH(0); 1013428d7b3dSmrg OUT_BATCH(limit); 1014428d7b3dSmrg OUT_BATCH(offset); 1015428d7b3dSmrg return false; 1016428d7b3dSmrg} 1017428d7b3dSmrg 1018428d7b3dSmrgstatic void 1019428d7b3dSmrggen8_emit_vertex_elements(struct sna *sna, 1020428d7b3dSmrg const struct sna_composite_op *op) 1021428d7b3dSmrg{ 1022428d7b3dSmrg /* 1023428d7b3dSmrg * vertex data in vertex buffer 1024428d7b3dSmrg * position: (x, y) 1025428d7b3dSmrg * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 1026428d7b3dSmrg * texture coordinate 1 if (has_mask is true): same as above 1027428d7b3dSmrg */ 1028428d7b3dSmrg struct gen8_render_state *render = &sna->render_state.gen8; 1029428d7b3dSmrg uint32_t src_format, dw; 1030428d7b3dSmrg int id = GEN8_VERTEX(op->u.gen8.flags); 1031428d7b3dSmrg bool has_mask; 1032428d7b3dSmrg 1033428d7b3dSmrg DBG(("%s: setup id=%d\n", __FUNCTION__, id)); 1034428d7b3dSmrg 1035428d7b3dSmrg if (render->ve_id == id) 1036428d7b3dSmrg return; 1037428d7b3dSmrg render->ve_id = id; 1038428d7b3dSmrg 1039428d7b3dSmrg /* The VUE layout 1040428d7b3dSmrg * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 1041428d7b3dSmrg * dword 4-7: position (x, y, 1.0, 1.0), 1042428d7b3dSmrg * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 1043428d7b3dSmrg * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 1044428d7b3dSmrg * 1045428d7b3dSmrg * dword 4-15 are fetched from vertex buffer 1046428d7b3dSmrg */ 1047428d7b3dSmrg has_mask = (id >> 2) != 0; 1048428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_VERTEX_ELEMENTS | 1049428d7b3dSmrg ((2 * (3 + has_mask)) + 1 - 2)); 1050428d7b3dSmrg 1051428d7b3dSmrg OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1052428d7b3dSmrg SURFACEFORMAT_R32G32B32A32_FLOAT << VE_FORMAT_SHIFT | 1053428d7b3dSmrg 0 << VE_OFFSET_SHIFT); 1054428d7b3dSmrg OUT_BATCH(COMPONENT_STORE_0 << VE_COMPONENT_0_SHIFT | 1055428d7b3dSmrg COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT | 1056428d7b3dSmrg COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | 1057428d7b3dSmrg COMPONENT_STORE_0 << VE_COMPONENT_3_SHIFT); 1058428d7b3dSmrg 1059428d7b3dSmrg /* x,y */ 1060428d7b3dSmrg OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1061428d7b3dSmrg SURFACEFORMAT_R16G16_SSCALED << VE_FORMAT_SHIFT | 1062428d7b3dSmrg 0 << VE_OFFSET_SHIFT); 1063428d7b3dSmrg OUT_BATCH(COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT | 1064428d7b3dSmrg COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT | 1065428d7b3dSmrg COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | 1066428d7b3dSmrg COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT); 1067428d7b3dSmrg 1068428d7b3dSmrg /* u0, v0, w0 */ 1069428d7b3dSmrg DBG(("%s: first channel %d floats, offset=4\n", __FUNCTION__, id & 3)); 1070428d7b3dSmrg dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; 1071428d7b3dSmrg switch (id & 3) { 1072428d7b3dSmrg default: 1073428d7b3dSmrg assert(0); 1074428d7b3dSmrg case 0: 1075428d7b3dSmrg src_format = SURFACEFORMAT_R16G16_SSCALED; 1076428d7b3dSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1077428d7b3dSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1078428d7b3dSmrg dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1079428d7b3dSmrg break; 1080428d7b3dSmrg case 1: 1081428d7b3dSmrg src_format = SURFACEFORMAT_R32_FLOAT; 1082428d7b3dSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1083428d7b3dSmrg dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; 1084428d7b3dSmrg dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1085428d7b3dSmrg break; 1086428d7b3dSmrg case 2: 1087428d7b3dSmrg src_format = SURFACEFORMAT_R32G32_FLOAT; 1088428d7b3dSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1089428d7b3dSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1090428d7b3dSmrg dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1091428d7b3dSmrg break; 1092428d7b3dSmrg case 3: 1093428d7b3dSmrg src_format = SURFACEFORMAT_R32G32B32_FLOAT; 1094428d7b3dSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1095428d7b3dSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1096428d7b3dSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; 1097428d7b3dSmrg break; 1098428d7b3dSmrg } 1099428d7b3dSmrg OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1100428d7b3dSmrg src_format << VE_FORMAT_SHIFT | 1101428d7b3dSmrg 4 << VE_OFFSET_SHIFT); 1102428d7b3dSmrg OUT_BATCH(dw); 1103428d7b3dSmrg 1104428d7b3dSmrg /* u1, v1, w1 */ 1105428d7b3dSmrg if (has_mask) { 1106428d7b3dSmrg unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); 1107428d7b3dSmrg DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset)); 1108428d7b3dSmrg dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; 1109428d7b3dSmrg switch (id >> 2) { 1110428d7b3dSmrg case 1: 1111428d7b3dSmrg src_format = SURFACEFORMAT_R32_FLOAT; 1112428d7b3dSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1113428d7b3dSmrg dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; 1114428d7b3dSmrg dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1115428d7b3dSmrg break; 1116428d7b3dSmrg default: 1117428d7b3dSmrg assert(0); 1118428d7b3dSmrg case 2: 1119428d7b3dSmrg src_format = SURFACEFORMAT_R32G32_FLOAT; 1120428d7b3dSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1121428d7b3dSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1122428d7b3dSmrg dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1123428d7b3dSmrg break; 1124428d7b3dSmrg case 3: 1125428d7b3dSmrg src_format = SURFACEFORMAT_R32G32B32_FLOAT; 1126428d7b3dSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1127428d7b3dSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1128428d7b3dSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; 1129428d7b3dSmrg break; 1130428d7b3dSmrg } 1131428d7b3dSmrg OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1132428d7b3dSmrg src_format << VE_FORMAT_SHIFT | 1133428d7b3dSmrg offset << VE_OFFSET_SHIFT); 1134428d7b3dSmrg OUT_BATCH(dw); 1135428d7b3dSmrg } 1136428d7b3dSmrg} 1137428d7b3dSmrg 1138428d7b3dSmrginline static void 1139428d7b3dSmrggen8_emit_pipe_invalidate(struct sna *sna) 1140428d7b3dSmrg{ 1141428d7b3dSmrg OUT_BATCH(GEN8_PIPE_CONTROL | (6 - 2)); 1142428d7b3dSmrg OUT_BATCH(PIPE_CONTROL_WC_FLUSH | 1143428d7b3dSmrg PIPE_CONTROL_TC_FLUSH | 1144428d7b3dSmrg PIPE_CONTROL_CS_STALL); 1145428d7b3dSmrg OUT_BATCH64(0); 1146428d7b3dSmrg OUT_BATCH64(0); 1147428d7b3dSmrg} 1148428d7b3dSmrg 1149428d7b3dSmrginline static void 1150428d7b3dSmrggen8_emit_pipe_flush(struct sna *sna, bool need_stall) 1151428d7b3dSmrg{ 1152428d7b3dSmrg unsigned stall; 1153428d7b3dSmrg 1154428d7b3dSmrg stall = 0; 1155428d7b3dSmrg if (need_stall) 1156428d7b3dSmrg stall = (PIPE_CONTROL_CS_STALL | 1157428d7b3dSmrg PIPE_CONTROL_STALL_AT_SCOREBOARD); 1158428d7b3dSmrg 1159428d7b3dSmrg OUT_BATCH(GEN8_PIPE_CONTROL | (6 - 2)); 1160428d7b3dSmrg OUT_BATCH(PIPE_CONTROL_WC_FLUSH | stall); 1161428d7b3dSmrg OUT_BATCH64(0); 1162428d7b3dSmrg OUT_BATCH64(0); 1163428d7b3dSmrg} 1164428d7b3dSmrg 1165428d7b3dSmrginline static void 1166428d7b3dSmrggen8_emit_pipe_stall(struct sna *sna) 1167428d7b3dSmrg{ 1168428d7b3dSmrg OUT_BATCH(GEN8_PIPE_CONTROL | (6 - 2)); 1169428d7b3dSmrg OUT_BATCH(PIPE_CONTROL_CS_STALL | 1170428d7b3dSmrg PIPE_CONTROL_STALL_AT_SCOREBOARD); 1171428d7b3dSmrg OUT_BATCH64(0); 1172428d7b3dSmrg OUT_BATCH64(0); 1173428d7b3dSmrg} 1174428d7b3dSmrg 1175428d7b3dSmrgstatic void 1176428d7b3dSmrggen8_emit_state(struct sna *sna, 1177428d7b3dSmrg const struct sna_composite_op *op, 1178428d7b3dSmrg uint16_t wm_binding_table) 1179428d7b3dSmrg{ 1180428d7b3dSmrg bool need_invalidate; 1181428d7b3dSmrg bool need_flush; 1182428d7b3dSmrg bool need_stall; 1183428d7b3dSmrg 1184428d7b3dSmrg assert(op->dst.bo->exec); 1185428d7b3dSmrg 1186428d7b3dSmrg need_flush = wm_binding_table & 1 || 1187428d7b3dSmrg (sna->render_state.gen8.emit_flush && GEN8_READS_DST(op->u.gen8.flags)); 1188428d7b3dSmrg if (ALWAYS_FLUSH) 1189428d7b3dSmrg need_flush = true; 1190428d7b3dSmrg 1191428d7b3dSmrg wm_binding_table &= ~1; 1192428d7b3dSmrg 1193428d7b3dSmrg need_stall = sna->render_state.gen8.surface_table != wm_binding_table; 1194428d7b3dSmrg 1195428d7b3dSmrg need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); 1196428d7b3dSmrg if (ALWAYS_INVALIDATE) 1197428d7b3dSmrg need_invalidate = true; 1198428d7b3dSmrg 1199428d7b3dSmrg need_stall &= gen8_emit_drawing_rectangle(sna, op); 1200428d7b3dSmrg if (ALWAYS_STALL) 1201428d7b3dSmrg need_stall = true; 1202428d7b3dSmrg 1203428d7b3dSmrg if (need_invalidate) { 1204428d7b3dSmrg gen8_emit_pipe_invalidate(sna); 1205428d7b3dSmrg kgem_clear_dirty(&sna->kgem); 1206428d7b3dSmrg assert(op->dst.bo->exec); 1207428d7b3dSmrg kgem_bo_mark_dirty(op->dst.bo); 1208428d7b3dSmrg 1209428d7b3dSmrg need_flush = false; 1210428d7b3dSmrg need_stall = false; 1211428d7b3dSmrg } 1212428d7b3dSmrg if (need_flush) { 1213428d7b3dSmrg gen8_emit_pipe_flush(sna, need_stall); 1214428d7b3dSmrg need_stall = false; 1215428d7b3dSmrg } 1216428d7b3dSmrg if (need_stall) 1217428d7b3dSmrg gen8_emit_pipe_stall(sna); 1218428d7b3dSmrg 1219428d7b3dSmrg gen8_emit_cc(sna, GEN8_BLEND(op->u.gen8.flags)); 1220428d7b3dSmrg gen8_emit_sampler(sna, GEN8_SAMPLER(op->u.gen8.flags)); 1221428d7b3dSmrg gen8_emit_sf(sna, GEN8_VERTEX(op->u.gen8.flags) >> 2); 1222428d7b3dSmrg gen8_emit_wm(sna, GEN8_KERNEL(op->u.gen8.flags)); 1223428d7b3dSmrg gen8_emit_vertex_elements(sna, op); 1224428d7b3dSmrg gen8_emit_binding_table(sna, wm_binding_table); 1225428d7b3dSmrg 1226428d7b3dSmrg sna->render_state.gen8.emit_flush = GEN8_READS_DST(op->u.gen8.flags); 1227428d7b3dSmrg} 1228428d7b3dSmrg 1229428d7b3dSmrgstatic bool gen8_magic_ca_pass(struct sna *sna, 1230428d7b3dSmrg const struct sna_composite_op *op) 1231428d7b3dSmrg{ 1232428d7b3dSmrg struct gen8_render_state *state = &sna->render_state.gen8; 1233428d7b3dSmrg 1234428d7b3dSmrg if (!op->need_magic_ca_pass) 1235428d7b3dSmrg return false; 1236428d7b3dSmrg 1237428d7b3dSmrg DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, 1238428d7b3dSmrg sna->render.vertex_start, sna->render.vertex_index)); 1239428d7b3dSmrg 1240428d7b3dSmrg gen8_emit_pipe_stall(sna); 1241428d7b3dSmrg 1242428d7b3dSmrg gen8_emit_cc(sna, 1243428d7b3dSmrg GEN8_BLEND(gen8_get_blend(PictOpAdd, true, 1244428d7b3dSmrg op->dst.format))); 1245428d7b3dSmrg gen8_emit_wm(sna, 1246428d7b3dSmrg gen8_choose_composite_kernel(PictOpAdd, 1247428d7b3dSmrg true, true, 1248428d7b3dSmrg op->is_affine)); 1249428d7b3dSmrg 1250428d7b3dSmrg OUT_BATCH(GEN8_3DPRIMITIVE | (7 - 2)); 1251428d7b3dSmrg OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ 1252428d7b3dSmrg OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 1253428d7b3dSmrg OUT_BATCH(sna->render.vertex_start); 1254428d7b3dSmrg OUT_BATCH(1); /* single instance */ 1255428d7b3dSmrg OUT_BATCH(0); /* start instance location */ 1256428d7b3dSmrg OUT_BATCH(0); /* index buffer offset, ignored */ 1257428d7b3dSmrg 1258428d7b3dSmrg state->last_primitive = sna->kgem.nbatch; 1259428d7b3dSmrg return true; 1260428d7b3dSmrg} 1261428d7b3dSmrg 1262428d7b3dSmrgstatic void null_create(struct sna_static_stream *stream) 1263428d7b3dSmrg{ 1264428d7b3dSmrg /* A bunch of zeros useful for legacy border color and depth-stencil */ 1265428d7b3dSmrg sna_static_stream_map(stream, 64, 64); 1266428d7b3dSmrg} 1267428d7b3dSmrg 1268428d7b3dSmrgstatic void 1269428d7b3dSmrgsampler_state_init(struct gen8_sampler_state *sampler_state, 1270428d7b3dSmrg sampler_filter_t filter, 1271428d7b3dSmrg sampler_extend_t extend) 1272428d7b3dSmrg{ 1273428d7b3dSmrg COMPILE_TIME_ASSERT(sizeof(*sampler_state) == 4*sizeof(uint32_t)); 1274428d7b3dSmrg 1275428d7b3dSmrg sampler_state->ss0.lod_preclamp = 2; /* GL mode */ 1276428d7b3dSmrg sampler_state->ss0.default_color_mode = 1; 1277428d7b3dSmrg 1278428d7b3dSmrg switch (filter) { 1279428d7b3dSmrg default: 1280428d7b3dSmrg case SAMPLER_FILTER_NEAREST: 1281428d7b3dSmrg sampler_state->ss0.min_filter = MAPFILTER_NEAREST; 1282428d7b3dSmrg sampler_state->ss0.mag_filter = MAPFILTER_NEAREST; 1283428d7b3dSmrg break; 1284428d7b3dSmrg case SAMPLER_FILTER_BILINEAR: 1285428d7b3dSmrg sampler_state->ss0.min_filter = MAPFILTER_LINEAR; 1286428d7b3dSmrg sampler_state->ss0.mag_filter = MAPFILTER_LINEAR; 1287428d7b3dSmrg break; 1288428d7b3dSmrg } 1289428d7b3dSmrg 1290428d7b3dSmrg /* XXX bicubic filter using MAPFILTER_FLEXIBLE */ 1291428d7b3dSmrg 1292428d7b3dSmrg switch (extend) { 1293428d7b3dSmrg default: 1294428d7b3dSmrg case SAMPLER_EXTEND_NONE: 1295428d7b3dSmrg sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1296428d7b3dSmrg sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1297428d7b3dSmrg sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1298428d7b3dSmrg break; 1299428d7b3dSmrg case SAMPLER_EXTEND_REPEAT: 1300428d7b3dSmrg sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_WRAP; 1301428d7b3dSmrg sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_WRAP; 1302428d7b3dSmrg sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_WRAP; 1303428d7b3dSmrg break; 1304428d7b3dSmrg case SAMPLER_EXTEND_PAD: 1305428d7b3dSmrg sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP; 1306428d7b3dSmrg sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP; 1307428d7b3dSmrg sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP; 1308428d7b3dSmrg break; 1309428d7b3dSmrg case SAMPLER_EXTEND_REFLECT: 1310428d7b3dSmrg sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_MIRROR; 1311428d7b3dSmrg sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_MIRROR; 1312428d7b3dSmrg sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_MIRROR; 1313428d7b3dSmrg break; 1314428d7b3dSmrg } 1315428d7b3dSmrg} 1316428d7b3dSmrg 1317428d7b3dSmrgstatic void 1318428d7b3dSmrgsampler_copy_init(struct gen8_sampler_state *ss) 1319428d7b3dSmrg{ 1320428d7b3dSmrg sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1321428d7b3dSmrg ss->ss3.non_normalized_coord = 1; 1322428d7b3dSmrg 1323428d7b3dSmrg sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1324428d7b3dSmrg} 1325428d7b3dSmrg 1326428d7b3dSmrgstatic void 1327428d7b3dSmrgsampler_fill_init(struct gen8_sampler_state *ss) 1328428d7b3dSmrg{ 1329428d7b3dSmrg sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); 1330428d7b3dSmrg ss->ss3.non_normalized_coord = 1; 1331428d7b3dSmrg 1332428d7b3dSmrg sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1333428d7b3dSmrg} 1334428d7b3dSmrg 1335428d7b3dSmrgstatic uint32_t 1336428d7b3dSmrggen8_tiling_bits(uint32_t tiling) 1337428d7b3dSmrg{ 1338428d7b3dSmrg switch (tiling) { 1339428d7b3dSmrg default: assert(0); 1340428d7b3dSmrg case I915_TILING_NONE: return 0; 1341428d7b3dSmrg case I915_TILING_X: return SURFACE_TILED; 1342428d7b3dSmrg case I915_TILING_Y: return SURFACE_TILED | SURFACE_TILED_Y; 1343428d7b3dSmrg } 1344428d7b3dSmrg} 1345428d7b3dSmrg 1346428d7b3dSmrg#define MOCS_WT (2 << 5) 1347428d7b3dSmrg#define MOCS_WB (3 << 5) 1348428d7b3dSmrg#define MOCS_eLLC_ONLY (0 << 3) 1349428d7b3dSmrg#define MOCS_LLC_ONLY (1 << 3) 1350428d7b3dSmrg#define MOCS_eLLC_LLC (2 << 3) 1351428d7b3dSmrg#define MOCS_ALL_CACHES (3 << 3) 1352428d7b3dSmrg 1353428d7b3dSmrg/** 1354428d7b3dSmrg * Sets up the common fields for a surface state buffer for the given 1355428d7b3dSmrg * picture in the given surface state buffer. 1356428d7b3dSmrg */ 1357428d7b3dSmrgstatic uint32_t 1358428d7b3dSmrggen8_bind_bo(struct sna *sna, 1359428d7b3dSmrg struct kgem_bo *bo, 1360428d7b3dSmrg uint32_t width, 1361428d7b3dSmrg uint32_t height, 1362428d7b3dSmrg uint32_t format, 1363428d7b3dSmrg bool is_dst) 1364428d7b3dSmrg{ 1365428d7b3dSmrg uint32_t *ss; 1366428d7b3dSmrg uint32_t domains; 1367428d7b3dSmrg int offset; 1368428d7b3dSmrg uint32_t is_scanout = is_dst && bo->scanout; 1369428d7b3dSmrg 1370428d7b3dSmrg /* After the first bind, we manage the cache domains within the batch */ 1371428d7b3dSmrg offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); 1372428d7b3dSmrg if (offset) { 1373428d7b3dSmrg if (is_dst) 1374428d7b3dSmrg kgem_bo_mark_dirty(bo); 1375428d7b3dSmrg assert(offset >= sna->kgem.surface); 1376428d7b3dSmrg return offset * sizeof(uint32_t); 1377428d7b3dSmrg } 1378428d7b3dSmrg 1379428d7b3dSmrg offset = sna->kgem.surface -= SURFACE_DW; 1380428d7b3dSmrg ss = sna->kgem.batch + offset; 1381428d7b3dSmrg ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | 1382428d7b3dSmrg gen8_tiling_bits(bo->tiling) | 1383428d7b3dSmrg format << SURFACE_FORMAT_SHIFT | 1384428d7b3dSmrg SURFACE_VALIGN_4 | SURFACE_HALIGN_4); 1385428d7b3dSmrg if (is_dst) { 1386428d7b3dSmrg ss[0] |= SURFACE_RC_READ_WRITE; 1387428d7b3dSmrg domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; 1388428d7b3dSmrg } else 1389428d7b3dSmrg domains = I915_GEM_DOMAIN_SAMPLER << 16; 1390428d7b3dSmrg ss[1] = (is_dst && is_uncached(sna, bo)) ? 0 : is_scanout ? (MOCS_WT | MOCS_ALL_CACHES) << 24 : (MOCS_WB | MOCS_ALL_CACHES) << 24; 1391428d7b3dSmrg ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | 1392428d7b3dSmrg (height - 1) << SURFACE_HEIGHT_SHIFT); 1393428d7b3dSmrg ss[3] = (bo->pitch - 1) << SURFACE_PITCH_SHIFT; 1394428d7b3dSmrg ss[4] = 0; 1395428d7b3dSmrg ss[5] = 0; 1396428d7b3dSmrg ss[6] = 0; 1397428d7b3dSmrg ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 1398428d7b3dSmrg *(uint64_t *)(ss+8) = kgem_add_reloc64(&sna->kgem, offset + 8, bo, domains, 0); 1399428d7b3dSmrg ss[10] = 0; 1400428d7b3dSmrg ss[11] = 0; 1401428d7b3dSmrg ss[12] = 0; 1402428d7b3dSmrg ss[13] = 0; 1403428d7b3dSmrg ss[14] = 0; 1404428d7b3dSmrg ss[15] = 0; 1405428d7b3dSmrg 1406428d7b3dSmrg kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); 1407428d7b3dSmrg 1408428d7b3dSmrg DBG(("[%x] bind bo(handle=%d, addr=%lx), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 1409428d7b3dSmrg offset, bo->handle, *(uint64_t *)(ss+8), 1410428d7b3dSmrg format, width, height, bo->pitch, bo->tiling, 1411428d7b3dSmrg domains & 0xffff ? "render" : "sampler")); 1412428d7b3dSmrg 1413428d7b3dSmrg return offset * sizeof(uint32_t); 1414428d7b3dSmrg} 1415428d7b3dSmrg 1416428d7b3dSmrgstatic void gen8_emit_vertex_buffer(struct sna *sna, 1417428d7b3dSmrg const struct sna_composite_op *op) 1418428d7b3dSmrg{ 1419428d7b3dSmrg int id = GEN8_VERTEX(op->u.gen8.flags); 1420428d7b3dSmrg 1421428d7b3dSmrg OUT_BATCH(GEN8_3DSTATE_VERTEX_BUFFERS | (5 - 2)); 1422428d7b3dSmrg OUT_BATCH(id << VB_INDEX_SHIFT | VB_MODIFY_ENABLE | 1423428d7b3dSmrg 4*op->floats_per_vertex); 1424428d7b3dSmrg sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 1425428d7b3dSmrg OUT_BATCH64(0); 1426428d7b3dSmrg OUT_BATCH(~0); /* buffer size: disabled */ 1427428d7b3dSmrg 1428428d7b3dSmrg sna->render.vb_id |= 1 << id; 1429428d7b3dSmrg} 1430428d7b3dSmrg 1431428d7b3dSmrgstatic void gen8_emit_primitive(struct sna *sna) 1432428d7b3dSmrg{ 1433428d7b3dSmrg if (sna->kgem.nbatch == sna->render_state.gen8.last_primitive) { 1434428d7b3dSmrg sna->render.vertex_offset = sna->kgem.nbatch - 5; 1435428d7b3dSmrg return; 1436428d7b3dSmrg } 1437428d7b3dSmrg 1438428d7b3dSmrg OUT_BATCH(GEN8_3DPRIMITIVE | (7 - 2)); 1439428d7b3dSmrg OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ 1440428d7b3dSmrg sna->render.vertex_offset = sna->kgem.nbatch; 1441428d7b3dSmrg OUT_BATCH(0); /* vertex count, to be filled in later */ 1442428d7b3dSmrg OUT_BATCH(sna->render.vertex_index); 1443428d7b3dSmrg OUT_BATCH(1); /* single instance */ 1444428d7b3dSmrg OUT_BATCH(0); /* start instance location */ 1445428d7b3dSmrg OUT_BATCH(0); /* index buffer offset, ignored */ 1446428d7b3dSmrg sna->render.vertex_start = sna->render.vertex_index; 1447428d7b3dSmrg 1448428d7b3dSmrg sna->render_state.gen8.last_primitive = sna->kgem.nbatch; 1449428d7b3dSmrg} 1450428d7b3dSmrg 1451428d7b3dSmrgstatic bool gen8_rectangle_begin(struct sna *sna, 1452428d7b3dSmrg const struct sna_composite_op *op) 1453428d7b3dSmrg{ 1454428d7b3dSmrg int id = 1 << GEN8_VERTEX(op->u.gen8.flags); 1455428d7b3dSmrg int ndwords; 1456428d7b3dSmrg 1457428d7b3dSmrg if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 1458428d7b3dSmrg return true; 1459428d7b3dSmrg 1460428d7b3dSmrg ndwords = op->need_magic_ca_pass ? 60 : 6; 1461428d7b3dSmrg if ((sna->render.vb_id & id) == 0) 1462428d7b3dSmrg ndwords += 5; 1463428d7b3dSmrg if (!kgem_check_batch(&sna->kgem, ndwords)) 1464428d7b3dSmrg return false; 1465428d7b3dSmrg 1466428d7b3dSmrg if ((sna->render.vb_id & id) == 0) 1467428d7b3dSmrg gen8_emit_vertex_buffer(sna, op); 1468428d7b3dSmrg 1469428d7b3dSmrg gen8_emit_primitive(sna); 1470428d7b3dSmrg return true; 1471428d7b3dSmrg} 1472428d7b3dSmrg 1473428d7b3dSmrgstatic int gen8_get_rectangles__flush(struct sna *sna, 1474428d7b3dSmrg const struct sna_composite_op *op) 1475428d7b3dSmrg{ 1476428d7b3dSmrg /* Preventing discarding new vbo after lock contention */ 1477428d7b3dSmrg if (sna_vertex_wait__locked(&sna->render)) { 1478428d7b3dSmrg int rem = vertex_space(sna); 1479428d7b3dSmrg if (rem > op->floats_per_rect) 1480428d7b3dSmrg return rem; 1481428d7b3dSmrg } 1482428d7b3dSmrg 1483428d7b3dSmrg if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6)) 1484428d7b3dSmrg return 0; 1485428d7b3dSmrg if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 1486428d7b3dSmrg return 0; 1487428d7b3dSmrg 1488428d7b3dSmrg if (sna->render.vertex_offset) { 1489428d7b3dSmrg gen8_vertex_flush(sna); 1490428d7b3dSmrg if (gen8_magic_ca_pass(sna, op)) { 1491428d7b3dSmrg gen8_emit_pipe_invalidate(sna); 1492428d7b3dSmrg gen8_emit_cc(sna, GEN8_BLEND(op->u.gen8.flags)); 1493428d7b3dSmrg gen8_emit_wm(sna, GEN8_KERNEL(op->u.gen8.flags)); 1494428d7b3dSmrg } 1495428d7b3dSmrg } 1496428d7b3dSmrg 1497428d7b3dSmrg return gen8_vertex_finish(sna); 1498428d7b3dSmrg} 1499428d7b3dSmrg 1500428d7b3dSmrginline static int gen8_get_rectangles(struct sna *sna, 1501428d7b3dSmrg const struct sna_composite_op *op, 1502428d7b3dSmrg int want, 1503428d7b3dSmrg void (*emit_state)(struct sna *sna, const struct sna_composite_op *op)) 1504428d7b3dSmrg{ 1505428d7b3dSmrg int rem; 1506428d7b3dSmrg 1507428d7b3dSmrg assert(want); 1508428d7b3dSmrg 1509428d7b3dSmrgstart: 1510428d7b3dSmrg rem = vertex_space(sna); 1511428d7b3dSmrg if (unlikely(rem < op->floats_per_rect)) { 1512428d7b3dSmrg DBG(("flushing vbo for %s: %d < %d\n", 1513428d7b3dSmrg __FUNCTION__, rem, op->floats_per_rect)); 1514428d7b3dSmrg rem = gen8_get_rectangles__flush(sna, op); 1515428d7b3dSmrg if (unlikely(rem == 0)) 1516428d7b3dSmrg goto flush; 1517428d7b3dSmrg } 1518428d7b3dSmrg 1519428d7b3dSmrg if (unlikely(sna->render.vertex_offset == 0)) { 1520428d7b3dSmrg if (!gen8_rectangle_begin(sna, op)) 1521428d7b3dSmrg goto flush; 1522428d7b3dSmrg else 1523428d7b3dSmrg goto start; 1524428d7b3dSmrg } 1525428d7b3dSmrg 1526428d7b3dSmrg assert(rem <= vertex_space(sna)); 1527428d7b3dSmrg assert(op->floats_per_rect <= rem); 1528428d7b3dSmrg if (want > 1 && want * op->floats_per_rect > rem) 1529428d7b3dSmrg want = rem / op->floats_per_rect; 1530428d7b3dSmrg 1531428d7b3dSmrg assert(want > 0); 1532428d7b3dSmrg sna->render.vertex_index += 3*want; 1533428d7b3dSmrg return want; 1534428d7b3dSmrg 1535428d7b3dSmrgflush: 1536428d7b3dSmrg if (sna->render.vertex_offset) { 1537428d7b3dSmrg gen8_vertex_flush(sna); 1538428d7b3dSmrg gen8_magic_ca_pass(sna, op); 1539428d7b3dSmrg } 1540428d7b3dSmrg sna_vertex_wait__locked(&sna->render); 1541428d7b3dSmrg _kgem_submit(&sna->kgem); 1542428d7b3dSmrg emit_state(sna, op); 1543428d7b3dSmrg goto start; 1544428d7b3dSmrg} 1545428d7b3dSmrg 1546428d7b3dSmrginline static uint32_t *gen8_composite_get_binding_table(struct sna *sna, 1547428d7b3dSmrg uint16_t *offset) 1548428d7b3dSmrg{ 1549428d7b3dSmrg uint32_t *table; 1550428d7b3dSmrg 1551428d7b3dSmrg assert(sna->kgem.surface <= 16384); 1552428d7b3dSmrg sna->kgem.surface -= SURFACE_DW; 1553428d7b3dSmrg /* Clear all surplus entries to zero in case of prefetch */ 1554428d7b3dSmrg table = memset(sna->kgem.batch + sna->kgem.surface, 0, 64); 1555428d7b3dSmrg 1556428d7b3dSmrg DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 1557428d7b3dSmrg 1558428d7b3dSmrg *offset = sna->kgem.surface; 1559428d7b3dSmrg return table; 1560428d7b3dSmrg} 1561428d7b3dSmrg 1562428d7b3dSmrgstatic void 1563428d7b3dSmrggen8_get_batch(struct sna *sna, const struct sna_composite_op *op) 1564428d7b3dSmrg{ 1565428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1566428d7b3dSmrg 1567428d7b3dSmrg if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 2*(1+3))) { 1568428d7b3dSmrg DBG(("%s: flushing batch: %d < %d+%d\n", 1569428d7b3dSmrg __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 1570428d7b3dSmrg 150, 4*8*2)); 1571428d7b3dSmrg _kgem_submit(&sna->kgem); 1572428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1573428d7b3dSmrg } 1574428d7b3dSmrg 1575428d7b3dSmrg assert(sna->kgem.mode == KGEM_RENDER); 1576428d7b3dSmrg assert(sna->kgem.ring == KGEM_RENDER); 1577428d7b3dSmrg 1578428d7b3dSmrg if (sna->render_state.gen8.needs_invariant) 1579428d7b3dSmrg gen8_emit_invariant(sna); 1580428d7b3dSmrg} 1581428d7b3dSmrg 1582428d7b3dSmrgstatic void gen8_emit_composite_state(struct sna *sna, 1583428d7b3dSmrg const struct sna_composite_op *op) 1584428d7b3dSmrg{ 1585428d7b3dSmrg uint32_t *binding_table; 1586428d7b3dSmrg uint16_t offset, dirty; 1587428d7b3dSmrg 1588428d7b3dSmrg gen8_get_batch(sna, op); 1589428d7b3dSmrg 1590428d7b3dSmrg binding_table = gen8_composite_get_binding_table(sna, &offset); 1591428d7b3dSmrg 1592428d7b3dSmrg dirty = kgem_bo_is_dirty(op->dst.bo); 1593428d7b3dSmrg 1594428d7b3dSmrg binding_table[0] = 1595428d7b3dSmrg gen8_bind_bo(sna, 1596428d7b3dSmrg op->dst.bo, op->dst.width, op->dst.height, 1597428d7b3dSmrg gen8_get_dest_format(op->dst.format), 1598428d7b3dSmrg true); 1599428d7b3dSmrg binding_table[1] = 1600428d7b3dSmrg gen8_bind_bo(sna, 1601428d7b3dSmrg op->src.bo, op->src.width, op->src.height, 1602428d7b3dSmrg op->src.card_format, 1603428d7b3dSmrg false); 1604428d7b3dSmrg if (op->mask.bo) { 1605428d7b3dSmrg binding_table[2] = 1606428d7b3dSmrg gen8_bind_bo(sna, 1607428d7b3dSmrg op->mask.bo, 1608428d7b3dSmrg op->mask.width, 1609428d7b3dSmrg op->mask.height, 1610428d7b3dSmrg op->mask.card_format, 1611428d7b3dSmrg false); 1612428d7b3dSmrg } 1613428d7b3dSmrg 1614428d7b3dSmrg if (sna->kgem.surface == offset && 1615428d7b3dSmrg *(uint64_t *)(sna->kgem.batch + sna->render_state.gen8.surface_table) == *(uint64_t*)binding_table && 1616428d7b3dSmrg (op->mask.bo == NULL || 1617428d7b3dSmrg sna->kgem.batch[sna->render_state.gen8.surface_table+2] == binding_table[2])) { 1618428d7b3dSmrg sna->kgem.surface += SURFACE_DW; 1619428d7b3dSmrg offset = sna->render_state.gen8.surface_table; 1620428d7b3dSmrg } 1621428d7b3dSmrg 1622428d7b3dSmrg if (sna->kgem.batch[sna->render_state.gen8.surface_table] == binding_table[0]) 1623428d7b3dSmrg dirty = 0; 1624428d7b3dSmrg 1625428d7b3dSmrg gen8_emit_state(sna, op, offset | dirty); 1626428d7b3dSmrg} 1627428d7b3dSmrg 1628428d7b3dSmrgstatic void 1629428d7b3dSmrggen8_align_vertex(struct sna *sna, const struct sna_composite_op *op) 1630428d7b3dSmrg{ 1631428d7b3dSmrg if (op->floats_per_vertex != sna->render_state.gen8.floats_per_vertex) { 1632428d7b3dSmrg DBG(("aligning vertex: was %d, now %d floats per vertex\n", 1633428d7b3dSmrg sna->render_state.gen8.floats_per_vertex, op->floats_per_vertex)); 1634428d7b3dSmrg gen8_vertex_align(sna, op); 1635428d7b3dSmrg sna->render_state.gen8.floats_per_vertex = op->floats_per_vertex; 1636428d7b3dSmrg } 1637428d7b3dSmrg} 1638428d7b3dSmrg 1639428d7b3dSmrgfastcall static void 1640428d7b3dSmrggen8_render_composite_blt(struct sna *sna, 1641428d7b3dSmrg const struct sna_composite_op *op, 1642428d7b3dSmrg const struct sna_composite_rectangles *r) 1643428d7b3dSmrg{ 1644428d7b3dSmrg gen8_get_rectangles(sna, op, 1, gen8_emit_composite_state); 1645428d7b3dSmrg op->prim_emit(sna, op, r); 1646428d7b3dSmrg} 1647428d7b3dSmrg 1648428d7b3dSmrgfastcall static void 1649428d7b3dSmrggen8_render_composite_box(struct sna *sna, 1650428d7b3dSmrg const struct sna_composite_op *op, 1651428d7b3dSmrg const BoxRec *box) 1652428d7b3dSmrg{ 1653428d7b3dSmrg struct sna_composite_rectangles r; 1654428d7b3dSmrg 1655428d7b3dSmrg gen8_get_rectangles(sna, op, 1, gen8_emit_composite_state); 1656428d7b3dSmrg 1657428d7b3dSmrg DBG((" %s: (%d, %d), (%d, %d)\n", 1658428d7b3dSmrg __FUNCTION__, 1659428d7b3dSmrg box->x1, box->y1, box->x2, box->y2)); 1660428d7b3dSmrg 1661428d7b3dSmrg r.dst.x = box->x1; 1662428d7b3dSmrg r.dst.y = box->y1; 1663428d7b3dSmrg r.width = box->x2 - box->x1; 1664428d7b3dSmrg r.height = box->y2 - box->y1; 1665428d7b3dSmrg r.src = r.mask = r.dst; 1666428d7b3dSmrg 1667428d7b3dSmrg op->prim_emit(sna, op, &r); 1668428d7b3dSmrg} 1669428d7b3dSmrg 1670428d7b3dSmrgstatic void 1671428d7b3dSmrggen8_render_composite_boxes__blt(struct sna *sna, 1672428d7b3dSmrg const struct sna_composite_op *op, 1673428d7b3dSmrg const BoxRec *box, int nbox) 1674428d7b3dSmrg{ 1675428d7b3dSmrg DBG(("composite_boxes(%d)\n", nbox)); 1676428d7b3dSmrg 1677428d7b3dSmrg do { 1678428d7b3dSmrg int nbox_this_time; 1679428d7b3dSmrg 1680428d7b3dSmrg nbox_this_time = gen8_get_rectangles(sna, op, nbox, 1681428d7b3dSmrg gen8_emit_composite_state); 1682428d7b3dSmrg nbox -= nbox_this_time; 1683428d7b3dSmrg 1684428d7b3dSmrg do { 1685428d7b3dSmrg struct sna_composite_rectangles r; 1686428d7b3dSmrg 1687428d7b3dSmrg DBG((" %s: (%d, %d), (%d, %d)\n", 1688428d7b3dSmrg __FUNCTION__, 1689428d7b3dSmrg box->x1, box->y1, box->x2, box->y2)); 1690428d7b3dSmrg 1691428d7b3dSmrg r.dst.x = box->x1; 1692428d7b3dSmrg r.dst.y = box->y1; 1693428d7b3dSmrg r.width = box->x2 - box->x1; 1694428d7b3dSmrg r.height = box->y2 - box->y1; 1695428d7b3dSmrg r.src = r.mask = r.dst; 1696428d7b3dSmrg 1697428d7b3dSmrg op->prim_emit(sna, op, &r); 1698428d7b3dSmrg box++; 1699428d7b3dSmrg } while (--nbox_this_time); 1700428d7b3dSmrg } while (nbox); 1701428d7b3dSmrg} 1702428d7b3dSmrg 1703428d7b3dSmrgstatic void 1704428d7b3dSmrggen8_render_composite_boxes(struct sna *sna, 1705428d7b3dSmrg const struct sna_composite_op *op, 1706428d7b3dSmrg const BoxRec *box, int nbox) 1707428d7b3dSmrg{ 1708428d7b3dSmrg DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1709428d7b3dSmrg 1710428d7b3dSmrg do { 1711428d7b3dSmrg int nbox_this_time; 1712428d7b3dSmrg float *v; 1713428d7b3dSmrg 1714428d7b3dSmrg nbox_this_time = gen8_get_rectangles(sna, op, nbox, 1715428d7b3dSmrg gen8_emit_composite_state); 1716428d7b3dSmrg assert(nbox_this_time); 1717428d7b3dSmrg nbox -= nbox_this_time; 1718428d7b3dSmrg 1719428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1720428d7b3dSmrg sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1721428d7b3dSmrg 1722428d7b3dSmrg op->emit_boxes(op, box, nbox_this_time, v); 1723428d7b3dSmrg box += nbox_this_time; 1724428d7b3dSmrg } while (nbox); 1725428d7b3dSmrg} 1726428d7b3dSmrg 1727428d7b3dSmrgstatic void 1728428d7b3dSmrggen8_render_composite_boxes__thread(struct sna *sna, 1729428d7b3dSmrg const struct sna_composite_op *op, 1730428d7b3dSmrg const BoxRec *box, int nbox) 1731428d7b3dSmrg{ 1732428d7b3dSmrg DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1733428d7b3dSmrg 1734428d7b3dSmrg sna_vertex_lock(&sna->render); 1735428d7b3dSmrg do { 1736428d7b3dSmrg int nbox_this_time; 1737428d7b3dSmrg float *v; 1738428d7b3dSmrg 1739428d7b3dSmrg nbox_this_time = gen8_get_rectangles(sna, op, nbox, 1740428d7b3dSmrg gen8_emit_composite_state); 1741428d7b3dSmrg assert(nbox_this_time); 1742428d7b3dSmrg nbox -= nbox_this_time; 1743428d7b3dSmrg 1744428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1745428d7b3dSmrg sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1746428d7b3dSmrg 1747428d7b3dSmrg sna_vertex_acquire__locked(&sna->render); 1748428d7b3dSmrg sna_vertex_unlock(&sna->render); 1749428d7b3dSmrg 1750428d7b3dSmrg op->emit_boxes(op, box, nbox_this_time, v); 1751428d7b3dSmrg box += nbox_this_time; 1752428d7b3dSmrg 1753428d7b3dSmrg sna_vertex_lock(&sna->render); 1754428d7b3dSmrg sna_vertex_release__locked(&sna->render); 1755428d7b3dSmrg } while (nbox); 1756428d7b3dSmrg sna_vertex_unlock(&sna->render); 1757428d7b3dSmrg} 1758428d7b3dSmrg 1759428d7b3dSmrgstatic uint32_t 1760428d7b3dSmrggen8_create_blend_state(struct sna_static_stream *stream) 1761428d7b3dSmrg{ 1762428d7b3dSmrg char *base, *ptr; 1763428d7b3dSmrg int src, dst; 1764428d7b3dSmrg 1765428d7b3dSmrg COMPILE_TIME_ASSERT(((GEN8_BLENDFACTOR_COUNT * GEN8_BLENDFACTOR_COUNT << 4) & (1 << 15)) == 0); 1766428d7b3dSmrg 1767428d7b3dSmrg base = sna_static_stream_map(stream, 1768428d7b3dSmrg GEN8_BLENDFACTOR_COUNT * GEN8_BLENDFACTOR_COUNT * GEN8_BLEND_STATE_PADDED_SIZE, 1769428d7b3dSmrg 64); 1770428d7b3dSmrg 1771428d7b3dSmrg ptr = base; 1772428d7b3dSmrg for (src = 0; src < GEN8_BLENDFACTOR_COUNT; src++) { 1773428d7b3dSmrg for (dst = 0; dst < GEN8_BLENDFACTOR_COUNT; dst++) { 1774428d7b3dSmrg struct gen8_blend_state *blend = 1775428d7b3dSmrg (struct gen8_blend_state *)ptr; 1776428d7b3dSmrg 1777428d7b3dSmrg assert(((ptr - base) & 63) == 0); 1778428d7b3dSmrg COMPILE_TIME_ASSERT(sizeof(blend->common) == 4); 1779428d7b3dSmrg COMPILE_TIME_ASSERT(sizeof(blend->rt) == 8); 1780428d7b3dSmrg COMPILE_TIME_ASSERT((char *)&blend->rt - (char *)blend == 4); 1781428d7b3dSmrg 1782428d7b3dSmrg blend->rt.post_blend_clamp = 1; 1783428d7b3dSmrg blend->rt.pre_blend_clamp = 1; 1784428d7b3dSmrg 1785428d7b3dSmrg blend->rt.color_blend = 1786428d7b3dSmrg !(dst == BLENDFACTOR_ZERO && src == BLENDFACTOR_ONE); 1787428d7b3dSmrg blend->rt.dest_blend_factor = dst; 1788428d7b3dSmrg blend->rt.source_blend_factor = src; 1789428d7b3dSmrg blend->rt.color_blend_function = BLENDFUNCTION_ADD; 1790428d7b3dSmrg 1791428d7b3dSmrg blend->rt.dest_alpha_blend_factor = dst; 1792428d7b3dSmrg blend->rt.source_alpha_blend_factor = src; 1793428d7b3dSmrg blend->rt.alpha_blend_function = BLENDFUNCTION_ADD; 1794428d7b3dSmrg 1795428d7b3dSmrg ptr += GEN8_BLEND_STATE_PADDED_SIZE; 1796428d7b3dSmrg } 1797428d7b3dSmrg } 1798428d7b3dSmrg 1799428d7b3dSmrg return sna_static_stream_offsetof(stream, base); 1800428d7b3dSmrg} 1801428d7b3dSmrg 1802428d7b3dSmrgstatic int 1803428d7b3dSmrggen8_composite_picture(struct sna *sna, 1804428d7b3dSmrg PicturePtr picture, 1805428d7b3dSmrg struct sna_composite_channel *channel, 1806428d7b3dSmrg int x, int y, 1807428d7b3dSmrg int w, int h, 1808428d7b3dSmrg int dst_x, int dst_y, 1809428d7b3dSmrg bool precise) 1810428d7b3dSmrg{ 1811428d7b3dSmrg PixmapPtr pixmap; 1812428d7b3dSmrg uint32_t color; 1813428d7b3dSmrg int16_t dx, dy; 1814428d7b3dSmrg 1815428d7b3dSmrg DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1816428d7b3dSmrg __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1817428d7b3dSmrg 1818428d7b3dSmrg channel->is_solid = false; 1819428d7b3dSmrg channel->card_format = -1; 1820428d7b3dSmrg 1821428d7b3dSmrg if (sna_picture_is_solid(picture, &color)) 1822428d7b3dSmrg return gen4_channel_init_solid(sna, channel, color); 1823428d7b3dSmrg 1824428d7b3dSmrg if (picture->pDrawable == NULL) { 1825428d7b3dSmrg int ret; 1826428d7b3dSmrg 1827428d7b3dSmrg if (picture->pSourcePict->type == SourcePictTypeLinear) 1828428d7b3dSmrg return gen4_channel_init_linear(sna, picture, channel, 1829428d7b3dSmrg x, y, 1830428d7b3dSmrg w, h, 1831428d7b3dSmrg dst_x, dst_y); 1832428d7b3dSmrg 1833428d7b3dSmrg DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 1834428d7b3dSmrg ret = -1; 1835428d7b3dSmrg if (!precise) 1836428d7b3dSmrg ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1837428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1838428d7b3dSmrg if (ret == -1) 1839428d7b3dSmrg ret = sna_render_picture_fixup(sna, picture, channel, 1840428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1841428d7b3dSmrg return ret; 1842428d7b3dSmrg } 1843428d7b3dSmrg 1844428d7b3dSmrg if (picture->alphaMap) { 1845428d7b3dSmrg DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 1846428d7b3dSmrg return sna_render_picture_fixup(sna, picture, channel, 1847428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1848428d7b3dSmrg } 1849428d7b3dSmrg 1850428d7b3dSmrg if (!gen8_check_repeat(picture)) 1851428d7b3dSmrg return sna_render_picture_fixup(sna, picture, channel, 1852428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1853428d7b3dSmrg 1854428d7b3dSmrg if (!gen8_check_filter(picture)) 1855428d7b3dSmrg return sna_render_picture_fixup(sna, picture, channel, 1856428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1857428d7b3dSmrg 1858428d7b3dSmrg channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1859428d7b3dSmrg channel->filter = picture->filter; 1860428d7b3dSmrg 1861428d7b3dSmrg pixmap = get_drawable_pixmap(picture->pDrawable); 1862428d7b3dSmrg get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 1863428d7b3dSmrg 1864428d7b3dSmrg x += dx + picture->pDrawable->x; 1865428d7b3dSmrg y += dy + picture->pDrawable->y; 1866428d7b3dSmrg 1867428d7b3dSmrg channel->is_affine = sna_transform_is_affine(picture->transform); 1868428d7b3dSmrg if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 1869428d7b3dSmrg DBG(("%s: integer translation (%d, %d), removing\n", 1870428d7b3dSmrg __FUNCTION__, dx, dy)); 1871428d7b3dSmrg x += dx; 1872428d7b3dSmrg y += dy; 1873428d7b3dSmrg channel->transform = NULL; 1874428d7b3dSmrg channel->filter = PictFilterNearest; 1875428d7b3dSmrg 1876428d7b3dSmrg if (channel->repeat || 1877428d7b3dSmrg (x >= 0 && 1878428d7b3dSmrg y >= 0 && 1879428d7b3dSmrg x + w < pixmap->drawable.width && 1880428d7b3dSmrg y + h < pixmap->drawable.height)) { 1881428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 1882428d7b3dSmrg if (priv && priv->clear) { 1883428d7b3dSmrg DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 1884428d7b3dSmrg return gen4_channel_init_solid(sna, channel, priv->clear_color); 1885428d7b3dSmrg } 1886428d7b3dSmrg } 1887428d7b3dSmrg } else 1888428d7b3dSmrg channel->transform = picture->transform; 1889428d7b3dSmrg 1890428d7b3dSmrg channel->pict_format = picture->format; 1891428d7b3dSmrg channel->card_format = gen8_get_card_format(picture->format); 1892428d7b3dSmrg if (channel->card_format == (unsigned)-1) 1893428d7b3dSmrg return sna_render_picture_convert(sna, picture, channel, pixmap, 1894428d7b3dSmrg x, y, w, h, dst_x, dst_y, 1895428d7b3dSmrg false); 1896428d7b3dSmrg 1897428d7b3dSmrg if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 1898428d7b3dSmrg DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, 1899428d7b3dSmrg pixmap->drawable.width, pixmap->drawable.height)); 1900428d7b3dSmrg return sna_render_picture_extract(sna, picture, channel, 1901428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1902428d7b3dSmrg } 1903428d7b3dSmrg 1904428d7b3dSmrg return sna_render_pixmap_bo(sna, channel, pixmap, 1905428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1906428d7b3dSmrg} 1907428d7b3dSmrg 1908428d7b3dSmrginline static bool gen8_composite_channel_convert(struct sna_composite_channel *channel) 1909428d7b3dSmrg{ 1910428d7b3dSmrg if (unaligned(channel->bo, PICT_FORMAT_BPP(channel->pict_format))) 1911428d7b3dSmrg return false; 1912428d7b3dSmrg 1913428d7b3dSmrg channel->repeat = gen8_repeat(channel->repeat); 1914428d7b3dSmrg channel->filter = gen8_filter(channel->filter); 1915428d7b3dSmrg if (channel->card_format == (unsigned)-1) 1916428d7b3dSmrg channel->card_format = gen8_get_card_format(channel->pict_format); 1917428d7b3dSmrg assert(channel->card_format != (unsigned)-1); 1918428d7b3dSmrg 1919428d7b3dSmrg return true; 1920428d7b3dSmrg} 1921428d7b3dSmrg 1922428d7b3dSmrgstatic void gen8_render_composite_done(struct sna *sna, 1923428d7b3dSmrg const struct sna_composite_op *op) 1924428d7b3dSmrg{ 1925428d7b3dSmrg if (sna->render.vertex_offset) { 1926428d7b3dSmrg gen8_vertex_flush(sna); 1927428d7b3dSmrg gen8_magic_ca_pass(sna, op); 1928428d7b3dSmrg } 1929428d7b3dSmrg 1930428d7b3dSmrg if (op->mask.bo) 1931428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->mask.bo); 1932428d7b3dSmrg if (op->src.bo) 1933428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->src.bo); 1934428d7b3dSmrg 1935428d7b3dSmrg sna_render_composite_redirect_done(sna, op); 1936428d7b3dSmrg} 1937428d7b3dSmrg 1938428d7b3dSmrginline static bool 1939428d7b3dSmrggen8_composite_set_target(struct sna *sna, 1940428d7b3dSmrg struct sna_composite_op *op, 1941428d7b3dSmrg PicturePtr dst, 1942428d7b3dSmrg int x, int y, int w, int h, 1943428d7b3dSmrg bool partial) 1944428d7b3dSmrg{ 1945428d7b3dSmrg BoxRec box; 1946428d7b3dSmrg unsigned int hint; 1947428d7b3dSmrg 1948428d7b3dSmrg DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); 1949428d7b3dSmrg 1950428d7b3dSmrg op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 1951428d7b3dSmrg op->dst.format = dst->format; 1952428d7b3dSmrg op->dst.width = op->dst.pixmap->drawable.width; 1953428d7b3dSmrg op->dst.height = op->dst.pixmap->drawable.height; 1954428d7b3dSmrg 1955428d7b3dSmrg if (w | h) { 1956428d7b3dSmrg assert(w && h); 1957428d7b3dSmrg box.x1 = x; 1958428d7b3dSmrg box.y1 = y; 1959428d7b3dSmrg box.x2 = x + w; 1960428d7b3dSmrg box.y2 = y + h; 1961428d7b3dSmrg } else 1962428d7b3dSmrg sna_render_picture_extents(dst, &box); 1963428d7b3dSmrg 1964428d7b3dSmrg hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; 1965428d7b3dSmrg if (!partial) { 1966428d7b3dSmrg hint |= IGNORE_DAMAGE; 1967428d7b3dSmrg if (w == op->dst.width && h == op->dst.height) 1968428d7b3dSmrg hint |= REPLACES; 1969428d7b3dSmrg } 1970428d7b3dSmrg 1971428d7b3dSmrg op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 1972428d7b3dSmrg if (op->dst.bo == NULL) 1973428d7b3dSmrg return false; 1974428d7b3dSmrg 1975428d7b3dSmrg if (unaligned(op->dst.bo, dst->pDrawable->bitsPerPixel)) 1976428d7b3dSmrg return false; 1977428d7b3dSmrg 1978428d7b3dSmrg if (hint & REPLACES) { 1979428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 1980428d7b3dSmrg kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 1981428d7b3dSmrg } 1982428d7b3dSmrg 1983428d7b3dSmrg get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 1984428d7b3dSmrg &op->dst.x, &op->dst.y); 1985428d7b3dSmrg 1986428d7b3dSmrg DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 1987428d7b3dSmrg __FUNCTION__, 1988428d7b3dSmrg op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 1989428d7b3dSmrg op->dst.width, op->dst.height, 1990428d7b3dSmrg op->dst.bo->pitch, 1991428d7b3dSmrg op->dst.x, op->dst.y, 1992428d7b3dSmrg op->damage ? *op->damage : (void *)-1)); 1993428d7b3dSmrg 1994428d7b3dSmrg assert(op->dst.bo->proxy == NULL); 1995428d7b3dSmrg 1996428d7b3dSmrg if (too_large(op->dst.width, op->dst.height) && 1997428d7b3dSmrg !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 1998428d7b3dSmrg return false; 1999428d7b3dSmrg 2000428d7b3dSmrg return true; 2001428d7b3dSmrg} 2002428d7b3dSmrg 2003428d7b3dSmrgstatic bool 2004428d7b3dSmrgtry_blt(struct sna *sna, 2005428d7b3dSmrg PicturePtr dst, PicturePtr src, 2006428d7b3dSmrg int width, int height) 2007428d7b3dSmrg{ 2008428d7b3dSmrg struct kgem_bo *bo; 2009428d7b3dSmrg 2010428d7b3dSmrg if (sna->kgem.mode == KGEM_BLT) { 2011428d7b3dSmrg DBG(("%s: already performing BLT\n", __FUNCTION__)); 2012428d7b3dSmrg return true; 2013428d7b3dSmrg } 2014428d7b3dSmrg 2015428d7b3dSmrg if (too_large(width, height)) { 2016428d7b3dSmrg DBG(("%s: operation too large for 3D pipe (%d, %d)\n", 2017428d7b3dSmrg __FUNCTION__, width, height)); 2018428d7b3dSmrg return true; 2019428d7b3dSmrg } 2020428d7b3dSmrg 2021428d7b3dSmrg bo = __sna_drawable_peek_bo(dst->pDrawable); 2022428d7b3dSmrg if (bo == NULL) 2023428d7b3dSmrg return true; 2024428d7b3dSmrg if (bo->rq) 2025428d7b3dSmrg return RQ_IS_BLT(bo->rq); 2026428d7b3dSmrg 2027428d7b3dSmrg if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) 2028428d7b3dSmrg return true; 2029428d7b3dSmrg 2030428d7b3dSmrg if (src->pDrawable) { 2031428d7b3dSmrg bo = __sna_drawable_peek_bo(src->pDrawable); 2032428d7b3dSmrg if (bo == NULL) 2033428d7b3dSmrg return true; 2034428d7b3dSmrg 2035428d7b3dSmrg if (prefer_blt_bo(sna, bo)) 2036428d7b3dSmrg return RQ_IS_BLT(bo->rq); 2037428d7b3dSmrg } 2038428d7b3dSmrg 2039428d7b3dSmrg if (sna->kgem.ring == KGEM_BLT) { 2040428d7b3dSmrg DBG(("%s: already performing BLT\n", __FUNCTION__)); 2041428d7b3dSmrg return true; 2042428d7b3dSmrg } 2043428d7b3dSmrg 2044428d7b3dSmrg return false; 2045428d7b3dSmrg} 2046428d7b3dSmrg 2047428d7b3dSmrgstatic bool 2048428d7b3dSmrgcheck_gradient(PicturePtr picture, bool precise) 2049428d7b3dSmrg{ 2050428d7b3dSmrg if (picture->pDrawable) 2051428d7b3dSmrg return false; 2052428d7b3dSmrg 2053428d7b3dSmrg switch (picture->pSourcePict->type) { 2054428d7b3dSmrg case SourcePictTypeSolidFill: 2055428d7b3dSmrg case SourcePictTypeLinear: 2056428d7b3dSmrg return false; 2057428d7b3dSmrg default: 2058428d7b3dSmrg return precise; 2059428d7b3dSmrg } 2060428d7b3dSmrg} 2061428d7b3dSmrg 2062428d7b3dSmrgstatic bool 2063428d7b3dSmrghas_alphamap(PicturePtr p) 2064428d7b3dSmrg{ 2065428d7b3dSmrg return p->alphaMap != NULL; 2066428d7b3dSmrg} 2067428d7b3dSmrg 2068428d7b3dSmrgstatic bool 2069428d7b3dSmrgneed_upload(PicturePtr p) 2070428d7b3dSmrg{ 2071428d7b3dSmrg return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 2072428d7b3dSmrg} 2073428d7b3dSmrg 2074428d7b3dSmrgstatic bool 2075428d7b3dSmrgsource_is_busy(PixmapPtr pixmap) 2076428d7b3dSmrg{ 2077428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 2078428d7b3dSmrg if (priv == NULL || priv->clear) 2079428d7b3dSmrg return false; 2080428d7b3dSmrg 2081428d7b3dSmrg if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 2082428d7b3dSmrg return true; 2083428d7b3dSmrg 2084428d7b3dSmrg if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2085428d7b3dSmrg return true; 2086428d7b3dSmrg 2087428d7b3dSmrg return priv->gpu_damage && !priv->cpu_damage; 2088428d7b3dSmrg} 2089428d7b3dSmrg 2090428d7b3dSmrgstatic bool 2091428d7b3dSmrgsource_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 2092428d7b3dSmrg{ 2093428d7b3dSmrg if (sna_picture_is_solid(p, NULL)) 2094428d7b3dSmrg return false; 2095428d7b3dSmrg 2096428d7b3dSmrg if (p->pSourcePict) 2097428d7b3dSmrg return check_gradient(p, precise); 2098428d7b3dSmrg 2099428d7b3dSmrg if (!gen8_check_repeat(p) || !gen8_check_format(p->format)) 2100428d7b3dSmrg return true; 2101428d7b3dSmrg 2102428d7b3dSmrg if (pixmap && source_is_busy(pixmap)) 2103428d7b3dSmrg return false; 2104428d7b3dSmrg 2105428d7b3dSmrg return has_alphamap(p) || !gen8_check_filter(p) || need_upload(p); 2106428d7b3dSmrg} 2107428d7b3dSmrg 2108428d7b3dSmrgstatic bool 2109428d7b3dSmrggen8_composite_fallback(struct sna *sna, 2110428d7b3dSmrg PicturePtr src, 2111428d7b3dSmrg PicturePtr mask, 2112428d7b3dSmrg PicturePtr dst) 2113428d7b3dSmrg{ 2114428d7b3dSmrg PixmapPtr src_pixmap; 2115428d7b3dSmrg PixmapPtr mask_pixmap; 2116428d7b3dSmrg PixmapPtr dst_pixmap; 2117428d7b3dSmrg bool src_fallback, mask_fallback; 2118428d7b3dSmrg 2119428d7b3dSmrg if (!gen8_check_dst_format(dst->format)) { 2120428d7b3dSmrg DBG(("%s: unknown destination format: %d\n", 2121428d7b3dSmrg __FUNCTION__, dst->format)); 2122428d7b3dSmrg return true; 2123428d7b3dSmrg } 2124428d7b3dSmrg 2125428d7b3dSmrg dst_pixmap = get_drawable_pixmap(dst->pDrawable); 2126428d7b3dSmrg 2127428d7b3dSmrg src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 2128428d7b3dSmrg src_fallback = source_fallback(src, src_pixmap, 2129428d7b3dSmrg dst->polyMode == PolyModePrecise); 2130428d7b3dSmrg 2131428d7b3dSmrg if (mask) { 2132428d7b3dSmrg mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 2133428d7b3dSmrg mask_fallback = source_fallback(mask, mask_pixmap, 2134428d7b3dSmrg dst->polyMode == PolyModePrecise); 2135428d7b3dSmrg } else { 2136428d7b3dSmrg mask_pixmap = NULL; 2137428d7b3dSmrg mask_fallback = false; 2138428d7b3dSmrg } 2139428d7b3dSmrg 2140428d7b3dSmrg /* If we are using the destination as a source and need to 2141428d7b3dSmrg * readback in order to upload the source, do it all 2142428d7b3dSmrg * on the cpu. 2143428d7b3dSmrg */ 2144428d7b3dSmrg if (src_pixmap == dst_pixmap && src_fallback) { 2145428d7b3dSmrg DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 2146428d7b3dSmrg return true; 2147428d7b3dSmrg } 2148428d7b3dSmrg if (mask_pixmap == dst_pixmap && mask_fallback) { 2149428d7b3dSmrg DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 2150428d7b3dSmrg return true; 2151428d7b3dSmrg } 2152428d7b3dSmrg 2153428d7b3dSmrg /* If anything is on the GPU, push everything out to the GPU */ 2154428d7b3dSmrg if (dst_use_gpu(dst_pixmap)) { 2155428d7b3dSmrg DBG(("%s: dst is already on the GPU, try to use GPU\n", 2156428d7b3dSmrg __FUNCTION__)); 2157428d7b3dSmrg return false; 2158428d7b3dSmrg } 2159428d7b3dSmrg 2160428d7b3dSmrg if (src_pixmap && !src_fallback) { 2161428d7b3dSmrg DBG(("%s: src is already on the GPU, try to use GPU\n", 2162428d7b3dSmrg __FUNCTION__)); 2163428d7b3dSmrg return false; 2164428d7b3dSmrg } 2165428d7b3dSmrg if (mask_pixmap && !mask_fallback) { 2166428d7b3dSmrg DBG(("%s: mask is already on the GPU, try to use GPU\n", 2167428d7b3dSmrg __FUNCTION__)); 2168428d7b3dSmrg return false; 2169428d7b3dSmrg } 2170428d7b3dSmrg 2171428d7b3dSmrg /* However if the dst is not on the GPU and we need to 2172428d7b3dSmrg * render one of the sources using the CPU, we may 2173428d7b3dSmrg * as well do the entire operation in place onthe CPU. 2174428d7b3dSmrg */ 2175428d7b3dSmrg if (src_fallback) { 2176428d7b3dSmrg DBG(("%s: dst is on the CPU and src will fallback\n", 2177428d7b3dSmrg __FUNCTION__)); 2178428d7b3dSmrg return true; 2179428d7b3dSmrg } 2180428d7b3dSmrg 2181428d7b3dSmrg if (mask && mask_fallback) { 2182428d7b3dSmrg DBG(("%s: dst is on the CPU and mask will fallback\n", 2183428d7b3dSmrg __FUNCTION__)); 2184428d7b3dSmrg return true; 2185428d7b3dSmrg } 2186428d7b3dSmrg 2187428d7b3dSmrg if (too_large(dst_pixmap->drawable.width, 2188428d7b3dSmrg dst_pixmap->drawable.height) && 2189428d7b3dSmrg dst_is_cpu(dst_pixmap)) { 2190428d7b3dSmrg DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 2191428d7b3dSmrg return true; 2192428d7b3dSmrg } 2193428d7b3dSmrg 2194428d7b3dSmrg DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 2195428d7b3dSmrg __FUNCTION__)); 2196428d7b3dSmrg return dst_use_cpu(dst_pixmap); 2197428d7b3dSmrg} 2198428d7b3dSmrg 2199428d7b3dSmrgstatic int 2200428d7b3dSmrgreuse_source(struct sna *sna, 2201428d7b3dSmrg PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 2202428d7b3dSmrg PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 2203428d7b3dSmrg{ 2204428d7b3dSmrg uint32_t color; 2205428d7b3dSmrg 2206428d7b3dSmrg if (src_x != msk_x || src_y != msk_y) 2207428d7b3dSmrg return false; 2208428d7b3dSmrg 2209428d7b3dSmrg if (src == mask) { 2210428d7b3dSmrg DBG(("%s: mask is source\n", __FUNCTION__)); 2211428d7b3dSmrg *mc = *sc; 2212428d7b3dSmrg mc->bo = kgem_bo_reference(mc->bo); 2213428d7b3dSmrg return true; 2214428d7b3dSmrg } 2215428d7b3dSmrg 2216428d7b3dSmrg if (sna_picture_is_solid(mask, &color)) 2217428d7b3dSmrg return gen4_channel_init_solid(sna, mc, color); 2218428d7b3dSmrg 2219428d7b3dSmrg if (sc->is_solid) 2220428d7b3dSmrg return false; 2221428d7b3dSmrg 2222428d7b3dSmrg if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 2223428d7b3dSmrg return false; 2224428d7b3dSmrg 2225428d7b3dSmrg DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 2226428d7b3dSmrg 2227428d7b3dSmrg if (!sna_transform_equal(src->transform, mask->transform)) 2228428d7b3dSmrg return false; 2229428d7b3dSmrg 2230428d7b3dSmrg if (!sna_picture_alphamap_equal(src, mask)) 2231428d7b3dSmrg return false; 2232428d7b3dSmrg 2233428d7b3dSmrg if (!gen8_check_repeat(mask)) 2234428d7b3dSmrg return false; 2235428d7b3dSmrg 2236428d7b3dSmrg if (!gen8_check_filter(mask)) 2237428d7b3dSmrg return false; 2238428d7b3dSmrg 2239428d7b3dSmrg if (!gen8_check_format(mask->format)) 2240428d7b3dSmrg return false; 2241428d7b3dSmrg 2242428d7b3dSmrg DBG(("%s: reusing source channel for mask with a twist\n", 2243428d7b3dSmrg __FUNCTION__)); 2244428d7b3dSmrg 2245428d7b3dSmrg *mc = *sc; 2246428d7b3dSmrg mc->repeat = gen8_repeat(mask->repeat ? mask->repeatType : RepeatNone); 2247428d7b3dSmrg mc->filter = gen8_filter(mask->filter); 2248428d7b3dSmrg mc->pict_format = mask->format; 2249428d7b3dSmrg mc->card_format = gen8_get_card_format(mask->format); 2250428d7b3dSmrg mc->bo = kgem_bo_reference(mc->bo); 2251428d7b3dSmrg return true; 2252428d7b3dSmrg} 2253428d7b3dSmrg 2254428d7b3dSmrgstatic bool 2255428d7b3dSmrggen8_render_composite(struct sna *sna, 2256428d7b3dSmrg uint8_t op, 2257428d7b3dSmrg PicturePtr src, 2258428d7b3dSmrg PicturePtr mask, 2259428d7b3dSmrg PicturePtr dst, 2260428d7b3dSmrg int16_t src_x, int16_t src_y, 2261428d7b3dSmrg int16_t msk_x, int16_t msk_y, 2262428d7b3dSmrg int16_t dst_x, int16_t dst_y, 2263428d7b3dSmrg int16_t width, int16_t height, 2264428d7b3dSmrg unsigned flags, 2265428d7b3dSmrg struct sna_composite_op *tmp) 2266428d7b3dSmrg{ 2267428d7b3dSmrg if (op >= ARRAY_SIZE(gen8_blend_op)) 2268428d7b3dSmrg return false; 2269428d7b3dSmrg 2270428d7b3dSmrg DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__, 2271428d7b3dSmrg width, height, sna->kgem.mode, sna->kgem.ring)); 2272428d7b3dSmrg 2273428d7b3dSmrg if (mask == NULL && 2274428d7b3dSmrg try_blt(sna, dst, src, width, height) && 2275428d7b3dSmrg sna_blt_composite(sna, op, 2276428d7b3dSmrg src, dst, 2277428d7b3dSmrg src_x, src_y, 2278428d7b3dSmrg dst_x, dst_y, 2279428d7b3dSmrg width, height, 2280428d7b3dSmrg flags, tmp)) 2281428d7b3dSmrg return true; 2282428d7b3dSmrg 2283428d7b3dSmrg if (gen8_composite_fallback(sna, src, mask, dst)) 2284428d7b3dSmrg goto fallback; 2285428d7b3dSmrg 2286428d7b3dSmrg if (need_tiling(sna, width, height)) 2287428d7b3dSmrg return sna_tiling_composite(op, src, mask, dst, 2288428d7b3dSmrg src_x, src_y, 2289428d7b3dSmrg msk_x, msk_y, 2290428d7b3dSmrg dst_x, dst_y, 2291428d7b3dSmrg width, height, 2292428d7b3dSmrg tmp); 2293428d7b3dSmrg 2294428d7b3dSmrg if (op == PictOpClear && src == sna->clear) 2295428d7b3dSmrg op = PictOpSrc; 2296428d7b3dSmrg tmp->op = op; 2297428d7b3dSmrg if (!gen8_composite_set_target(sna, tmp, dst, 2298428d7b3dSmrg dst_x, dst_y, width, height, 2299428d7b3dSmrg flags & COMPOSITE_PARTIAL || op > PictOpSrc)) 2300428d7b3dSmrg goto fallback; 2301428d7b3dSmrg 2302428d7b3dSmrg switch (gen8_composite_picture(sna, src, &tmp->src, 2303428d7b3dSmrg src_x, src_y, 2304428d7b3dSmrg width, height, 2305428d7b3dSmrg dst_x, dst_y, 2306428d7b3dSmrg dst->polyMode == PolyModePrecise)) { 2307428d7b3dSmrg case -1: 2308428d7b3dSmrg goto cleanup_dst; 2309428d7b3dSmrg case 0: 2310428d7b3dSmrg if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 2311428d7b3dSmrg goto cleanup_dst; 2312428d7b3dSmrg /* fall through to fixup */ 2313428d7b3dSmrg case 1: 2314428d7b3dSmrg /* Did we just switch rings to prepare the source? */ 2315428d7b3dSmrg if (mask == NULL && 2316428d7b3dSmrg (prefer_blt_composite(sna, tmp) || 2317428d7b3dSmrg unaligned(tmp->src.bo, PICT_FORMAT_BPP(tmp->src.pict_format))) && 2318428d7b3dSmrg sna_blt_composite__convert(sna, 2319428d7b3dSmrg dst_x, dst_y, width, height, 2320428d7b3dSmrg tmp)) 2321428d7b3dSmrg return true; 2322428d7b3dSmrg 2323428d7b3dSmrg if (!gen8_composite_channel_convert(&tmp->src)) 2324428d7b3dSmrg goto cleanup_src; 2325428d7b3dSmrg 2326428d7b3dSmrg break; 2327428d7b3dSmrg } 2328428d7b3dSmrg 2329428d7b3dSmrg tmp->is_affine = tmp->src.is_affine; 2330428d7b3dSmrg tmp->has_component_alpha = false; 2331428d7b3dSmrg tmp->need_magic_ca_pass = false; 2332428d7b3dSmrg 2333428d7b3dSmrg tmp->mask.bo = NULL; 2334428d7b3dSmrg tmp->mask.filter = SAMPLER_FILTER_NEAREST; 2335428d7b3dSmrg tmp->mask.repeat = SAMPLER_EXTEND_NONE; 2336428d7b3dSmrg 2337428d7b3dSmrg if (mask) { 2338428d7b3dSmrg if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 2339428d7b3dSmrg tmp->has_component_alpha = true; 2340428d7b3dSmrg 2341428d7b3dSmrg /* Check if it's component alpha that relies on a source alpha and on 2342428d7b3dSmrg * the source value. We can only get one of those into the single 2343428d7b3dSmrg * source value that we get to blend with. 2344428d7b3dSmrg */ 2345428d7b3dSmrg if (gen8_blend_op[op].src_alpha && 2346428d7b3dSmrg (gen8_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { 2347428d7b3dSmrg if (op != PictOpOver) 2348428d7b3dSmrg goto cleanup_src; 2349428d7b3dSmrg 2350428d7b3dSmrg tmp->need_magic_ca_pass = true; 2351428d7b3dSmrg tmp->op = PictOpOutReverse; 2352428d7b3dSmrg } 2353428d7b3dSmrg } 2354428d7b3dSmrg 2355428d7b3dSmrg if (!reuse_source(sna, 2356428d7b3dSmrg src, &tmp->src, src_x, src_y, 2357428d7b3dSmrg mask, &tmp->mask, msk_x, msk_y)) { 2358428d7b3dSmrg switch (gen8_composite_picture(sna, mask, &tmp->mask, 2359428d7b3dSmrg msk_x, msk_y, 2360428d7b3dSmrg width, height, 2361428d7b3dSmrg dst_x, dst_y, 2362428d7b3dSmrg dst->polyMode == PolyModePrecise)) { 2363428d7b3dSmrg case -1: 2364428d7b3dSmrg goto cleanup_src; 2365428d7b3dSmrg case 0: 2366428d7b3dSmrg if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2367428d7b3dSmrg goto cleanup_src; 2368428d7b3dSmrg /* fall through to fixup */ 2369428d7b3dSmrg case 1: 2370428d7b3dSmrg if (!gen8_composite_channel_convert(&tmp->mask)) 2371428d7b3dSmrg goto cleanup_mask; 2372428d7b3dSmrg break; 2373428d7b3dSmrg } 2374428d7b3dSmrg } 2375428d7b3dSmrg 2376428d7b3dSmrg tmp->is_affine &= tmp->mask.is_affine; 2377428d7b3dSmrg } 2378428d7b3dSmrg 2379428d7b3dSmrg tmp->u.gen8.flags = 2380428d7b3dSmrg GEN8_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, 2381428d7b3dSmrg tmp->src.repeat, 2382428d7b3dSmrg tmp->mask.filter, 2383428d7b3dSmrg tmp->mask.repeat), 2384428d7b3dSmrg gen8_get_blend(tmp->op, 2385428d7b3dSmrg tmp->has_component_alpha, 2386428d7b3dSmrg tmp->dst.format), 2387428d7b3dSmrg gen8_choose_composite_kernel(tmp->op, 2388428d7b3dSmrg tmp->mask.bo != NULL, 2389428d7b3dSmrg tmp->has_component_alpha, 2390428d7b3dSmrg tmp->is_affine), 2391428d7b3dSmrg gen4_choose_composite_emitter(sna, tmp)); 2392428d7b3dSmrg 2393428d7b3dSmrg tmp->blt = gen8_render_composite_blt; 2394428d7b3dSmrg tmp->box = gen8_render_composite_box; 2395428d7b3dSmrg tmp->boxes = gen8_render_composite_boxes__blt; 2396428d7b3dSmrg if (tmp->emit_boxes){ 2397428d7b3dSmrg tmp->boxes = gen8_render_composite_boxes; 2398428d7b3dSmrg tmp->thread_boxes = gen8_render_composite_boxes__thread; 2399428d7b3dSmrg } 2400428d7b3dSmrg tmp->done = gen8_render_composite_done; 2401428d7b3dSmrg 2402428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); 2403428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2404428d7b3dSmrg tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2405428d7b3dSmrg NULL)) { 2406428d7b3dSmrg kgem_submit(&sna->kgem); 2407428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2408428d7b3dSmrg tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2409428d7b3dSmrg NULL)) 2410428d7b3dSmrg goto cleanup_mask; 2411428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2412428d7b3dSmrg } 2413428d7b3dSmrg 2414428d7b3dSmrg gen8_align_vertex(sna, tmp); 2415428d7b3dSmrg gen8_emit_composite_state(sna, tmp); 2416428d7b3dSmrg return true; 2417428d7b3dSmrg 2418428d7b3dSmrgcleanup_mask: 2419428d7b3dSmrg if (tmp->mask.bo) { 2420428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2421428d7b3dSmrg tmp->mask.bo = NULL; 2422428d7b3dSmrg } 2423428d7b3dSmrgcleanup_src: 2424428d7b3dSmrg if (tmp->src.bo) { 2425428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2426428d7b3dSmrg tmp->src.bo = NULL; 2427428d7b3dSmrg } 2428428d7b3dSmrgcleanup_dst: 2429428d7b3dSmrg if (tmp->redirect.real_bo) { 2430428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2431428d7b3dSmrg tmp->redirect.real_bo = NULL; 2432428d7b3dSmrg } 2433428d7b3dSmrgfallback: 2434428d7b3dSmrg return (mask == NULL && 2435428d7b3dSmrg sna_blt_composite(sna, op, 2436428d7b3dSmrg src, dst, 2437428d7b3dSmrg src_x, src_y, 2438428d7b3dSmrg dst_x, dst_y, 2439428d7b3dSmrg width, height, 2440428d7b3dSmrg flags | COMPOSITE_FALLBACK, tmp)); 2441428d7b3dSmrg} 2442428d7b3dSmrg 2443428d7b3dSmrg#if !NO_COMPOSITE_SPANS 2444428d7b3dSmrgfastcall static void 2445428d7b3dSmrggen8_render_composite_spans_box(struct sna *sna, 2446428d7b3dSmrg const struct sna_composite_spans_op *op, 2447428d7b3dSmrg const BoxRec *box, float opacity) 2448428d7b3dSmrg{ 2449428d7b3dSmrg DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2450428d7b3dSmrg __FUNCTION__, 2451428d7b3dSmrg op->base.src.offset[0], op->base.src.offset[1], 2452428d7b3dSmrg opacity, 2453428d7b3dSmrg op->base.dst.x, op->base.dst.y, 2454428d7b3dSmrg box->x1, box->y1, 2455428d7b3dSmrg box->x2 - box->x1, 2456428d7b3dSmrg box->y2 - box->y1)); 2457428d7b3dSmrg 2458428d7b3dSmrg gen8_get_rectangles(sna, &op->base, 1, gen8_emit_composite_state); 2459428d7b3dSmrg op->prim_emit(sna, op, box, opacity); 2460428d7b3dSmrg} 2461428d7b3dSmrg 2462428d7b3dSmrgstatic void 2463428d7b3dSmrggen8_render_composite_spans_boxes(struct sna *sna, 2464428d7b3dSmrg const struct sna_composite_spans_op *op, 2465428d7b3dSmrg const BoxRec *box, int nbox, 2466428d7b3dSmrg float opacity) 2467428d7b3dSmrg{ 2468428d7b3dSmrg DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2469428d7b3dSmrg __FUNCTION__, nbox, 2470428d7b3dSmrg op->base.src.offset[0], op->base.src.offset[1], 2471428d7b3dSmrg opacity, 2472428d7b3dSmrg op->base.dst.x, op->base.dst.y)); 2473428d7b3dSmrg 2474428d7b3dSmrg do { 2475428d7b3dSmrg int nbox_this_time; 2476428d7b3dSmrg 2477428d7b3dSmrg nbox_this_time = gen8_get_rectangles(sna, &op->base, nbox, 2478428d7b3dSmrg gen8_emit_composite_state); 2479428d7b3dSmrg nbox -= nbox_this_time; 2480428d7b3dSmrg 2481428d7b3dSmrg do { 2482428d7b3dSmrg DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2483428d7b3dSmrg box->x1, box->y1, 2484428d7b3dSmrg box->x2 - box->x1, 2485428d7b3dSmrg box->y2 - box->y1)); 2486428d7b3dSmrg 2487428d7b3dSmrg op->prim_emit(sna, op, box++, opacity); 2488428d7b3dSmrg } while (--nbox_this_time); 2489428d7b3dSmrg } while (nbox); 2490428d7b3dSmrg} 2491428d7b3dSmrg 2492428d7b3dSmrgfastcall static void 2493428d7b3dSmrggen8_render_composite_spans_boxes__thread(struct sna *sna, 2494428d7b3dSmrg const struct sna_composite_spans_op *op, 2495428d7b3dSmrg const struct sna_opacity_box *box, 2496428d7b3dSmrg int nbox) 2497428d7b3dSmrg{ 2498428d7b3dSmrg DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2499428d7b3dSmrg __FUNCTION__, nbox, 2500428d7b3dSmrg op->base.src.offset[0], op->base.src.offset[1], 2501428d7b3dSmrg op->base.dst.x, op->base.dst.y)); 2502428d7b3dSmrg 2503428d7b3dSmrg sna_vertex_lock(&sna->render); 2504428d7b3dSmrg do { 2505428d7b3dSmrg int nbox_this_time; 2506428d7b3dSmrg float *v; 2507428d7b3dSmrg 2508428d7b3dSmrg nbox_this_time = gen8_get_rectangles(sna, &op->base, nbox, 2509428d7b3dSmrg gen8_emit_composite_state); 2510428d7b3dSmrg assert(nbox_this_time); 2511428d7b3dSmrg nbox -= nbox_this_time; 2512428d7b3dSmrg 2513428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 2514428d7b3dSmrg sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2515428d7b3dSmrg 2516428d7b3dSmrg sna_vertex_acquire__locked(&sna->render); 2517428d7b3dSmrg sna_vertex_unlock(&sna->render); 2518428d7b3dSmrg 2519428d7b3dSmrg op->emit_boxes(op, box, nbox_this_time, v); 2520428d7b3dSmrg box += nbox_this_time; 2521428d7b3dSmrg 2522428d7b3dSmrg sna_vertex_lock(&sna->render); 2523428d7b3dSmrg sna_vertex_release__locked(&sna->render); 2524428d7b3dSmrg } while (nbox); 2525428d7b3dSmrg sna_vertex_unlock(&sna->render); 2526428d7b3dSmrg} 2527428d7b3dSmrg 2528428d7b3dSmrgfastcall static void 2529428d7b3dSmrggen8_render_composite_spans_done(struct sna *sna, 2530428d7b3dSmrg const struct sna_composite_spans_op *op) 2531428d7b3dSmrg{ 2532428d7b3dSmrg if (sna->render.vertex_offset) 2533428d7b3dSmrg gen8_vertex_flush(sna); 2534428d7b3dSmrg 2535428d7b3dSmrg DBG(("%s()\n", __FUNCTION__)); 2536428d7b3dSmrg 2537428d7b3dSmrg if (op->base.src.bo) 2538428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2539428d7b3dSmrg 2540428d7b3dSmrg sna_render_composite_redirect_done(sna, &op->base); 2541428d7b3dSmrg} 2542428d7b3dSmrg 2543428d7b3dSmrgstatic bool 2544428d7b3dSmrggen8_check_composite_spans(struct sna *sna, 2545428d7b3dSmrg uint8_t op, PicturePtr src, PicturePtr dst, 2546428d7b3dSmrg int16_t width, int16_t height, unsigned flags) 2547428d7b3dSmrg{ 2548428d7b3dSmrg if (op >= ARRAY_SIZE(gen8_blend_op)) 2549428d7b3dSmrg return false; 2550428d7b3dSmrg 2551428d7b3dSmrg if (gen8_composite_fallback(sna, src, NULL, dst)) 2552428d7b3dSmrg return false; 2553428d7b3dSmrg 2554428d7b3dSmrg if (need_tiling(sna, width, height) && 2555428d7b3dSmrg !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2556428d7b3dSmrg DBG(("%s: fallback, tiled operation not on GPU\n", 2557428d7b3dSmrg __FUNCTION__)); 2558428d7b3dSmrg return false; 2559428d7b3dSmrg } 2560428d7b3dSmrg 2561428d7b3dSmrg return true; 2562428d7b3dSmrg} 2563428d7b3dSmrg 2564428d7b3dSmrgstatic bool 2565428d7b3dSmrggen8_render_composite_spans(struct sna *sna, 2566428d7b3dSmrg uint8_t op, 2567428d7b3dSmrg PicturePtr src, 2568428d7b3dSmrg PicturePtr dst, 2569428d7b3dSmrg int16_t src_x, int16_t src_y, 2570428d7b3dSmrg int16_t dst_x, int16_t dst_y, 2571428d7b3dSmrg int16_t width, int16_t height, 2572428d7b3dSmrg unsigned flags, 2573428d7b3dSmrg struct sna_composite_spans_op *tmp) 2574428d7b3dSmrg{ 2575428d7b3dSmrg DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, 2576428d7b3dSmrg width, height, flags, sna->kgem.ring)); 2577428d7b3dSmrg 2578428d7b3dSmrg assert(gen8_check_composite_spans(sna, op, src, dst, width, height, flags)); 2579428d7b3dSmrg 2580428d7b3dSmrg if (need_tiling(sna, width, height)) { 2581428d7b3dSmrg DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2582428d7b3dSmrg __FUNCTION__, width, height)); 2583428d7b3dSmrg return sna_tiling_composite_spans(op, src, dst, 2584428d7b3dSmrg src_x, src_y, dst_x, dst_y, 2585428d7b3dSmrg width, height, flags, tmp); 2586428d7b3dSmrg } 2587428d7b3dSmrg 2588428d7b3dSmrg tmp->base.op = op; 2589428d7b3dSmrg if (!gen8_composite_set_target(sna, &tmp->base, dst, 2590428d7b3dSmrg dst_x, dst_y, width, height, true)) 2591428d7b3dSmrg return false; 2592428d7b3dSmrg 2593428d7b3dSmrg switch (gen8_composite_picture(sna, src, &tmp->base.src, 2594428d7b3dSmrg src_x, src_y, 2595428d7b3dSmrg width, height, 2596428d7b3dSmrg dst_x, dst_y, 2597428d7b3dSmrg dst->polyMode == PolyModePrecise)) { 2598428d7b3dSmrg case -1: 2599428d7b3dSmrg goto cleanup_dst; 2600428d7b3dSmrg case 0: 2601428d7b3dSmrg if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2602428d7b3dSmrg goto cleanup_dst; 2603428d7b3dSmrg /* fall through to fixup */ 2604428d7b3dSmrg case 1: 2605428d7b3dSmrg if (!gen8_composite_channel_convert(&tmp->base.src)) 2606428d7b3dSmrg goto cleanup_src; 2607428d7b3dSmrg break; 2608428d7b3dSmrg } 2609428d7b3dSmrg tmp->base.mask.bo = NULL; 2610428d7b3dSmrg 2611428d7b3dSmrg tmp->base.is_affine = tmp->base.src.is_affine; 2612428d7b3dSmrg tmp->base.need_magic_ca_pass = false; 2613428d7b3dSmrg 2614428d7b3dSmrg tmp->base.u.gen8.flags = 2615428d7b3dSmrg GEN8_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, 2616428d7b3dSmrg tmp->base.src.repeat, 2617428d7b3dSmrg SAMPLER_FILTER_NEAREST, 2618428d7b3dSmrg SAMPLER_EXTEND_PAD), 2619428d7b3dSmrg gen8_get_blend(tmp->base.op, false, tmp->base.dst.format), 2620428d7b3dSmrg GEN8_WM_KERNEL_OPACITY | !tmp->base.is_affine, 2621428d7b3dSmrg gen4_choose_spans_emitter(sna, tmp)); 2622428d7b3dSmrg 2623428d7b3dSmrg tmp->box = gen8_render_composite_spans_box; 2624428d7b3dSmrg tmp->boxes = gen8_render_composite_spans_boxes; 2625428d7b3dSmrg if (tmp->emit_boxes) 2626428d7b3dSmrg tmp->thread_boxes = gen8_render_composite_spans_boxes__thread; 2627428d7b3dSmrg tmp->done = gen8_render_composite_spans_done; 2628428d7b3dSmrg 2629428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); 2630428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2631428d7b3dSmrg tmp->base.dst.bo, tmp->base.src.bo, 2632428d7b3dSmrg NULL)) { 2633428d7b3dSmrg kgem_submit(&sna->kgem); 2634428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2635428d7b3dSmrg tmp->base.dst.bo, tmp->base.src.bo, 2636428d7b3dSmrg NULL)) 2637428d7b3dSmrg goto cleanup_src; 2638428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2639428d7b3dSmrg } 2640428d7b3dSmrg 2641428d7b3dSmrg gen8_align_vertex(sna, &tmp->base); 2642428d7b3dSmrg gen8_emit_composite_state(sna, &tmp->base); 2643428d7b3dSmrg return true; 2644428d7b3dSmrg 2645428d7b3dSmrgcleanup_src: 2646428d7b3dSmrg if (tmp->base.src.bo) 2647428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2648428d7b3dSmrgcleanup_dst: 2649428d7b3dSmrg if (tmp->base.redirect.real_bo) 2650428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2651428d7b3dSmrg return false; 2652428d7b3dSmrg} 2653428d7b3dSmrg#endif 2654428d7b3dSmrg 2655428d7b3dSmrgstatic void 2656428d7b3dSmrggen8_emit_copy_state(struct sna *sna, 2657428d7b3dSmrg const struct sna_composite_op *op) 2658428d7b3dSmrg{ 2659428d7b3dSmrg uint32_t *binding_table; 2660428d7b3dSmrg uint16_t offset, dirty; 2661428d7b3dSmrg 2662428d7b3dSmrg gen8_get_batch(sna, op); 2663428d7b3dSmrg 2664428d7b3dSmrg binding_table = gen8_composite_get_binding_table(sna, &offset); 2665428d7b3dSmrg 2666428d7b3dSmrg dirty = kgem_bo_is_dirty(op->dst.bo); 2667428d7b3dSmrg 2668428d7b3dSmrg binding_table[0] = 2669428d7b3dSmrg gen8_bind_bo(sna, 2670428d7b3dSmrg op->dst.bo, op->dst.width, op->dst.height, 2671428d7b3dSmrg gen8_get_dest_format(op->dst.format), 2672428d7b3dSmrg true); 2673428d7b3dSmrg binding_table[1] = 2674428d7b3dSmrg gen8_bind_bo(sna, 2675428d7b3dSmrg op->src.bo, op->src.width, op->src.height, 2676428d7b3dSmrg op->src.card_format, 2677428d7b3dSmrg false); 2678428d7b3dSmrg 2679428d7b3dSmrg if (sna->kgem.surface == offset && 2680428d7b3dSmrg *(uint64_t *)(sna->kgem.batch + sna->render_state.gen8.surface_table) == *(uint64_t*)binding_table) { 2681428d7b3dSmrg sna->kgem.surface += SURFACE_DW; 2682428d7b3dSmrg offset = sna->render_state.gen8.surface_table; 2683428d7b3dSmrg } 2684428d7b3dSmrg 2685428d7b3dSmrg if (sna->kgem.batch[sna->render_state.gen8.surface_table] == binding_table[0]) 2686428d7b3dSmrg dirty = 0; 2687428d7b3dSmrg 2688428d7b3dSmrg assert(!GEN8_READS_DST(op->u.gen8.flags)); 2689428d7b3dSmrg gen8_emit_state(sna, op, offset | dirty); 2690428d7b3dSmrg} 2691428d7b3dSmrg 2692428d7b3dSmrgstatic inline bool 2693428d7b3dSmrgprefer_blt_copy(struct sna *sna, 2694428d7b3dSmrg struct kgem_bo *src_bo, 2695428d7b3dSmrg struct kgem_bo *dst_bo, 2696428d7b3dSmrg unsigned flags) 2697428d7b3dSmrg{ 2698428d7b3dSmrg if (sna->kgem.mode == KGEM_BLT) 2699428d7b3dSmrg return true; 2700428d7b3dSmrg 2701428d7b3dSmrg assert((flags & COPY_SYNC) == 0); 2702428d7b3dSmrg 2703428d7b3dSmrg if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) 2704428d7b3dSmrg return true; 2705428d7b3dSmrg 2706428d7b3dSmrg if (untiled_tlb_miss(src_bo) || 2707428d7b3dSmrg untiled_tlb_miss(dst_bo)) 2708428d7b3dSmrg return true; 2709428d7b3dSmrg 2710428d7b3dSmrg if (force_blt_ring(sna)) 2711428d7b3dSmrg return true; 2712428d7b3dSmrg 2713428d7b3dSmrg if (kgem_bo_is_render(dst_bo) || 2714428d7b3dSmrg kgem_bo_is_render(src_bo)) 2715428d7b3dSmrg return false; 2716428d7b3dSmrg 2717428d7b3dSmrg if (prefer_render_ring(sna, dst_bo)) 2718428d7b3dSmrg return false; 2719428d7b3dSmrg 2720428d7b3dSmrg if (!prefer_blt_ring(sna, dst_bo, flags)) 2721428d7b3dSmrg return false; 2722428d7b3dSmrg 2723428d7b3dSmrg return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); 2724428d7b3dSmrg} 2725428d7b3dSmrg 2726428d7b3dSmrgstatic bool 2727428d7b3dSmrggen8_render_copy_boxes(struct sna *sna, uint8_t alu, 2728428d7b3dSmrg const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2729428d7b3dSmrg const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2730428d7b3dSmrg const BoxRec *box, int n, unsigned flags) 2731428d7b3dSmrg{ 2732428d7b3dSmrg struct sna_composite_op tmp; 2733428d7b3dSmrg BoxRec extents; 2734428d7b3dSmrg 2735428d7b3dSmrg DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n", 2736428d7b3dSmrg __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags, 2737428d7b3dSmrg src_bo == dst_bo, 2738428d7b3dSmrg overlaps(sna, 2739428d7b3dSmrg src_bo, src_dx, src_dy, 2740428d7b3dSmrg dst_bo, dst_dx, dst_dy, 2741428d7b3dSmrg box, n, flags, &extents))); 2742428d7b3dSmrg 2743428d7b3dSmrg if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && 2744428d7b3dSmrg sna_blt_compare_depth(src, dst) && 2745428d7b3dSmrg sna_blt_copy_boxes(sna, alu, 2746428d7b3dSmrg src_bo, src_dx, src_dy, 2747428d7b3dSmrg dst_bo, dst_dx, dst_dy, 2748428d7b3dSmrg dst->bitsPerPixel, 2749428d7b3dSmrg box, n)) 2750428d7b3dSmrg return true; 2751428d7b3dSmrg 2752428d7b3dSmrg if (!(alu == GXcopy || alu == GXclear) || 2753428d7b3dSmrg unaligned(src_bo, src->bitsPerPixel) || 2754428d7b3dSmrg unaligned(dst_bo, dst->bitsPerPixel)) { 2755428d7b3dSmrgfallback_blt: 2756428d7b3dSmrg DBG(("%s: fallback blt\n", __FUNCTION__)); 2757428d7b3dSmrg if (!sna_blt_compare_depth(src, dst)) 2758428d7b3dSmrg return false; 2759428d7b3dSmrg 2760428d7b3dSmrg return sna_blt_copy_boxes_fallback(sna, alu, 2761428d7b3dSmrg src, src_bo, src_dx, src_dy, 2762428d7b3dSmrg dst, dst_bo, dst_dx, dst_dy, 2763428d7b3dSmrg box, n); 2764428d7b3dSmrg } 2765428d7b3dSmrg 2766428d7b3dSmrg if (overlaps(sna, 2767428d7b3dSmrg src_bo, src_dx, src_dy, 2768428d7b3dSmrg dst_bo, dst_dx, dst_dy, 2769428d7b3dSmrg box, n, flags, 2770428d7b3dSmrg &extents)) { 2771428d7b3dSmrg bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); 2772428d7b3dSmrg 2773428d7b3dSmrg if ((big || can_switch_to_blt(sna, dst_bo, flags)) && 2774428d7b3dSmrg sna_blt_copy_boxes(sna, alu, 2775428d7b3dSmrg src_bo, src_dx, src_dy, 2776428d7b3dSmrg dst_bo, dst_dx, dst_dy, 2777428d7b3dSmrg dst->bitsPerPixel, 2778428d7b3dSmrg box, n)) 2779428d7b3dSmrg return true; 2780428d7b3dSmrg 2781428d7b3dSmrg if (big) 2782428d7b3dSmrg goto fallback_blt; 2783428d7b3dSmrg 2784428d7b3dSmrg assert(src_bo == dst_bo); 2785428d7b3dSmrg assert(src->depth == dst->depth); 2786428d7b3dSmrg assert(src->width == dst->width); 2787428d7b3dSmrg assert(src->height == dst->height); 2788428d7b3dSmrg return sna_render_copy_boxes__overlap(sna, alu, 2789428d7b3dSmrg src, src_bo, 2790428d7b3dSmrg src_dx, src_dy, 2791428d7b3dSmrg dst_dx, dst_dy, 2792428d7b3dSmrg box, n, &extents); 2793428d7b3dSmrg } 2794428d7b3dSmrg 2795428d7b3dSmrg if (dst->depth == src->depth) { 2796428d7b3dSmrg tmp.dst.format = sna_render_format_for_depth(dst->depth); 2797428d7b3dSmrg tmp.src.pict_format = tmp.dst.format; 2798428d7b3dSmrg } else { 2799428d7b3dSmrg tmp.dst.format = sna_format_for_depth(dst->depth); 2800428d7b3dSmrg tmp.src.pict_format = sna_format_for_depth(src->depth); 2801428d7b3dSmrg } 2802428d7b3dSmrg if (!gen8_check_format(tmp.src.pict_format)) 2803428d7b3dSmrg goto fallback_blt; 2804428d7b3dSmrg 2805428d7b3dSmrg tmp.dst.pixmap = (PixmapPtr)dst; 2806428d7b3dSmrg tmp.dst.width = dst->width; 2807428d7b3dSmrg tmp.dst.height = dst->height; 2808428d7b3dSmrg tmp.dst.bo = dst_bo; 2809428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 2810428d7b3dSmrg tmp.damage = NULL; 2811428d7b3dSmrg 2812428d7b3dSmrg sna_render_composite_redirect_init(&tmp); 2813428d7b3dSmrg if (too_large(tmp.dst.width, tmp.dst.height)) { 2814428d7b3dSmrg int i; 2815428d7b3dSmrg 2816428d7b3dSmrg extents = box[0]; 2817428d7b3dSmrg for (i = 1; i < n; i++) { 2818428d7b3dSmrg if (box[i].x1 < extents.x1) 2819428d7b3dSmrg extents.x1 = box[i].x1; 2820428d7b3dSmrg if (box[i].y1 < extents.y1) 2821428d7b3dSmrg extents.y1 = box[i].y1; 2822428d7b3dSmrg 2823428d7b3dSmrg if (box[i].x2 > extents.x2) 2824428d7b3dSmrg extents.x2 = box[i].x2; 2825428d7b3dSmrg if (box[i].y2 > extents.y2) 2826428d7b3dSmrg extents.y2 = box[i].y2; 2827428d7b3dSmrg } 2828428d7b3dSmrg 2829428d7b3dSmrg if (!sna_render_composite_redirect(sna, &tmp, 2830428d7b3dSmrg extents.x1 + dst_dx, 2831428d7b3dSmrg extents.y1 + dst_dy, 2832428d7b3dSmrg extents.x2 - extents.x1, 2833428d7b3dSmrg extents.y2 - extents.y1, 2834428d7b3dSmrg n > 1)) 2835428d7b3dSmrg goto fallback_tiled; 2836428d7b3dSmrg } 2837428d7b3dSmrg 2838428d7b3dSmrg tmp.src.card_format = gen8_get_card_format(tmp.src.pict_format); 2839428d7b3dSmrg if (too_large(src->width, src->height)) { 2840428d7b3dSmrg int i; 2841428d7b3dSmrg 2842428d7b3dSmrg extents = box[0]; 2843428d7b3dSmrg for (i = 1; i < n; i++) { 2844428d7b3dSmrg if (box[i].x1 < extents.x1) 2845428d7b3dSmrg extents.x1 = box[i].x1; 2846428d7b3dSmrg if (box[i].y1 < extents.y1) 2847428d7b3dSmrg extents.y1 = box[i].y1; 2848428d7b3dSmrg 2849428d7b3dSmrg if (box[i].x2 > extents.x2) 2850428d7b3dSmrg extents.x2 = box[i].x2; 2851428d7b3dSmrg if (box[i].y2 > extents.y2) 2852428d7b3dSmrg extents.y2 = box[i].y2; 2853428d7b3dSmrg } 2854428d7b3dSmrg 2855428d7b3dSmrg if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 2856428d7b3dSmrg extents.x1 + src_dx, 2857428d7b3dSmrg extents.y1 + src_dy, 2858428d7b3dSmrg extents.x2 - extents.x1, 2859428d7b3dSmrg extents.y2 - extents.y1)) 2860428d7b3dSmrg goto fallback_tiled_dst; 2861428d7b3dSmrg } else { 2862428d7b3dSmrg tmp.src.bo = src_bo; 2863428d7b3dSmrg tmp.src.width = src->width; 2864428d7b3dSmrg tmp.src.height = src->height; 2865428d7b3dSmrg tmp.src.offset[0] = tmp.src.offset[1] = 0; 2866428d7b3dSmrg } 2867428d7b3dSmrg 2868428d7b3dSmrg tmp.mask.bo = NULL; 2869428d7b3dSmrg 2870428d7b3dSmrg tmp.floats_per_vertex = 2; 2871428d7b3dSmrg tmp.floats_per_rect = 6; 2872428d7b3dSmrg tmp.need_magic_ca_pass = 0; 2873428d7b3dSmrg 2874428d7b3dSmrg tmp.u.gen8.flags = COPY_FLAGS(alu); 2875428d7b3dSmrg 2876428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 2877428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2878428d7b3dSmrg kgem_submit(&sna->kgem); 2879428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2880428d7b3dSmrg if (tmp.src.bo != src_bo) 2881428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2882428d7b3dSmrg if (tmp.redirect.real_bo) 2883428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2884428d7b3dSmrg goto fallback_blt; 2885428d7b3dSmrg } 2886428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2887428d7b3dSmrg } 2888428d7b3dSmrg 2889428d7b3dSmrg src_dx += tmp.src.offset[0]; 2890428d7b3dSmrg src_dy += tmp.src.offset[1]; 2891428d7b3dSmrg 2892428d7b3dSmrg dst_dx += tmp.dst.x; 2893428d7b3dSmrg dst_dy += tmp.dst.y; 2894428d7b3dSmrg 2895428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 2896428d7b3dSmrg 2897428d7b3dSmrg gen8_align_vertex(sna, &tmp); 2898428d7b3dSmrg gen8_emit_copy_state(sna, &tmp); 2899428d7b3dSmrg 2900428d7b3dSmrg do { 2901428d7b3dSmrg int16_t *v; 2902428d7b3dSmrg int n_this_time; 2903428d7b3dSmrg 2904428d7b3dSmrg n_this_time = gen8_get_rectangles(sna, &tmp, n, 2905428d7b3dSmrg gen8_emit_copy_state); 2906428d7b3dSmrg n -= n_this_time; 2907428d7b3dSmrg 2908428d7b3dSmrg v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 2909428d7b3dSmrg sna->render.vertex_used += 6 * n_this_time; 2910428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 2911428d7b3dSmrg do { 2912428d7b3dSmrg 2913428d7b3dSmrg DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 2914428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 2915428d7b3dSmrg box->x1 + dst_dx, box->y1 + dst_dy, 2916428d7b3dSmrg box->x2 - box->x1, box->y2 - box->y1)); 2917428d7b3dSmrg v[0] = box->x2 + dst_dx; 2918428d7b3dSmrg v[2] = box->x2 + src_dx; 2919428d7b3dSmrg v[1] = v[5] = box->y2 + dst_dy; 2920428d7b3dSmrg v[3] = v[7] = box->y2 + src_dy; 2921428d7b3dSmrg v[8] = v[4] = box->x1 + dst_dx; 2922428d7b3dSmrg v[10] = v[6] = box->x1 + src_dx; 2923428d7b3dSmrg v[9] = box->y1 + dst_dy; 2924428d7b3dSmrg v[11] = box->y1 + src_dy; 2925428d7b3dSmrg v += 12; box++; 2926428d7b3dSmrg } while (--n_this_time); 2927428d7b3dSmrg } while (n); 2928428d7b3dSmrg 2929428d7b3dSmrg gen8_vertex_flush(sna); 2930428d7b3dSmrg sna_render_composite_redirect_done(sna, &tmp); 2931428d7b3dSmrg if (tmp.src.bo != src_bo) 2932428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2933428d7b3dSmrg return true; 2934428d7b3dSmrg 2935428d7b3dSmrgfallback_tiled_dst: 2936428d7b3dSmrg if (tmp.redirect.real_bo) 2937428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2938428d7b3dSmrgfallback_tiled: 2939428d7b3dSmrg DBG(("%s: fallback tiled\n", __FUNCTION__)); 2940428d7b3dSmrg if (sna_blt_compare_depth(src, dst) && 2941428d7b3dSmrg sna_blt_copy_boxes(sna, alu, 2942428d7b3dSmrg src_bo, src_dx, src_dy, 2943428d7b3dSmrg dst_bo, dst_dx, dst_dy, 2944428d7b3dSmrg dst->bitsPerPixel, 2945428d7b3dSmrg box, n)) 2946428d7b3dSmrg return true; 2947428d7b3dSmrg 2948428d7b3dSmrg return sna_tiling_copy_boxes(sna, alu, 2949428d7b3dSmrg src, src_bo, src_dx, src_dy, 2950428d7b3dSmrg dst, dst_bo, dst_dx, dst_dy, 2951428d7b3dSmrg box, n); 2952428d7b3dSmrg} 2953428d7b3dSmrg 2954428d7b3dSmrgstatic void 2955428d7b3dSmrggen8_render_copy_blt(struct sna *sna, 2956428d7b3dSmrg const struct sna_copy_op *op, 2957428d7b3dSmrg int16_t sx, int16_t sy, 2958428d7b3dSmrg int16_t w, int16_t h, 2959428d7b3dSmrg int16_t dx, int16_t dy) 2960428d7b3dSmrg{ 2961428d7b3dSmrg int16_t *v; 2962428d7b3dSmrg 2963428d7b3dSmrg gen8_get_rectangles(sna, &op->base, 1, gen8_emit_copy_state); 2964428d7b3dSmrg 2965428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 2966428d7b3dSmrg sna->render.vertex_used += 6; 2967428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 2968428d7b3dSmrg 2969428d7b3dSmrg v[0] = dx+w; v[1] = dy+h; 2970428d7b3dSmrg v[2] = sx+w; v[3] = sy+h; 2971428d7b3dSmrg v[4] = dx; v[5] = dy+h; 2972428d7b3dSmrg v[6] = sx; v[7] = sy+h; 2973428d7b3dSmrg v[8] = dx; v[9] = dy; 2974428d7b3dSmrg v[10] = sx; v[11] = sy; 2975428d7b3dSmrg} 2976428d7b3dSmrg 2977428d7b3dSmrgstatic void 2978428d7b3dSmrggen8_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 2979428d7b3dSmrg{ 2980428d7b3dSmrg if (sna->render.vertex_offset) 2981428d7b3dSmrg gen8_vertex_flush(sna); 2982428d7b3dSmrg} 2983428d7b3dSmrg 2984428d7b3dSmrgstatic bool 2985428d7b3dSmrggen8_render_copy(struct sna *sna, uint8_t alu, 2986428d7b3dSmrg PixmapPtr src, struct kgem_bo *src_bo, 2987428d7b3dSmrg PixmapPtr dst, struct kgem_bo *dst_bo, 2988428d7b3dSmrg struct sna_copy_op *op) 2989428d7b3dSmrg{ 2990428d7b3dSmrg DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", 2991428d7b3dSmrg __FUNCTION__, alu, 2992428d7b3dSmrg src->drawable.width, src->drawable.height, 2993428d7b3dSmrg dst->drawable.width, dst->drawable.height)); 2994428d7b3dSmrg 2995428d7b3dSmrg if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && 2996428d7b3dSmrg sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2997428d7b3dSmrg sna_blt_copy(sna, alu, 2998428d7b3dSmrg src_bo, dst_bo, 2999428d7b3dSmrg dst->drawable.bitsPerPixel, 3000428d7b3dSmrg op)) 3001428d7b3dSmrg return true; 3002428d7b3dSmrg 3003428d7b3dSmrg if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 3004428d7b3dSmrg too_large(src->drawable.width, src->drawable.height) || 3005428d7b3dSmrg too_large(dst->drawable.width, dst->drawable.height) || 3006428d7b3dSmrg unaligned(src_bo, src->drawable.bitsPerPixel) || 3007428d7b3dSmrg unaligned(dst_bo, dst->drawable.bitsPerPixel)) { 3008428d7b3dSmrgfallback: 3009428d7b3dSmrg if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3010428d7b3dSmrg return false; 3011428d7b3dSmrg 3012428d7b3dSmrg return sna_blt_copy(sna, alu, src_bo, dst_bo, 3013428d7b3dSmrg dst->drawable.bitsPerPixel, 3014428d7b3dSmrg op); 3015428d7b3dSmrg } 3016428d7b3dSmrg 3017428d7b3dSmrg if (dst->drawable.depth == src->drawable.depth) { 3018428d7b3dSmrg op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 3019428d7b3dSmrg op->base.src.pict_format = op->base.dst.format; 3020428d7b3dSmrg } else { 3021428d7b3dSmrg op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3022428d7b3dSmrg op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 3023428d7b3dSmrg } 3024428d7b3dSmrg if (!gen8_check_format(op->base.src.pict_format)) 3025428d7b3dSmrg goto fallback; 3026428d7b3dSmrg 3027428d7b3dSmrg op->base.dst.pixmap = dst; 3028428d7b3dSmrg op->base.dst.width = dst->drawable.width; 3029428d7b3dSmrg op->base.dst.height = dst->drawable.height; 3030428d7b3dSmrg op->base.dst.bo = dst_bo; 3031428d7b3dSmrg 3032428d7b3dSmrg op->base.src.bo = src_bo; 3033428d7b3dSmrg op->base.src.card_format = 3034428d7b3dSmrg gen8_get_card_format(op->base.src.pict_format); 3035428d7b3dSmrg op->base.src.width = src->drawable.width; 3036428d7b3dSmrg op->base.src.height = src->drawable.height; 3037428d7b3dSmrg 3038428d7b3dSmrg op->base.mask.bo = NULL; 3039428d7b3dSmrg 3040428d7b3dSmrg op->base.floats_per_vertex = 2; 3041428d7b3dSmrg op->base.floats_per_rect = 6; 3042428d7b3dSmrg 3043428d7b3dSmrg op->base.u.gen8.flags = COPY_FLAGS(alu); 3044428d7b3dSmrg 3045428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3046428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3047428d7b3dSmrg kgem_submit(&sna->kgem); 3048428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3049428d7b3dSmrg goto fallback; 3050428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3051428d7b3dSmrg } 3052428d7b3dSmrg 3053428d7b3dSmrg gen8_align_vertex(sna, &op->base); 3054428d7b3dSmrg gen8_emit_copy_state(sna, &op->base); 3055428d7b3dSmrg 3056428d7b3dSmrg op->blt = gen8_render_copy_blt; 3057428d7b3dSmrg op->done = gen8_render_copy_done; 3058428d7b3dSmrg return true; 3059428d7b3dSmrg} 3060428d7b3dSmrg 3061428d7b3dSmrgstatic void 3062428d7b3dSmrggen8_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) 3063428d7b3dSmrg{ 3064428d7b3dSmrg uint32_t *binding_table; 3065428d7b3dSmrg uint16_t offset, dirty; 3066428d7b3dSmrg 3067428d7b3dSmrg /* XXX Render Target Fast Clear 3068428d7b3dSmrg * Set RTFC Enable in PS and render a rectangle. 3069428d7b3dSmrg * Limited to a clearing the full MSC surface only with a 3070428d7b3dSmrg * specific kernel. 3071428d7b3dSmrg */ 3072428d7b3dSmrg 3073428d7b3dSmrg gen8_get_batch(sna, op); 3074428d7b3dSmrg 3075428d7b3dSmrg binding_table = gen8_composite_get_binding_table(sna, &offset); 3076428d7b3dSmrg 3077428d7b3dSmrg dirty = kgem_bo_is_dirty(op->dst.bo); 3078428d7b3dSmrg 3079428d7b3dSmrg binding_table[0] = 3080428d7b3dSmrg gen8_bind_bo(sna, 3081428d7b3dSmrg op->dst.bo, op->dst.width, op->dst.height, 3082428d7b3dSmrg gen8_get_dest_format(op->dst.format), 3083428d7b3dSmrg true); 3084428d7b3dSmrg binding_table[1] = 3085428d7b3dSmrg gen8_bind_bo(sna, 3086428d7b3dSmrg op->src.bo, 1, 1, 3087428d7b3dSmrg SURFACEFORMAT_B8G8R8A8_UNORM, 3088428d7b3dSmrg false); 3089428d7b3dSmrg 3090428d7b3dSmrg if (sna->kgem.surface == offset && 3091428d7b3dSmrg *(uint64_t *)(sna->kgem.batch + sna->render_state.gen8.surface_table) == *(uint64_t*)binding_table) { 3092428d7b3dSmrg sna->kgem.surface += SURFACE_DW; 3093428d7b3dSmrg offset = sna->render_state.gen8.surface_table; 3094428d7b3dSmrg } 3095428d7b3dSmrg 3096428d7b3dSmrg if (sna->kgem.batch[sna->render_state.gen8.surface_table] == binding_table[0]) 3097428d7b3dSmrg dirty = 0; 3098428d7b3dSmrg 3099428d7b3dSmrg gen8_emit_state(sna, op, offset | dirty); 3100428d7b3dSmrg} 3101428d7b3dSmrg 3102428d7b3dSmrgstatic bool 3103428d7b3dSmrggen8_render_fill_boxes(struct sna *sna, 3104428d7b3dSmrg CARD8 op, 3105428d7b3dSmrg PictFormat format, 3106428d7b3dSmrg const xRenderColor *color, 3107428d7b3dSmrg const DrawableRec *dst, struct kgem_bo *dst_bo, 3108428d7b3dSmrg const BoxRec *box, int n) 3109428d7b3dSmrg{ 3110428d7b3dSmrg struct sna_composite_op tmp; 3111428d7b3dSmrg uint32_t pixel; 3112428d7b3dSmrg 3113428d7b3dSmrg DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", 3114428d7b3dSmrg __FUNCTION__, op, 3115428d7b3dSmrg color->red, color->green, color->blue, color->alpha, (int)format)); 3116428d7b3dSmrg 3117428d7b3dSmrg if (op >= ARRAY_SIZE(gen8_blend_op)) { 3118428d7b3dSmrg DBG(("%s: fallback due to unhandled blend op: %d\n", 3119428d7b3dSmrg __FUNCTION__, op)); 3120428d7b3dSmrg return false; 3121428d7b3dSmrg } 3122428d7b3dSmrg 3123428d7b3dSmrg if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || 3124428d7b3dSmrg !gen8_check_dst_format(format) || 3125428d7b3dSmrg unaligned(dst_bo, PICT_FORMAT_BPP(format))) { 3126428d7b3dSmrg uint8_t alu = GXinvalid; 3127428d7b3dSmrg 3128428d7b3dSmrg if (op <= PictOpSrc) { 3129428d7b3dSmrg pixel = 0; 3130428d7b3dSmrg if (op == PictOpClear) 3131428d7b3dSmrg alu = GXclear; 3132428d7b3dSmrg else if (sna_get_pixel_from_rgba(&pixel, 3133428d7b3dSmrg color->red, 3134428d7b3dSmrg color->green, 3135428d7b3dSmrg color->blue, 3136428d7b3dSmrg color->alpha, 3137428d7b3dSmrg format)) 3138428d7b3dSmrg alu = GXcopy; 3139428d7b3dSmrg } 3140428d7b3dSmrg 3141428d7b3dSmrg if (alu != GXinvalid && 3142428d7b3dSmrg sna_blt_fill_boxes(sna, alu, 3143428d7b3dSmrg dst_bo, dst->bitsPerPixel, 3144428d7b3dSmrg pixel, box, n)) 3145428d7b3dSmrg return true; 3146428d7b3dSmrg 3147428d7b3dSmrg if (!gen8_check_dst_format(format)) 3148428d7b3dSmrg return false; 3149428d7b3dSmrg } 3150428d7b3dSmrg 3151428d7b3dSmrg if (op == PictOpClear) { 3152428d7b3dSmrg pixel = 0; 3153428d7b3dSmrg op = PictOpSrc; 3154428d7b3dSmrg } else if (!sna_get_pixel_from_rgba(&pixel, 3155428d7b3dSmrg color->red, 3156428d7b3dSmrg color->green, 3157428d7b3dSmrg color->blue, 3158428d7b3dSmrg color->alpha, 3159428d7b3dSmrg PICT_a8r8g8b8)) 3160428d7b3dSmrg return false; 3161428d7b3dSmrg 3162428d7b3dSmrg DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", 3163428d7b3dSmrg __FUNCTION__, pixel, n, 3164428d7b3dSmrg box[0].x1, box[0].y1, box[0].x2, box[0].y2)); 3165428d7b3dSmrg 3166428d7b3dSmrg tmp.dst.pixmap = (PixmapPtr)dst; 3167428d7b3dSmrg tmp.dst.width = dst->width; 3168428d7b3dSmrg tmp.dst.height = dst->height; 3169428d7b3dSmrg tmp.dst.format = format; 3170428d7b3dSmrg tmp.dst.bo = dst_bo; 3171428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 3172428d7b3dSmrg tmp.damage = NULL; 3173428d7b3dSmrg 3174428d7b3dSmrg sna_render_composite_redirect_init(&tmp); 3175428d7b3dSmrg if (too_large(dst->width, dst->height)) { 3176428d7b3dSmrg BoxRec extents; 3177428d7b3dSmrg 3178428d7b3dSmrg boxes_extents(box, n, &extents); 3179428d7b3dSmrg if (!sna_render_composite_redirect(sna, &tmp, 3180428d7b3dSmrg extents.x1, extents.y1, 3181428d7b3dSmrg extents.x2 - extents.x1, 3182428d7b3dSmrg extents.y2 - extents.y1, 3183428d7b3dSmrg n > 1)) 3184428d7b3dSmrg return sna_tiling_fill_boxes(sna, op, format, color, 3185428d7b3dSmrg dst, dst_bo, box, n); 3186428d7b3dSmrg } 3187428d7b3dSmrg 3188428d7b3dSmrg tmp.src.bo = sna_render_get_solid(sna, pixel); 3189428d7b3dSmrg tmp.mask.bo = NULL; 3190428d7b3dSmrg 3191428d7b3dSmrg tmp.floats_per_vertex = 2; 3192428d7b3dSmrg tmp.floats_per_rect = 6; 3193428d7b3dSmrg tmp.need_magic_ca_pass = false; 3194428d7b3dSmrg 3195428d7b3dSmrg tmp.u.gen8.flags = FILL_FLAGS(op, format); 3196428d7b3dSmrg 3197428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3198428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3199428d7b3dSmrg kgem_submit(&sna->kgem); 3200428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3201428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3202428d7b3dSmrg tmp.src.bo = NULL; 3203428d7b3dSmrg 3204428d7b3dSmrg if (tmp.redirect.real_bo) { 3205428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3206428d7b3dSmrg tmp.redirect.real_bo = NULL; 3207428d7b3dSmrg } 3208428d7b3dSmrg 3209428d7b3dSmrg return false; 3210428d7b3dSmrg } 3211428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3212428d7b3dSmrg } 3213428d7b3dSmrg 3214428d7b3dSmrg gen8_align_vertex(sna, &tmp); 3215428d7b3dSmrg gen8_emit_fill_state(sna, &tmp); 3216428d7b3dSmrg 3217428d7b3dSmrg do { 3218428d7b3dSmrg int n_this_time; 3219428d7b3dSmrg int16_t *v; 3220428d7b3dSmrg 3221428d7b3dSmrg n_this_time = gen8_get_rectangles(sna, &tmp, n, 3222428d7b3dSmrg gen8_emit_fill_state); 3223428d7b3dSmrg n -= n_this_time; 3224428d7b3dSmrg 3225428d7b3dSmrg v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3226428d7b3dSmrg sna->render.vertex_used += 6 * n_this_time; 3227428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3228428d7b3dSmrg do { 3229428d7b3dSmrg DBG((" (%d, %d), (%d, %d)\n", 3230428d7b3dSmrg box->x1, box->y1, box->x2, box->y2)); 3231428d7b3dSmrg 3232428d7b3dSmrg v[0] = box->x2; 3233428d7b3dSmrg v[5] = v[1] = box->y2; 3234428d7b3dSmrg v[8] = v[4] = box->x1; 3235428d7b3dSmrg v[9] = box->y1; 3236428d7b3dSmrg v[2] = v[3] = v[7] = 1; 3237428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3238428d7b3dSmrg v += 12; box++; 3239428d7b3dSmrg } while (--n_this_time); 3240428d7b3dSmrg } while (n); 3241428d7b3dSmrg 3242428d7b3dSmrg gen8_vertex_flush(sna); 3243428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3244428d7b3dSmrg sna_render_composite_redirect_done(sna, &tmp); 3245428d7b3dSmrg return true; 3246428d7b3dSmrg} 3247428d7b3dSmrg 3248428d7b3dSmrgstatic void 3249428d7b3dSmrggen8_render_fill_op_blt(struct sna *sna, 3250428d7b3dSmrg const struct sna_fill_op *op, 3251428d7b3dSmrg int16_t x, int16_t y, int16_t w, int16_t h) 3252428d7b3dSmrg{ 3253428d7b3dSmrg int16_t *v; 3254428d7b3dSmrg 3255428d7b3dSmrg DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); 3256428d7b3dSmrg 3257428d7b3dSmrg gen8_get_rectangles(sna, &op->base, 1, gen8_emit_fill_state); 3258428d7b3dSmrg 3259428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3260428d7b3dSmrg sna->render.vertex_used += 6; 3261428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3262428d7b3dSmrg 3263428d7b3dSmrg v[0] = x+w; 3264428d7b3dSmrg v[4] = v[8] = x; 3265428d7b3dSmrg v[1] = v[5] = y+h; 3266428d7b3dSmrg v[9] = y; 3267428d7b3dSmrg 3268428d7b3dSmrg v[2] = v[3] = v[7] = 1; 3269428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3270428d7b3dSmrg} 3271428d7b3dSmrg 3272428d7b3dSmrgfastcall static void 3273428d7b3dSmrggen8_render_fill_op_box(struct sna *sna, 3274428d7b3dSmrg const struct sna_fill_op *op, 3275428d7b3dSmrg const BoxRec *box) 3276428d7b3dSmrg{ 3277428d7b3dSmrg int16_t *v; 3278428d7b3dSmrg 3279428d7b3dSmrg DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, 3280428d7b3dSmrg box->x1, box->y1, box->x2, box->y2)); 3281428d7b3dSmrg 3282428d7b3dSmrg gen8_get_rectangles(sna, &op->base, 1, gen8_emit_fill_state); 3283428d7b3dSmrg 3284428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3285428d7b3dSmrg sna->render.vertex_used += 6; 3286428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3287428d7b3dSmrg 3288428d7b3dSmrg v[0] = box->x2; 3289428d7b3dSmrg v[8] = v[4] = box->x1; 3290428d7b3dSmrg v[5] = v[1] = box->y2; 3291428d7b3dSmrg v[9] = box->y1; 3292428d7b3dSmrg 3293428d7b3dSmrg v[7] = v[2] = v[3] = 1; 3294428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3295428d7b3dSmrg} 3296428d7b3dSmrg 3297428d7b3dSmrgfastcall static void 3298428d7b3dSmrggen8_render_fill_op_boxes(struct sna *sna, 3299428d7b3dSmrg const struct sna_fill_op *op, 3300428d7b3dSmrg const BoxRec *box, 3301428d7b3dSmrg int nbox) 3302428d7b3dSmrg{ 3303428d7b3dSmrg DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 3304428d7b3dSmrg box->x1, box->y1, box->x2, box->y2, nbox)); 3305428d7b3dSmrg 3306428d7b3dSmrg do { 3307428d7b3dSmrg int nbox_this_time; 3308428d7b3dSmrg int16_t *v; 3309428d7b3dSmrg 3310428d7b3dSmrg nbox_this_time = gen8_get_rectangles(sna, &op->base, nbox, 3311428d7b3dSmrg gen8_emit_fill_state); 3312428d7b3dSmrg nbox -= nbox_this_time; 3313428d7b3dSmrg 3314428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3315428d7b3dSmrg sna->render.vertex_used += 6 * nbox_this_time; 3316428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3317428d7b3dSmrg 3318428d7b3dSmrg do { 3319428d7b3dSmrg v[0] = box->x2; 3320428d7b3dSmrg v[8] = v[4] = box->x1; 3321428d7b3dSmrg v[5] = v[1] = box->y2; 3322428d7b3dSmrg v[9] = box->y1; 3323428d7b3dSmrg v[7] = v[2] = v[3] = 1; 3324428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3325428d7b3dSmrg box++; v += 12; 3326428d7b3dSmrg } while (--nbox_this_time); 3327428d7b3dSmrg } while (nbox); 3328428d7b3dSmrg} 3329428d7b3dSmrg 3330428d7b3dSmrgstatic void 3331428d7b3dSmrggen8_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 3332428d7b3dSmrg{ 3333428d7b3dSmrg if (sna->render.vertex_offset) 3334428d7b3dSmrg gen8_vertex_flush(sna); 3335428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3336428d7b3dSmrg} 3337428d7b3dSmrg 3338428d7b3dSmrgstatic bool 3339428d7b3dSmrggen8_render_fill(struct sna *sna, uint8_t alu, 3340428d7b3dSmrg PixmapPtr dst, struct kgem_bo *dst_bo, 3341428d7b3dSmrg uint32_t color, unsigned flags, 3342428d7b3dSmrg struct sna_fill_op *op) 3343428d7b3dSmrg{ 3344428d7b3dSmrg DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); 3345428d7b3dSmrg 3346428d7b3dSmrg if (prefer_blt_fill(sna, dst_bo, flags) && 3347428d7b3dSmrg sna_blt_fill(sna, alu, 3348428d7b3dSmrg dst_bo, dst->drawable.bitsPerPixel, 3349428d7b3dSmrg color, 3350428d7b3dSmrg op)) 3351428d7b3dSmrg return true; 3352428d7b3dSmrg 3353428d7b3dSmrg if (!(alu == GXcopy || alu == GXclear) || 3354428d7b3dSmrg too_large(dst->drawable.width, dst->drawable.height) || 3355428d7b3dSmrg unaligned(dst_bo, dst->drawable.bitsPerPixel)) 3356428d7b3dSmrg return sna_blt_fill(sna, alu, 3357428d7b3dSmrg dst_bo, dst->drawable.bitsPerPixel, 3358428d7b3dSmrg color, 3359428d7b3dSmrg op); 3360428d7b3dSmrg 3361428d7b3dSmrg if (alu == GXclear) 3362428d7b3dSmrg color = 0; 3363428d7b3dSmrg 3364428d7b3dSmrg op->base.dst.pixmap = dst; 3365428d7b3dSmrg op->base.dst.width = dst->drawable.width; 3366428d7b3dSmrg op->base.dst.height = dst->drawable.height; 3367428d7b3dSmrg op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3368428d7b3dSmrg op->base.dst.bo = dst_bo; 3369428d7b3dSmrg op->base.dst.x = op->base.dst.y = 0; 3370428d7b3dSmrg 3371428d7b3dSmrg op->base.src.bo = 3372428d7b3dSmrg sna_render_get_solid(sna, 3373428d7b3dSmrg sna_rgba_for_color(color, 3374428d7b3dSmrg dst->drawable.depth)); 3375428d7b3dSmrg op->base.mask.bo = NULL; 3376428d7b3dSmrg 3377428d7b3dSmrg op->base.need_magic_ca_pass = false; 3378428d7b3dSmrg op->base.floats_per_vertex = 2; 3379428d7b3dSmrg op->base.floats_per_rect = 6; 3380428d7b3dSmrg 3381428d7b3dSmrg op->base.u.gen8.flags = FILL_FLAGS_NOBLEND; 3382428d7b3dSmrg 3383428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3384428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3385428d7b3dSmrg kgem_submit(&sna->kgem); 3386428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3387428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3388428d7b3dSmrg return false; 3389428d7b3dSmrg } 3390428d7b3dSmrg 3391428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3392428d7b3dSmrg } 3393428d7b3dSmrg 3394428d7b3dSmrg gen8_align_vertex(sna, &op->base); 3395428d7b3dSmrg gen8_emit_fill_state(sna, &op->base); 3396428d7b3dSmrg 3397428d7b3dSmrg op->blt = gen8_render_fill_op_blt; 3398428d7b3dSmrg op->box = gen8_render_fill_op_box; 3399428d7b3dSmrg op->boxes = gen8_render_fill_op_boxes; 3400428d7b3dSmrg op->points = NULL; 3401428d7b3dSmrg op->done = gen8_render_fill_op_done; 3402428d7b3dSmrg return true; 3403428d7b3dSmrg} 3404428d7b3dSmrg 3405428d7b3dSmrgstatic bool 3406428d7b3dSmrggen8_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3407428d7b3dSmrg uint32_t color, 3408428d7b3dSmrg int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3409428d7b3dSmrg uint8_t alu) 3410428d7b3dSmrg{ 3411428d7b3dSmrg BoxRec box; 3412428d7b3dSmrg 3413428d7b3dSmrg box.x1 = x1; 3414428d7b3dSmrg box.y1 = y1; 3415428d7b3dSmrg box.x2 = x2; 3416428d7b3dSmrg box.y2 = y2; 3417428d7b3dSmrg 3418428d7b3dSmrg return sna_blt_fill_boxes(sna, alu, 3419428d7b3dSmrg bo, dst->drawable.bitsPerPixel, 3420428d7b3dSmrg color, &box, 1); 3421428d7b3dSmrg} 3422428d7b3dSmrg 3423428d7b3dSmrgstatic bool 3424428d7b3dSmrggen8_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3425428d7b3dSmrg uint32_t color, 3426428d7b3dSmrg int16_t x1, int16_t y1, 3427428d7b3dSmrg int16_t x2, int16_t y2, 3428428d7b3dSmrg uint8_t alu) 3429428d7b3dSmrg{ 3430428d7b3dSmrg struct sna_composite_op tmp; 3431428d7b3dSmrg int16_t *v; 3432428d7b3dSmrg 3433428d7b3dSmrg /* Prefer to use the BLT if already engaged */ 3434428d7b3dSmrg if (prefer_blt_fill(sna, bo, FILL_BOXES) && 3435428d7b3dSmrg gen8_render_fill_one_try_blt(sna, dst, bo, color, 3436428d7b3dSmrg x1, y1, x2, y2, alu)) 3437428d7b3dSmrg return true; 3438428d7b3dSmrg 3439428d7b3dSmrg /* Must use the BLT if we can't RENDER... */ 3440428d7b3dSmrg if (!(alu == GXcopy || alu == GXclear) || 3441428d7b3dSmrg too_large(dst->drawable.width, dst->drawable.height) || 3442428d7b3dSmrg unaligned(bo, dst->drawable.bitsPerPixel)) 3443428d7b3dSmrg return gen8_render_fill_one_try_blt(sna, dst, bo, color, 3444428d7b3dSmrg x1, y1, x2, y2, alu); 3445428d7b3dSmrg 3446428d7b3dSmrg if (alu == GXclear) 3447428d7b3dSmrg color = 0; 3448428d7b3dSmrg 3449428d7b3dSmrg tmp.dst.pixmap = dst; 3450428d7b3dSmrg tmp.dst.width = dst->drawable.width; 3451428d7b3dSmrg tmp.dst.height = dst->drawable.height; 3452428d7b3dSmrg tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3453428d7b3dSmrg tmp.dst.bo = bo; 3454428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 3455428d7b3dSmrg 3456428d7b3dSmrg tmp.src.bo = 3457428d7b3dSmrg sna_render_get_solid(sna, 3458428d7b3dSmrg sna_rgba_for_color(color, 3459428d7b3dSmrg dst->drawable.depth)); 3460428d7b3dSmrg tmp.mask.bo = NULL; 3461428d7b3dSmrg 3462428d7b3dSmrg tmp.floats_per_vertex = 2; 3463428d7b3dSmrg tmp.floats_per_rect = 6; 3464428d7b3dSmrg tmp.need_magic_ca_pass = false; 3465428d7b3dSmrg 3466428d7b3dSmrg tmp.u.gen8.flags = FILL_FLAGS_NOBLEND; 3467428d7b3dSmrg 3468428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3469428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3470428d7b3dSmrg kgem_submit(&sna->kgem); 3471428d7b3dSmrg if (kgem_check_bo(&sna->kgem, bo, NULL)) { 3472428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3473428d7b3dSmrg return false; 3474428d7b3dSmrg } 3475428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3476428d7b3dSmrg } 3477428d7b3dSmrg 3478428d7b3dSmrg gen8_align_vertex(sna, &tmp); 3479428d7b3dSmrg gen8_emit_fill_state(sna, &tmp); 3480428d7b3dSmrg 3481428d7b3dSmrg gen8_get_rectangles(sna, &tmp, 1, gen8_emit_fill_state); 3482428d7b3dSmrg 3483428d7b3dSmrg DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); 3484428d7b3dSmrg 3485428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3486428d7b3dSmrg sna->render.vertex_used += 6; 3487428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3488428d7b3dSmrg 3489428d7b3dSmrg v[0] = x2; 3490428d7b3dSmrg v[8] = v[4] = x1; 3491428d7b3dSmrg v[5] = v[1] = y2; 3492428d7b3dSmrg v[9] = y1; 3493428d7b3dSmrg v[7] = v[2] = v[3] = 1; 3494428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3495428d7b3dSmrg 3496428d7b3dSmrg gen8_vertex_flush(sna); 3497428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3498428d7b3dSmrg 3499428d7b3dSmrg return true; 3500428d7b3dSmrg} 3501428d7b3dSmrg 3502428d7b3dSmrgstatic bool 3503428d7b3dSmrggen8_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3504428d7b3dSmrg{ 3505428d7b3dSmrg BoxRec box; 3506428d7b3dSmrg 3507428d7b3dSmrg box.x1 = 0; 3508428d7b3dSmrg box.y1 = 0; 3509428d7b3dSmrg box.x2 = dst->drawable.width; 3510428d7b3dSmrg box.y2 = dst->drawable.height; 3511428d7b3dSmrg 3512428d7b3dSmrg return sna_blt_fill_boxes(sna, GXclear, 3513428d7b3dSmrg bo, dst->drawable.bitsPerPixel, 3514428d7b3dSmrg 0, &box, 1); 3515428d7b3dSmrg} 3516428d7b3dSmrg 3517428d7b3dSmrgstatic bool 3518428d7b3dSmrggen8_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3519428d7b3dSmrg{ 3520428d7b3dSmrg struct sna_composite_op tmp; 3521428d7b3dSmrg int16_t *v; 3522428d7b3dSmrg 3523428d7b3dSmrg DBG(("%s: %dx%d\n", 3524428d7b3dSmrg __FUNCTION__, 3525428d7b3dSmrg dst->drawable.width, 3526428d7b3dSmrg dst->drawable.height)); 3527428d7b3dSmrg 3528428d7b3dSmrg /* Prefer to use the BLT if already engaged */ 3529428d7b3dSmrg if (sna->kgem.mode == KGEM_BLT && 3530428d7b3dSmrg gen8_render_clear_try_blt(sna, dst, bo)) 3531428d7b3dSmrg return true; 3532428d7b3dSmrg 3533428d7b3dSmrg /* Must use the BLT if we can't RENDER... */ 3534428d7b3dSmrg if (too_large(dst->drawable.width, dst->drawable.height) || 3535428d7b3dSmrg unaligned(bo, dst->drawable.bitsPerPixel)) 3536428d7b3dSmrg return gen8_render_clear_try_blt(sna, dst, bo); 3537428d7b3dSmrg 3538428d7b3dSmrg tmp.dst.pixmap = dst; 3539428d7b3dSmrg tmp.dst.width = dst->drawable.width; 3540428d7b3dSmrg tmp.dst.height = dst->drawable.height; 3541428d7b3dSmrg tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3542428d7b3dSmrg tmp.dst.bo = bo; 3543428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 3544428d7b3dSmrg 3545428d7b3dSmrg tmp.src.bo = sna_render_get_solid(sna, 0); 3546428d7b3dSmrg tmp.mask.bo = NULL; 3547428d7b3dSmrg 3548428d7b3dSmrg tmp.floats_per_vertex = 2; 3549428d7b3dSmrg tmp.floats_per_rect = 6; 3550428d7b3dSmrg tmp.need_magic_ca_pass = false; 3551428d7b3dSmrg 3552428d7b3dSmrg tmp.u.gen8.flags = FILL_FLAGS_NOBLEND; 3553428d7b3dSmrg 3554428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3555428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3556428d7b3dSmrg kgem_submit(&sna->kgem); 3557428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3558428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3559428d7b3dSmrg return false; 3560428d7b3dSmrg } 3561428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3562428d7b3dSmrg } 3563428d7b3dSmrg 3564428d7b3dSmrg gen8_align_vertex(sna, &tmp); 3565428d7b3dSmrg gen8_emit_fill_state(sna, &tmp); 3566428d7b3dSmrg 3567428d7b3dSmrg gen8_get_rectangles(sna, &tmp, 1, gen8_emit_fill_state); 3568428d7b3dSmrg 3569428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3570428d7b3dSmrg sna->render.vertex_used += 6; 3571428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3572428d7b3dSmrg 3573428d7b3dSmrg v[0] = dst->drawable.width; 3574428d7b3dSmrg v[5] = v[1] = dst->drawable.height; 3575428d7b3dSmrg v[8] = v[4] = 0; 3576428d7b3dSmrg v[9] = 0; 3577428d7b3dSmrg 3578428d7b3dSmrg v[7] = v[2] = v[3] = 1; 3579428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3580428d7b3dSmrg 3581428d7b3dSmrg gen8_vertex_flush(sna); 3582428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3583428d7b3dSmrg 3584428d7b3dSmrg return true; 3585428d7b3dSmrg} 3586428d7b3dSmrg 3587428d7b3dSmrg#if !NO_VIDEO 3588428d7b3dSmrgstatic uint32_t gen8_bind_video_source(struct sna *sna, 3589428d7b3dSmrg struct kgem_bo *bo, 3590428d7b3dSmrg uint32_t delta, 3591428d7b3dSmrg int width, 3592428d7b3dSmrg int height, 3593428d7b3dSmrg int pitch, 3594428d7b3dSmrg uint32_t format) 3595428d7b3dSmrg{ 3596428d7b3dSmrg uint32_t *ss; 3597428d7b3dSmrg int offset; 3598428d7b3dSmrg 3599428d7b3dSmrg offset = sna->kgem.surface -= SURFACE_DW; 3600428d7b3dSmrg ss = sna->kgem.batch + offset; 3601428d7b3dSmrg ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | 3602428d7b3dSmrg gen8_tiling_bits(bo->tiling) | 3603428d7b3dSmrg format << SURFACE_FORMAT_SHIFT | 3604428d7b3dSmrg SURFACE_VALIGN_4 | SURFACE_HALIGN_4); 3605428d7b3dSmrg ss[1] = 0; 3606428d7b3dSmrg ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | 3607428d7b3dSmrg (height - 1) << SURFACE_HEIGHT_SHIFT); 3608428d7b3dSmrg ss[3] = (pitch - 1) << SURFACE_PITCH_SHIFT; 3609428d7b3dSmrg ss[4] = 0; 3610428d7b3dSmrg ss[5] = 0; 3611428d7b3dSmrg ss[6] = 0; 3612428d7b3dSmrg ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 3613428d7b3dSmrg *(uint64_t *)(ss+8) = 3614428d7b3dSmrg kgem_add_reloc64(&sna->kgem, offset + 8, bo, 3615428d7b3dSmrg I915_GEM_DOMAIN_SAMPLER << 16, 3616428d7b3dSmrg delta); 3617428d7b3dSmrg ss[10] = 0; 3618428d7b3dSmrg ss[11] = 0; 3619428d7b3dSmrg ss[12] = 0; 3620428d7b3dSmrg ss[13] = 0; 3621428d7b3dSmrg ss[14] = 0; 3622428d7b3dSmrg ss[15] = 0; 3623428d7b3dSmrg 3624428d7b3dSmrg DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> sampler\n", 3625428d7b3dSmrg offset, bo->handle, ss[1], 3626428d7b3dSmrg format, width, height, bo->pitch, bo->tiling)); 3627428d7b3dSmrg 3628428d7b3dSmrg return offset * sizeof(uint32_t); 3629428d7b3dSmrg} 3630428d7b3dSmrg 3631428d7b3dSmrgstatic void gen8_emit_video_state(struct sna *sna, 3632428d7b3dSmrg const struct sna_composite_op *op) 3633428d7b3dSmrg{ 3634428d7b3dSmrg struct sna_video_frame *frame = op->priv; 3635428d7b3dSmrg uint32_t src_surf_format; 3636428d7b3dSmrg uint32_t src_surf_base[6]; 3637428d7b3dSmrg int src_width[6]; 3638428d7b3dSmrg int src_height[6]; 3639428d7b3dSmrg int src_pitch[6]; 3640428d7b3dSmrg uint32_t *binding_table; 3641428d7b3dSmrg uint16_t offset; 3642428d7b3dSmrg int n_src, n; 3643428d7b3dSmrg 3644428d7b3dSmrg /* XXX VeBox, bicubic */ 3645428d7b3dSmrg 3646428d7b3dSmrg gen8_get_batch(sna, op); 3647428d7b3dSmrg 3648428d7b3dSmrg src_surf_base[0] = 0; 3649428d7b3dSmrg src_surf_base[1] = 0; 3650428d7b3dSmrg src_surf_base[2] = frame->VBufOffset; 3651428d7b3dSmrg src_surf_base[3] = frame->VBufOffset; 3652428d7b3dSmrg src_surf_base[4] = frame->UBufOffset; 3653428d7b3dSmrg src_surf_base[5] = frame->UBufOffset; 3654428d7b3dSmrg 3655428d7b3dSmrg if (is_planar_fourcc(frame->id)) { 3656428d7b3dSmrg src_surf_format = SURFACEFORMAT_R8_UNORM; 3657428d7b3dSmrg src_width[1] = src_width[0] = frame->width; 3658428d7b3dSmrg src_height[1] = src_height[0] = frame->height; 3659428d7b3dSmrg src_pitch[1] = src_pitch[0] = frame->pitch[1]; 3660428d7b3dSmrg src_width[4] = src_width[5] = src_width[2] = src_width[3] = 3661428d7b3dSmrg frame->width / 2; 3662428d7b3dSmrg src_height[4] = src_height[5] = src_height[2] = src_height[3] = 3663428d7b3dSmrg frame->height / 2; 3664428d7b3dSmrg src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = 3665428d7b3dSmrg frame->pitch[0]; 3666428d7b3dSmrg n_src = 6; 3667428d7b3dSmrg } else { 3668428d7b3dSmrg if (frame->id == FOURCC_UYVY) 3669428d7b3dSmrg src_surf_format = SURFACEFORMAT_YCRCB_SWAPY; 3670428d7b3dSmrg else 3671428d7b3dSmrg src_surf_format = SURFACEFORMAT_YCRCB_NORMAL; 3672428d7b3dSmrg 3673428d7b3dSmrg src_width[0] = frame->width; 3674428d7b3dSmrg src_height[0] = frame->height; 3675428d7b3dSmrg src_pitch[0] = frame->pitch[0]; 3676428d7b3dSmrg n_src = 1; 3677428d7b3dSmrg } 3678428d7b3dSmrg 3679428d7b3dSmrg binding_table = gen8_composite_get_binding_table(sna, &offset); 3680428d7b3dSmrg 3681428d7b3dSmrg binding_table[0] = 3682428d7b3dSmrg gen8_bind_bo(sna, 3683428d7b3dSmrg op->dst.bo, op->dst.width, op->dst.height, 3684428d7b3dSmrg gen8_get_dest_format(op->dst.format), 3685428d7b3dSmrg true); 3686428d7b3dSmrg for (n = 0; n < n_src; n++) { 3687428d7b3dSmrg binding_table[1+n] = 3688428d7b3dSmrg gen8_bind_video_source(sna, 3689428d7b3dSmrg frame->bo, 3690428d7b3dSmrg src_surf_base[n], 3691428d7b3dSmrg src_width[n], 3692428d7b3dSmrg src_height[n], 3693428d7b3dSmrg src_pitch[n], 3694428d7b3dSmrg src_surf_format); 3695428d7b3dSmrg } 3696428d7b3dSmrg 3697428d7b3dSmrg gen8_emit_state(sna, op, offset); 3698428d7b3dSmrg} 3699428d7b3dSmrg 3700428d7b3dSmrgstatic bool 3701428d7b3dSmrggen8_render_video(struct sna *sna, 3702428d7b3dSmrg struct sna_video *video, 3703428d7b3dSmrg struct sna_video_frame *frame, 3704428d7b3dSmrg RegionPtr dstRegion, 3705428d7b3dSmrg PixmapPtr pixmap) 3706428d7b3dSmrg{ 3707428d7b3dSmrg struct sna_composite_op tmp; 3708428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 3709428d7b3dSmrg int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 3710428d7b3dSmrg int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 3711428d7b3dSmrg int src_width = frame->src.x2 - frame->src.x1; 3712428d7b3dSmrg int src_height = frame->src.y2 - frame->src.y1; 3713428d7b3dSmrg float src_offset_x, src_offset_y; 3714428d7b3dSmrg float src_scale_x, src_scale_y; 3715428d7b3dSmrg int nbox, pix_xoff, pix_yoff; 3716428d7b3dSmrg unsigned filter; 3717428d7b3dSmrg const BoxRec *box; 3718428d7b3dSmrg 3719428d7b3dSmrg DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", 3720428d7b3dSmrg __FUNCTION__, 3721428d7b3dSmrg src_width, src_height, dst_width, dst_height, 3722428d7b3dSmrg region_num_rects(dstRegion), 3723428d7b3dSmrg REGION_EXTENTS(NULL, dstRegion)->x1, 3724428d7b3dSmrg REGION_EXTENTS(NULL, dstRegion)->y1, 3725428d7b3dSmrg REGION_EXTENTS(NULL, dstRegion)->x2, 3726428d7b3dSmrg REGION_EXTENTS(NULL, dstRegion)->y2)); 3727428d7b3dSmrg 3728428d7b3dSmrg assert(priv->gpu_bo); 3729428d7b3dSmrg assert(!too_large(pixmap->drawable.width, pixmap->drawable.height)); 3730428d7b3dSmrg assert(!unaligned(priv->gpu_bo, pixmap->drawable.bitsPerPixel)); 3731428d7b3dSmrg 3732428d7b3dSmrg memset(&tmp, 0, sizeof(tmp)); 3733428d7b3dSmrg 3734428d7b3dSmrg tmp.dst.pixmap = pixmap; 3735428d7b3dSmrg tmp.dst.width = pixmap->drawable.width; 3736428d7b3dSmrg tmp.dst.height = pixmap->drawable.height; 3737428d7b3dSmrg tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); 3738428d7b3dSmrg tmp.dst.bo = priv->gpu_bo; 3739428d7b3dSmrg 3740428d7b3dSmrg tmp.src.bo = frame->bo; 3741428d7b3dSmrg tmp.mask.bo = NULL; 3742428d7b3dSmrg 3743428d7b3dSmrg tmp.floats_per_vertex = 3; 3744428d7b3dSmrg tmp.floats_per_rect = 9; 3745428d7b3dSmrg 3746428d7b3dSmrg if (src_width == dst_width && src_height == dst_height) 3747428d7b3dSmrg filter = SAMPLER_FILTER_NEAREST; 3748428d7b3dSmrg else 3749428d7b3dSmrg filter = SAMPLER_FILTER_BILINEAR; 3750428d7b3dSmrg 3751428d7b3dSmrg tmp.u.gen8.flags = 3752428d7b3dSmrg GEN8_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, 3753428d7b3dSmrg SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), 3754428d7b3dSmrg NO_BLEND, 3755428d7b3dSmrg is_planar_fourcc(frame->id) ? 3756428d7b3dSmrg GEN8_WM_KERNEL_VIDEO_PLANAR : 3757428d7b3dSmrg GEN8_WM_KERNEL_VIDEO_PACKED, 3758428d7b3dSmrg 2); 3759428d7b3dSmrg tmp.priv = frame; 3760428d7b3dSmrg 3761428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 3762428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 3763428d7b3dSmrg kgem_submit(&sna->kgem); 3764428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) 3765428d7b3dSmrg return false; 3766428d7b3dSmrg 3767428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3768428d7b3dSmrg } 3769428d7b3dSmrg 3770428d7b3dSmrg gen8_align_vertex(sna, &tmp); 3771428d7b3dSmrg gen8_emit_video_state(sna, &tmp); 3772428d7b3dSmrg 3773428d7b3dSmrg /* Set up the offset for translating from the given region (in screen 3774428d7b3dSmrg * coordinates) to the backing pixmap. 3775428d7b3dSmrg */ 3776428d7b3dSmrg#ifdef COMPOSITE 3777428d7b3dSmrg pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 3778428d7b3dSmrg pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 3779428d7b3dSmrg#else 3780428d7b3dSmrg pix_xoff = 0; 3781428d7b3dSmrg pix_yoff = 0; 3782428d7b3dSmrg#endif 3783428d7b3dSmrg 3784428d7b3dSmrg DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", 3785428d7b3dSmrg __FUNCTION__, 3786428d7b3dSmrg frame->src.x1, frame->src.y1, 3787428d7b3dSmrg src_width, src_height, 3788428d7b3dSmrg dst_width, dst_height, 3789428d7b3dSmrg frame->width, frame->height)); 3790428d7b3dSmrg 3791428d7b3dSmrg src_scale_x = (float)src_width / dst_width / frame->width; 3792428d7b3dSmrg src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 3793428d7b3dSmrg 3794428d7b3dSmrg src_scale_y = (float)src_height / dst_height / frame->height; 3795428d7b3dSmrg src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 3796428d7b3dSmrg 3797428d7b3dSmrg DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n", 3798428d7b3dSmrg __FUNCTION__, 3799428d7b3dSmrg src_scale_x, src_scale_y, 3800428d7b3dSmrg src_offset_x, src_offset_y)); 3801428d7b3dSmrg 3802428d7b3dSmrg box = region_rects(dstRegion); 3803428d7b3dSmrg nbox = region_num_rects(dstRegion); 3804428d7b3dSmrg while (nbox--) { 3805428d7b3dSmrg BoxRec r; 3806428d7b3dSmrg 3807428d7b3dSmrg DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", 3808428d7b3dSmrg __FUNCTION__, 3809428d7b3dSmrg box->x1, box->y1, 3810428d7b3dSmrg box->x2, box->y2, 3811428d7b3dSmrg pix_xoff, pix_yoff, 3812428d7b3dSmrg box->x1 * src_scale_x + src_offset_x, 3813428d7b3dSmrg box->y1 * src_scale_y + src_offset_y, 3814428d7b3dSmrg box->x2 * src_scale_x + src_offset_x, 3815428d7b3dSmrg box->y2 * src_scale_y + src_offset_y)); 3816428d7b3dSmrg 3817428d7b3dSmrg r.x1 = box->x1 + pix_xoff; 3818428d7b3dSmrg r.x2 = box->x2 + pix_xoff; 3819428d7b3dSmrg r.y1 = box->y1 + pix_yoff; 3820428d7b3dSmrg r.y2 = box->y2 + pix_yoff; 3821428d7b3dSmrg 3822428d7b3dSmrg gen8_get_rectangles(sna, &tmp, 1, gen8_emit_video_state); 3823428d7b3dSmrg 3824428d7b3dSmrg OUT_VERTEX(r.x2, r.y2); 3825428d7b3dSmrg OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 3826428d7b3dSmrg OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 3827428d7b3dSmrg 3828428d7b3dSmrg OUT_VERTEX(r.x1, r.y2); 3829428d7b3dSmrg OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 3830428d7b3dSmrg OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 3831428d7b3dSmrg 3832428d7b3dSmrg OUT_VERTEX(r.x1, r.y1); 3833428d7b3dSmrg OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 3834428d7b3dSmrg OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 3835428d7b3dSmrg 3836428d7b3dSmrg if (!DAMAGE_IS_ALL(priv->gpu_damage)) { 3837428d7b3dSmrg sna_damage_add_box(&priv->gpu_damage, &r); 3838428d7b3dSmrg sna_damage_subtract_box(&priv->cpu_damage, &r); 3839428d7b3dSmrg } 3840428d7b3dSmrg box++; 3841428d7b3dSmrg } 3842428d7b3dSmrg 3843428d7b3dSmrg gen8_vertex_flush(sna); 3844428d7b3dSmrg return true; 3845428d7b3dSmrg} 3846428d7b3dSmrg#endif 3847428d7b3dSmrg 3848428d7b3dSmrgstatic void gen8_render_flush(struct sna *sna) 3849428d7b3dSmrg{ 3850428d7b3dSmrg gen8_vertex_close(sna); 3851428d7b3dSmrg 3852428d7b3dSmrg assert(sna->render.vb_id == 0); 3853428d7b3dSmrg assert(sna->render.vertex_offset == 0); 3854428d7b3dSmrg} 3855428d7b3dSmrg 3856428d7b3dSmrgstatic void gen8_render_reset(struct sna *sna) 3857428d7b3dSmrg{ 3858428d7b3dSmrg sna->render_state.gen8.emit_flush = false; 3859428d7b3dSmrg sna->render_state.gen8.needs_invariant = true; 3860428d7b3dSmrg sna->render_state.gen8.ve_id = 3 << 2; 3861428d7b3dSmrg sna->render_state.gen8.last_primitive = -1; 3862428d7b3dSmrg 3863428d7b3dSmrg sna->render_state.gen8.num_sf_outputs = 0; 3864428d7b3dSmrg sna->render_state.gen8.samplers = -1; 3865428d7b3dSmrg sna->render_state.gen8.blend = -1; 3866428d7b3dSmrg sna->render_state.gen8.kernel = -1; 3867428d7b3dSmrg sna->render_state.gen8.drawrect_offset = -1; 3868428d7b3dSmrg sna->render_state.gen8.drawrect_limit = -1; 3869428d7b3dSmrg sna->render_state.gen8.surface_table = 0; 3870428d7b3dSmrg 3871428d7b3dSmrg if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 3872428d7b3dSmrg DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 3873428d7b3dSmrg discard_vbo(sna); 3874428d7b3dSmrg } 3875428d7b3dSmrg 3876428d7b3dSmrg sna->render.vertex_offset = 0; 3877428d7b3dSmrg sna->render.nvertex_reloc = 0; 3878428d7b3dSmrg sna->render.vb_id = 0; 3879428d7b3dSmrg} 3880428d7b3dSmrg 3881428d7b3dSmrgstatic void gen8_render_fini(struct sna *sna) 3882428d7b3dSmrg{ 3883428d7b3dSmrg kgem_bo_destroy(&sna->kgem, sna->render_state.gen8.general_bo); 3884428d7b3dSmrg} 3885428d7b3dSmrg 3886428d7b3dSmrgstatic bool gen8_render_setup(struct sna *sna) 3887428d7b3dSmrg{ 3888428d7b3dSmrg struct gen8_render_state *state = &sna->render_state.gen8; 3889428d7b3dSmrg struct sna_static_stream general; 3890428d7b3dSmrg struct gen8_sampler_state *ss; 3891428d7b3dSmrg int i, j, k, l, m; 3892428d7b3dSmrg uint32_t devid; 3893428d7b3dSmrg 3894428d7b3dSmrg devid = intel_get_device_id(sna->dev); 3895428d7b3dSmrg if (devid & 0xf) 3896428d7b3dSmrg state->gt = ((devid >> 4) & 0xf) + 1; 3897428d7b3dSmrg DBG(("%s: gt=%d\n", __FUNCTION__, state->gt)); 3898428d7b3dSmrg 3899428d7b3dSmrg sna_static_stream_init(&general); 3900428d7b3dSmrg 3901428d7b3dSmrg /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 3902428d7b3dSmrg * dumps, you know it points to zero. 3903428d7b3dSmrg */ 3904428d7b3dSmrg null_create(&general); 3905428d7b3dSmrg 3906428d7b3dSmrg for (m = 0; m < ARRAY_SIZE(wm_kernels); m++) { 3907428d7b3dSmrg if (wm_kernels[m].size) { 3908428d7b3dSmrg state->wm_kernel[m][1] = 3909428d7b3dSmrg sna_static_stream_add(&general, 3910428d7b3dSmrg wm_kernels[m].data, 3911428d7b3dSmrg wm_kernels[m].size, 3912428d7b3dSmrg 64); 3913428d7b3dSmrg } else { 3914428d7b3dSmrg if (USE_8_PIXEL_DISPATCH) { 3915428d7b3dSmrg state->wm_kernel[m][0] = 3916428d7b3dSmrg sna_static_stream_compile_wm(sna, &general, 3917428d7b3dSmrg wm_kernels[m].data, 8); 3918428d7b3dSmrg } 3919428d7b3dSmrg 3920428d7b3dSmrg if (USE_16_PIXEL_DISPATCH) { 3921428d7b3dSmrg state->wm_kernel[m][1] = 3922428d7b3dSmrg sna_static_stream_compile_wm(sna, &general, 3923428d7b3dSmrg wm_kernels[m].data, 16); 3924428d7b3dSmrg } 3925428d7b3dSmrg 3926428d7b3dSmrg if (USE_32_PIXEL_DISPATCH) { 3927428d7b3dSmrg state->wm_kernel[m][2] = 3928428d7b3dSmrg sna_static_stream_compile_wm(sna, &general, 3929428d7b3dSmrg wm_kernels[m].data, 32); 3930428d7b3dSmrg } 3931428d7b3dSmrg } 3932428d7b3dSmrg assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]); 3933428d7b3dSmrg } 3934428d7b3dSmrg 3935428d7b3dSmrg COMPILE_TIME_ASSERT(SAMPLER_OFFSET(FILTER_COUNT, EXTEND_COUNT, FILTER_COUNT, EXTEND_COUNT) <= 0x7ff); 3936428d7b3dSmrg ss = sna_static_stream_map(&general, 3937428d7b3dSmrg 2 * sizeof(*ss) * 3938428d7b3dSmrg (2 + 3939428d7b3dSmrg FILTER_COUNT * EXTEND_COUNT * 3940428d7b3dSmrg FILTER_COUNT * EXTEND_COUNT), 3941428d7b3dSmrg 32); 3942428d7b3dSmrg state->wm_state = sna_static_stream_offsetof(&general, ss); 3943428d7b3dSmrg sampler_copy_init(ss); ss += 2; 3944428d7b3dSmrg sampler_fill_init(ss); ss += 2; 3945428d7b3dSmrg for (i = 0; i < FILTER_COUNT; i++) { 3946428d7b3dSmrg for (j = 0; j < EXTEND_COUNT; j++) { 3947428d7b3dSmrg for (k = 0; k < FILTER_COUNT; k++) { 3948428d7b3dSmrg for (l = 0; l < EXTEND_COUNT; l++) { 3949428d7b3dSmrg sampler_state_init(ss++, i, j); 3950428d7b3dSmrg sampler_state_init(ss++, k, l); 3951428d7b3dSmrg } 3952428d7b3dSmrg } 3953428d7b3dSmrg } 3954428d7b3dSmrg } 3955428d7b3dSmrg 3956428d7b3dSmrg state->cc_blend = gen8_create_blend_state(&general); 3957428d7b3dSmrg 3958428d7b3dSmrg state->general_bo = sna_static_stream_fini(sna, &general); 3959428d7b3dSmrg return state->general_bo != NULL; 3960428d7b3dSmrg} 3961428d7b3dSmrg 3962428d7b3dSmrgconst char *gen8_render_init(struct sna *sna, const char *backend) 3963428d7b3dSmrg{ 3964428d7b3dSmrg if (!gen8_render_setup(sna)) 3965428d7b3dSmrg return backend; 3966428d7b3dSmrg 3967428d7b3dSmrg sna->kgem.context_switch = gen6_render_context_switch; 3968428d7b3dSmrg sna->kgem.retire = gen6_render_retire; 3969428d7b3dSmrg sna->kgem.expire = gen4_render_expire; 3970428d7b3dSmrg 3971428d7b3dSmrg#if !NO_COMPOSITE 3972428d7b3dSmrg sna->render.composite = gen8_render_composite; 3973428d7b3dSmrg sna->render.prefer_gpu |= PREFER_GPU_RENDER; 3974428d7b3dSmrg#endif 3975428d7b3dSmrg#if !NO_COMPOSITE_SPANS 3976428d7b3dSmrg sna->render.check_composite_spans = gen8_check_composite_spans; 3977428d7b3dSmrg sna->render.composite_spans = gen8_render_composite_spans; 3978428d7b3dSmrg sna->render.prefer_gpu |= PREFER_GPU_SPANS; 3979428d7b3dSmrg#endif 3980428d7b3dSmrg#if !NO_VIDEO 3981428d7b3dSmrg sna->render.video = gen8_render_video; 3982428d7b3dSmrg#endif 3983428d7b3dSmrg 3984428d7b3dSmrg#if !NO_COPY_BOXES 3985428d7b3dSmrg sna->render.copy_boxes = gen8_render_copy_boxes; 3986428d7b3dSmrg#endif 3987428d7b3dSmrg#if !NO_COPY 3988428d7b3dSmrg sna->render.copy = gen8_render_copy; 3989428d7b3dSmrg#endif 3990428d7b3dSmrg 3991428d7b3dSmrg#if !NO_FILL_BOXES 3992428d7b3dSmrg sna->render.fill_boxes = gen8_render_fill_boxes; 3993428d7b3dSmrg#endif 3994428d7b3dSmrg#if !NO_FILL 3995428d7b3dSmrg sna->render.fill = gen8_render_fill; 3996428d7b3dSmrg#endif 3997428d7b3dSmrg#if !NO_FILL_ONE 3998428d7b3dSmrg sna->render.fill_one = gen8_render_fill_one; 3999428d7b3dSmrg#endif 4000428d7b3dSmrg#if !NO_FILL_CLEAR 4001428d7b3dSmrg sna->render.clear = gen8_render_clear; 4002428d7b3dSmrg#endif 4003428d7b3dSmrg 4004428d7b3dSmrg sna->render.flush = gen8_render_flush; 4005428d7b3dSmrg sna->render.reset = gen8_render_reset; 4006428d7b3dSmrg sna->render.fini = gen8_render_fini; 4007428d7b3dSmrg 4008428d7b3dSmrg sna->render.max_3d_size = GEN8_MAX_SIZE; 4009428d7b3dSmrg sna->render.max_3d_pitch = 1 << 18; 4010428d7b3dSmrg return "Broadwell"; 4011428d7b3dSmrg} 4012