1428d7b3dSmrg/* 2428d7b3dSmrg * Copyright © 2006,2008,2011 Intel Corporation 3428d7b3dSmrg * Copyright © 2007 Red Hat, Inc. 4428d7b3dSmrg * 5428d7b3dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 6428d7b3dSmrg * copy of this software and associated documentation files (the "Software"), 7428d7b3dSmrg * to deal in the Software without restriction, including without limitation 8428d7b3dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9428d7b3dSmrg * and/or sell copies of the Software, and to permit persons to whom the 10428d7b3dSmrg * Software is furnished to do so, subject to the following conditions: 11428d7b3dSmrg * 12428d7b3dSmrg * The above copyright notice and this permission notice (including the next 13428d7b3dSmrg * paragraph) shall be included in all copies or substantial portions of the 14428d7b3dSmrg * Software. 15428d7b3dSmrg * 16428d7b3dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17428d7b3dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18428d7b3dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19428d7b3dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20428d7b3dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21428d7b3dSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22428d7b3dSmrg * SOFTWARE. 23428d7b3dSmrg * 24428d7b3dSmrg * Authors: 25428d7b3dSmrg * Wang Zhenyu <zhenyu.z.wang@sna.com> 26428d7b3dSmrg * Eric Anholt <eric@anholt.net> 27428d7b3dSmrg * Carl Worth <cworth@redhat.com> 28428d7b3dSmrg * Keith Packard <keithp@keithp.com> 29428d7b3dSmrg * Chris Wilson <chris@chris-wilson.co.uk> 30428d7b3dSmrg * 31428d7b3dSmrg */ 32428d7b3dSmrg 33428d7b3dSmrg#ifdef HAVE_CONFIG_H 34428d7b3dSmrg#include "config.h" 35428d7b3dSmrg#endif 36428d7b3dSmrg 37428d7b3dSmrg#include "sna.h" 38428d7b3dSmrg#include "sna_reg.h" 39428d7b3dSmrg#include "sna_render.h" 40428d7b3dSmrg#include "sna_render_inline.h" 41428d7b3dSmrg#include "sna_video.h" 42428d7b3dSmrg 43428d7b3dSmrg#include "brw/brw.h" 44428d7b3dSmrg#include "gen7_render.h" 45428d7b3dSmrg#include "gen4_common.h" 46428d7b3dSmrg#include "gen4_source.h" 47428d7b3dSmrg#include "gen4_vertex.h" 48428d7b3dSmrg#include "gen6_common.h" 49428d7b3dSmrg 50428d7b3dSmrg#define ALWAYS_INVALIDATE 0 51428d7b3dSmrg#define ALWAYS_FLUSH 0 52428d7b3dSmrg#define ALWAYS_STALL 0 53428d7b3dSmrg 54428d7b3dSmrg#define NO_COMPOSITE 0 55428d7b3dSmrg#define NO_COMPOSITE_SPANS 0 56428d7b3dSmrg#define NO_COPY 0 57428d7b3dSmrg#define NO_COPY_BOXES 0 58428d7b3dSmrg#define NO_FILL 0 59428d7b3dSmrg#define NO_FILL_BOXES 0 60428d7b3dSmrg#define NO_FILL_ONE 0 61428d7b3dSmrg#define NO_FILL_CLEAR 0 62428d7b3dSmrg 63428d7b3dSmrg#define NO_RING_SWITCH 0 64428d7b3dSmrg 65428d7b3dSmrg#define USE_8_PIXEL_DISPATCH 1 66428d7b3dSmrg#define USE_16_PIXEL_DISPATCH 1 67428d7b3dSmrg#define USE_32_PIXEL_DISPATCH 0 68428d7b3dSmrg 69428d7b3dSmrg#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH 70428d7b3dSmrg#error "Must select at least 8, 16 or 32 pixel dispatch" 71428d7b3dSmrg#endif 72428d7b3dSmrg 73428d7b3dSmrg#define GEN7_MAX_SIZE 16384 74428d7b3dSmrg 75428d7b3dSmrg/* XXX Todo 76428d7b3dSmrg * 77428d7b3dSmrg * STR (software tiled rendering) mode. No, really. 78428d7b3dSmrg * 64x32 pixel blocks align with the rendering cache. Worth considering. 79428d7b3dSmrg */ 80428d7b3dSmrg 81428d7b3dSmrg#define is_aligned(x, y) (((x) & ((y) - 1)) == 0) 82428d7b3dSmrg 83428d7b3dSmrgstruct gt_info { 84428d7b3dSmrg const char *name; 85428d7b3dSmrg uint32_t max_vs_threads; 86428d7b3dSmrg uint32_t max_gs_threads; 87428d7b3dSmrg uint32_t max_wm_threads; 88428d7b3dSmrg struct { 89428d7b3dSmrg int size; 90428d7b3dSmrg int max_vs_entries; 91428d7b3dSmrg int max_gs_entries; 92428d7b3dSmrg int push_ps_size; /* in 1KBs */ 93428d7b3dSmrg } urb; 94428d7b3dSmrg int gt; 95428d7b3dSmrg uint32_t mocs; 96428d7b3dSmrg}; 97428d7b3dSmrg 98428d7b3dSmrgstatic const struct gt_info ivb_gt_info = { 99428d7b3dSmrg .name = "Ivybridge (gen7)", 100428d7b3dSmrg .max_vs_threads = 16, 101428d7b3dSmrg .max_gs_threads = 16, 102428d7b3dSmrg .max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT, 103428d7b3dSmrg .urb = { 128, 64, 64, 8 }, 104428d7b3dSmrg .gt = 0, 105428d7b3dSmrg}; 106428d7b3dSmrg 107428d7b3dSmrgstatic const struct gt_info ivb_gt1_info = { 108428d7b3dSmrg .name = "Ivybridge (gen7, gt1)", 109428d7b3dSmrg .max_vs_threads = 36, 110428d7b3dSmrg .max_gs_threads = 36, 111428d7b3dSmrg .max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT, 112428d7b3dSmrg .urb = { 128, 512, 192, 8 }, 113428d7b3dSmrg .gt = 1, 114428d7b3dSmrg .mocs = 3, 115428d7b3dSmrg}; 116428d7b3dSmrg 117428d7b3dSmrgstatic const struct gt_info ivb_gt2_info = { 118428d7b3dSmrg .name = "Ivybridge (gen7, gt2)", 119428d7b3dSmrg .max_vs_threads = 128, 120428d7b3dSmrg .max_gs_threads = 128, 121428d7b3dSmrg .max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT, 122428d7b3dSmrg .urb = { 256, 704, 320, 8 }, 123428d7b3dSmrg .gt = 2, 124428d7b3dSmrg .mocs = 3, 125428d7b3dSmrg}; 126428d7b3dSmrg 127428d7b3dSmrgstatic const struct gt_info byt_gt_info = { 128428d7b3dSmrg .name = "Baytrail (gen7)", 129428d7b3dSmrg .max_vs_threads = 36, 130428d7b3dSmrg .max_gs_threads = 36, 131428d7b3dSmrg .max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT, 132428d7b3dSmrg .urb = { 128, 512, 192, 8 }, 133428d7b3dSmrg .gt = 1, 134428d7b3dSmrg}; 135428d7b3dSmrg 136428d7b3dSmrgstatic const struct gt_info hsw_gt_info = { 137428d7b3dSmrg .name = "Haswell (gen7.5)", 138428d7b3dSmrg .max_vs_threads = 8, 139428d7b3dSmrg .max_gs_threads = 8, 140428d7b3dSmrg .max_wm_threads = 141428d7b3dSmrg (8 - 1) << HSW_PS_MAX_THREADS_SHIFT | 142428d7b3dSmrg 1 << HSW_PS_SAMPLE_MASK_SHIFT, 143428d7b3dSmrg .urb = { 128, 64, 64, 8 }, 144428d7b3dSmrg .gt = 0, 145428d7b3dSmrg}; 146428d7b3dSmrg 147428d7b3dSmrgstatic const struct gt_info hsw_gt1_info = { 148428d7b3dSmrg .name = "Haswell (gen7.5, gt1)", 149428d7b3dSmrg .max_vs_threads = 70, 150428d7b3dSmrg .max_gs_threads = 70, 151428d7b3dSmrg .max_wm_threads = 152428d7b3dSmrg (102 - 1) << HSW_PS_MAX_THREADS_SHIFT | 153428d7b3dSmrg 1 << HSW_PS_SAMPLE_MASK_SHIFT, 154428d7b3dSmrg .urb = { 128, 640, 256, 8 }, 155428d7b3dSmrg .gt = 1, 156428d7b3dSmrg .mocs = 5, 157428d7b3dSmrg}; 158428d7b3dSmrg 159428d7b3dSmrgstatic const struct gt_info hsw_gt2_info = { 160428d7b3dSmrg .name = "Haswell (gen7.5, gt2)", 161428d7b3dSmrg .max_vs_threads = 140, 162428d7b3dSmrg .max_gs_threads = 140, 163428d7b3dSmrg .max_wm_threads = 164428d7b3dSmrg (140 - 1) << HSW_PS_MAX_THREADS_SHIFT | 165428d7b3dSmrg 1 << HSW_PS_SAMPLE_MASK_SHIFT, 166428d7b3dSmrg .urb = { 256, 1664, 640, 8 }, 167428d7b3dSmrg .gt = 2, 168428d7b3dSmrg .mocs = 5, 169428d7b3dSmrg}; 170428d7b3dSmrg 171428d7b3dSmrgstatic const struct gt_info hsw_gt3_info = { 172428d7b3dSmrg .name = "Haswell (gen7.5, gt3)", 173428d7b3dSmrg .max_vs_threads = 280, 174428d7b3dSmrg .max_gs_threads = 280, 175428d7b3dSmrg .max_wm_threads = 176428d7b3dSmrg (280 - 1) << HSW_PS_MAX_THREADS_SHIFT | 177428d7b3dSmrg 1 << HSW_PS_SAMPLE_MASK_SHIFT, 178428d7b3dSmrg .urb = { 512, 3328, 1280, 16 }, 179428d7b3dSmrg .gt = 3, 180428d7b3dSmrg .mocs = 5, 181428d7b3dSmrg}; 182428d7b3dSmrg 183428d7b3dSmrginline static bool is_ivb(struct sna *sna) 184428d7b3dSmrg{ 185428d7b3dSmrg return sna->kgem.gen == 070; 186428d7b3dSmrg} 187428d7b3dSmrg 188428d7b3dSmrginline static bool is_byt(struct sna *sna) 189428d7b3dSmrg{ 190428d7b3dSmrg return sna->kgem.gen == 071; 191428d7b3dSmrg} 192428d7b3dSmrg 193428d7b3dSmrginline static bool is_hsw(struct sna *sna) 194428d7b3dSmrg{ 195428d7b3dSmrg return sna->kgem.gen == 075; 196428d7b3dSmrg} 197428d7b3dSmrg 198428d7b3dSmrgstatic const uint32_t ps_kernel_packed[][4] = { 199428d7b3dSmrg#include "exa_wm_src_affine.g7b" 200428d7b3dSmrg#include "exa_wm_src_sample_argb.g7b" 201428d7b3dSmrg#include "exa_wm_yuv_rgb.g7b" 202428d7b3dSmrg#include "exa_wm_write.g7b" 203428d7b3dSmrg}; 204428d7b3dSmrg 205428d7b3dSmrgstatic const uint32_t ps_kernel_planar[][4] = { 206428d7b3dSmrg#include "exa_wm_src_affine.g7b" 207428d7b3dSmrg#include "exa_wm_src_sample_planar.g7b" 208428d7b3dSmrg#include "exa_wm_yuv_rgb.g7b" 209428d7b3dSmrg#include "exa_wm_write.g7b" 210428d7b3dSmrg}; 211428d7b3dSmrg 212428d7b3dSmrg#define KERNEL(kernel_enum, kernel, num_surfaces) \ 213428d7b3dSmrg [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} 214428d7b3dSmrg#define NOKERNEL(kernel_enum, func, num_surfaces) \ 215428d7b3dSmrg [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces} 216428d7b3dSmrgstatic const struct wm_kernel_info { 217428d7b3dSmrg const char *name; 218428d7b3dSmrg const void *data; 219428d7b3dSmrg unsigned int size; 220428d7b3dSmrg int num_surfaces; 221428d7b3dSmrg} wm_kernels[] = { 222428d7b3dSmrg NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), 223428d7b3dSmrg NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), 224428d7b3dSmrg 225428d7b3dSmrg NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3), 226428d7b3dSmrg NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3), 227428d7b3dSmrg 228428d7b3dSmrg NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3), 229428d7b3dSmrg NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3), 230428d7b3dSmrg 231428d7b3dSmrg NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), 232428d7b3dSmrg NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), 233428d7b3dSmrg 234428d7b3dSmrg NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2), 235428d7b3dSmrg NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2), 236428d7b3dSmrg 237428d7b3dSmrg KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), 238428d7b3dSmrg KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), 239428d7b3dSmrg}; 240428d7b3dSmrg#undef KERNEL 241428d7b3dSmrg 242428d7b3dSmrgstatic const struct blendinfo { 243428d7b3dSmrg bool src_alpha; 244428d7b3dSmrg uint32_t src_blend; 245428d7b3dSmrg uint32_t dst_blend; 246428d7b3dSmrg} gen7_blend_op[] = { 247428d7b3dSmrg /* Clear */ {0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO}, 248428d7b3dSmrg /* Src */ {0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO}, 249428d7b3dSmrg /* Dst */ {0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ONE}, 250428d7b3dSmrg /* Over */ {1, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_INV_SRC_ALPHA}, 251428d7b3dSmrg /* OverReverse */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ONE}, 252428d7b3dSmrg /* In */ {0, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_ZERO}, 253428d7b3dSmrg /* InReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_SRC_ALPHA}, 254428d7b3dSmrg /* Out */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ZERO}, 255428d7b3dSmrg /* OutReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_INV_SRC_ALPHA}, 256428d7b3dSmrg /* Atop */ {1, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA}, 257428d7b3dSmrg /* AtopReverse */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_SRC_ALPHA}, 258428d7b3dSmrg /* Xor */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA}, 259428d7b3dSmrg /* Add */ {0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ONE}, 260428d7b3dSmrg}; 261428d7b3dSmrg 262428d7b3dSmrg/** 263428d7b3dSmrg * Highest-valued BLENDFACTOR used in gen7_blend_op. 264428d7b3dSmrg * 265428d7b3dSmrg * This leaves out GEN7_BLENDFACTOR_INV_DST_COLOR, 266428d7b3dSmrg * GEN7_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 267428d7b3dSmrg * GEN7_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 268428d7b3dSmrg */ 269428d7b3dSmrg#define GEN7_BLENDFACTOR_COUNT (GEN7_BLENDFACTOR_INV_DST_ALPHA + 1) 270428d7b3dSmrg 271428d7b3dSmrg#define GEN7_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen7_blend_state), 64) 272428d7b3dSmrg 273428d7b3dSmrg#define BLEND_OFFSET(s, d) \ 274428d7b3dSmrg ((d != GEN7_BLENDFACTOR_ZERO) << 15 | \ 275428d7b3dSmrg (((s) * GEN7_BLENDFACTOR_COUNT + (d)) * GEN7_BLEND_STATE_PADDED_SIZE)) 276428d7b3dSmrg 277428d7b3dSmrg#define NO_BLEND BLEND_OFFSET(GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO) 278428d7b3dSmrg#define CLEAR BLEND_OFFSET(GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO) 279428d7b3dSmrg 280428d7b3dSmrg#define SAMPLER_OFFSET(sf, se, mf, me) \ 281428d7b3dSmrg ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) * 2 * sizeof(struct gen7_sampler_state)) 282428d7b3dSmrg 283428d7b3dSmrg#define VERTEX_2s2s 0 284428d7b3dSmrg 285428d7b3dSmrg#define COPY_SAMPLER 0 286428d7b3dSmrg#define COPY_VERTEX VERTEX_2s2s 287428d7b3dSmrg#define COPY_FLAGS(a) GEN7_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN7_WM_KERNEL_NOMASK, COPY_VERTEX) 288428d7b3dSmrg 289428d7b3dSmrg#define FILL_SAMPLER (2 * sizeof(struct gen7_sampler_state)) 290428d7b3dSmrg#define FILL_VERTEX VERTEX_2s2s 291428d7b3dSmrg#define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), GEN7_WM_KERNEL_NOMASK, FILL_VERTEX) 292428d7b3dSmrg#define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN7_WM_KERNEL_NOMASK, FILL_VERTEX) 293428d7b3dSmrg 294428d7b3dSmrg#define GEN7_SAMPLER(f) (((f) >> 16) & 0xfff0) 295428d7b3dSmrg#define GEN7_BLEND(f) (((f) >> 0) & 0x7ff0) 296428d7b3dSmrg#define GEN7_READS_DST(f) (((f) >> 15) & 1) 297428d7b3dSmrg#define GEN7_KERNEL(f) (((f) >> 16) & 0xf) 298428d7b3dSmrg#define GEN7_VERTEX(f) (((f) >> 0) & 0xf) 299428d7b3dSmrg#define GEN7_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V))) 300428d7b3dSmrg 301428d7b3dSmrg#define OUT_BATCH(v) batch_emit(sna, v) 302428d7b3dSmrg#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 303428d7b3dSmrg#define OUT_VERTEX_F(v) vertex_emit(sna, v) 304428d7b3dSmrg 305428d7b3dSmrgstatic inline bool too_large(int width, int height) 306428d7b3dSmrg{ 307428d7b3dSmrg return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE; 308428d7b3dSmrg} 309428d7b3dSmrg 310428d7b3dSmrgstatic uint32_t gen7_get_blend(int op, 311428d7b3dSmrg bool has_component_alpha, 312428d7b3dSmrg uint32_t dst_format) 313428d7b3dSmrg{ 314428d7b3dSmrg uint32_t src, dst; 315428d7b3dSmrg 316428d7b3dSmrg src = gen7_blend_op[op].src_blend; 317428d7b3dSmrg dst = gen7_blend_op[op].dst_blend; 318428d7b3dSmrg 319428d7b3dSmrg /* If there's no dst alpha channel, adjust the blend op so that 320428d7b3dSmrg * we'll treat it always as 1. 321428d7b3dSmrg */ 322428d7b3dSmrg if (PICT_FORMAT_A(dst_format) == 0) { 323428d7b3dSmrg if (src == GEN7_BLENDFACTOR_DST_ALPHA) 324428d7b3dSmrg src = GEN7_BLENDFACTOR_ONE; 325428d7b3dSmrg else if (src == GEN7_BLENDFACTOR_INV_DST_ALPHA) 326428d7b3dSmrg src = GEN7_BLENDFACTOR_ZERO; 327428d7b3dSmrg } 328428d7b3dSmrg 329428d7b3dSmrg /* If the source alpha is being used, then we should only be in a 330428d7b3dSmrg * case where the source blend factor is 0, and the source blend 331428d7b3dSmrg * value is the mask channels multiplied by the source picture's alpha. 332428d7b3dSmrg */ 333428d7b3dSmrg if (has_component_alpha && gen7_blend_op[op].src_alpha) { 334428d7b3dSmrg if (dst == GEN7_BLENDFACTOR_SRC_ALPHA) 335428d7b3dSmrg dst = GEN7_BLENDFACTOR_SRC_COLOR; 336428d7b3dSmrg else if (dst == GEN7_BLENDFACTOR_INV_SRC_ALPHA) 337428d7b3dSmrg dst = GEN7_BLENDFACTOR_INV_SRC_COLOR; 338428d7b3dSmrg } 339428d7b3dSmrg 340428d7b3dSmrg DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 341428d7b3dSmrg op, dst_format, PICT_FORMAT_A(dst_format), 342428d7b3dSmrg src, dst, (int)BLEND_OFFSET(src, dst))); 343428d7b3dSmrg return BLEND_OFFSET(src, dst); 344428d7b3dSmrg} 345428d7b3dSmrg 346428d7b3dSmrgstatic uint32_t gen7_get_card_format(PictFormat format) 347428d7b3dSmrg{ 348428d7b3dSmrg switch (format) { 349428d7b3dSmrg default: 350428d7b3dSmrg return -1; 351428d7b3dSmrg case PICT_a8r8g8b8: 352428d7b3dSmrg return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM; 353428d7b3dSmrg case PICT_x8r8g8b8: 354428d7b3dSmrg return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM; 355428d7b3dSmrg case PICT_a8b8g8r8: 356428d7b3dSmrg return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM; 357428d7b3dSmrg case PICT_x8b8g8r8: 358428d7b3dSmrg return GEN7_SURFACEFORMAT_R8G8B8X8_UNORM; 359428d7b3dSmrg#ifdef PICT_a2r10g10b10 360428d7b3dSmrg case PICT_a2r10g10b10: 361428d7b3dSmrg return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM; 362428d7b3dSmrg case PICT_x2r10g10b10: 363428d7b3dSmrg return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM; 364428d7b3dSmrg#endif 365428d7b3dSmrg case PICT_r8g8b8: 366428d7b3dSmrg return GEN7_SURFACEFORMAT_R8G8B8_UNORM; 367428d7b3dSmrg case PICT_r5g6b5: 368428d7b3dSmrg return GEN7_SURFACEFORMAT_B5G6R5_UNORM; 369428d7b3dSmrg case PICT_a1r5g5b5: 370428d7b3dSmrg return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM; 371428d7b3dSmrg case PICT_a8: 372428d7b3dSmrg return GEN7_SURFACEFORMAT_A8_UNORM; 373428d7b3dSmrg case PICT_a4r4g4b4: 374428d7b3dSmrg return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM; 375428d7b3dSmrg } 376428d7b3dSmrg} 377428d7b3dSmrg 378428d7b3dSmrgstatic uint32_t gen7_get_dest_format(PictFormat format) 379428d7b3dSmrg{ 380428d7b3dSmrg switch (format) { 381428d7b3dSmrg default: 382428d7b3dSmrg return -1; 383428d7b3dSmrg case PICT_a8r8g8b8: 384428d7b3dSmrg case PICT_x8r8g8b8: 385428d7b3dSmrg return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM; 386428d7b3dSmrg case PICT_a8b8g8r8: 387428d7b3dSmrg case PICT_x8b8g8r8: 388428d7b3dSmrg return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM; 389428d7b3dSmrg#ifdef PICT_a2r10g10b10 390428d7b3dSmrg case PICT_a2r10g10b10: 391428d7b3dSmrg case PICT_x2r10g10b10: 392428d7b3dSmrg return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM; 393428d7b3dSmrg#endif 394428d7b3dSmrg case PICT_r5g6b5: 395428d7b3dSmrg return GEN7_SURFACEFORMAT_B5G6R5_UNORM; 396428d7b3dSmrg case PICT_x1r5g5b5: 397428d7b3dSmrg case PICT_a1r5g5b5: 398428d7b3dSmrg return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM; 399428d7b3dSmrg case PICT_a8: 400428d7b3dSmrg return GEN7_SURFACEFORMAT_A8_UNORM; 401428d7b3dSmrg case PICT_a4r4g4b4: 402428d7b3dSmrg case PICT_x4r4g4b4: 403428d7b3dSmrg return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM; 404428d7b3dSmrg } 405428d7b3dSmrg} 406428d7b3dSmrg 407428d7b3dSmrgstatic bool gen7_check_dst_format(PictFormat format) 408428d7b3dSmrg{ 409428d7b3dSmrg if (gen7_get_dest_format(format) != -1) 410428d7b3dSmrg return true; 411428d7b3dSmrg 412428d7b3dSmrg DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 413428d7b3dSmrg return false; 414428d7b3dSmrg} 415428d7b3dSmrg 416428d7b3dSmrgstatic bool gen7_check_format(uint32_t format) 417428d7b3dSmrg{ 418428d7b3dSmrg if (gen7_get_card_format(format) != -1) 419428d7b3dSmrg return true; 420428d7b3dSmrg 421428d7b3dSmrg DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 422428d7b3dSmrg return false; 423428d7b3dSmrg} 424428d7b3dSmrg 425428d7b3dSmrgstatic uint32_t gen7_filter(uint32_t filter) 426428d7b3dSmrg{ 427428d7b3dSmrg switch (filter) { 428428d7b3dSmrg default: 429428d7b3dSmrg assert(0); 430428d7b3dSmrg case PictFilterNearest: 431428d7b3dSmrg return SAMPLER_FILTER_NEAREST; 432428d7b3dSmrg case PictFilterBilinear: 433428d7b3dSmrg return SAMPLER_FILTER_BILINEAR; 434428d7b3dSmrg } 435428d7b3dSmrg} 436428d7b3dSmrg 437428d7b3dSmrgstatic uint32_t gen7_check_filter(PicturePtr picture) 438428d7b3dSmrg{ 439428d7b3dSmrg switch (picture->filter) { 440428d7b3dSmrg case PictFilterNearest: 441428d7b3dSmrg case PictFilterBilinear: 442428d7b3dSmrg return true; 443428d7b3dSmrg default: 444428d7b3dSmrg return false; 445428d7b3dSmrg } 446428d7b3dSmrg} 447428d7b3dSmrg 448428d7b3dSmrgstatic uint32_t gen7_repeat(uint32_t repeat) 449428d7b3dSmrg{ 450428d7b3dSmrg switch (repeat) { 451428d7b3dSmrg default: 452428d7b3dSmrg assert(0); 453428d7b3dSmrg case RepeatNone: 454428d7b3dSmrg return SAMPLER_EXTEND_NONE; 455428d7b3dSmrg case RepeatNormal: 456428d7b3dSmrg return SAMPLER_EXTEND_REPEAT; 457428d7b3dSmrg case RepeatPad: 458428d7b3dSmrg return SAMPLER_EXTEND_PAD; 459428d7b3dSmrg case RepeatReflect: 460428d7b3dSmrg return SAMPLER_EXTEND_REFLECT; 461428d7b3dSmrg } 462428d7b3dSmrg} 463428d7b3dSmrg 464428d7b3dSmrgstatic bool gen7_check_repeat(PicturePtr picture) 465428d7b3dSmrg{ 466428d7b3dSmrg if (!picture->repeat) 467428d7b3dSmrg return true; 468428d7b3dSmrg 469428d7b3dSmrg switch (picture->repeatType) { 470428d7b3dSmrg case RepeatNone: 471428d7b3dSmrg case RepeatNormal: 472428d7b3dSmrg case RepeatPad: 473428d7b3dSmrg case RepeatReflect: 474428d7b3dSmrg return true; 475428d7b3dSmrg default: 476428d7b3dSmrg return false; 477428d7b3dSmrg } 478428d7b3dSmrg} 479428d7b3dSmrg 480428d7b3dSmrgstatic int 481428d7b3dSmrggen7_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 482428d7b3dSmrg{ 483428d7b3dSmrg int base; 484428d7b3dSmrg 485428d7b3dSmrg if (has_mask) { 486428d7b3dSmrg if (is_ca) { 487428d7b3dSmrg if (gen7_blend_op[op].src_alpha) 488428d7b3dSmrg base = GEN7_WM_KERNEL_MASKSA; 489428d7b3dSmrg else 490428d7b3dSmrg base = GEN7_WM_KERNEL_MASKCA; 491428d7b3dSmrg } else 492428d7b3dSmrg base = GEN7_WM_KERNEL_MASK; 493428d7b3dSmrg } else 494428d7b3dSmrg base = GEN7_WM_KERNEL_NOMASK; 495428d7b3dSmrg 496428d7b3dSmrg return base + !is_affine; 497428d7b3dSmrg} 498428d7b3dSmrg 499428d7b3dSmrgstatic void 500428d7b3dSmrggen7_emit_urb(struct sna *sna) 501428d7b3dSmrg{ 502428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); 503428d7b3dSmrg OUT_BATCH(sna->render_state.gen7.info->urb.push_ps_size); 504428d7b3dSmrg 505428d7b3dSmrg /* num of VS entries must be divisible by 8 if size < 9 */ 506428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2)); 507428d7b3dSmrg OUT_BATCH((sna->render_state.gen7.info->urb.max_vs_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) | 508428d7b3dSmrg (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | 509428d7b3dSmrg (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); 510428d7b3dSmrg 511428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2)); 512428d7b3dSmrg OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | 513428d7b3dSmrg (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); 514428d7b3dSmrg 515428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2)); 516428d7b3dSmrg OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | 517428d7b3dSmrg (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); 518428d7b3dSmrg 519428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2)); 520428d7b3dSmrg OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | 521428d7b3dSmrg (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); 522428d7b3dSmrg} 523428d7b3dSmrg 524428d7b3dSmrgstatic void 525428d7b3dSmrggen7_emit_state_base_address(struct sna *sna) 526428d7b3dSmrg{ 527428d7b3dSmrg uint32_t mocs = sna->render_state.gen7.info->mocs << 8; 528428d7b3dSmrg 529428d7b3dSmrg OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2)); 530428d7b3dSmrg OUT_BATCH(0); /* general */ 531428d7b3dSmrg OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ 532428d7b3dSmrg sna->kgem.nbatch, 533428d7b3dSmrg NULL, 534428d7b3dSmrg I915_GEM_DOMAIN_INSTRUCTION << 16, 535428d7b3dSmrg BASE_ADDRESS_MODIFY)); 536428d7b3dSmrg OUT_BATCH(kgem_add_reloc(&sna->kgem, /* dynamic */ 537428d7b3dSmrg sna->kgem.nbatch, 538428d7b3dSmrg sna->render_state.gen7.general_bo, 539428d7b3dSmrg I915_GEM_DOMAIN_INSTRUCTION << 16, 540428d7b3dSmrg mocs | BASE_ADDRESS_MODIFY)); 541428d7b3dSmrg OUT_BATCH(0); /* indirect */ 542428d7b3dSmrg OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ 543428d7b3dSmrg sna->kgem.nbatch, 544428d7b3dSmrg sna->render_state.gen7.general_bo, 545428d7b3dSmrg I915_GEM_DOMAIN_INSTRUCTION << 16, 546428d7b3dSmrg mocs | BASE_ADDRESS_MODIFY)); 547428d7b3dSmrg 548428d7b3dSmrg /* upper bounds, disable */ 549428d7b3dSmrg OUT_BATCH(0); 550428d7b3dSmrg OUT_BATCH(BASE_ADDRESS_MODIFY); 551428d7b3dSmrg OUT_BATCH(0); 552428d7b3dSmrg OUT_BATCH(BASE_ADDRESS_MODIFY); 553428d7b3dSmrg} 554428d7b3dSmrg 555428d7b3dSmrgstatic void 556428d7b3dSmrggen7_disable_vs(struct sna *sna) 557428d7b3dSmrg{ 558428d7b3dSmrg /* For future reference: 559428d7b3dSmrg * A PIPE_CONTROL with post-sync op set to 1 and a depth stall needs 560428d7b3dSmrg * to be emitted just prior to change VS state, i.e. 3DSTATE_VS, 561428d7b3dSmrg * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS, 562428d7b3dSmrg * 3DSTATE_BINDING_TABLE_POINTER_VS, 3DSTATE_SAMPLER_STATE_POINTER_VS. 563428d7b3dSmrg * 564428d7b3dSmrg * Here we saved by the full-flush incurred when emitting 565428d7b3dSmrg * the batchbuffer. 566428d7b3dSmrg */ 567428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_VS | (6 - 2)); 568428d7b3dSmrg OUT_BATCH(0); /* no VS kernel */ 569428d7b3dSmrg OUT_BATCH(0); 570428d7b3dSmrg OUT_BATCH(0); 571428d7b3dSmrg OUT_BATCH(0); 572428d7b3dSmrg OUT_BATCH(0); /* pass-through */ 573428d7b3dSmrg 574428d7b3dSmrg#if 0 575428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_CONSTANT_VS | (7 - 2)); 576428d7b3dSmrg OUT_BATCH(0); 577428d7b3dSmrg OUT_BATCH(0); 578428d7b3dSmrg OUT_BATCH(0); 579428d7b3dSmrg OUT_BATCH(0); 580428d7b3dSmrg OUT_BATCH(0); 581428d7b3dSmrg OUT_BATCH(0); 582428d7b3dSmrg 583428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); 584428d7b3dSmrg OUT_BATCH(0); 585428d7b3dSmrg 586428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); 587428d7b3dSmrg OUT_BATCH(0); 588428d7b3dSmrg#endif 589428d7b3dSmrg} 590428d7b3dSmrg 591428d7b3dSmrgstatic void 592428d7b3dSmrggen7_disable_hs(struct sna *sna) 593428d7b3dSmrg{ 594428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2)); 595428d7b3dSmrg OUT_BATCH(0); /* no HS kernel */ 596428d7b3dSmrg OUT_BATCH(0); 597428d7b3dSmrg OUT_BATCH(0); 598428d7b3dSmrg OUT_BATCH(0); 599428d7b3dSmrg OUT_BATCH(0); 600428d7b3dSmrg OUT_BATCH(0); /* pass-through */ 601428d7b3dSmrg 602428d7b3dSmrg#if 0 603428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2)); 604428d7b3dSmrg OUT_BATCH(0); 605428d7b3dSmrg OUT_BATCH(0); 606428d7b3dSmrg OUT_BATCH(0); 607428d7b3dSmrg OUT_BATCH(0); 608428d7b3dSmrg OUT_BATCH(0); 609428d7b3dSmrg OUT_BATCH(0); 610428d7b3dSmrg 611428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); 612428d7b3dSmrg OUT_BATCH(0); 613428d7b3dSmrg 614428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); 615428d7b3dSmrg OUT_BATCH(0); 616428d7b3dSmrg#endif 617428d7b3dSmrg} 618428d7b3dSmrg 619428d7b3dSmrgstatic void 620428d7b3dSmrggen7_disable_te(struct sna *sna) 621428d7b3dSmrg{ 622428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2)); 623428d7b3dSmrg OUT_BATCH(0); 624428d7b3dSmrg OUT_BATCH(0); 625428d7b3dSmrg OUT_BATCH(0); 626428d7b3dSmrg} 627428d7b3dSmrg 628428d7b3dSmrgstatic void 629428d7b3dSmrggen7_disable_ds(struct sna *sna) 630428d7b3dSmrg{ 631428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2)); 632428d7b3dSmrg OUT_BATCH(0); 633428d7b3dSmrg OUT_BATCH(0); 634428d7b3dSmrg OUT_BATCH(0); 635428d7b3dSmrg OUT_BATCH(0); 636428d7b3dSmrg OUT_BATCH(0); 637428d7b3dSmrg 638428d7b3dSmrg#if 0 639428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2)); 640428d7b3dSmrg OUT_BATCH(0); 641428d7b3dSmrg OUT_BATCH(0); 642428d7b3dSmrg OUT_BATCH(0); 643428d7b3dSmrg OUT_BATCH(0); 644428d7b3dSmrg OUT_BATCH(0); 645428d7b3dSmrg OUT_BATCH(0); 646428d7b3dSmrg 647428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); 648428d7b3dSmrg OUT_BATCH(0); 649428d7b3dSmrg 650428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); 651428d7b3dSmrg OUT_BATCH(0); 652428d7b3dSmrg#endif 653428d7b3dSmrg} 654428d7b3dSmrg 655428d7b3dSmrgstatic void 656428d7b3dSmrggen7_disable_gs(struct sna *sna) 657428d7b3dSmrg{ 658428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_GS | (7 - 2)); 659428d7b3dSmrg OUT_BATCH(0); /* no GS kernel */ 660428d7b3dSmrg OUT_BATCH(0); 661428d7b3dSmrg OUT_BATCH(0); 662428d7b3dSmrg OUT_BATCH(0); 663428d7b3dSmrg OUT_BATCH(0); 664428d7b3dSmrg OUT_BATCH(0); /* pass-through */ 665428d7b3dSmrg 666428d7b3dSmrg#if 0 667428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_CONSTANT_GS | (7 - 2)); 668428d7b3dSmrg OUT_BATCH(0); 669428d7b3dSmrg OUT_BATCH(0); 670428d7b3dSmrg OUT_BATCH(0); 671428d7b3dSmrg OUT_BATCH(0); 672428d7b3dSmrg OUT_BATCH(0); 673428d7b3dSmrg OUT_BATCH(0); 674428d7b3dSmrg 675428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); 676428d7b3dSmrg OUT_BATCH(0); 677428d7b3dSmrg 678428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); 679428d7b3dSmrg OUT_BATCH(0); 680428d7b3dSmrg#endif 681428d7b3dSmrg} 682428d7b3dSmrg 683428d7b3dSmrgstatic void 684428d7b3dSmrggen7_disable_streamout(struct sna *sna) 685428d7b3dSmrg{ 686428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2)); 687428d7b3dSmrg OUT_BATCH(0); 688428d7b3dSmrg OUT_BATCH(0); 689428d7b3dSmrg} 690428d7b3dSmrg 691428d7b3dSmrgstatic void 692428d7b3dSmrggen7_emit_sf_invariant(struct sna *sna) 693428d7b3dSmrg{ 694428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_SF | (7 - 2)); 695428d7b3dSmrg OUT_BATCH(0); 696428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_SF_CULL_NONE); 697428d7b3dSmrg OUT_BATCH(2 << GEN7_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); 698428d7b3dSmrg OUT_BATCH(0); 699428d7b3dSmrg OUT_BATCH(0); 700428d7b3dSmrg OUT_BATCH(0); 701428d7b3dSmrg} 702428d7b3dSmrg 703428d7b3dSmrgstatic void 704428d7b3dSmrggen7_emit_cc_invariant(struct sna *sna) 705428d7b3dSmrg{ 706428d7b3dSmrg#if 0 /* unused, no change */ 707428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_CC_STATE_POINTERS | (2 - 2)); 708428d7b3dSmrg OUT_BATCH(0); 709428d7b3dSmrg 710428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2)); 711428d7b3dSmrg OUT_BATCH(0); 712428d7b3dSmrg#endif 713428d7b3dSmrg 714428d7b3dSmrg /* XXX clear to be safe */ 715428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); 716428d7b3dSmrg OUT_BATCH(0); 717428d7b3dSmrg} 718428d7b3dSmrg 719428d7b3dSmrgstatic void 720428d7b3dSmrggen7_disable_clip(struct sna *sna) 721428d7b3dSmrg{ 722428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_CLIP | (4 - 2)); 723428d7b3dSmrg OUT_BATCH(0); 724428d7b3dSmrg OUT_BATCH(0); /* pass-through */ 725428d7b3dSmrg OUT_BATCH(0); 726428d7b3dSmrg 727428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2)); 728428d7b3dSmrg OUT_BATCH(0); 729428d7b3dSmrg} 730428d7b3dSmrg 731428d7b3dSmrgstatic void 732428d7b3dSmrggen7_emit_wm_invariant(struct sna *sna) 733428d7b3dSmrg{ 734428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_WM | (3 - 2)); 735428d7b3dSmrg OUT_BATCH(GEN7_WM_DISPATCH_ENABLE | 736428d7b3dSmrg GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 737428d7b3dSmrg OUT_BATCH(0); 738428d7b3dSmrg 739428d7b3dSmrg#if 0 740428d7b3dSmrg /* XXX length bias of 7 in old spec? */ 741428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_CONSTANT_PS | (7 - 2)); 742428d7b3dSmrg OUT_BATCH(0); 743428d7b3dSmrg OUT_BATCH(0); 744428d7b3dSmrg OUT_BATCH(0); 745428d7b3dSmrg OUT_BATCH(0); 746428d7b3dSmrg OUT_BATCH(0); 747428d7b3dSmrg OUT_BATCH(0); 748428d7b3dSmrg#endif 749428d7b3dSmrg} 750428d7b3dSmrg 751428d7b3dSmrgstatic void 752428d7b3dSmrggen7_emit_null_depth_buffer(struct sna *sna) 753428d7b3dSmrg{ 754428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2)); 755428d7b3dSmrg OUT_BATCH(GEN7_SURFACE_NULL << GEN7_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT | 756428d7b3dSmrg GEN7_DEPTHFORMAT_D32_FLOAT << GEN7_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT); 757428d7b3dSmrg OUT_BATCH(0); /* disable depth, stencil and hiz */ 758428d7b3dSmrg OUT_BATCH(0); 759428d7b3dSmrg OUT_BATCH(0); 760428d7b3dSmrg OUT_BATCH(0); 761428d7b3dSmrg OUT_BATCH(0); 762428d7b3dSmrg 763428d7b3dSmrg#if 0 764428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2)); 765428d7b3dSmrg OUT_BATCH(0); 766428d7b3dSmrg OUT_BATCH(0); 767428d7b3dSmrg#endif 768428d7b3dSmrg} 769428d7b3dSmrg 770428d7b3dSmrgstatic void 771428d7b3dSmrggen7_emit_invariant(struct sna *sna) 772428d7b3dSmrg{ 773428d7b3dSmrg OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D); 774428d7b3dSmrg 775428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE | (4 - 2)); 776428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | 777428d7b3dSmrg GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ 778428d7b3dSmrg OUT_BATCH(0); 779428d7b3dSmrg OUT_BATCH(0); 780428d7b3dSmrg 781428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_SAMPLE_MASK | (2 - 2)); 782428d7b3dSmrg OUT_BATCH(1); 783428d7b3dSmrg 784428d7b3dSmrg gen7_emit_urb(sna); 785428d7b3dSmrg 786428d7b3dSmrg gen7_emit_state_base_address(sna); 787428d7b3dSmrg 788428d7b3dSmrg gen7_disable_vs(sna); 789428d7b3dSmrg gen7_disable_hs(sna); 790428d7b3dSmrg gen7_disable_te(sna); 791428d7b3dSmrg gen7_disable_ds(sna); 792428d7b3dSmrg gen7_disable_gs(sna); 793428d7b3dSmrg gen7_disable_clip(sna); 794428d7b3dSmrg gen7_emit_sf_invariant(sna); 795428d7b3dSmrg gen7_emit_wm_invariant(sna); 796428d7b3dSmrg gen7_emit_cc_invariant(sna); 797428d7b3dSmrg gen7_disable_streamout(sna); 798428d7b3dSmrg gen7_emit_null_depth_buffer(sna); 799428d7b3dSmrg 800428d7b3dSmrg sna->render_state.gen7.needs_invariant = false; 801428d7b3dSmrg} 802428d7b3dSmrg 803428d7b3dSmrgstatic void 804428d7b3dSmrggen7_emit_cc(struct sna *sna, uint32_t blend_offset) 805428d7b3dSmrg{ 806428d7b3dSmrg struct gen7_render_state *render = &sna->render_state.gen7; 807428d7b3dSmrg 808428d7b3dSmrg if (render->blend == blend_offset) 809428d7b3dSmrg return; 810428d7b3dSmrg 811428d7b3dSmrg DBG(("%s: blend = %x\n", __FUNCTION__, blend_offset)); 812428d7b3dSmrg 813428d7b3dSmrg /* XXX can have upto 8 blend states preload, selectable via 814428d7b3dSmrg * Render Target Index. What other side-effects of Render Target Index? 815428d7b3dSmrg */ 816428d7b3dSmrg 817428d7b3dSmrg assert (is_aligned(render->cc_blend + blend_offset, 64)); 818428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); 819428d7b3dSmrg OUT_BATCH((render->cc_blend + blend_offset) | 1); 820428d7b3dSmrg 821428d7b3dSmrg render->blend = blend_offset; 822428d7b3dSmrg} 823428d7b3dSmrg 824428d7b3dSmrgstatic void 825428d7b3dSmrggen7_emit_sampler(struct sna *sna, uint32_t state) 826428d7b3dSmrg{ 827428d7b3dSmrg if (sna->render_state.gen7.samplers == state) 828428d7b3dSmrg return; 829428d7b3dSmrg 830428d7b3dSmrg sna->render_state.gen7.samplers = state; 831428d7b3dSmrg 832428d7b3dSmrg DBG(("%s: sampler = %x\n", __FUNCTION__, state)); 833428d7b3dSmrg 834428d7b3dSmrg assert (is_aligned(sna->render_state.gen7.wm_state + state, 32)); 835428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); 836428d7b3dSmrg OUT_BATCH(sna->render_state.gen7.wm_state + state); 837428d7b3dSmrg} 838428d7b3dSmrg 839428d7b3dSmrgstatic void 840428d7b3dSmrggen7_emit_sf(struct sna *sna, bool has_mask) 841428d7b3dSmrg{ 842428d7b3dSmrg int num_sf_outputs = has_mask ? 2 : 1; 843428d7b3dSmrg 844428d7b3dSmrg if (sna->render_state.gen7.num_sf_outputs == num_sf_outputs) 845428d7b3dSmrg return; 846428d7b3dSmrg 847428d7b3dSmrg DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n", 848428d7b3dSmrg __FUNCTION__, num_sf_outputs, 1, 0)); 849428d7b3dSmrg 850428d7b3dSmrg sna->render_state.gen7.num_sf_outputs = num_sf_outputs; 851428d7b3dSmrg 852428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2)); 853428d7b3dSmrg OUT_BATCH(num_sf_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT | 854428d7b3dSmrg 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | 855428d7b3dSmrg 1 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT); 856428d7b3dSmrg OUT_BATCH(0); 857428d7b3dSmrg OUT_BATCH(0); /* dw4 */ 858428d7b3dSmrg OUT_BATCH(0); 859428d7b3dSmrg OUT_BATCH(0); 860428d7b3dSmrg OUT_BATCH(0); 861428d7b3dSmrg OUT_BATCH(0); /* dw8 */ 862428d7b3dSmrg OUT_BATCH(0); 863428d7b3dSmrg OUT_BATCH(0); 864428d7b3dSmrg OUT_BATCH(0); 865428d7b3dSmrg OUT_BATCH(0); /* dw12 */ 866428d7b3dSmrg OUT_BATCH(0); 867428d7b3dSmrg OUT_BATCH(0); 868428d7b3dSmrg} 869428d7b3dSmrg 870428d7b3dSmrgstatic void 871428d7b3dSmrggen7_emit_wm(struct sna *sna, int kernel) 872428d7b3dSmrg{ 873428d7b3dSmrg const uint32_t *kernels; 874428d7b3dSmrg 875428d7b3dSmrg if (sna->render_state.gen7.kernel == kernel) 876428d7b3dSmrg return; 877428d7b3dSmrg 878428d7b3dSmrg sna->render_state.gen7.kernel = kernel; 879428d7b3dSmrg kernels = sna->render_state.gen7.wm_kernel[kernel]; 880428d7b3dSmrg 881428d7b3dSmrg DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n", 882428d7b3dSmrg __FUNCTION__, 883428d7b3dSmrg wm_kernels[kernel].name, 884428d7b3dSmrg wm_kernels[kernel].num_surfaces, 885428d7b3dSmrg kernels[0], kernels[1], kernels[2])); 886428d7b3dSmrg 887428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2)); 888428d7b3dSmrg OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]); 889428d7b3dSmrg OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT | 890428d7b3dSmrg wm_kernels[kernel].num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); 891428d7b3dSmrg OUT_BATCH(0); /* scratch address */ 892428d7b3dSmrg OUT_BATCH(sna->render_state.gen7.info->max_wm_threads | 893428d7b3dSmrg (kernels[0] ? GEN7_PS_8_DISPATCH_ENABLE : 0) | 894428d7b3dSmrg (kernels[1] ? GEN7_PS_16_DISPATCH_ENABLE : 0) | 895428d7b3dSmrg (kernels[2] ? GEN7_PS_32_DISPATCH_ENABLE : 0) | 896428d7b3dSmrg GEN7_PS_ATTRIBUTE_ENABLE); 897428d7b3dSmrg OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 | 898428d7b3dSmrg 8 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 | 899428d7b3dSmrg 6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2); 900428d7b3dSmrg OUT_BATCH(kernels[2]); 901428d7b3dSmrg OUT_BATCH(kernels[1]); 902428d7b3dSmrg} 903428d7b3dSmrg 904428d7b3dSmrgstatic bool 905428d7b3dSmrggen7_emit_binding_table(struct sna *sna, uint16_t offset) 906428d7b3dSmrg{ 907428d7b3dSmrg if (sna->render_state.gen7.surface_table == offset) 908428d7b3dSmrg return false; 909428d7b3dSmrg 910428d7b3dSmrg /* Binding table pointers */ 911428d7b3dSmrg assert(is_aligned(4*offset, 32)); 912428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); 913428d7b3dSmrg OUT_BATCH(offset*4); 914428d7b3dSmrg 915428d7b3dSmrg sna->render_state.gen7.surface_table = offset; 916428d7b3dSmrg return true; 917428d7b3dSmrg} 918428d7b3dSmrg 919428d7b3dSmrgstatic bool 920428d7b3dSmrggen7_emit_drawing_rectangle(struct sna *sna, 921428d7b3dSmrg const struct sna_composite_op *op) 922428d7b3dSmrg{ 923428d7b3dSmrg uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 924428d7b3dSmrg uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 925428d7b3dSmrg 926428d7b3dSmrg assert(!too_large(abs(op->dst.x), abs(op->dst.y))); 927428d7b3dSmrg assert(!too_large(op->dst.width, op->dst.height)); 928428d7b3dSmrg 929428d7b3dSmrg if (sna->render_state.gen7.drawrect_limit == limit && 930428d7b3dSmrg sna->render_state.gen7.drawrect_offset == offset) 931428d7b3dSmrg return true; 932428d7b3dSmrg 933428d7b3dSmrg sna->render_state.gen7.drawrect_offset = offset; 934428d7b3dSmrg sna->render_state.gen7.drawrect_limit = limit; 935428d7b3dSmrg 936428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 937428d7b3dSmrg OUT_BATCH(0); 938428d7b3dSmrg OUT_BATCH(limit); 939428d7b3dSmrg OUT_BATCH(offset); 940428d7b3dSmrg return false; 941428d7b3dSmrg} 942428d7b3dSmrg 943428d7b3dSmrgstatic void 944428d7b3dSmrggen7_emit_vertex_elements(struct sna *sna, 945428d7b3dSmrg const struct sna_composite_op *op) 946428d7b3dSmrg{ 947428d7b3dSmrg /* 948428d7b3dSmrg * vertex data in vertex buffer 949428d7b3dSmrg * position: (x, y) 950428d7b3dSmrg * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 951428d7b3dSmrg * texture coordinate 1 if (has_mask is true): same as above 952428d7b3dSmrg */ 953428d7b3dSmrg struct gen7_render_state *render = &sna->render_state.gen7; 954428d7b3dSmrg uint32_t src_format, dw; 955428d7b3dSmrg int id = GEN7_VERTEX(op->u.gen7.flags); 956428d7b3dSmrg bool has_mask; 957428d7b3dSmrg 958428d7b3dSmrg DBG(("%s: setup id=%d\n", __FUNCTION__, id)); 959428d7b3dSmrg 960428d7b3dSmrg if (render->ve_id == id) 961428d7b3dSmrg return; 962428d7b3dSmrg render->ve_id = id; 963428d7b3dSmrg 964428d7b3dSmrg /* The VUE layout 965428d7b3dSmrg * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 966428d7b3dSmrg * dword 4-7: position (x, y, 1.0, 1.0), 967428d7b3dSmrg * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 968428d7b3dSmrg * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 969428d7b3dSmrg * 970428d7b3dSmrg * dword 4-15 are fetched from vertex buffer 971428d7b3dSmrg */ 972428d7b3dSmrg has_mask = (id >> 2) != 0; 973428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS | 974428d7b3dSmrg ((2 * (3 + has_mask)) + 1 - 2)); 975428d7b3dSmrg 976428d7b3dSmrg OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | 977428d7b3dSmrg GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT | 978428d7b3dSmrg 0 << GEN7_VE0_OFFSET_SHIFT); 979428d7b3dSmrg OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT | 980428d7b3dSmrg GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT | 981428d7b3dSmrg GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | 982428d7b3dSmrg GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT); 983428d7b3dSmrg 984428d7b3dSmrg /* x,y */ 985428d7b3dSmrg OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | 986428d7b3dSmrg GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT | 987428d7b3dSmrg 0 << GEN7_VE0_OFFSET_SHIFT); 988428d7b3dSmrg OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT | 989428d7b3dSmrg GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT | 990428d7b3dSmrg GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | 991428d7b3dSmrg GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT); 992428d7b3dSmrg 993428d7b3dSmrg /* u0, v0, w0 */ 994428d7b3dSmrg DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); 995428d7b3dSmrg dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT; 996428d7b3dSmrg switch (id & 3) { 997428d7b3dSmrg default: 998428d7b3dSmrg assert(0); 999428d7b3dSmrg case 0: 1000428d7b3dSmrg src_format = GEN7_SURFACEFORMAT_R16G16_SSCALED; 1001428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1002428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1003428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1004428d7b3dSmrg break; 1005428d7b3dSmrg case 1: 1006428d7b3dSmrg src_format = GEN7_SURFACEFORMAT_R32_FLOAT; 1007428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1008428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1009428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1010428d7b3dSmrg break; 1011428d7b3dSmrg case 2: 1012428d7b3dSmrg src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT; 1013428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1014428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1015428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1016428d7b3dSmrg break; 1017428d7b3dSmrg case 3: 1018428d7b3dSmrg src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT; 1019428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1020428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1021428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1022428d7b3dSmrg break; 1023428d7b3dSmrg } 1024428d7b3dSmrg OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | 1025428d7b3dSmrg src_format << GEN7_VE0_FORMAT_SHIFT | 1026428d7b3dSmrg 4 << GEN7_VE0_OFFSET_SHIFT); 1027428d7b3dSmrg OUT_BATCH(dw); 1028428d7b3dSmrg 1029428d7b3dSmrg /* u1, v1, w1 */ 1030428d7b3dSmrg if (has_mask) { 1031428d7b3dSmrg unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); 1032428d7b3dSmrg DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset)); 1033428d7b3dSmrg dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT; 1034428d7b3dSmrg switch (id >> 2) { 1035428d7b3dSmrg case 1: 1036428d7b3dSmrg src_format = GEN7_SURFACEFORMAT_R32_FLOAT; 1037428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1038428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1039428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1040428d7b3dSmrg break; 1041428d7b3dSmrg default: 1042428d7b3dSmrg assert(0); 1043428d7b3dSmrg case 2: 1044428d7b3dSmrg src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT; 1045428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1046428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1047428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1048428d7b3dSmrg break; 1049428d7b3dSmrg case 3: 1050428d7b3dSmrg src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT; 1051428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; 1052428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; 1053428d7b3dSmrg dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT; 1054428d7b3dSmrg break; 1055428d7b3dSmrg } 1056428d7b3dSmrg OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | 1057428d7b3dSmrg src_format << GEN7_VE0_FORMAT_SHIFT | 1058428d7b3dSmrg offset << GEN7_VE0_OFFSET_SHIFT); 1059428d7b3dSmrg OUT_BATCH(dw); 1060428d7b3dSmrg } 1061428d7b3dSmrg} 1062428d7b3dSmrg 1063428d7b3dSmrginline static void 1064428d7b3dSmrggen7_emit_pipe_invalidate(struct sna *sna) 1065428d7b3dSmrg{ 1066428d7b3dSmrg OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); 1067428d7b3dSmrg OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | 1068428d7b3dSmrg GEN7_PIPE_CONTROL_TC_FLUSH | 1069428d7b3dSmrg GEN7_PIPE_CONTROL_CS_STALL); 1070428d7b3dSmrg OUT_BATCH(0); 1071428d7b3dSmrg OUT_BATCH(0); 1072428d7b3dSmrg sna->render_state.gen7.pipe_controls_since_stall = 0; 1073428d7b3dSmrg} 1074428d7b3dSmrg 1075428d7b3dSmrginline static void 1076428d7b3dSmrggen7_emit_pipe_flush(struct sna *sna, bool need_stall) 1077428d7b3dSmrg{ 1078428d7b3dSmrg unsigned stall; 1079428d7b3dSmrg 1080428d7b3dSmrg stall = 0; 1081428d7b3dSmrg if (need_stall) { 1082428d7b3dSmrg stall = GEN7_PIPE_CONTROL_CS_STALL; 1083428d7b3dSmrg sna->render_state.gen7.pipe_controls_since_stall = 0; 1084428d7b3dSmrg } else 1085428d7b3dSmrg sna->render_state.gen7.pipe_controls_since_stall++; 1086428d7b3dSmrg 1087428d7b3dSmrg OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); 1088428d7b3dSmrg OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | stall); 1089428d7b3dSmrg OUT_BATCH(0); 1090428d7b3dSmrg OUT_BATCH(0); 1091428d7b3dSmrg} 1092428d7b3dSmrg 1093428d7b3dSmrginline static void 1094428d7b3dSmrggen7_emit_pipe_stall(struct sna *sna) 1095428d7b3dSmrg{ 1096428d7b3dSmrg OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); 1097428d7b3dSmrg OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL | 1098428d7b3dSmrg GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD); 1099428d7b3dSmrg OUT_BATCH(0); 1100428d7b3dSmrg OUT_BATCH(0); 1101428d7b3dSmrg sna->render_state.gen7.pipe_controls_since_stall = 0; 1102428d7b3dSmrg} 1103428d7b3dSmrg 1104428d7b3dSmrgstatic void 1105428d7b3dSmrggen7_emit_state(struct sna *sna, 1106428d7b3dSmrg const struct sna_composite_op *op, 1107428d7b3dSmrg uint16_t wm_binding_table) 1108428d7b3dSmrg{ 1109428d7b3dSmrg bool need_invalidate; 1110428d7b3dSmrg bool need_flush; 1111428d7b3dSmrg bool need_stall; 1112428d7b3dSmrg 1113428d7b3dSmrg assert(op->dst.bo->exec); 1114428d7b3dSmrg 1115428d7b3dSmrg need_flush = wm_binding_table & 1 || 1116428d7b3dSmrg (sna->render_state.gen7.emit_flush && GEN7_READS_DST(op->u.gen7.flags)); 1117428d7b3dSmrg if (ALWAYS_FLUSH) 1118428d7b3dSmrg need_flush = true; 1119428d7b3dSmrg 1120428d7b3dSmrg wm_binding_table &= ~1; 1121428d7b3dSmrg 1122428d7b3dSmrg need_stall = sna->render_state.gen7.surface_table != wm_binding_table; 1123428d7b3dSmrg 1124428d7b3dSmrg need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); 1125428d7b3dSmrg if (ALWAYS_INVALIDATE) 1126428d7b3dSmrg need_invalidate = true; 1127428d7b3dSmrg 1128428d7b3dSmrg need_stall &= gen7_emit_drawing_rectangle(sna, op); 1129428d7b3dSmrg if (ALWAYS_STALL) 1130428d7b3dSmrg need_stall = true; 1131428d7b3dSmrg if (sna->kgem.gen < 075 && 1132428d7b3dSmrg sna->render_state.gen7.pipe_controls_since_stall >= 3) 1133428d7b3dSmrg need_stall = true; 1134428d7b3dSmrg 1135428d7b3dSmrg if (need_invalidate) { 1136428d7b3dSmrg gen7_emit_pipe_invalidate(sna); 1137428d7b3dSmrg kgem_clear_dirty(&sna->kgem); 1138428d7b3dSmrg assert(op->dst.bo->exec); 1139428d7b3dSmrg kgem_bo_mark_dirty(op->dst.bo); 1140428d7b3dSmrg 1141428d7b3dSmrg need_flush = false; 1142428d7b3dSmrg need_stall = false; 1143428d7b3dSmrg } 1144428d7b3dSmrg if (need_flush) { 1145428d7b3dSmrg gen7_emit_pipe_flush(sna, need_stall); 1146428d7b3dSmrg need_stall = false; 1147428d7b3dSmrg } 1148428d7b3dSmrg if (need_stall) 1149428d7b3dSmrg gen7_emit_pipe_stall(sna); 1150428d7b3dSmrg 1151428d7b3dSmrg gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags)); 1152428d7b3dSmrg gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags)); 1153428d7b3dSmrg gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2); 1154428d7b3dSmrg gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags)); 1155428d7b3dSmrg gen7_emit_vertex_elements(sna, op); 1156428d7b3dSmrg gen7_emit_binding_table(sna, wm_binding_table); 1157428d7b3dSmrg 1158428d7b3dSmrg sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags); 1159428d7b3dSmrg} 1160428d7b3dSmrg 1161428d7b3dSmrgstatic bool gen7_magic_ca_pass(struct sna *sna, 1162428d7b3dSmrg const struct sna_composite_op *op) 1163428d7b3dSmrg{ 1164428d7b3dSmrg struct gen7_render_state *state = &sna->render_state.gen7; 1165428d7b3dSmrg 1166428d7b3dSmrg if (!op->need_magic_ca_pass) 1167428d7b3dSmrg return false; 1168428d7b3dSmrg 1169428d7b3dSmrg DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, 1170428d7b3dSmrg sna->render.vertex_start, sna->render.vertex_index)); 1171428d7b3dSmrg 1172428d7b3dSmrg gen7_emit_pipe_stall(sna); 1173428d7b3dSmrg 1174428d7b3dSmrg gen7_emit_cc(sna, 1175428d7b3dSmrg GEN7_BLEND(gen7_get_blend(PictOpAdd, true, 1176428d7b3dSmrg op->dst.format))); 1177428d7b3dSmrg gen7_emit_wm(sna, 1178428d7b3dSmrg gen7_choose_composite_kernel(PictOpAdd, 1179428d7b3dSmrg true, true, 1180428d7b3dSmrg op->is_affine)); 1181428d7b3dSmrg 1182428d7b3dSmrg OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2)); 1183428d7b3dSmrg OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST); 1184428d7b3dSmrg OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 1185428d7b3dSmrg OUT_BATCH(sna->render.vertex_start); 1186428d7b3dSmrg OUT_BATCH(1); /* single instance */ 1187428d7b3dSmrg OUT_BATCH(0); /* start instance location */ 1188428d7b3dSmrg OUT_BATCH(0); /* index buffer offset, ignored */ 1189428d7b3dSmrg 1190428d7b3dSmrg state->last_primitive = sna->kgem.nbatch; 1191428d7b3dSmrg return true; 1192428d7b3dSmrg} 1193428d7b3dSmrg 1194428d7b3dSmrgstatic void null_create(struct sna_static_stream *stream) 1195428d7b3dSmrg{ 1196428d7b3dSmrg /* A bunch of zeros useful for legacy border color and depth-stencil */ 1197428d7b3dSmrg sna_static_stream_map(stream, 64, 64); 1198428d7b3dSmrg} 1199428d7b3dSmrg 1200428d7b3dSmrgstatic void 1201428d7b3dSmrgsampler_state_init(struct gen7_sampler_state *sampler_state, 1202428d7b3dSmrg sampler_filter_t filter, 1203428d7b3dSmrg sampler_extend_t extend) 1204428d7b3dSmrg{ 1205428d7b3dSmrg sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 1206428d7b3dSmrg 1207428d7b3dSmrg /* We use the legacy mode to get the semantics specified by 1208428d7b3dSmrg * the Render extension. */ 1209428d7b3dSmrg sampler_state->ss0.default_color_mode = GEN7_BORDER_COLOR_MODE_LEGACY; 1210428d7b3dSmrg 1211428d7b3dSmrg switch (filter) { 1212428d7b3dSmrg default: 1213428d7b3dSmrg case SAMPLER_FILTER_NEAREST: 1214428d7b3dSmrg sampler_state->ss0.min_filter = GEN7_MAPFILTER_NEAREST; 1215428d7b3dSmrg sampler_state->ss0.mag_filter = GEN7_MAPFILTER_NEAREST; 1216428d7b3dSmrg break; 1217428d7b3dSmrg case SAMPLER_FILTER_BILINEAR: 1218428d7b3dSmrg sampler_state->ss0.min_filter = GEN7_MAPFILTER_LINEAR; 1219428d7b3dSmrg sampler_state->ss0.mag_filter = GEN7_MAPFILTER_LINEAR; 1220428d7b3dSmrg break; 1221428d7b3dSmrg } 1222428d7b3dSmrg 1223428d7b3dSmrg switch (extend) { 1224428d7b3dSmrg default: 1225428d7b3dSmrg case SAMPLER_EXTEND_NONE: 1226428d7b3dSmrg sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER; 1227428d7b3dSmrg sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER; 1228428d7b3dSmrg sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER; 1229428d7b3dSmrg break; 1230428d7b3dSmrg case SAMPLER_EXTEND_REPEAT: 1231428d7b3dSmrg sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_WRAP; 1232428d7b3dSmrg sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_WRAP; 1233428d7b3dSmrg sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_WRAP; 1234428d7b3dSmrg break; 1235428d7b3dSmrg case SAMPLER_EXTEND_PAD: 1236428d7b3dSmrg sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP; 1237428d7b3dSmrg sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP; 1238428d7b3dSmrg sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP; 1239428d7b3dSmrg break; 1240428d7b3dSmrg case SAMPLER_EXTEND_REFLECT: 1241428d7b3dSmrg sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_MIRROR; 1242428d7b3dSmrg sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_MIRROR; 1243428d7b3dSmrg sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_MIRROR; 1244428d7b3dSmrg break; 1245428d7b3dSmrg } 1246428d7b3dSmrg} 1247428d7b3dSmrg 1248428d7b3dSmrgstatic void 1249428d7b3dSmrgsampler_copy_init(struct gen7_sampler_state *ss) 1250428d7b3dSmrg{ 1251428d7b3dSmrg sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1252428d7b3dSmrg ss->ss3.non_normalized_coord = 1; 1253428d7b3dSmrg 1254428d7b3dSmrg sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1255428d7b3dSmrg} 1256428d7b3dSmrg 1257428d7b3dSmrgstatic void 1258428d7b3dSmrgsampler_fill_init(struct gen7_sampler_state *ss) 1259428d7b3dSmrg{ 1260428d7b3dSmrg sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); 1261428d7b3dSmrg ss->ss3.non_normalized_coord = 1; 1262428d7b3dSmrg 1263428d7b3dSmrg sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1264428d7b3dSmrg} 1265428d7b3dSmrg 1266428d7b3dSmrgstatic uint32_t 1267428d7b3dSmrggen7_tiling_bits(uint32_t tiling) 1268428d7b3dSmrg{ 1269428d7b3dSmrg switch (tiling) { 1270428d7b3dSmrg default: assert(0); 1271428d7b3dSmrg case I915_TILING_NONE: return 0; 1272428d7b3dSmrg case I915_TILING_X: return GEN7_SURFACE_TILED; 1273428d7b3dSmrg case I915_TILING_Y: return GEN7_SURFACE_TILED | GEN7_SURFACE_TILED_Y; 1274428d7b3dSmrg } 1275428d7b3dSmrg} 1276428d7b3dSmrg 1277428d7b3dSmrg/** 1278428d7b3dSmrg * Sets up the common fields for a surface state buffer for the given 1279428d7b3dSmrg * picture in the given surface state buffer. 1280428d7b3dSmrg */ 1281428d7b3dSmrgstatic uint32_t 1282428d7b3dSmrggen7_bind_bo(struct sna *sna, 1283428d7b3dSmrg struct kgem_bo *bo, 1284428d7b3dSmrg uint32_t width, 1285428d7b3dSmrg uint32_t height, 1286428d7b3dSmrg uint32_t format, 1287428d7b3dSmrg bool is_dst) 1288428d7b3dSmrg{ 1289428d7b3dSmrg uint32_t *ss; 1290428d7b3dSmrg uint32_t domains; 1291428d7b3dSmrg int offset; 1292428d7b3dSmrg uint32_t is_scanout = is_dst && bo->scanout; 1293428d7b3dSmrg 1294428d7b3dSmrg COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32); 1295428d7b3dSmrg 1296428d7b3dSmrg /* After the first bind, we manage the cache domains within the batch */ 1297428d7b3dSmrg offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); 1298428d7b3dSmrg if (offset) { 1299428d7b3dSmrg assert(offset >= sna->kgem.surface); 1300428d7b3dSmrg if (is_dst) 1301428d7b3dSmrg kgem_bo_mark_dirty(bo); 1302428d7b3dSmrg return offset * sizeof(uint32_t); 1303428d7b3dSmrg } 1304428d7b3dSmrg 1305428d7b3dSmrg offset = sna->kgem.surface -= 1306428d7b3dSmrg sizeof(struct gen7_surface_state) / sizeof(uint32_t); 1307428d7b3dSmrg ss = sna->kgem.batch + offset; 1308428d7b3dSmrg ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT | 1309428d7b3dSmrg gen7_tiling_bits(bo->tiling) | 1310428d7b3dSmrg format << GEN7_SURFACE_FORMAT_SHIFT); 1311428d7b3dSmrg if (bo->tiling == I915_TILING_Y) 1312428d7b3dSmrg ss[0] |= GEN7_SURFACE_VALIGN_4; 1313428d7b3dSmrg if (is_dst) { 1314428d7b3dSmrg ss[0] |= GEN7_SURFACE_RC_READ_WRITE; 1315428d7b3dSmrg domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; 1316428d7b3dSmrg } else 1317428d7b3dSmrg domains = I915_GEM_DOMAIN_SAMPLER << 16; 1318428d7b3dSmrg ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); 1319428d7b3dSmrg ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT | 1320428d7b3dSmrg (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT); 1321428d7b3dSmrg ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT; 1322428d7b3dSmrg ss[4] = 0; 1323428d7b3dSmrg ss[5] = (is_scanout || bo->io) ? 0 : sna->render_state.gen7.info->mocs << 16; 1324428d7b3dSmrg ss[6] = 0; 1325428d7b3dSmrg ss[7] = 0; 1326428d7b3dSmrg if (is_hsw(sna)) 1327428d7b3dSmrg ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 1328428d7b3dSmrg 1329428d7b3dSmrg kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); 1330428d7b3dSmrg 1331428d7b3dSmrg DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 1332428d7b3dSmrg offset, bo->handle, ss[1], 1333428d7b3dSmrg format, width, height, bo->pitch, bo->tiling, 1334428d7b3dSmrg domains & 0xffff ? "render" : "sampler")); 1335428d7b3dSmrg 1336428d7b3dSmrg return offset * sizeof(uint32_t); 1337428d7b3dSmrg} 1338428d7b3dSmrg 1339428d7b3dSmrgstatic void gen7_emit_vertex_buffer(struct sna *sna, 1340428d7b3dSmrg const struct sna_composite_op *op) 1341428d7b3dSmrg{ 1342428d7b3dSmrg int id = GEN7_VERTEX(op->u.gen7.flags); 1343428d7b3dSmrg 1344428d7b3dSmrg OUT_BATCH(GEN7_3DSTATE_VERTEX_BUFFERS | (5 - 2)); 1345428d7b3dSmrg OUT_BATCH(id << GEN7_VB0_BUFFER_INDEX_SHIFT | 1346428d7b3dSmrg GEN7_VB0_VERTEXDATA | 1347428d7b3dSmrg GEN7_VB0_ADDRESS_MODIFY_ENABLE | 1348428d7b3dSmrg 4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT); 1349428d7b3dSmrg sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 1350428d7b3dSmrg OUT_BATCH(0); 1351428d7b3dSmrg OUT_BATCH(~0); /* max address: disabled */ 1352428d7b3dSmrg OUT_BATCH(0); 1353428d7b3dSmrg 1354428d7b3dSmrg sna->render.vb_id |= 1 << id; 1355428d7b3dSmrg} 1356428d7b3dSmrg 1357428d7b3dSmrgstatic void gen7_emit_primitive(struct sna *sna) 1358428d7b3dSmrg{ 1359428d7b3dSmrg if (sna->kgem.nbatch == sna->render_state.gen7.last_primitive) { 1360428d7b3dSmrg sna->render.vertex_offset = sna->kgem.nbatch - 5; 1361428d7b3dSmrg return; 1362428d7b3dSmrg } 1363428d7b3dSmrg 1364428d7b3dSmrg OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2)); 1365428d7b3dSmrg OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST); 1366428d7b3dSmrg sna->render.vertex_offset = sna->kgem.nbatch; 1367428d7b3dSmrg OUT_BATCH(0); /* vertex count, to be filled in later */ 1368428d7b3dSmrg OUT_BATCH(sna->render.vertex_index); 1369428d7b3dSmrg OUT_BATCH(1); /* single instance */ 1370428d7b3dSmrg OUT_BATCH(0); /* start instance location */ 1371428d7b3dSmrg OUT_BATCH(0); /* index buffer offset, ignored */ 1372428d7b3dSmrg sna->render.vertex_start = sna->render.vertex_index; 1373428d7b3dSmrg 1374428d7b3dSmrg sna->render_state.gen7.last_primitive = sna->kgem.nbatch; 1375428d7b3dSmrg} 1376428d7b3dSmrg 1377428d7b3dSmrgstatic bool gen7_rectangle_begin(struct sna *sna, 1378428d7b3dSmrg const struct sna_composite_op *op) 1379428d7b3dSmrg{ 1380428d7b3dSmrg int id = 1 << GEN7_VERTEX(op->u.gen7.flags); 1381428d7b3dSmrg int ndwords; 1382428d7b3dSmrg 1383428d7b3dSmrg if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 1384428d7b3dSmrg return true; 1385428d7b3dSmrg 1386428d7b3dSmrg ndwords = op->need_magic_ca_pass ? 60 : 6; 1387428d7b3dSmrg if ((sna->render.vb_id & id) == 0) 1388428d7b3dSmrg ndwords += 5; 1389428d7b3dSmrg if (!kgem_check_batch(&sna->kgem, ndwords)) 1390428d7b3dSmrg return false; 1391428d7b3dSmrg 1392428d7b3dSmrg if ((sna->render.vb_id & id) == 0) 1393428d7b3dSmrg gen7_emit_vertex_buffer(sna, op); 1394428d7b3dSmrg 1395428d7b3dSmrg gen7_emit_primitive(sna); 1396428d7b3dSmrg return true; 1397428d7b3dSmrg} 1398428d7b3dSmrg 1399428d7b3dSmrgstatic int gen7_get_rectangles__flush(struct sna *sna, 1400428d7b3dSmrg const struct sna_composite_op *op) 1401428d7b3dSmrg{ 1402428d7b3dSmrg /* Preventing discarding new vbo after lock contention */ 1403428d7b3dSmrg if (sna_vertex_wait__locked(&sna->render)) { 1404428d7b3dSmrg int rem = vertex_space(sna); 1405428d7b3dSmrg if (rem > op->floats_per_rect) 1406428d7b3dSmrg return rem; 1407428d7b3dSmrg } 1408428d7b3dSmrg 1409428d7b3dSmrg if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6)) 1410428d7b3dSmrg return 0; 1411428d7b3dSmrg if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 1412428d7b3dSmrg return 0; 1413428d7b3dSmrg 1414428d7b3dSmrg if (sna->render.vertex_offset) { 1415428d7b3dSmrg gen4_vertex_flush(sna); 1416428d7b3dSmrg if (gen7_magic_ca_pass(sna, op)) { 1417428d7b3dSmrg gen7_emit_pipe_stall(sna); 1418428d7b3dSmrg gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags)); 1419428d7b3dSmrg gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags)); 1420428d7b3dSmrg } 1421428d7b3dSmrg } 1422428d7b3dSmrg 1423428d7b3dSmrg return gen4_vertex_finish(sna); 1424428d7b3dSmrg} 1425428d7b3dSmrg 1426428d7b3dSmrginline static int gen7_get_rectangles(struct sna *sna, 1427428d7b3dSmrg const struct sna_composite_op *op, 1428428d7b3dSmrg int want, 1429428d7b3dSmrg void (*emit_state)(struct sna *sna, const struct sna_composite_op *op)) 1430428d7b3dSmrg{ 1431428d7b3dSmrg int rem; 1432428d7b3dSmrg 1433428d7b3dSmrg assert(want); 1434428d7b3dSmrg 1435428d7b3dSmrgstart: 1436428d7b3dSmrg rem = vertex_space(sna); 1437428d7b3dSmrg if (unlikely(rem < op->floats_per_rect)) { 1438428d7b3dSmrg DBG(("flushing vbo for %s: %d < %d\n", 1439428d7b3dSmrg __FUNCTION__, rem, op->floats_per_rect)); 1440428d7b3dSmrg rem = gen7_get_rectangles__flush(sna, op); 1441428d7b3dSmrg if (unlikely(rem == 0)) 1442428d7b3dSmrg goto flush; 1443428d7b3dSmrg } 1444428d7b3dSmrg 1445428d7b3dSmrg if (unlikely(sna->render.vertex_offset == 0)) { 1446428d7b3dSmrg if (!gen7_rectangle_begin(sna, op)) 1447428d7b3dSmrg goto flush; 1448428d7b3dSmrg else 1449428d7b3dSmrg goto start; 1450428d7b3dSmrg } 1451428d7b3dSmrg 1452428d7b3dSmrg assert(rem <= vertex_space(sna)); 1453428d7b3dSmrg assert(op->floats_per_rect <= rem); 1454428d7b3dSmrg if (want > 1 && want * op->floats_per_rect > rem) 1455428d7b3dSmrg want = rem / op->floats_per_rect; 1456428d7b3dSmrg 1457428d7b3dSmrg assert(want > 0); 1458428d7b3dSmrg sna->render.vertex_index += 3*want; 1459428d7b3dSmrg return want; 1460428d7b3dSmrg 1461428d7b3dSmrgflush: 1462428d7b3dSmrg if (sna->render.vertex_offset) { 1463428d7b3dSmrg gen4_vertex_flush(sna); 1464428d7b3dSmrg gen7_magic_ca_pass(sna, op); 1465428d7b3dSmrg } 1466428d7b3dSmrg sna_vertex_wait__locked(&sna->render); 1467428d7b3dSmrg _kgem_submit(&sna->kgem); 1468428d7b3dSmrg emit_state(sna, op); 1469428d7b3dSmrg goto start; 1470428d7b3dSmrg} 1471428d7b3dSmrg 1472428d7b3dSmrginline static uint32_t *gen7_composite_get_binding_table(struct sna *sna, 1473428d7b3dSmrg uint16_t *offset) 1474428d7b3dSmrg{ 1475428d7b3dSmrg uint32_t *table; 1476428d7b3dSmrg 1477428d7b3dSmrg sna->kgem.surface -= 1478428d7b3dSmrg sizeof(struct gen7_surface_state) / sizeof(uint32_t); 1479428d7b3dSmrg /* Clear all surplus entries to zero in case of prefetch */ 1480428d7b3dSmrg table = memset(sna->kgem.batch + sna->kgem.surface, 1481428d7b3dSmrg 0, sizeof(struct gen7_surface_state)); 1482428d7b3dSmrg 1483428d7b3dSmrg DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 1484428d7b3dSmrg 1485428d7b3dSmrg *offset = sna->kgem.surface; 1486428d7b3dSmrg return table; 1487428d7b3dSmrg} 1488428d7b3dSmrg 1489428d7b3dSmrgstatic void 1490428d7b3dSmrggen7_get_batch(struct sna *sna, const struct sna_composite_op *op) 1491428d7b3dSmrg{ 1492428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1493428d7b3dSmrg 1494428d7b3dSmrg if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { 1495428d7b3dSmrg DBG(("%s: flushing batch: %d < %d+%d\n", 1496428d7b3dSmrg __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 1497428d7b3dSmrg 150, 4*8)); 1498428d7b3dSmrg _kgem_submit(&sna->kgem); 1499428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1500428d7b3dSmrg } 1501428d7b3dSmrg 1502428d7b3dSmrg assert(sna->kgem.mode == KGEM_RENDER); 1503428d7b3dSmrg assert(sna->kgem.ring == KGEM_RENDER); 1504428d7b3dSmrg 1505428d7b3dSmrg if (sna->render_state.gen7.needs_invariant) 1506428d7b3dSmrg gen7_emit_invariant(sna); 1507428d7b3dSmrg} 1508428d7b3dSmrg 1509428d7b3dSmrgstatic void gen7_emit_composite_state(struct sna *sna, 1510428d7b3dSmrg const struct sna_composite_op *op) 1511428d7b3dSmrg{ 1512428d7b3dSmrg uint32_t *binding_table; 1513428d7b3dSmrg uint16_t offset, dirty; 1514428d7b3dSmrg 1515428d7b3dSmrg gen7_get_batch(sna, op); 1516428d7b3dSmrg 1517428d7b3dSmrg binding_table = gen7_composite_get_binding_table(sna, &offset); 1518428d7b3dSmrg 1519428d7b3dSmrg dirty = kgem_bo_is_dirty(op->dst.bo); 1520428d7b3dSmrg 1521428d7b3dSmrg binding_table[0] = 1522428d7b3dSmrg gen7_bind_bo(sna, 1523428d7b3dSmrg op->dst.bo, op->dst.width, op->dst.height, 1524428d7b3dSmrg gen7_get_dest_format(op->dst.format), 1525428d7b3dSmrg true); 1526428d7b3dSmrg binding_table[1] = 1527428d7b3dSmrg gen7_bind_bo(sna, 1528428d7b3dSmrg op->src.bo, op->src.width, op->src.height, 1529428d7b3dSmrg op->src.card_format, 1530428d7b3dSmrg false); 1531428d7b3dSmrg if (op->mask.bo) { 1532428d7b3dSmrg binding_table[2] = 1533428d7b3dSmrg gen7_bind_bo(sna, 1534428d7b3dSmrg op->mask.bo, 1535428d7b3dSmrg op->mask.width, 1536428d7b3dSmrg op->mask.height, 1537428d7b3dSmrg op->mask.card_format, 1538428d7b3dSmrg false); 1539428d7b3dSmrg } 1540428d7b3dSmrg 1541428d7b3dSmrg if (sna->kgem.surface == offset && 1542428d7b3dSmrg *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table && 1543428d7b3dSmrg (op->mask.bo == NULL || 1544428d7b3dSmrg sna->kgem.batch[sna->render_state.gen7.surface_table+2] == binding_table[2])) { 1545428d7b3dSmrg sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t); 1546428d7b3dSmrg offset = sna->render_state.gen7.surface_table; 1547428d7b3dSmrg } 1548428d7b3dSmrg 1549428d7b3dSmrg if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0]) 1550428d7b3dSmrg dirty = 0; 1551428d7b3dSmrg 1552428d7b3dSmrg gen7_emit_state(sna, op, offset | dirty); 1553428d7b3dSmrg} 1554428d7b3dSmrg 1555428d7b3dSmrgstatic void 1556428d7b3dSmrggen7_align_vertex(struct sna *sna, const struct sna_composite_op *op) 1557428d7b3dSmrg{ 1558428d7b3dSmrg if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) { 1559428d7b3dSmrg DBG(("aligning vertex: was %d, now %d floats per vertex\n", 1560428d7b3dSmrg sna->render_state.gen7.floats_per_vertex, op->floats_per_vertex)); 1561428d7b3dSmrg gen4_vertex_align(sna, op); 1562428d7b3dSmrg sna->render_state.gen7.floats_per_vertex = op->floats_per_vertex; 1563428d7b3dSmrg } 1564428d7b3dSmrg} 1565428d7b3dSmrg 1566428d7b3dSmrgfastcall static void 1567428d7b3dSmrggen7_render_composite_blt(struct sna *sna, 1568428d7b3dSmrg const struct sna_composite_op *op, 1569428d7b3dSmrg const struct sna_composite_rectangles *r) 1570428d7b3dSmrg{ 1571428d7b3dSmrg gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state); 1572428d7b3dSmrg op->prim_emit(sna, op, r); 1573428d7b3dSmrg} 1574428d7b3dSmrg 1575428d7b3dSmrgfastcall static void 1576428d7b3dSmrggen7_render_composite_box(struct sna *sna, 1577428d7b3dSmrg const struct sna_composite_op *op, 1578428d7b3dSmrg const BoxRec *box) 1579428d7b3dSmrg{ 1580428d7b3dSmrg struct sna_composite_rectangles r; 1581428d7b3dSmrg 1582428d7b3dSmrg gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state); 1583428d7b3dSmrg 1584428d7b3dSmrg DBG((" %s: (%d, %d), (%d, %d)\n", 1585428d7b3dSmrg __FUNCTION__, 1586428d7b3dSmrg box->x1, box->y1, box->x2, box->y2)); 1587428d7b3dSmrg 1588428d7b3dSmrg r.dst.x = box->x1; 1589428d7b3dSmrg r.dst.y = box->y1; 1590428d7b3dSmrg r.width = box->x2 - box->x1; 1591428d7b3dSmrg r.height = box->y2 - box->y1; 1592428d7b3dSmrg r.src = r.mask = r.dst; 1593428d7b3dSmrg 1594428d7b3dSmrg op->prim_emit(sna, op, &r); 1595428d7b3dSmrg} 1596428d7b3dSmrg 1597428d7b3dSmrgstatic void 1598428d7b3dSmrggen7_render_composite_boxes__blt(struct sna *sna, 1599428d7b3dSmrg const struct sna_composite_op *op, 1600428d7b3dSmrg const BoxRec *box, int nbox) 1601428d7b3dSmrg{ 1602428d7b3dSmrg DBG(("composite_boxes(%d)\n", nbox)); 1603428d7b3dSmrg 1604428d7b3dSmrg do { 1605428d7b3dSmrg int nbox_this_time; 1606428d7b3dSmrg 1607428d7b3dSmrg nbox_this_time = gen7_get_rectangles(sna, op, nbox, 1608428d7b3dSmrg gen7_emit_composite_state); 1609428d7b3dSmrg nbox -= nbox_this_time; 1610428d7b3dSmrg 1611428d7b3dSmrg do { 1612428d7b3dSmrg struct sna_composite_rectangles r; 1613428d7b3dSmrg 1614428d7b3dSmrg DBG((" %s: (%d, %d), (%d, %d)\n", 1615428d7b3dSmrg __FUNCTION__, 1616428d7b3dSmrg box->x1, box->y1, box->x2, box->y2)); 1617428d7b3dSmrg 1618428d7b3dSmrg r.dst.x = box->x1; 1619428d7b3dSmrg r.dst.y = box->y1; 1620428d7b3dSmrg r.width = box->x2 - box->x1; 1621428d7b3dSmrg r.height = box->y2 - box->y1; 1622428d7b3dSmrg r.src = r.mask = r.dst; 1623428d7b3dSmrg 1624428d7b3dSmrg op->prim_emit(sna, op, &r); 1625428d7b3dSmrg box++; 1626428d7b3dSmrg } while (--nbox_this_time); 1627428d7b3dSmrg } while (nbox); 1628428d7b3dSmrg} 1629428d7b3dSmrg 1630428d7b3dSmrgstatic void 1631428d7b3dSmrggen7_render_composite_boxes(struct sna *sna, 1632428d7b3dSmrg const struct sna_composite_op *op, 1633428d7b3dSmrg const BoxRec *box, int nbox) 1634428d7b3dSmrg{ 1635428d7b3dSmrg DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1636428d7b3dSmrg 1637428d7b3dSmrg do { 1638428d7b3dSmrg int nbox_this_time; 1639428d7b3dSmrg float *v; 1640428d7b3dSmrg 1641428d7b3dSmrg nbox_this_time = gen7_get_rectangles(sna, op, nbox, 1642428d7b3dSmrg gen7_emit_composite_state); 1643428d7b3dSmrg assert(nbox_this_time); 1644428d7b3dSmrg nbox -= nbox_this_time; 1645428d7b3dSmrg 1646428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1647428d7b3dSmrg sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1648428d7b3dSmrg 1649428d7b3dSmrg op->emit_boxes(op, box, nbox_this_time, v); 1650428d7b3dSmrg box += nbox_this_time; 1651428d7b3dSmrg } while (nbox); 1652428d7b3dSmrg} 1653428d7b3dSmrg 1654428d7b3dSmrgstatic void 1655428d7b3dSmrggen7_render_composite_boxes__thread(struct sna *sna, 1656428d7b3dSmrg const struct sna_composite_op *op, 1657428d7b3dSmrg const BoxRec *box, int nbox) 1658428d7b3dSmrg{ 1659428d7b3dSmrg DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1660428d7b3dSmrg 1661428d7b3dSmrg sna_vertex_lock(&sna->render); 1662428d7b3dSmrg do { 1663428d7b3dSmrg int nbox_this_time; 1664428d7b3dSmrg float *v; 1665428d7b3dSmrg 1666428d7b3dSmrg nbox_this_time = gen7_get_rectangles(sna, op, nbox, 1667428d7b3dSmrg gen7_emit_composite_state); 1668428d7b3dSmrg assert(nbox_this_time); 1669428d7b3dSmrg nbox -= nbox_this_time; 1670428d7b3dSmrg 1671428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1672428d7b3dSmrg sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1673428d7b3dSmrg 1674428d7b3dSmrg sna_vertex_acquire__locked(&sna->render); 1675428d7b3dSmrg sna_vertex_unlock(&sna->render); 1676428d7b3dSmrg 1677428d7b3dSmrg op->emit_boxes(op, box, nbox_this_time, v); 1678428d7b3dSmrg box += nbox_this_time; 1679428d7b3dSmrg 1680428d7b3dSmrg sna_vertex_lock(&sna->render); 1681428d7b3dSmrg sna_vertex_release__locked(&sna->render); 1682428d7b3dSmrg } while (nbox); 1683428d7b3dSmrg sna_vertex_unlock(&sna->render); 1684428d7b3dSmrg} 1685428d7b3dSmrg 1686428d7b3dSmrg#ifndef MAX 1687428d7b3dSmrg#define MAX(a,b) ((a) > (b) ? (a) : (b)) 1688428d7b3dSmrg#endif 1689428d7b3dSmrg 1690428d7b3dSmrgstatic uint32_t 1691428d7b3dSmrggen7_composite_create_blend_state(struct sna_static_stream *stream) 1692428d7b3dSmrg{ 1693428d7b3dSmrg char *base, *ptr; 1694428d7b3dSmrg int src, dst; 1695428d7b3dSmrg 1696428d7b3dSmrg base = sna_static_stream_map(stream, 1697428d7b3dSmrg GEN7_BLENDFACTOR_COUNT * GEN7_BLENDFACTOR_COUNT * GEN7_BLEND_STATE_PADDED_SIZE, 1698428d7b3dSmrg 64); 1699428d7b3dSmrg 1700428d7b3dSmrg ptr = base; 1701428d7b3dSmrg for (src = 0; src < GEN7_BLENDFACTOR_COUNT; src++) { 1702428d7b3dSmrg for (dst= 0; dst < GEN7_BLENDFACTOR_COUNT; dst++) { 1703428d7b3dSmrg struct gen7_blend_state *blend = 1704428d7b3dSmrg (struct gen7_blend_state *)ptr; 1705428d7b3dSmrg 1706428d7b3dSmrg blend->blend0.dest_blend_factor = dst; 1707428d7b3dSmrg blend->blend0.source_blend_factor = src; 1708428d7b3dSmrg blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD; 1709428d7b3dSmrg blend->blend0.blend_enable = 1710428d7b3dSmrg !(dst == GEN7_BLENDFACTOR_ZERO && src == GEN7_BLENDFACTOR_ONE); 1711428d7b3dSmrg 1712428d7b3dSmrg blend->blend1.post_blend_clamp_enable = 1; 1713428d7b3dSmrg blend->blend1.pre_blend_clamp_enable = 1; 1714428d7b3dSmrg 1715428d7b3dSmrg ptr += GEN7_BLEND_STATE_PADDED_SIZE; 1716428d7b3dSmrg } 1717428d7b3dSmrg } 1718428d7b3dSmrg 1719428d7b3dSmrg return sna_static_stream_offsetof(stream, base); 1720428d7b3dSmrg} 1721428d7b3dSmrg 1722428d7b3dSmrgstatic uint32_t gen7_bind_video_source(struct sna *sna, 1723428d7b3dSmrg struct kgem_bo *bo, 1724428d7b3dSmrg uint32_t offset, 1725428d7b3dSmrg int width, 1726428d7b3dSmrg int height, 1727428d7b3dSmrg int pitch, 1728428d7b3dSmrg uint32_t format) 1729428d7b3dSmrg{ 1730428d7b3dSmrg uint32_t *ss, bind; 1731428d7b3dSmrg 1732428d7b3dSmrg bind = sna->kgem.surface -= 1733428d7b3dSmrg sizeof(struct gen7_surface_state) / sizeof(uint32_t); 1734428d7b3dSmrg 1735428d7b3dSmrg assert(bo->tiling == I915_TILING_NONE); 1736428d7b3dSmrg 1737428d7b3dSmrg ss = sna->kgem.batch + bind; 1738428d7b3dSmrg ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT | 1739428d7b3dSmrg format << GEN7_SURFACE_FORMAT_SHIFT); 1740428d7b3dSmrg ss[1] = kgem_add_reloc(&sna->kgem, bind + 1, bo, 1741428d7b3dSmrg I915_GEM_DOMAIN_SAMPLER << 16, 1742428d7b3dSmrg offset); 1743428d7b3dSmrg ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT | 1744428d7b3dSmrg (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT); 1745428d7b3dSmrg ss[3] = (pitch - 1) << GEN7_SURFACE_PITCH_SHIFT; 1746428d7b3dSmrg ss[4] = 0; 1747428d7b3dSmrg ss[5] = 0; 1748428d7b3dSmrg ss[6] = 0; 1749428d7b3dSmrg ss[7] = 0; 1750428d7b3dSmrg if (is_hsw(sna)) 1751428d7b3dSmrg ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 1752428d7b3dSmrg 1753428d7b3dSmrg DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, offset=%d\n", 1754428d7b3dSmrg bind, bo->handle, ss[1], 1755428d7b3dSmrg format, width, height, pitch, offset)); 1756428d7b3dSmrg 1757428d7b3dSmrg return bind * sizeof(uint32_t); 1758428d7b3dSmrg} 1759428d7b3dSmrg 1760428d7b3dSmrgstatic void gen7_emit_video_state(struct sna *sna, 1761428d7b3dSmrg const struct sna_composite_op *op) 1762428d7b3dSmrg{ 1763428d7b3dSmrg struct sna_video_frame *frame = op->priv; 1764428d7b3dSmrg uint32_t src_surf_format; 1765428d7b3dSmrg uint32_t src_surf_base[6]; 1766428d7b3dSmrg int src_width[6]; 1767428d7b3dSmrg int src_height[6]; 1768428d7b3dSmrg int src_pitch[6]; 1769428d7b3dSmrg uint32_t *binding_table; 1770428d7b3dSmrg uint16_t offset, dirty; 1771428d7b3dSmrg int n_src, n; 1772428d7b3dSmrg 1773428d7b3dSmrg gen7_get_batch(sna, op); 1774428d7b3dSmrg 1775428d7b3dSmrg src_surf_base[0] = 0; 1776428d7b3dSmrg src_surf_base[1] = 0; 1777428d7b3dSmrg src_surf_base[2] = frame->VBufOffset; 1778428d7b3dSmrg src_surf_base[3] = frame->VBufOffset; 1779428d7b3dSmrg src_surf_base[4] = frame->UBufOffset; 1780428d7b3dSmrg src_surf_base[5] = frame->UBufOffset; 1781428d7b3dSmrg 1782428d7b3dSmrg if (is_planar_fourcc(frame->id)) { 1783428d7b3dSmrg src_surf_format = GEN7_SURFACEFORMAT_R8_UNORM; 1784428d7b3dSmrg src_width[1] = src_width[0] = frame->width; 1785428d7b3dSmrg src_height[1] = src_height[0] = frame->height; 1786428d7b3dSmrg src_pitch[1] = src_pitch[0] = frame->pitch[1]; 1787428d7b3dSmrg src_width[4] = src_width[5] = src_width[2] = src_width[3] = 1788428d7b3dSmrg frame->width / 2; 1789428d7b3dSmrg src_height[4] = src_height[5] = src_height[2] = src_height[3] = 1790428d7b3dSmrg frame->height / 2; 1791428d7b3dSmrg src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = 1792428d7b3dSmrg frame->pitch[0]; 1793428d7b3dSmrg n_src = 6; 1794428d7b3dSmrg } else { 1795428d7b3dSmrg if (frame->id == FOURCC_UYVY) 1796428d7b3dSmrg src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY; 1797428d7b3dSmrg else 1798428d7b3dSmrg src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL; 1799428d7b3dSmrg 1800428d7b3dSmrg src_width[0] = frame->width; 1801428d7b3dSmrg src_height[0] = frame->height; 1802428d7b3dSmrg src_pitch[0] = frame->pitch[0]; 1803428d7b3dSmrg n_src = 1; 1804428d7b3dSmrg } 1805428d7b3dSmrg 1806428d7b3dSmrg binding_table = gen7_composite_get_binding_table(sna, &offset); 1807428d7b3dSmrg 1808428d7b3dSmrg dirty = kgem_bo_is_dirty(op->dst.bo); 1809428d7b3dSmrg 1810428d7b3dSmrg binding_table[0] = 1811428d7b3dSmrg gen7_bind_bo(sna, 1812428d7b3dSmrg op->dst.bo, op->dst.width, op->dst.height, 1813428d7b3dSmrg gen7_get_dest_format(op->dst.format), 1814428d7b3dSmrg true); 1815428d7b3dSmrg for (n = 0; n < n_src; n++) { 1816428d7b3dSmrg binding_table[1+n] = 1817428d7b3dSmrg gen7_bind_video_source(sna, 1818428d7b3dSmrg frame->bo, 1819428d7b3dSmrg src_surf_base[n], 1820428d7b3dSmrg src_width[n], 1821428d7b3dSmrg src_height[n], 1822428d7b3dSmrg src_pitch[n], 1823428d7b3dSmrg src_surf_format); 1824428d7b3dSmrg } 1825428d7b3dSmrg 1826428d7b3dSmrg gen7_emit_state(sna, op, offset | dirty); 1827428d7b3dSmrg} 1828428d7b3dSmrg 1829428d7b3dSmrgstatic bool 1830428d7b3dSmrggen7_render_video(struct sna *sna, 1831428d7b3dSmrg struct sna_video *video, 1832428d7b3dSmrg struct sna_video_frame *frame, 1833428d7b3dSmrg RegionPtr dstRegion, 1834428d7b3dSmrg PixmapPtr pixmap) 1835428d7b3dSmrg{ 1836428d7b3dSmrg struct sna_composite_op tmp; 1837428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 1838428d7b3dSmrg int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 1839428d7b3dSmrg int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 1840428d7b3dSmrg int src_width = frame->src.x2 - frame->src.x1; 1841428d7b3dSmrg int src_height = frame->src.y2 - frame->src.y1; 1842428d7b3dSmrg float src_offset_x, src_offset_y; 1843428d7b3dSmrg float src_scale_x, src_scale_y; 1844428d7b3dSmrg int nbox, pix_xoff, pix_yoff; 1845428d7b3dSmrg unsigned filter; 1846428d7b3dSmrg const BoxRec *box; 1847428d7b3dSmrg 1848428d7b3dSmrg DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", 1849428d7b3dSmrg __FUNCTION__, 1850428d7b3dSmrg src_width, src_height, dst_width, dst_height, 1851428d7b3dSmrg region_num_rects(dstRegion), 1852428d7b3dSmrg REGION_EXTENTS(NULL, dstRegion)->x1, 1853428d7b3dSmrg REGION_EXTENTS(NULL, dstRegion)->y1, 1854428d7b3dSmrg REGION_EXTENTS(NULL, dstRegion)->x2, 1855428d7b3dSmrg REGION_EXTENTS(NULL, dstRegion)->y2)); 1856428d7b3dSmrg 1857428d7b3dSmrg assert(priv->gpu_bo); 1858428d7b3dSmrg memset(&tmp, 0, sizeof(tmp)); 1859428d7b3dSmrg 1860428d7b3dSmrg tmp.dst.pixmap = pixmap; 1861428d7b3dSmrg tmp.dst.width = pixmap->drawable.width; 1862428d7b3dSmrg tmp.dst.height = pixmap->drawable.height; 1863428d7b3dSmrg tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); 1864428d7b3dSmrg tmp.dst.bo = priv->gpu_bo; 1865428d7b3dSmrg 1866428d7b3dSmrg tmp.src.bo = frame->bo; 1867428d7b3dSmrg tmp.mask.bo = NULL; 1868428d7b3dSmrg 1869428d7b3dSmrg tmp.floats_per_vertex = 3; 1870428d7b3dSmrg tmp.floats_per_rect = 9; 1871428d7b3dSmrg 1872428d7b3dSmrg if (src_width == dst_width && src_height == dst_height) 1873428d7b3dSmrg filter = SAMPLER_FILTER_NEAREST; 1874428d7b3dSmrg else 1875428d7b3dSmrg filter = SAMPLER_FILTER_BILINEAR; 1876428d7b3dSmrg 1877428d7b3dSmrg tmp.u.gen7.flags = 1878428d7b3dSmrg GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, 1879428d7b3dSmrg SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), 1880428d7b3dSmrg NO_BLEND, 1881428d7b3dSmrg is_planar_fourcc(frame->id) ? 1882428d7b3dSmrg GEN7_WM_KERNEL_VIDEO_PLANAR : 1883428d7b3dSmrg GEN7_WM_KERNEL_VIDEO_PACKED, 1884428d7b3dSmrg 2); 1885428d7b3dSmrg tmp.priv = frame; 1886428d7b3dSmrg 1887428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 1888428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 1889428d7b3dSmrg kgem_submit(&sna->kgem); 1890428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) 1891428d7b3dSmrg return false; 1892428d7b3dSmrg 1893428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1894428d7b3dSmrg } 1895428d7b3dSmrg 1896428d7b3dSmrg gen7_align_vertex(sna, &tmp); 1897428d7b3dSmrg gen7_emit_video_state(sna, &tmp); 1898428d7b3dSmrg 1899428d7b3dSmrg /* Set up the offset for translating from the given region (in screen 1900428d7b3dSmrg * coordinates) to the backing pixmap. 1901428d7b3dSmrg */ 1902428d7b3dSmrg#ifdef COMPOSITE 1903428d7b3dSmrg pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 1904428d7b3dSmrg pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 1905428d7b3dSmrg#else 1906428d7b3dSmrg pix_xoff = 0; 1907428d7b3dSmrg pix_yoff = 0; 1908428d7b3dSmrg#endif 1909428d7b3dSmrg 1910428d7b3dSmrg DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", 1911428d7b3dSmrg __FUNCTION__, 1912428d7b3dSmrg frame->src.x1, frame->src.y1, 1913428d7b3dSmrg src_width, src_height, 1914428d7b3dSmrg dst_width, dst_height, 1915428d7b3dSmrg frame->width, frame->height)); 1916428d7b3dSmrg 1917428d7b3dSmrg src_scale_x = (float)src_width / dst_width / frame->width; 1918428d7b3dSmrg src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 1919428d7b3dSmrg 1920428d7b3dSmrg src_scale_y = (float)src_height / dst_height / frame->height; 1921428d7b3dSmrg src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 1922428d7b3dSmrg 1923428d7b3dSmrg DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n", 1924428d7b3dSmrg __FUNCTION__, 1925428d7b3dSmrg src_scale_x, src_scale_y, 1926428d7b3dSmrg src_offset_x, src_offset_y)); 1927428d7b3dSmrg 1928428d7b3dSmrg box = region_rects(dstRegion); 1929428d7b3dSmrg nbox = region_num_rects(dstRegion); 1930428d7b3dSmrg while (nbox--) { 1931428d7b3dSmrg BoxRec r; 1932428d7b3dSmrg 1933428d7b3dSmrg DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", 1934428d7b3dSmrg __FUNCTION__, 1935428d7b3dSmrg box->x1, box->y1, 1936428d7b3dSmrg box->x2, box->y2, 1937428d7b3dSmrg pix_xoff, pix_yoff, 1938428d7b3dSmrg box->x1 * src_scale_x + src_offset_x, 1939428d7b3dSmrg box->y1 * src_scale_y + src_offset_y, 1940428d7b3dSmrg box->x2 * src_scale_x + src_offset_x, 1941428d7b3dSmrg box->y2 * src_scale_y + src_offset_y)); 1942428d7b3dSmrg 1943428d7b3dSmrg r.x1 = box->x1 + pix_xoff; 1944428d7b3dSmrg r.x2 = box->x2 + pix_xoff; 1945428d7b3dSmrg r.y1 = box->y1 + pix_yoff; 1946428d7b3dSmrg r.y2 = box->y2 + pix_yoff; 1947428d7b3dSmrg 1948428d7b3dSmrg gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state); 1949428d7b3dSmrg 1950428d7b3dSmrg OUT_VERTEX(r.x2, r.y2); 1951428d7b3dSmrg OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 1952428d7b3dSmrg OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1953428d7b3dSmrg 1954428d7b3dSmrg OUT_VERTEX(r.x1, r.y2); 1955428d7b3dSmrg OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1956428d7b3dSmrg OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1957428d7b3dSmrg 1958428d7b3dSmrg OUT_VERTEX(r.x1, r.y1); 1959428d7b3dSmrg OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1960428d7b3dSmrg OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 1961428d7b3dSmrg 1962428d7b3dSmrg if (!DAMAGE_IS_ALL(priv->gpu_damage)) { 1963428d7b3dSmrg sna_damage_add_box(&priv->gpu_damage, &r); 1964428d7b3dSmrg sna_damage_subtract_box(&priv->cpu_damage, &r); 1965428d7b3dSmrg } 1966428d7b3dSmrg box++; 1967428d7b3dSmrg } 1968428d7b3dSmrg 1969428d7b3dSmrg gen4_vertex_flush(sna); 1970428d7b3dSmrg return true; 1971428d7b3dSmrg} 1972428d7b3dSmrg 1973428d7b3dSmrgstatic int 1974428d7b3dSmrggen7_composite_picture(struct sna *sna, 1975428d7b3dSmrg PicturePtr picture, 1976428d7b3dSmrg struct sna_composite_channel *channel, 1977428d7b3dSmrg int x, int y, 1978428d7b3dSmrg int w, int h, 1979428d7b3dSmrg int dst_x, int dst_y, 1980428d7b3dSmrg bool precise) 1981428d7b3dSmrg{ 1982428d7b3dSmrg PixmapPtr pixmap; 1983428d7b3dSmrg uint32_t color; 1984428d7b3dSmrg int16_t dx, dy; 1985428d7b3dSmrg 1986428d7b3dSmrg DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1987428d7b3dSmrg __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1988428d7b3dSmrg 1989428d7b3dSmrg channel->is_solid = false; 1990428d7b3dSmrg channel->card_format = -1; 1991428d7b3dSmrg 1992428d7b3dSmrg if (sna_picture_is_solid(picture, &color)) 1993428d7b3dSmrg return gen4_channel_init_solid(sna, channel, color); 1994428d7b3dSmrg 1995428d7b3dSmrg if (picture->pDrawable == NULL) { 1996428d7b3dSmrg int ret; 1997428d7b3dSmrg 1998428d7b3dSmrg if (picture->pSourcePict->type == SourcePictTypeLinear) 1999428d7b3dSmrg return gen4_channel_init_linear(sna, picture, channel, 2000428d7b3dSmrg x, y, 2001428d7b3dSmrg w, h, 2002428d7b3dSmrg dst_x, dst_y); 2003428d7b3dSmrg 2004428d7b3dSmrg DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 2005428d7b3dSmrg ret = -1; 2006428d7b3dSmrg if (!precise) 2007428d7b3dSmrg ret = sna_render_picture_approximate_gradient(sna, picture, channel, 2008428d7b3dSmrg x, y, w, h, dst_x, dst_y); 2009428d7b3dSmrg if (ret == -1) 2010428d7b3dSmrg ret = sna_render_picture_fixup(sna, picture, channel, 2011428d7b3dSmrg x, y, w, h, dst_x, dst_y); 2012428d7b3dSmrg return ret; 2013428d7b3dSmrg } 2014428d7b3dSmrg 2015428d7b3dSmrg if (picture->alphaMap) { 2016428d7b3dSmrg DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 2017428d7b3dSmrg return sna_render_picture_fixup(sna, picture, channel, 2018428d7b3dSmrg x, y, w, h, dst_x, dst_y); 2019428d7b3dSmrg } 2020428d7b3dSmrg 2021428d7b3dSmrg if (!gen7_check_repeat(picture)) 2022428d7b3dSmrg return sna_render_picture_fixup(sna, picture, channel, 2023428d7b3dSmrg x, y, w, h, dst_x, dst_y); 2024428d7b3dSmrg 2025428d7b3dSmrg if (!gen7_check_filter(picture)) 2026428d7b3dSmrg return sna_render_picture_fixup(sna, picture, channel, 2027428d7b3dSmrg x, y, w, h, dst_x, dst_y); 2028428d7b3dSmrg 2029428d7b3dSmrg channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 2030428d7b3dSmrg channel->filter = picture->filter; 2031428d7b3dSmrg 2032428d7b3dSmrg assert(picture->pDrawable); 2033428d7b3dSmrg pixmap = get_drawable_pixmap(picture->pDrawable); 2034428d7b3dSmrg get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 2035428d7b3dSmrg 2036428d7b3dSmrg x += dx + picture->pDrawable->x; 2037428d7b3dSmrg y += dy + picture->pDrawable->y; 2038428d7b3dSmrg 2039428d7b3dSmrg channel->is_affine = sna_transform_is_affine(picture->transform); 2040428d7b3dSmrg if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 2041428d7b3dSmrg DBG(("%s: integer translation (%d, %d), removing\n", 2042428d7b3dSmrg __FUNCTION__, dx, dy)); 2043428d7b3dSmrg x += dx; 2044428d7b3dSmrg y += dy; 2045428d7b3dSmrg channel->transform = NULL; 2046428d7b3dSmrg channel->filter = PictFilterNearest; 2047428d7b3dSmrg 2048428d7b3dSmrg if (channel->repeat || 2049428d7b3dSmrg (x >= 0 && 2050428d7b3dSmrg y >= 0 && 2051428d7b3dSmrg x + w < pixmap->drawable.width && 2052428d7b3dSmrg y + h < pixmap->drawable.height)) { 2053428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 2054428d7b3dSmrg if (priv && priv->clear) { 2055428d7b3dSmrg DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 2056428d7b3dSmrg return gen4_channel_init_solid(sna, channel, priv->clear_color); 2057428d7b3dSmrg } 2058428d7b3dSmrg } 2059428d7b3dSmrg } else 2060428d7b3dSmrg channel->transform = picture->transform; 2061428d7b3dSmrg 2062428d7b3dSmrg channel->pict_format = picture->format; 2063428d7b3dSmrg channel->card_format = gen7_get_card_format(picture->format); 2064428d7b3dSmrg if (channel->card_format == (unsigned)-1) 2065428d7b3dSmrg return sna_render_picture_convert(sna, picture, channel, pixmap, 2066428d7b3dSmrg x, y, w, h, dst_x, dst_y, 2067428d7b3dSmrg false); 2068428d7b3dSmrg 2069428d7b3dSmrg if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 2070428d7b3dSmrg DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, 2071428d7b3dSmrg pixmap->drawable.width, pixmap->drawable.height)); 2072428d7b3dSmrg return sna_render_picture_extract(sna, picture, channel, 2073428d7b3dSmrg x, y, w, h, dst_x, dst_y); 2074428d7b3dSmrg } 2075428d7b3dSmrg 2076428d7b3dSmrg DBG(("%s: pixmap, repeat=%d, filter=%d, transform?=%d [affine? %d], format=%08x\n", 2077428d7b3dSmrg __FUNCTION__, 2078428d7b3dSmrg channel->repeat, channel->filter, 2079428d7b3dSmrg channel->transform != NULL, channel->is_affine, 2080428d7b3dSmrg channel->pict_format)); 2081428d7b3dSmrg if (channel->transform) { 2082428d7b3dSmrg DBG(("%s: transform=[%f %f %f, %f %f %f, %f %f %f]\n", 2083428d7b3dSmrg __FUNCTION__, 2084428d7b3dSmrg channel->transform->matrix[0][0] / 65536., 2085428d7b3dSmrg channel->transform->matrix[0][1] / 65536., 2086428d7b3dSmrg channel->transform->matrix[0][2] / 65536., 2087428d7b3dSmrg channel->transform->matrix[1][0] / 65536., 2088428d7b3dSmrg channel->transform->matrix[1][1] / 65536., 2089428d7b3dSmrg channel->transform->matrix[1][2] / 65536., 2090428d7b3dSmrg channel->transform->matrix[2][0] / 65536., 2091428d7b3dSmrg channel->transform->matrix[2][1] / 65536., 2092428d7b3dSmrg channel->transform->matrix[2][2] / 65536.)); 2093428d7b3dSmrg } 2094428d7b3dSmrg 2095428d7b3dSmrg return sna_render_pixmap_bo(sna, channel, pixmap, 2096428d7b3dSmrg x, y, w, h, dst_x, dst_y); 2097428d7b3dSmrg} 2098428d7b3dSmrg 2099428d7b3dSmrginline static void gen7_composite_channel_convert(struct sna_composite_channel *channel) 2100428d7b3dSmrg{ 2101428d7b3dSmrg channel->repeat = gen7_repeat(channel->repeat); 2102428d7b3dSmrg channel->filter = gen7_filter(channel->filter); 2103428d7b3dSmrg if (channel->card_format == (unsigned)-1) 2104428d7b3dSmrg channel->card_format = gen7_get_card_format(channel->pict_format); 2105428d7b3dSmrg assert(channel->card_format != (unsigned)-1); 2106428d7b3dSmrg} 2107428d7b3dSmrg 2108428d7b3dSmrgstatic void gen7_render_composite_done(struct sna *sna, 2109428d7b3dSmrg const struct sna_composite_op *op) 2110428d7b3dSmrg{ 2111428d7b3dSmrg if (sna->render.vertex_offset) { 2112428d7b3dSmrg gen4_vertex_flush(sna); 2113428d7b3dSmrg gen7_magic_ca_pass(sna, op); 2114428d7b3dSmrg } 2115428d7b3dSmrg 2116428d7b3dSmrg if (op->mask.bo) 2117428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->mask.bo); 2118428d7b3dSmrg if (op->src.bo) 2119428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->src.bo); 2120428d7b3dSmrg 2121428d7b3dSmrg sna_render_composite_redirect_done(sna, op); 2122428d7b3dSmrg} 2123428d7b3dSmrg 2124428d7b3dSmrginline static bool 2125428d7b3dSmrggen7_composite_set_target(struct sna *sna, 2126428d7b3dSmrg struct sna_composite_op *op, 2127428d7b3dSmrg PicturePtr dst, 2128428d7b3dSmrg int x, int y, int w, int h, 2129428d7b3dSmrg bool partial) 2130428d7b3dSmrg{ 2131428d7b3dSmrg BoxRec box; 2132428d7b3dSmrg unsigned int hint; 2133428d7b3dSmrg 2134428d7b3dSmrg DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); 2135428d7b3dSmrg 2136428d7b3dSmrg op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 2137428d7b3dSmrg op->dst.format = dst->format; 2138428d7b3dSmrg op->dst.width = op->dst.pixmap->drawable.width; 2139428d7b3dSmrg op->dst.height = op->dst.pixmap->drawable.height; 2140428d7b3dSmrg 2141428d7b3dSmrg if (w | h) { 2142428d7b3dSmrg assert(w && h); 2143428d7b3dSmrg box.x1 = x; 2144428d7b3dSmrg box.y1 = y; 2145428d7b3dSmrg box.x2 = x + w; 2146428d7b3dSmrg box.y2 = y + h; 2147428d7b3dSmrg } else 2148428d7b3dSmrg sna_render_picture_extents(dst, &box); 2149428d7b3dSmrg 2150428d7b3dSmrg hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; 2151428d7b3dSmrg if (!partial) { 2152428d7b3dSmrg hint |= IGNORE_DAMAGE; 2153428d7b3dSmrg if (w == op->dst.width && h == op->dst.height) 2154428d7b3dSmrg hint |= REPLACES; 2155428d7b3dSmrg } 2156428d7b3dSmrg 2157428d7b3dSmrg op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 2158428d7b3dSmrg if (op->dst.bo == NULL) 2159428d7b3dSmrg return false; 2160428d7b3dSmrg 2161428d7b3dSmrg if (hint & REPLACES) { 2162428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 2163428d7b3dSmrg kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 2164428d7b3dSmrg } 2165428d7b3dSmrg 2166428d7b3dSmrg get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 2167428d7b3dSmrg &op->dst.x, &op->dst.y); 2168428d7b3dSmrg 2169428d7b3dSmrg DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 2170428d7b3dSmrg __FUNCTION__, 2171428d7b3dSmrg op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 2172428d7b3dSmrg op->dst.width, op->dst.height, 2173428d7b3dSmrg op->dst.bo->pitch, 2174428d7b3dSmrg op->dst.x, op->dst.y, 2175428d7b3dSmrg op->damage ? *op->damage : (void *)-1)); 2176428d7b3dSmrg 2177428d7b3dSmrg assert(op->dst.bo->proxy == NULL); 2178428d7b3dSmrg 2179428d7b3dSmrg if (too_large(op->dst.width, op->dst.height) && 2180428d7b3dSmrg !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 2181428d7b3dSmrg return false; 2182428d7b3dSmrg 2183428d7b3dSmrg return true; 2184428d7b3dSmrg} 2185428d7b3dSmrg 2186428d7b3dSmrgstatic bool 2187428d7b3dSmrgtry_blt(struct sna *sna, 2188428d7b3dSmrg PicturePtr dst, PicturePtr src, 2189428d7b3dSmrg int width, int height) 2190428d7b3dSmrg{ 2191428d7b3dSmrg struct kgem_bo *bo; 2192428d7b3dSmrg 2193428d7b3dSmrg if (sna->kgem.mode == KGEM_BLT) { 2194428d7b3dSmrg DBG(("%s: already performing BLT\n", __FUNCTION__)); 2195428d7b3dSmrg return true; 2196428d7b3dSmrg } 2197428d7b3dSmrg 2198428d7b3dSmrg if (too_large(width, height)) { 2199428d7b3dSmrg DBG(("%s: operation too large for 3D pipe (%d, %d)\n", 2200428d7b3dSmrg __FUNCTION__, width, height)); 2201428d7b3dSmrg return true; 2202428d7b3dSmrg } 2203428d7b3dSmrg 2204428d7b3dSmrg bo = __sna_drawable_peek_bo(dst->pDrawable); 2205428d7b3dSmrg if (bo == NULL) 2206428d7b3dSmrg return true; 2207428d7b3dSmrg if (bo->rq) 2208428d7b3dSmrg return RQ_IS_BLT(bo->rq); 2209428d7b3dSmrg 2210428d7b3dSmrg if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) 2211428d7b3dSmrg return true; 2212428d7b3dSmrg 2213428d7b3dSmrg if (src->pDrawable) { 2214428d7b3dSmrg bo = __sna_drawable_peek_bo(src->pDrawable); 2215428d7b3dSmrg if (bo == NULL) 2216428d7b3dSmrg return true; 2217428d7b3dSmrg 2218428d7b3dSmrg if (prefer_blt_bo(sna, bo)) 2219428d7b3dSmrg return true; 2220428d7b3dSmrg } 2221428d7b3dSmrg 2222428d7b3dSmrg if (sna->kgem.ring == KGEM_BLT) { 2223428d7b3dSmrg DBG(("%s: already performing BLT\n", __FUNCTION__)); 2224428d7b3dSmrg return true; 2225428d7b3dSmrg } 2226428d7b3dSmrg 2227428d7b3dSmrg return false; 2228428d7b3dSmrg} 2229428d7b3dSmrg 2230428d7b3dSmrgstatic bool 2231428d7b3dSmrgcheck_gradient(PicturePtr picture, bool precise) 2232428d7b3dSmrg{ 2233428d7b3dSmrg if (picture->pDrawable) 2234428d7b3dSmrg return false; 2235428d7b3dSmrg 2236428d7b3dSmrg switch (picture->pSourcePict->type) { 2237428d7b3dSmrg case SourcePictTypeSolidFill: 2238428d7b3dSmrg case SourcePictTypeLinear: 2239428d7b3dSmrg return false; 2240428d7b3dSmrg default: 2241428d7b3dSmrg return precise; 2242428d7b3dSmrg } 2243428d7b3dSmrg} 2244428d7b3dSmrg 2245428d7b3dSmrgstatic bool 2246428d7b3dSmrghas_alphamap(PicturePtr p) 2247428d7b3dSmrg{ 2248428d7b3dSmrg return p->alphaMap != NULL; 2249428d7b3dSmrg} 2250428d7b3dSmrg 2251428d7b3dSmrgstatic bool 2252428d7b3dSmrgneed_upload(PicturePtr p) 2253428d7b3dSmrg{ 2254428d7b3dSmrg return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 2255428d7b3dSmrg} 2256428d7b3dSmrg 2257428d7b3dSmrgstatic bool 2258428d7b3dSmrgsource_is_busy(PixmapPtr pixmap) 2259428d7b3dSmrg{ 2260428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 2261428d7b3dSmrg if (priv == NULL || priv->clear) 2262428d7b3dSmrg return false; 2263428d7b3dSmrg 2264428d7b3dSmrg if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 2265428d7b3dSmrg return true; 2266428d7b3dSmrg 2267428d7b3dSmrg if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2268428d7b3dSmrg return true; 2269428d7b3dSmrg 2270428d7b3dSmrg return priv->gpu_damage && !priv->cpu_damage; 2271428d7b3dSmrg} 2272428d7b3dSmrg 2273428d7b3dSmrgstatic bool 2274428d7b3dSmrgsource_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 2275428d7b3dSmrg{ 2276428d7b3dSmrg if (sna_picture_is_solid(p, NULL)) 2277428d7b3dSmrg return false; 2278428d7b3dSmrg 2279428d7b3dSmrg if (p->pSourcePict) 2280428d7b3dSmrg return check_gradient(p, precise); 2281428d7b3dSmrg 2282428d7b3dSmrg if (!gen7_check_repeat(p) || !gen7_check_format(p->format)) 2283428d7b3dSmrg return true; 2284428d7b3dSmrg 2285428d7b3dSmrg if (pixmap && source_is_busy(pixmap)) 2286428d7b3dSmrg return false; 2287428d7b3dSmrg 2288428d7b3dSmrg return has_alphamap(p) || !gen7_check_filter(p) || need_upload(p); 2289428d7b3dSmrg} 2290428d7b3dSmrg 2291428d7b3dSmrgstatic bool 2292428d7b3dSmrggen7_composite_fallback(struct sna *sna, 2293428d7b3dSmrg PicturePtr src, 2294428d7b3dSmrg PicturePtr mask, 2295428d7b3dSmrg PicturePtr dst) 2296428d7b3dSmrg{ 2297428d7b3dSmrg PixmapPtr src_pixmap; 2298428d7b3dSmrg PixmapPtr mask_pixmap; 2299428d7b3dSmrg PixmapPtr dst_pixmap; 2300428d7b3dSmrg bool src_fallback, mask_fallback; 2301428d7b3dSmrg 2302428d7b3dSmrg if (!gen7_check_dst_format(dst->format)) { 2303428d7b3dSmrg DBG(("%s: unknown destination format: %d\n", 2304428d7b3dSmrg __FUNCTION__, dst->format)); 2305428d7b3dSmrg return true; 2306428d7b3dSmrg } 2307428d7b3dSmrg 2308428d7b3dSmrg dst_pixmap = get_drawable_pixmap(dst->pDrawable); 2309428d7b3dSmrg 2310428d7b3dSmrg src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 2311428d7b3dSmrg src_fallback = source_fallback(src, src_pixmap, 2312428d7b3dSmrg dst->polyMode == PolyModePrecise); 2313428d7b3dSmrg 2314428d7b3dSmrg if (mask) { 2315428d7b3dSmrg mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 2316428d7b3dSmrg mask_fallback = source_fallback(mask, mask_pixmap, 2317428d7b3dSmrg dst->polyMode == PolyModePrecise); 2318428d7b3dSmrg } else { 2319428d7b3dSmrg mask_pixmap = NULL; 2320428d7b3dSmrg mask_fallback = false; 2321428d7b3dSmrg } 2322428d7b3dSmrg 2323428d7b3dSmrg /* If we are using the destination as a source and need to 2324428d7b3dSmrg * readback in order to upload the source, do it all 2325428d7b3dSmrg * on the cpu. 2326428d7b3dSmrg */ 2327428d7b3dSmrg if (src_pixmap == dst_pixmap && src_fallback) { 2328428d7b3dSmrg DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 2329428d7b3dSmrg return true; 2330428d7b3dSmrg } 2331428d7b3dSmrg if (mask_pixmap == dst_pixmap && mask_fallback) { 2332428d7b3dSmrg DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 2333428d7b3dSmrg return true; 2334428d7b3dSmrg } 2335428d7b3dSmrg 2336428d7b3dSmrg /* If anything is on the GPU, push everything out to the GPU */ 2337428d7b3dSmrg if (dst_use_gpu(dst_pixmap)) { 2338428d7b3dSmrg DBG(("%s: dst is already on the GPU, try to use GPU\n", 2339428d7b3dSmrg __FUNCTION__)); 2340428d7b3dSmrg return false; 2341428d7b3dSmrg } 2342428d7b3dSmrg 2343428d7b3dSmrg if (src_pixmap && !src_fallback) { 2344428d7b3dSmrg DBG(("%s: src is already on the GPU, try to use GPU\n", 2345428d7b3dSmrg __FUNCTION__)); 2346428d7b3dSmrg return false; 2347428d7b3dSmrg } 2348428d7b3dSmrg if (mask_pixmap && !mask_fallback) { 2349428d7b3dSmrg DBG(("%s: mask is already on the GPU, try to use GPU\n", 2350428d7b3dSmrg __FUNCTION__)); 2351428d7b3dSmrg return false; 2352428d7b3dSmrg } 2353428d7b3dSmrg 2354428d7b3dSmrg /* However if the dst is not on the GPU and we need to 2355428d7b3dSmrg * render one of the sources using the CPU, we may 2356428d7b3dSmrg * as well do the entire operation in place onthe CPU. 2357428d7b3dSmrg */ 2358428d7b3dSmrg if (src_fallback) { 2359428d7b3dSmrg DBG(("%s: dst is on the CPU and src will fallback\n", 2360428d7b3dSmrg __FUNCTION__)); 2361428d7b3dSmrg return true; 2362428d7b3dSmrg } 2363428d7b3dSmrg 2364428d7b3dSmrg if (mask && mask_fallback) { 2365428d7b3dSmrg DBG(("%s: dst is on the CPU and mask will fallback\n", 2366428d7b3dSmrg __FUNCTION__)); 2367428d7b3dSmrg return true; 2368428d7b3dSmrg } 2369428d7b3dSmrg 2370428d7b3dSmrg if (too_large(dst_pixmap->drawable.width, 2371428d7b3dSmrg dst_pixmap->drawable.height) && 2372428d7b3dSmrg dst_is_cpu(dst_pixmap)) { 2373428d7b3dSmrg DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 2374428d7b3dSmrg return true; 2375428d7b3dSmrg } 2376428d7b3dSmrg 2377428d7b3dSmrg DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 2378428d7b3dSmrg __FUNCTION__)); 2379428d7b3dSmrg return dst_use_cpu(dst_pixmap); 2380428d7b3dSmrg} 2381428d7b3dSmrg 2382428d7b3dSmrgstatic int 2383428d7b3dSmrgreuse_source(struct sna *sna, 2384428d7b3dSmrg PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 2385428d7b3dSmrg PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 2386428d7b3dSmrg{ 2387428d7b3dSmrg uint32_t color; 2388428d7b3dSmrg 2389428d7b3dSmrg if (src_x != msk_x || src_y != msk_y) 2390428d7b3dSmrg return false; 2391428d7b3dSmrg 2392428d7b3dSmrg if (src == mask) { 2393428d7b3dSmrg DBG(("%s: mask is source\n", __FUNCTION__)); 2394428d7b3dSmrg *mc = *sc; 2395428d7b3dSmrg mc->bo = kgem_bo_reference(mc->bo); 2396428d7b3dSmrg return true; 2397428d7b3dSmrg } 2398428d7b3dSmrg 2399428d7b3dSmrg if (sna_picture_is_solid(mask, &color)) 2400428d7b3dSmrg return gen4_channel_init_solid(sna, mc, color); 2401428d7b3dSmrg 2402428d7b3dSmrg if (sc->is_solid) 2403428d7b3dSmrg return false; 2404428d7b3dSmrg 2405428d7b3dSmrg if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 2406428d7b3dSmrg return false; 2407428d7b3dSmrg 2408428d7b3dSmrg DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 2409428d7b3dSmrg 2410428d7b3dSmrg if (!sna_transform_equal(src->transform, mask->transform)) 2411428d7b3dSmrg return false; 2412428d7b3dSmrg 2413428d7b3dSmrg if (!sna_picture_alphamap_equal(src, mask)) 2414428d7b3dSmrg return false; 2415428d7b3dSmrg 2416428d7b3dSmrg if (!gen7_check_repeat(mask)) 2417428d7b3dSmrg return false; 2418428d7b3dSmrg 2419428d7b3dSmrg if (!gen7_check_filter(mask)) 2420428d7b3dSmrg return false; 2421428d7b3dSmrg 2422428d7b3dSmrg if (!gen7_check_format(mask->format)) 2423428d7b3dSmrg return false; 2424428d7b3dSmrg 2425428d7b3dSmrg DBG(("%s: reusing source channel for mask with a twist\n", 2426428d7b3dSmrg __FUNCTION__)); 2427428d7b3dSmrg 2428428d7b3dSmrg *mc = *sc; 2429428d7b3dSmrg mc->repeat = gen7_repeat(mask->repeat ? mask->repeatType : RepeatNone); 2430428d7b3dSmrg mc->filter = gen7_filter(mask->filter); 2431428d7b3dSmrg mc->pict_format = mask->format; 2432428d7b3dSmrg mc->card_format = gen7_get_card_format(mask->format); 2433428d7b3dSmrg mc->bo = kgem_bo_reference(mc->bo); 2434428d7b3dSmrg return true; 2435428d7b3dSmrg} 2436428d7b3dSmrg 2437428d7b3dSmrgstatic bool 2438428d7b3dSmrggen7_render_composite(struct sna *sna, 2439428d7b3dSmrg uint8_t op, 2440428d7b3dSmrg PicturePtr src, 2441428d7b3dSmrg PicturePtr mask, 2442428d7b3dSmrg PicturePtr dst, 2443428d7b3dSmrg int16_t src_x, int16_t src_y, 2444428d7b3dSmrg int16_t msk_x, int16_t msk_y, 2445428d7b3dSmrg int16_t dst_x, int16_t dst_y, 2446428d7b3dSmrg int16_t width, int16_t height, 2447428d7b3dSmrg unsigned flags, 2448428d7b3dSmrg struct sna_composite_op *tmp) 2449428d7b3dSmrg{ 2450428d7b3dSmrg if (op >= ARRAY_SIZE(gen7_blend_op)) 2451428d7b3dSmrg return false; 2452428d7b3dSmrg 2453428d7b3dSmrg DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__, 2454428d7b3dSmrg width, height, sna->kgem.mode, sna->kgem.ring)); 2455428d7b3dSmrg 2456428d7b3dSmrg if (mask == NULL && 2457428d7b3dSmrg try_blt(sna, dst, src, width, height) && 2458428d7b3dSmrg sna_blt_composite(sna, op, 2459428d7b3dSmrg src, dst, 2460428d7b3dSmrg src_x, src_y, 2461428d7b3dSmrg dst_x, dst_y, 2462428d7b3dSmrg width, height, 2463428d7b3dSmrg flags, tmp)) 2464428d7b3dSmrg return true; 2465428d7b3dSmrg 2466428d7b3dSmrg if (gen7_composite_fallback(sna, src, mask, dst)) 2467428d7b3dSmrg goto fallback; 2468428d7b3dSmrg 2469428d7b3dSmrg if (need_tiling(sna, width, height)) 2470428d7b3dSmrg return sna_tiling_composite(op, src, mask, dst, 2471428d7b3dSmrg src_x, src_y, 2472428d7b3dSmrg msk_x, msk_y, 2473428d7b3dSmrg dst_x, dst_y, 2474428d7b3dSmrg width, height, 2475428d7b3dSmrg tmp); 2476428d7b3dSmrg 2477428d7b3dSmrg if (op == PictOpClear && src == sna->clear) 2478428d7b3dSmrg op = PictOpSrc; 2479428d7b3dSmrg tmp->op = op; 2480428d7b3dSmrg if (!gen7_composite_set_target(sna, tmp, dst, 2481428d7b3dSmrg dst_x, dst_y, width, height, 2482428d7b3dSmrg flags & COMPOSITE_PARTIAL || op > PictOpSrc)) 2483428d7b3dSmrg goto fallback; 2484428d7b3dSmrg 2485428d7b3dSmrg switch (gen7_composite_picture(sna, src, &tmp->src, 2486428d7b3dSmrg src_x, src_y, 2487428d7b3dSmrg width, height, 2488428d7b3dSmrg dst_x, dst_y, 2489428d7b3dSmrg dst->polyMode == PolyModePrecise)) { 2490428d7b3dSmrg case -1: 2491428d7b3dSmrg goto cleanup_dst; 2492428d7b3dSmrg case 0: 2493428d7b3dSmrg if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 2494428d7b3dSmrg goto cleanup_dst; 2495428d7b3dSmrg /* fall through to fixup */ 2496428d7b3dSmrg case 1: 2497428d7b3dSmrg /* Did we just switch rings to prepare the source? */ 2498428d7b3dSmrg if (mask == NULL && 2499428d7b3dSmrg prefer_blt_composite(sna, tmp) && 2500428d7b3dSmrg sna_blt_composite__convert(sna, 2501428d7b3dSmrg dst_x, dst_y, width, height, 2502428d7b3dSmrg tmp)) 2503428d7b3dSmrg return true; 2504428d7b3dSmrg 2505428d7b3dSmrg gen7_composite_channel_convert(&tmp->src); 2506428d7b3dSmrg break; 2507428d7b3dSmrg } 2508428d7b3dSmrg 2509428d7b3dSmrg tmp->is_affine = tmp->src.is_affine; 2510428d7b3dSmrg tmp->has_component_alpha = false; 2511428d7b3dSmrg tmp->need_magic_ca_pass = false; 2512428d7b3dSmrg 2513428d7b3dSmrg tmp->mask.bo = NULL; 2514428d7b3dSmrg tmp->mask.filter = SAMPLER_FILTER_NEAREST; 2515428d7b3dSmrg tmp->mask.repeat = SAMPLER_EXTEND_NONE; 2516428d7b3dSmrg 2517428d7b3dSmrg if (mask) { 2518428d7b3dSmrg if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 2519428d7b3dSmrg tmp->has_component_alpha = true; 2520428d7b3dSmrg 2521428d7b3dSmrg /* Check if it's component alpha that relies on a source alpha and on 2522428d7b3dSmrg * the source value. We can only get one of those into the single 2523428d7b3dSmrg * source value that we get to blend with. 2524428d7b3dSmrg */ 2525428d7b3dSmrg if (gen7_blend_op[op].src_alpha && 2526428d7b3dSmrg (gen7_blend_op[op].src_blend != GEN7_BLENDFACTOR_ZERO)) { 2527428d7b3dSmrg if (op != PictOpOver) 2528428d7b3dSmrg goto cleanup_src; 2529428d7b3dSmrg 2530428d7b3dSmrg tmp->need_magic_ca_pass = true; 2531428d7b3dSmrg tmp->op = PictOpOutReverse; 2532428d7b3dSmrg } 2533428d7b3dSmrg } 2534428d7b3dSmrg 2535428d7b3dSmrg if (!reuse_source(sna, 2536428d7b3dSmrg src, &tmp->src, src_x, src_y, 2537428d7b3dSmrg mask, &tmp->mask, msk_x, msk_y)) { 2538428d7b3dSmrg switch (gen7_composite_picture(sna, mask, &tmp->mask, 2539428d7b3dSmrg msk_x, msk_y, 2540428d7b3dSmrg width, height, 2541428d7b3dSmrg dst_x, dst_y, 2542428d7b3dSmrg dst->polyMode == PolyModePrecise)) { 2543428d7b3dSmrg case -1: 2544428d7b3dSmrg goto cleanup_src; 2545428d7b3dSmrg case 0: 2546428d7b3dSmrg if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2547428d7b3dSmrg goto cleanup_src; 2548428d7b3dSmrg /* fall through to fixup */ 2549428d7b3dSmrg case 1: 2550428d7b3dSmrg gen7_composite_channel_convert(&tmp->mask); 2551428d7b3dSmrg break; 2552428d7b3dSmrg } 2553428d7b3dSmrg } 2554428d7b3dSmrg 2555428d7b3dSmrg tmp->is_affine &= tmp->mask.is_affine; 2556428d7b3dSmrg } 2557428d7b3dSmrg 2558428d7b3dSmrg tmp->u.gen7.flags = 2559428d7b3dSmrg GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, 2560428d7b3dSmrg tmp->src.repeat, 2561428d7b3dSmrg tmp->mask.filter, 2562428d7b3dSmrg tmp->mask.repeat), 2563428d7b3dSmrg gen7_get_blend(tmp->op, 2564428d7b3dSmrg tmp->has_component_alpha, 2565428d7b3dSmrg tmp->dst.format), 2566428d7b3dSmrg gen7_choose_composite_kernel(tmp->op, 2567428d7b3dSmrg tmp->mask.bo != NULL, 2568428d7b3dSmrg tmp->has_component_alpha, 2569428d7b3dSmrg tmp->is_affine), 2570428d7b3dSmrg gen4_choose_composite_emitter(sna, tmp)); 2571428d7b3dSmrg 2572428d7b3dSmrg tmp->blt = gen7_render_composite_blt; 2573428d7b3dSmrg tmp->box = gen7_render_composite_box; 2574428d7b3dSmrg tmp->boxes = gen7_render_composite_boxes__blt; 2575428d7b3dSmrg if (tmp->emit_boxes){ 2576428d7b3dSmrg tmp->boxes = gen7_render_composite_boxes; 2577428d7b3dSmrg tmp->thread_boxes = gen7_render_composite_boxes__thread; 2578428d7b3dSmrg } 2579428d7b3dSmrg tmp->done = gen7_render_composite_done; 2580428d7b3dSmrg 2581428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); 2582428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2583428d7b3dSmrg tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2584428d7b3dSmrg NULL)) { 2585428d7b3dSmrg kgem_submit(&sna->kgem); 2586428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2587428d7b3dSmrg tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2588428d7b3dSmrg NULL)) 2589428d7b3dSmrg goto cleanup_mask; 2590428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2591428d7b3dSmrg } 2592428d7b3dSmrg 2593428d7b3dSmrg gen7_align_vertex(sna, tmp); 2594428d7b3dSmrg gen7_emit_composite_state(sna, tmp); 2595428d7b3dSmrg return true; 2596428d7b3dSmrg 2597428d7b3dSmrgcleanup_mask: 2598428d7b3dSmrg if (tmp->mask.bo) { 2599428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2600428d7b3dSmrg tmp->mask.bo = NULL; 2601428d7b3dSmrg } 2602428d7b3dSmrgcleanup_src: 2603428d7b3dSmrg if (tmp->src.bo) { 2604428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2605428d7b3dSmrg tmp->src.bo = NULL; 2606428d7b3dSmrg } 2607428d7b3dSmrgcleanup_dst: 2608428d7b3dSmrg if (tmp->redirect.real_bo) { 2609428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2610428d7b3dSmrg tmp->redirect.real_bo = NULL; 2611428d7b3dSmrg } 2612428d7b3dSmrgfallback: 2613428d7b3dSmrg return (mask == NULL && 2614428d7b3dSmrg sna_blt_composite(sna, op, 2615428d7b3dSmrg src, dst, 2616428d7b3dSmrg src_x, src_y, 2617428d7b3dSmrg dst_x, dst_y, 2618428d7b3dSmrg width, height, 2619428d7b3dSmrg flags | COMPOSITE_FALLBACK, tmp)); 2620428d7b3dSmrg} 2621428d7b3dSmrg 2622428d7b3dSmrg#if !NO_COMPOSITE_SPANS 2623428d7b3dSmrgfastcall static void 2624428d7b3dSmrggen7_render_composite_spans_box(struct sna *sna, 2625428d7b3dSmrg const struct sna_composite_spans_op *op, 2626428d7b3dSmrg const BoxRec *box, float opacity) 2627428d7b3dSmrg{ 2628428d7b3dSmrg DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2629428d7b3dSmrg __FUNCTION__, 2630428d7b3dSmrg op->base.src.offset[0], op->base.src.offset[1], 2631428d7b3dSmrg opacity, 2632428d7b3dSmrg op->base.dst.x, op->base.dst.y, 2633428d7b3dSmrg box->x1, box->y1, 2634428d7b3dSmrg box->x2 - box->x1, 2635428d7b3dSmrg box->y2 - box->y1)); 2636428d7b3dSmrg 2637428d7b3dSmrg gen7_get_rectangles(sna, &op->base, 1, gen7_emit_composite_state); 2638428d7b3dSmrg op->prim_emit(sna, op, box, opacity); 2639428d7b3dSmrg} 2640428d7b3dSmrg 2641428d7b3dSmrgstatic void 2642428d7b3dSmrggen7_render_composite_spans_boxes(struct sna *sna, 2643428d7b3dSmrg const struct sna_composite_spans_op *op, 2644428d7b3dSmrg const BoxRec *box, int nbox, 2645428d7b3dSmrg float opacity) 2646428d7b3dSmrg{ 2647428d7b3dSmrg DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2648428d7b3dSmrg __FUNCTION__, nbox, 2649428d7b3dSmrg op->base.src.offset[0], op->base.src.offset[1], 2650428d7b3dSmrg opacity, 2651428d7b3dSmrg op->base.dst.x, op->base.dst.y)); 2652428d7b3dSmrg 2653428d7b3dSmrg do { 2654428d7b3dSmrg int nbox_this_time; 2655428d7b3dSmrg 2656428d7b3dSmrg nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox, 2657428d7b3dSmrg gen7_emit_composite_state); 2658428d7b3dSmrg nbox -= nbox_this_time; 2659428d7b3dSmrg 2660428d7b3dSmrg do { 2661428d7b3dSmrg DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2662428d7b3dSmrg box->x1, box->y1, 2663428d7b3dSmrg box->x2 - box->x1, 2664428d7b3dSmrg box->y2 - box->y1)); 2665428d7b3dSmrg 2666428d7b3dSmrg op->prim_emit(sna, op, box++, opacity); 2667428d7b3dSmrg } while (--nbox_this_time); 2668428d7b3dSmrg } while (nbox); 2669428d7b3dSmrg} 2670428d7b3dSmrg 2671428d7b3dSmrgfastcall static void 2672428d7b3dSmrggen7_render_composite_spans_boxes__thread(struct sna *sna, 2673428d7b3dSmrg const struct sna_composite_spans_op *op, 2674428d7b3dSmrg const struct sna_opacity_box *box, 2675428d7b3dSmrg int nbox) 2676428d7b3dSmrg{ 2677428d7b3dSmrg DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2678428d7b3dSmrg __FUNCTION__, nbox, 2679428d7b3dSmrg op->base.src.offset[0], op->base.src.offset[1], 2680428d7b3dSmrg op->base.dst.x, op->base.dst.y)); 2681428d7b3dSmrg 2682428d7b3dSmrg sna_vertex_lock(&sna->render); 2683428d7b3dSmrg do { 2684428d7b3dSmrg int nbox_this_time; 2685428d7b3dSmrg float *v; 2686428d7b3dSmrg 2687428d7b3dSmrg nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox, 2688428d7b3dSmrg gen7_emit_composite_state); 2689428d7b3dSmrg assert(nbox_this_time); 2690428d7b3dSmrg nbox -= nbox_this_time; 2691428d7b3dSmrg 2692428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 2693428d7b3dSmrg sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2694428d7b3dSmrg 2695428d7b3dSmrg sna_vertex_acquire__locked(&sna->render); 2696428d7b3dSmrg sna_vertex_unlock(&sna->render); 2697428d7b3dSmrg 2698428d7b3dSmrg op->emit_boxes(op, box, nbox_this_time, v); 2699428d7b3dSmrg box += nbox_this_time; 2700428d7b3dSmrg 2701428d7b3dSmrg sna_vertex_lock(&sna->render); 2702428d7b3dSmrg sna_vertex_release__locked(&sna->render); 2703428d7b3dSmrg } while (nbox); 2704428d7b3dSmrg sna_vertex_unlock(&sna->render); 2705428d7b3dSmrg} 2706428d7b3dSmrg 2707428d7b3dSmrgfastcall static void 2708428d7b3dSmrggen7_render_composite_spans_done(struct sna *sna, 2709428d7b3dSmrg const struct sna_composite_spans_op *op) 2710428d7b3dSmrg{ 2711428d7b3dSmrg if (sna->render.vertex_offset) 2712428d7b3dSmrg gen4_vertex_flush(sna); 2713428d7b3dSmrg 2714428d7b3dSmrg DBG(("%s()\n", __FUNCTION__)); 2715428d7b3dSmrg 2716428d7b3dSmrg if (op->base.src.bo) 2717428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2718428d7b3dSmrg 2719428d7b3dSmrg sna_render_composite_redirect_done(sna, &op->base); 2720428d7b3dSmrg} 2721428d7b3dSmrg 2722428d7b3dSmrgstatic bool 2723428d7b3dSmrggen7_check_composite_spans(struct sna *sna, 2724428d7b3dSmrg uint8_t op, PicturePtr src, PicturePtr dst, 2725428d7b3dSmrg int16_t width, int16_t height, unsigned flags) 2726428d7b3dSmrg{ 2727428d7b3dSmrg if (op >= ARRAY_SIZE(gen7_blend_op)) 2728428d7b3dSmrg return false; 2729428d7b3dSmrg 2730428d7b3dSmrg if (gen7_composite_fallback(sna, src, NULL, dst)) 2731428d7b3dSmrg return false; 2732428d7b3dSmrg 2733428d7b3dSmrg if (need_tiling(sna, width, height) && 2734428d7b3dSmrg !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2735428d7b3dSmrg DBG(("%s: fallback, tiled operation not on GPU\n", 2736428d7b3dSmrg __FUNCTION__)); 2737428d7b3dSmrg return false; 2738428d7b3dSmrg } 2739428d7b3dSmrg 2740428d7b3dSmrg return true; 2741428d7b3dSmrg} 2742428d7b3dSmrg 2743428d7b3dSmrgstatic bool 2744428d7b3dSmrggen7_render_composite_spans(struct sna *sna, 2745428d7b3dSmrg uint8_t op, 2746428d7b3dSmrg PicturePtr src, 2747428d7b3dSmrg PicturePtr dst, 2748428d7b3dSmrg int16_t src_x, int16_t src_y, 2749428d7b3dSmrg int16_t dst_x, int16_t dst_y, 2750428d7b3dSmrg int16_t width, int16_t height, 2751428d7b3dSmrg unsigned flags, 2752428d7b3dSmrg struct sna_composite_spans_op *tmp) 2753428d7b3dSmrg{ 2754428d7b3dSmrg DBG(("%s: %dx%d with flags=%x, current mode=%d/%d\n", __FUNCTION__, 2755428d7b3dSmrg width, height, flags, sna->kgem.mode, sna->kgem.ring)); 2756428d7b3dSmrg 2757428d7b3dSmrg assert(gen7_check_composite_spans(sna, op, src, dst, width, height, flags)); 2758428d7b3dSmrg 2759428d7b3dSmrg if (need_tiling(sna, width, height)) { 2760428d7b3dSmrg DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2761428d7b3dSmrg __FUNCTION__, width, height)); 2762428d7b3dSmrg return sna_tiling_composite_spans(op, src, dst, 2763428d7b3dSmrg src_x, src_y, dst_x, dst_y, 2764428d7b3dSmrg width, height, flags, tmp); 2765428d7b3dSmrg } 2766428d7b3dSmrg 2767428d7b3dSmrg tmp->base.op = op; 2768428d7b3dSmrg if (!gen7_composite_set_target(sna, &tmp->base, dst, 2769428d7b3dSmrg dst_x, dst_y, width, height, true)) 2770428d7b3dSmrg return false; 2771428d7b3dSmrg 2772428d7b3dSmrg switch (gen7_composite_picture(sna, src, &tmp->base.src, 2773428d7b3dSmrg src_x, src_y, 2774428d7b3dSmrg width, height, 2775428d7b3dSmrg dst_x, dst_y, 2776428d7b3dSmrg dst->polyMode == PolyModePrecise)) { 2777428d7b3dSmrg case -1: 2778428d7b3dSmrg goto cleanup_dst; 2779428d7b3dSmrg case 0: 2780428d7b3dSmrg if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2781428d7b3dSmrg goto cleanup_dst; 2782428d7b3dSmrg /* fall through to fixup */ 2783428d7b3dSmrg case 1: 2784428d7b3dSmrg gen7_composite_channel_convert(&tmp->base.src); 2785428d7b3dSmrg break; 2786428d7b3dSmrg } 2787428d7b3dSmrg tmp->base.mask.bo = NULL; 2788428d7b3dSmrg 2789428d7b3dSmrg tmp->base.is_affine = tmp->base.src.is_affine; 2790428d7b3dSmrg tmp->base.need_magic_ca_pass = false; 2791428d7b3dSmrg 2792428d7b3dSmrg tmp->base.u.gen7.flags = 2793428d7b3dSmrg GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, 2794428d7b3dSmrg tmp->base.src.repeat, 2795428d7b3dSmrg SAMPLER_FILTER_NEAREST, 2796428d7b3dSmrg SAMPLER_EXTEND_PAD), 2797428d7b3dSmrg gen7_get_blend(tmp->base.op, false, tmp->base.dst.format), 2798428d7b3dSmrg GEN7_WM_KERNEL_OPACITY | !tmp->base.is_affine, 2799428d7b3dSmrg gen4_choose_spans_emitter(sna, tmp)); 2800428d7b3dSmrg 2801428d7b3dSmrg tmp->box = gen7_render_composite_spans_box; 2802428d7b3dSmrg tmp->boxes = gen7_render_composite_spans_boxes; 2803428d7b3dSmrg if (tmp->emit_boxes) 2804428d7b3dSmrg tmp->thread_boxes = gen7_render_composite_spans_boxes__thread; 2805428d7b3dSmrg tmp->done = gen7_render_composite_spans_done; 2806428d7b3dSmrg 2807428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); 2808428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2809428d7b3dSmrg tmp->base.dst.bo, tmp->base.src.bo, 2810428d7b3dSmrg NULL)) { 2811428d7b3dSmrg kgem_submit(&sna->kgem); 2812428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2813428d7b3dSmrg tmp->base.dst.bo, tmp->base.src.bo, 2814428d7b3dSmrg NULL)) 2815428d7b3dSmrg goto cleanup_src; 2816428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2817428d7b3dSmrg } 2818428d7b3dSmrg 2819428d7b3dSmrg gen7_align_vertex(sna, &tmp->base); 2820428d7b3dSmrg gen7_emit_composite_state(sna, &tmp->base); 2821428d7b3dSmrg return true; 2822428d7b3dSmrg 2823428d7b3dSmrgcleanup_src: 2824428d7b3dSmrg if (tmp->base.src.bo) 2825428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2826428d7b3dSmrgcleanup_dst: 2827428d7b3dSmrg if (tmp->base.redirect.real_bo) 2828428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2829428d7b3dSmrg return false; 2830428d7b3dSmrg} 2831428d7b3dSmrg#endif 2832428d7b3dSmrg 2833428d7b3dSmrgstatic void 2834428d7b3dSmrggen7_emit_copy_state(struct sna *sna, 2835428d7b3dSmrg const struct sna_composite_op *op) 2836428d7b3dSmrg{ 2837428d7b3dSmrg uint32_t *binding_table; 2838428d7b3dSmrg uint16_t offset, dirty; 2839428d7b3dSmrg 2840428d7b3dSmrg gen7_get_batch(sna, op); 2841428d7b3dSmrg 2842428d7b3dSmrg binding_table = gen7_composite_get_binding_table(sna, &offset); 2843428d7b3dSmrg 2844428d7b3dSmrg dirty = kgem_bo_is_dirty(op->dst.bo); 2845428d7b3dSmrg 2846428d7b3dSmrg binding_table[0] = 2847428d7b3dSmrg gen7_bind_bo(sna, 2848428d7b3dSmrg op->dst.bo, op->dst.width, op->dst.height, 2849428d7b3dSmrg gen7_get_dest_format(op->dst.format), 2850428d7b3dSmrg true); 2851428d7b3dSmrg binding_table[1] = 2852428d7b3dSmrg gen7_bind_bo(sna, 2853428d7b3dSmrg op->src.bo, op->src.width, op->src.height, 2854428d7b3dSmrg op->src.card_format, 2855428d7b3dSmrg false); 2856428d7b3dSmrg 2857428d7b3dSmrg if (sna->kgem.surface == offset && 2858428d7b3dSmrg *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table) { 2859428d7b3dSmrg sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t); 2860428d7b3dSmrg offset = sna->render_state.gen7.surface_table; 2861428d7b3dSmrg } 2862428d7b3dSmrg 2863428d7b3dSmrg if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0]) 2864428d7b3dSmrg dirty = 0; 2865428d7b3dSmrg 2866428d7b3dSmrg assert(!GEN7_READS_DST(op->u.gen7.flags)); 2867428d7b3dSmrg gen7_emit_state(sna, op, offset | dirty); 2868428d7b3dSmrg} 2869428d7b3dSmrg 2870428d7b3dSmrgstatic inline bool 2871428d7b3dSmrgprefer_blt_copy(struct sna *sna, 2872428d7b3dSmrg struct kgem_bo *src_bo, 2873428d7b3dSmrg struct kgem_bo *dst_bo, 2874428d7b3dSmrg unsigned flags) 2875428d7b3dSmrg{ 2876428d7b3dSmrg if (sna->kgem.mode == KGEM_BLT) 2877428d7b3dSmrg return true; 2878428d7b3dSmrg 2879428d7b3dSmrg assert((flags & COPY_SYNC) == 0); 2880428d7b3dSmrg 2881428d7b3dSmrg if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) 2882428d7b3dSmrg return true; 2883428d7b3dSmrg 2884428d7b3dSmrg if (untiled_tlb_miss(src_bo) || 2885428d7b3dSmrg untiled_tlb_miss(dst_bo)) 2886428d7b3dSmrg return true; 2887428d7b3dSmrg 2888428d7b3dSmrg if (force_blt_ring(sna)) 2889428d7b3dSmrg return true; 2890428d7b3dSmrg 2891428d7b3dSmrg if (kgem_bo_is_render(dst_bo) || 2892428d7b3dSmrg kgem_bo_is_render(src_bo)) 2893428d7b3dSmrg return false; 2894428d7b3dSmrg 2895428d7b3dSmrg if (prefer_render_ring(sna, dst_bo)) 2896428d7b3dSmrg return false; 2897428d7b3dSmrg 2898428d7b3dSmrg if (!prefer_blt_ring(sna, dst_bo, flags)) 2899428d7b3dSmrg return false; 2900428d7b3dSmrg 2901428d7b3dSmrg return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); 2902428d7b3dSmrg} 2903428d7b3dSmrg 2904428d7b3dSmrgstatic bool 2905428d7b3dSmrggen7_render_copy_boxes(struct sna *sna, uint8_t alu, 2906428d7b3dSmrg const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2907428d7b3dSmrg const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2908428d7b3dSmrg const BoxRec *box, int n, unsigned flags) 2909428d7b3dSmrg{ 2910428d7b3dSmrg struct sna_composite_op tmp; 2911428d7b3dSmrg BoxRec extents; 2912428d7b3dSmrg 2913428d7b3dSmrg DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n", 2914428d7b3dSmrg __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags, 2915428d7b3dSmrg src_bo == dst_bo, 2916428d7b3dSmrg overlaps(sna, 2917428d7b3dSmrg src_bo, src_dx, src_dy, 2918428d7b3dSmrg dst_bo, dst_dx, dst_dy, 2919428d7b3dSmrg box, n, flags, &extents))); 2920428d7b3dSmrg 2921428d7b3dSmrg if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && 2922428d7b3dSmrg sna_blt_compare_depth(src, dst) && 2923428d7b3dSmrg sna_blt_copy_boxes(sna, alu, 2924428d7b3dSmrg src_bo, src_dx, src_dy, 2925428d7b3dSmrg dst_bo, dst_dx, dst_dy, 2926428d7b3dSmrg dst->bitsPerPixel, 2927428d7b3dSmrg box, n)) 2928428d7b3dSmrg return true; 2929428d7b3dSmrg 2930428d7b3dSmrg if (!(alu == GXcopy || alu == GXclear)) { 2931428d7b3dSmrgfallback_blt: 2932428d7b3dSmrg DBG(("%s: fallback blt\n", __FUNCTION__)); 2933428d7b3dSmrg if (!sna_blt_compare_depth(src, dst)) 2934428d7b3dSmrg return false; 2935428d7b3dSmrg 2936428d7b3dSmrg return sna_blt_copy_boxes_fallback(sna, alu, 2937428d7b3dSmrg src, src_bo, src_dx, src_dy, 2938428d7b3dSmrg dst, dst_bo, dst_dx, dst_dy, 2939428d7b3dSmrg box, n); 2940428d7b3dSmrg } 2941428d7b3dSmrg 2942428d7b3dSmrg if (overlaps(sna, 2943428d7b3dSmrg src_bo, src_dx, src_dy, 2944428d7b3dSmrg dst_bo, dst_dx, dst_dy, 2945428d7b3dSmrg box, n, flags, 2946428d7b3dSmrg &extents)) { 2947428d7b3dSmrg bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); 2948428d7b3dSmrg 2949428d7b3dSmrg if ((big || can_switch_to_blt(sna, dst_bo, flags)) && 2950428d7b3dSmrg sna_blt_copy_boxes(sna, alu, 2951428d7b3dSmrg src_bo, src_dx, src_dy, 2952428d7b3dSmrg dst_bo, dst_dx, dst_dy, 2953428d7b3dSmrg dst->bitsPerPixel, 2954428d7b3dSmrg box, n)) 2955428d7b3dSmrg return true; 2956428d7b3dSmrg 2957428d7b3dSmrg if (big) 2958428d7b3dSmrg goto fallback_blt; 2959428d7b3dSmrg 2960428d7b3dSmrg assert(src_bo == dst_bo); 2961428d7b3dSmrg assert(src->depth == dst->depth); 2962428d7b3dSmrg assert(src->width == dst->width); 2963428d7b3dSmrg assert(src->height == dst->height); 2964428d7b3dSmrg return sna_render_copy_boxes__overlap(sna, alu, 2965428d7b3dSmrg src, src_bo, 2966428d7b3dSmrg src_dx, src_dy, 2967428d7b3dSmrg dst_dx, dst_dy, 2968428d7b3dSmrg box, n, &extents); 2969428d7b3dSmrg } 2970428d7b3dSmrg 2971428d7b3dSmrg if (dst->depth == src->depth) { 2972428d7b3dSmrg tmp.dst.format = sna_render_format_for_depth(dst->depth); 2973428d7b3dSmrg tmp.src.pict_format = tmp.dst.format; 2974428d7b3dSmrg } else { 2975428d7b3dSmrg tmp.dst.format = sna_format_for_depth(dst->depth); 2976428d7b3dSmrg tmp.src.pict_format = sna_format_for_depth(src->depth); 2977428d7b3dSmrg } 2978428d7b3dSmrg if (!gen7_check_format(tmp.src.pict_format)) 2979428d7b3dSmrg goto fallback_blt; 2980428d7b3dSmrg 2981428d7b3dSmrg tmp.dst.pixmap = (PixmapPtr)dst; 2982428d7b3dSmrg tmp.dst.width = dst->width; 2983428d7b3dSmrg tmp.dst.height = dst->height; 2984428d7b3dSmrg tmp.dst.bo = dst_bo; 2985428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 2986428d7b3dSmrg tmp.damage = NULL; 2987428d7b3dSmrg 2988428d7b3dSmrg sna_render_composite_redirect_init(&tmp); 2989428d7b3dSmrg if (too_large(tmp.dst.width, tmp.dst.height)) { 2990428d7b3dSmrg int i; 2991428d7b3dSmrg 2992428d7b3dSmrg extents = box[0]; 2993428d7b3dSmrg for (i = 1; i < n; i++) { 2994428d7b3dSmrg if (box[i].x1 < extents.x1) 2995428d7b3dSmrg extents.x1 = box[i].x1; 2996428d7b3dSmrg if (box[i].y1 < extents.y1) 2997428d7b3dSmrg extents.y1 = box[i].y1; 2998428d7b3dSmrg 2999428d7b3dSmrg if (box[i].x2 > extents.x2) 3000428d7b3dSmrg extents.x2 = box[i].x2; 3001428d7b3dSmrg if (box[i].y2 > extents.y2) 3002428d7b3dSmrg extents.y2 = box[i].y2; 3003428d7b3dSmrg } 3004428d7b3dSmrg 3005428d7b3dSmrg if (!sna_render_composite_redirect(sna, &tmp, 3006428d7b3dSmrg extents.x1 + dst_dx, 3007428d7b3dSmrg extents.y1 + dst_dy, 3008428d7b3dSmrg extents.x2 - extents.x1, 3009428d7b3dSmrg extents.y2 - extents.y1, 3010428d7b3dSmrg n > 1)) 3011428d7b3dSmrg goto fallback_tiled; 3012428d7b3dSmrg } 3013428d7b3dSmrg 3014428d7b3dSmrg tmp.src.card_format = gen7_get_card_format(tmp.src.pict_format); 3015428d7b3dSmrg if (too_large(src->width, src->height)) { 3016428d7b3dSmrg int i; 3017428d7b3dSmrg 3018428d7b3dSmrg extents = box[0]; 3019428d7b3dSmrg for (i = 1; i < n; i++) { 3020428d7b3dSmrg if (box[i].x1 < extents.x1) 3021428d7b3dSmrg extents.x1 = box[i].x1; 3022428d7b3dSmrg if (box[i].y1 < extents.y1) 3023428d7b3dSmrg extents.y1 = box[i].y1; 3024428d7b3dSmrg 3025428d7b3dSmrg if (box[i].x2 > extents.x2) 3026428d7b3dSmrg extents.x2 = box[i].x2; 3027428d7b3dSmrg if (box[i].y2 > extents.y2) 3028428d7b3dSmrg extents.y2 = box[i].y2; 3029428d7b3dSmrg } 3030428d7b3dSmrg 3031428d7b3dSmrg if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 3032428d7b3dSmrg extents.x1 + src_dx, 3033428d7b3dSmrg extents.y1 + src_dy, 3034428d7b3dSmrg extents.x2 - extents.x1, 3035428d7b3dSmrg extents.y2 - extents.y1)) 3036428d7b3dSmrg goto fallback_tiled_dst; 3037428d7b3dSmrg } else { 3038428d7b3dSmrg tmp.src.bo = src_bo; 3039428d7b3dSmrg tmp.src.width = src->width; 3040428d7b3dSmrg tmp.src.height = src->height; 3041428d7b3dSmrg tmp.src.offset[0] = tmp.src.offset[1] = 0; 3042428d7b3dSmrg } 3043428d7b3dSmrg 3044428d7b3dSmrg tmp.mask.bo = NULL; 3045428d7b3dSmrg 3046428d7b3dSmrg tmp.floats_per_vertex = 2; 3047428d7b3dSmrg tmp.floats_per_rect = 6; 3048428d7b3dSmrg tmp.need_magic_ca_pass = 0; 3049428d7b3dSmrg 3050428d7b3dSmrg tmp.u.gen7.flags = COPY_FLAGS(alu); 3051428d7b3dSmrg 3052428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 3053428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 3054428d7b3dSmrg kgem_submit(&sna->kgem); 3055428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 3056428d7b3dSmrg if (tmp.src.bo != src_bo) 3057428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3058428d7b3dSmrg if (tmp.redirect.real_bo) 3059428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3060428d7b3dSmrg goto fallback_blt; 3061428d7b3dSmrg } 3062428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3063428d7b3dSmrg } 3064428d7b3dSmrg 3065428d7b3dSmrg src_dx += tmp.src.offset[0]; 3066428d7b3dSmrg src_dy += tmp.src.offset[1]; 3067428d7b3dSmrg 3068428d7b3dSmrg dst_dx += tmp.dst.x; 3069428d7b3dSmrg dst_dy += tmp.dst.y; 3070428d7b3dSmrg 3071428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 3072428d7b3dSmrg 3073428d7b3dSmrg gen7_align_vertex(sna, &tmp); 3074428d7b3dSmrg gen7_emit_copy_state(sna, &tmp); 3075428d7b3dSmrg 3076428d7b3dSmrg do { 3077428d7b3dSmrg int16_t *v; 3078428d7b3dSmrg int n_this_time; 3079428d7b3dSmrg 3080428d7b3dSmrg n_this_time = gen7_get_rectangles(sna, &tmp, n, 3081428d7b3dSmrg gen7_emit_copy_state); 3082428d7b3dSmrg n -= n_this_time; 3083428d7b3dSmrg 3084428d7b3dSmrg v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3085428d7b3dSmrg sna->render.vertex_used += 6 * n_this_time; 3086428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3087428d7b3dSmrg do { 3088428d7b3dSmrg 3089428d7b3dSmrg DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 3090428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 3091428d7b3dSmrg box->x1 + dst_dx, box->y1 + dst_dy, 3092428d7b3dSmrg box->x2 - box->x1, box->y2 - box->y1)); 3093428d7b3dSmrg v[0] = box->x2 + dst_dx; 3094428d7b3dSmrg v[2] = box->x2 + src_dx; 3095428d7b3dSmrg v[1] = v[5] = box->y2 + dst_dy; 3096428d7b3dSmrg v[3] = v[7] = box->y2 + src_dy; 3097428d7b3dSmrg v[8] = v[4] = box->x1 + dst_dx; 3098428d7b3dSmrg v[10] = v[6] = box->x1 + src_dx; 3099428d7b3dSmrg v[9] = box->y1 + dst_dy; 3100428d7b3dSmrg v[11] = box->y1 + src_dy; 3101428d7b3dSmrg v += 12; box++; 3102428d7b3dSmrg } while (--n_this_time); 3103428d7b3dSmrg } while (n); 3104428d7b3dSmrg 3105428d7b3dSmrg gen4_vertex_flush(sna); 3106428d7b3dSmrg sna_render_composite_redirect_done(sna, &tmp); 3107428d7b3dSmrg if (tmp.src.bo != src_bo) 3108428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3109428d7b3dSmrg return true; 3110428d7b3dSmrg 3111428d7b3dSmrgfallback_tiled_dst: 3112428d7b3dSmrg if (tmp.redirect.real_bo) 3113428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3114428d7b3dSmrgfallback_tiled: 3115428d7b3dSmrg DBG(("%s: fallback tiled\n", __FUNCTION__)); 3116428d7b3dSmrg if (sna_blt_compare_depth(src, dst) && 3117428d7b3dSmrg sna_blt_copy_boxes(sna, alu, 3118428d7b3dSmrg src_bo, src_dx, src_dy, 3119428d7b3dSmrg dst_bo, dst_dx, dst_dy, 3120428d7b3dSmrg dst->bitsPerPixel, 3121428d7b3dSmrg box, n)) 3122428d7b3dSmrg return true; 3123428d7b3dSmrg 3124428d7b3dSmrg return sna_tiling_copy_boxes(sna, alu, 3125428d7b3dSmrg src, src_bo, src_dx, src_dy, 3126428d7b3dSmrg dst, dst_bo, dst_dx, dst_dy, 3127428d7b3dSmrg box, n); 3128428d7b3dSmrg} 3129428d7b3dSmrg 3130428d7b3dSmrgstatic void 3131428d7b3dSmrggen7_render_copy_blt(struct sna *sna, 3132428d7b3dSmrg const struct sna_copy_op *op, 3133428d7b3dSmrg int16_t sx, int16_t sy, 3134428d7b3dSmrg int16_t w, int16_t h, 3135428d7b3dSmrg int16_t dx, int16_t dy) 3136428d7b3dSmrg{ 3137428d7b3dSmrg int16_t *v; 3138428d7b3dSmrg 3139428d7b3dSmrg gen7_get_rectangles(sna, &op->base, 1, gen7_emit_copy_state); 3140428d7b3dSmrg 3141428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3142428d7b3dSmrg sna->render.vertex_used += 6; 3143428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3144428d7b3dSmrg 3145428d7b3dSmrg v[0] = dx+w; v[1] = dy+h; 3146428d7b3dSmrg v[2] = sx+w; v[3] = sy+h; 3147428d7b3dSmrg v[4] = dx; v[5] = dy+h; 3148428d7b3dSmrg v[6] = sx; v[7] = sy+h; 3149428d7b3dSmrg v[8] = dx; v[9] = dy; 3150428d7b3dSmrg v[10] = sx; v[11] = sy; 3151428d7b3dSmrg} 3152428d7b3dSmrg 3153428d7b3dSmrgstatic void 3154428d7b3dSmrggen7_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 3155428d7b3dSmrg{ 3156428d7b3dSmrg if (sna->render.vertex_offset) 3157428d7b3dSmrg gen4_vertex_flush(sna); 3158428d7b3dSmrg} 3159428d7b3dSmrg 3160428d7b3dSmrgstatic bool 3161428d7b3dSmrggen7_render_copy(struct sna *sna, uint8_t alu, 3162428d7b3dSmrg PixmapPtr src, struct kgem_bo *src_bo, 3163428d7b3dSmrg PixmapPtr dst, struct kgem_bo *dst_bo, 3164428d7b3dSmrg struct sna_copy_op *op) 3165428d7b3dSmrg{ 3166428d7b3dSmrg DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", 3167428d7b3dSmrg __FUNCTION__, alu, 3168428d7b3dSmrg src->drawable.width, src->drawable.height, 3169428d7b3dSmrg dst->drawable.width, dst->drawable.height)); 3170428d7b3dSmrg 3171428d7b3dSmrg if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && 3172428d7b3dSmrg sna_blt_compare_depth(&src->drawable, &dst->drawable) && 3173428d7b3dSmrg sna_blt_copy(sna, alu, 3174428d7b3dSmrg src_bo, dst_bo, 3175428d7b3dSmrg dst->drawable.bitsPerPixel, 3176428d7b3dSmrg op)) 3177428d7b3dSmrg return true; 3178428d7b3dSmrg 3179428d7b3dSmrg if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 3180428d7b3dSmrg too_large(src->drawable.width, src->drawable.height) || 3181428d7b3dSmrg too_large(dst->drawable.width, dst->drawable.height)) { 3182428d7b3dSmrgfallback: 3183428d7b3dSmrg if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3184428d7b3dSmrg return false; 3185428d7b3dSmrg 3186428d7b3dSmrg return sna_blt_copy(sna, alu, src_bo, dst_bo, 3187428d7b3dSmrg dst->drawable.bitsPerPixel, 3188428d7b3dSmrg op); 3189428d7b3dSmrg } 3190428d7b3dSmrg 3191428d7b3dSmrg if (dst->drawable.depth == src->drawable.depth) { 3192428d7b3dSmrg op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 3193428d7b3dSmrg op->base.src.pict_format = op->base.dst.format; 3194428d7b3dSmrg } else { 3195428d7b3dSmrg op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3196428d7b3dSmrg op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 3197428d7b3dSmrg } 3198428d7b3dSmrg if (!gen7_check_format(op->base.src.pict_format)) 3199428d7b3dSmrg goto fallback; 3200428d7b3dSmrg 3201428d7b3dSmrg op->base.dst.pixmap = dst; 3202428d7b3dSmrg op->base.dst.width = dst->drawable.width; 3203428d7b3dSmrg op->base.dst.height = dst->drawable.height; 3204428d7b3dSmrg op->base.dst.bo = dst_bo; 3205428d7b3dSmrg 3206428d7b3dSmrg op->base.src.bo = src_bo; 3207428d7b3dSmrg op->base.src.card_format = 3208428d7b3dSmrg gen7_get_card_format(op->base.src.pict_format); 3209428d7b3dSmrg op->base.src.width = src->drawable.width; 3210428d7b3dSmrg op->base.src.height = src->drawable.height; 3211428d7b3dSmrg 3212428d7b3dSmrg op->base.mask.bo = NULL; 3213428d7b3dSmrg 3214428d7b3dSmrg op->base.floats_per_vertex = 2; 3215428d7b3dSmrg op->base.floats_per_rect = 6; 3216428d7b3dSmrg 3217428d7b3dSmrg op->base.u.gen7.flags = COPY_FLAGS(alu); 3218428d7b3dSmrg 3219428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3220428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3221428d7b3dSmrg kgem_submit(&sna->kgem); 3222428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3223428d7b3dSmrg goto fallback; 3224428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3225428d7b3dSmrg } 3226428d7b3dSmrg 3227428d7b3dSmrg gen7_align_vertex(sna, &op->base); 3228428d7b3dSmrg gen7_emit_copy_state(sna, &op->base); 3229428d7b3dSmrg 3230428d7b3dSmrg op->blt = gen7_render_copy_blt; 3231428d7b3dSmrg op->done = gen7_render_copy_done; 3232428d7b3dSmrg return true; 3233428d7b3dSmrg} 3234428d7b3dSmrg 3235428d7b3dSmrgstatic void 3236428d7b3dSmrggen7_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) 3237428d7b3dSmrg{ 3238428d7b3dSmrg uint16_t dirty; 3239428d7b3dSmrg uint32_t *binding_table; 3240428d7b3dSmrg uint16_t offset; 3241428d7b3dSmrg 3242428d7b3dSmrg /* XXX Render Target Fast Clear 3243428d7b3dSmrg * Set RTFC Enable in PS and render a rectangle. 3244428d7b3dSmrg * Limited to a clearing the full MSC surface only with a 3245428d7b3dSmrg * specific kernel. 3246428d7b3dSmrg */ 3247428d7b3dSmrg 3248428d7b3dSmrg gen7_get_batch(sna, op); 3249428d7b3dSmrg 3250428d7b3dSmrg binding_table = gen7_composite_get_binding_table(sna, &offset); 3251428d7b3dSmrg 3252428d7b3dSmrg dirty = kgem_bo_is_dirty(op->dst.bo); 3253428d7b3dSmrg 3254428d7b3dSmrg binding_table[0] = 3255428d7b3dSmrg gen7_bind_bo(sna, 3256428d7b3dSmrg op->dst.bo, op->dst.width, op->dst.height, 3257428d7b3dSmrg gen7_get_dest_format(op->dst.format), 3258428d7b3dSmrg true); 3259428d7b3dSmrg binding_table[1] = 3260428d7b3dSmrg gen7_bind_bo(sna, 3261428d7b3dSmrg op->src.bo, 1, 1, 3262428d7b3dSmrg GEN7_SURFACEFORMAT_B8G8R8A8_UNORM, 3263428d7b3dSmrg false); 3264428d7b3dSmrg 3265428d7b3dSmrg if (sna->kgem.surface == offset && 3266428d7b3dSmrg *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table) { 3267428d7b3dSmrg sna->kgem.surface += 3268428d7b3dSmrg sizeof(struct gen7_surface_state)/sizeof(uint32_t); 3269428d7b3dSmrg offset = sna->render_state.gen7.surface_table; 3270428d7b3dSmrg } 3271428d7b3dSmrg 3272428d7b3dSmrg if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0]) 3273428d7b3dSmrg dirty = 0; 3274428d7b3dSmrg 3275428d7b3dSmrg gen7_emit_state(sna, op, offset | dirty); 3276428d7b3dSmrg} 3277428d7b3dSmrg 3278428d7b3dSmrgstatic bool 3279428d7b3dSmrggen7_render_fill_boxes(struct sna *sna, 3280428d7b3dSmrg CARD8 op, 3281428d7b3dSmrg PictFormat format, 3282428d7b3dSmrg const xRenderColor *color, 3283428d7b3dSmrg const DrawableRec *dst, struct kgem_bo *dst_bo, 3284428d7b3dSmrg const BoxRec *box, int n) 3285428d7b3dSmrg{ 3286428d7b3dSmrg struct sna_composite_op tmp; 3287428d7b3dSmrg uint32_t pixel; 3288428d7b3dSmrg 3289428d7b3dSmrg DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", 3290428d7b3dSmrg __FUNCTION__, op, 3291428d7b3dSmrg color->red, color->green, color->blue, color->alpha, (int)format)); 3292428d7b3dSmrg 3293428d7b3dSmrg if (op >= ARRAY_SIZE(gen7_blend_op)) { 3294428d7b3dSmrg DBG(("%s: fallback due to unhandled blend op: %d\n", 3295428d7b3dSmrg __FUNCTION__, op)); 3296428d7b3dSmrg return false; 3297428d7b3dSmrg } 3298428d7b3dSmrg 3299428d7b3dSmrg if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || 3300428d7b3dSmrg !gen7_check_dst_format(format)) { 3301428d7b3dSmrg uint8_t alu = GXinvalid; 3302428d7b3dSmrg 3303428d7b3dSmrg if (op <= PictOpSrc) { 3304428d7b3dSmrg pixel = 0; 3305428d7b3dSmrg if (op == PictOpClear) 3306428d7b3dSmrg alu = GXclear; 3307428d7b3dSmrg else if (sna_get_pixel_from_rgba(&pixel, 3308428d7b3dSmrg color->red, 3309428d7b3dSmrg color->green, 3310428d7b3dSmrg color->blue, 3311428d7b3dSmrg color->alpha, 3312428d7b3dSmrg format)) 3313428d7b3dSmrg alu = GXcopy; 3314428d7b3dSmrg } 3315428d7b3dSmrg 3316428d7b3dSmrg if (alu != GXinvalid && 3317428d7b3dSmrg sna_blt_fill_boxes(sna, alu, 3318428d7b3dSmrg dst_bo, dst->bitsPerPixel, 3319428d7b3dSmrg pixel, box, n)) 3320428d7b3dSmrg return true; 3321428d7b3dSmrg 3322428d7b3dSmrg if (!gen7_check_dst_format(format)) 3323428d7b3dSmrg return false; 3324428d7b3dSmrg } 3325428d7b3dSmrg 3326428d7b3dSmrg if (op == PictOpClear) { 3327428d7b3dSmrg pixel = 0; 3328428d7b3dSmrg op = PictOpSrc; 3329428d7b3dSmrg } else if (!sna_get_pixel_from_rgba(&pixel, 3330428d7b3dSmrg color->red, 3331428d7b3dSmrg color->green, 3332428d7b3dSmrg color->blue, 3333428d7b3dSmrg color->alpha, 3334428d7b3dSmrg PICT_a8r8g8b8)) 3335428d7b3dSmrg return false; 3336428d7b3dSmrg 3337428d7b3dSmrg DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", 3338428d7b3dSmrg __FUNCTION__, pixel, n, 3339428d7b3dSmrg box[0].x1, box[0].y1, box[0].x2, box[0].y2)); 3340428d7b3dSmrg 3341428d7b3dSmrg tmp.dst.pixmap = (PixmapPtr)dst; 3342428d7b3dSmrg tmp.dst.width = dst->width; 3343428d7b3dSmrg tmp.dst.height = dst->height; 3344428d7b3dSmrg tmp.dst.format = format; 3345428d7b3dSmrg tmp.dst.bo = dst_bo; 3346428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 3347428d7b3dSmrg tmp.damage = NULL; 3348428d7b3dSmrg 3349428d7b3dSmrg sna_render_composite_redirect_init(&tmp); 3350428d7b3dSmrg if (too_large(dst->width, dst->height)) { 3351428d7b3dSmrg BoxRec extents; 3352428d7b3dSmrg 3353428d7b3dSmrg boxes_extents(box, n, &extents); 3354428d7b3dSmrg if (!sna_render_composite_redirect(sna, &tmp, 3355428d7b3dSmrg extents.x1, extents.y1, 3356428d7b3dSmrg extents.x2 - extents.x1, 3357428d7b3dSmrg extents.y2 - extents.y1, 3358428d7b3dSmrg n > 1)) 3359428d7b3dSmrg return sna_tiling_fill_boxes(sna, op, format, color, 3360428d7b3dSmrg dst, dst_bo, box, n); 3361428d7b3dSmrg } 3362428d7b3dSmrg 3363428d7b3dSmrg tmp.src.bo = sna_render_get_solid(sna, pixel); 3364428d7b3dSmrg tmp.mask.bo = NULL; 3365428d7b3dSmrg 3366428d7b3dSmrg tmp.floats_per_vertex = 2; 3367428d7b3dSmrg tmp.floats_per_rect = 6; 3368428d7b3dSmrg tmp.need_magic_ca_pass = false; 3369428d7b3dSmrg 3370428d7b3dSmrg tmp.u.gen7.flags = FILL_FLAGS(op, format); 3371428d7b3dSmrg 3372428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3373428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3374428d7b3dSmrg kgem_submit(&sna->kgem); 3375428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3376428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3377428d7b3dSmrg if (tmp.redirect.real_bo) 3378428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3379428d7b3dSmrg 3380428d7b3dSmrg return false; 3381428d7b3dSmrg } 3382428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3383428d7b3dSmrg } 3384428d7b3dSmrg 3385428d7b3dSmrg gen7_align_vertex(sna, &tmp); 3386428d7b3dSmrg gen7_emit_fill_state(sna, &tmp); 3387428d7b3dSmrg 3388428d7b3dSmrg do { 3389428d7b3dSmrg int n_this_time; 3390428d7b3dSmrg int16_t *v; 3391428d7b3dSmrg 3392428d7b3dSmrg n_this_time = gen7_get_rectangles(sna, &tmp, n, 3393428d7b3dSmrg gen7_emit_fill_state); 3394428d7b3dSmrg n -= n_this_time; 3395428d7b3dSmrg 3396428d7b3dSmrg v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3397428d7b3dSmrg sna->render.vertex_used += 6 * n_this_time; 3398428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3399428d7b3dSmrg do { 3400428d7b3dSmrg DBG((" (%d, %d), (%d, %d)\n", 3401428d7b3dSmrg box->x1, box->y1, box->x2, box->y2)); 3402428d7b3dSmrg 3403428d7b3dSmrg v[0] = box->x2; 3404428d7b3dSmrg v[5] = v[1] = box->y2; 3405428d7b3dSmrg v[8] = v[4] = box->x1; 3406428d7b3dSmrg v[9] = box->y1; 3407428d7b3dSmrg v[2] = v[3] = v[7] = 1; 3408428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3409428d7b3dSmrg v += 12; box++; 3410428d7b3dSmrg } while (--n_this_time); 3411428d7b3dSmrg } while (n); 3412428d7b3dSmrg 3413428d7b3dSmrg gen4_vertex_flush(sna); 3414428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3415428d7b3dSmrg sna_render_composite_redirect_done(sna, &tmp); 3416428d7b3dSmrg return true; 3417428d7b3dSmrg} 3418428d7b3dSmrg 3419428d7b3dSmrgstatic void 3420428d7b3dSmrggen7_render_fill_op_blt(struct sna *sna, 3421428d7b3dSmrg const struct sna_fill_op *op, 3422428d7b3dSmrg int16_t x, int16_t y, int16_t w, int16_t h) 3423428d7b3dSmrg{ 3424428d7b3dSmrg int16_t *v; 3425428d7b3dSmrg 3426428d7b3dSmrg DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); 3427428d7b3dSmrg 3428428d7b3dSmrg gen7_get_rectangles(sna, &op->base, 1, gen7_emit_fill_state); 3429428d7b3dSmrg 3430428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3431428d7b3dSmrg sna->render.vertex_used += 6; 3432428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3433428d7b3dSmrg 3434428d7b3dSmrg v[0] = x+w; 3435428d7b3dSmrg v[4] = v[8] = x; 3436428d7b3dSmrg v[1] = v[5] = y+h; 3437428d7b3dSmrg v[9] = y; 3438428d7b3dSmrg 3439428d7b3dSmrg v[2] = v[3] = v[7] = 1; 3440428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3441428d7b3dSmrg} 3442428d7b3dSmrg 3443428d7b3dSmrgfastcall static void 3444428d7b3dSmrggen7_render_fill_op_box(struct sna *sna, 3445428d7b3dSmrg const struct sna_fill_op *op, 3446428d7b3dSmrg const BoxRec *box) 3447428d7b3dSmrg{ 3448428d7b3dSmrg int16_t *v; 3449428d7b3dSmrg 3450428d7b3dSmrg DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, 3451428d7b3dSmrg box->x1, box->y1, box->x2, box->y2)); 3452428d7b3dSmrg 3453428d7b3dSmrg gen7_get_rectangles(sna, &op->base, 1, gen7_emit_fill_state); 3454428d7b3dSmrg 3455428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3456428d7b3dSmrg sna->render.vertex_used += 6; 3457428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3458428d7b3dSmrg 3459428d7b3dSmrg v[0] = box->x2; 3460428d7b3dSmrg v[8] = v[4] = box->x1; 3461428d7b3dSmrg v[5] = v[1] = box->y2; 3462428d7b3dSmrg v[9] = box->y1; 3463428d7b3dSmrg 3464428d7b3dSmrg v[7] = v[2] = v[3] = 1; 3465428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3466428d7b3dSmrg} 3467428d7b3dSmrg 3468428d7b3dSmrgfastcall static void 3469428d7b3dSmrggen7_render_fill_op_boxes(struct sna *sna, 3470428d7b3dSmrg const struct sna_fill_op *op, 3471428d7b3dSmrg const BoxRec *box, 3472428d7b3dSmrg int nbox) 3473428d7b3dSmrg{ 3474428d7b3dSmrg DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 3475428d7b3dSmrg box->x1, box->y1, box->x2, box->y2, nbox)); 3476428d7b3dSmrg 3477428d7b3dSmrg do { 3478428d7b3dSmrg int nbox_this_time; 3479428d7b3dSmrg int16_t *v; 3480428d7b3dSmrg 3481428d7b3dSmrg nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox, 3482428d7b3dSmrg gen7_emit_fill_state); 3483428d7b3dSmrg nbox -= nbox_this_time; 3484428d7b3dSmrg 3485428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3486428d7b3dSmrg sna->render.vertex_used += 6 * nbox_this_time; 3487428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3488428d7b3dSmrg 3489428d7b3dSmrg do { 3490428d7b3dSmrg v[0] = box->x2; 3491428d7b3dSmrg v[8] = v[4] = box->x1; 3492428d7b3dSmrg v[5] = v[1] = box->y2; 3493428d7b3dSmrg v[9] = box->y1; 3494428d7b3dSmrg v[7] = v[2] = v[3] = 1; 3495428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3496428d7b3dSmrg box++; v += 12; 3497428d7b3dSmrg } while (--nbox_this_time); 3498428d7b3dSmrg } while (nbox); 3499428d7b3dSmrg} 3500428d7b3dSmrg 3501428d7b3dSmrgstatic void 3502428d7b3dSmrggen7_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 3503428d7b3dSmrg{ 3504428d7b3dSmrg if (sna->render.vertex_offset) 3505428d7b3dSmrg gen4_vertex_flush(sna); 3506428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3507428d7b3dSmrg} 3508428d7b3dSmrg 3509428d7b3dSmrgstatic bool 3510428d7b3dSmrggen7_render_fill(struct sna *sna, uint8_t alu, 3511428d7b3dSmrg PixmapPtr dst, struct kgem_bo *dst_bo, 3512428d7b3dSmrg uint32_t color, unsigned flags, 3513428d7b3dSmrg struct sna_fill_op *op) 3514428d7b3dSmrg{ 3515428d7b3dSmrg DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); 3516428d7b3dSmrg 3517428d7b3dSmrg if (prefer_blt_fill(sna, dst_bo, flags) && 3518428d7b3dSmrg sna_blt_fill(sna, alu, 3519428d7b3dSmrg dst_bo, dst->drawable.bitsPerPixel, 3520428d7b3dSmrg color, 3521428d7b3dSmrg op)) 3522428d7b3dSmrg return true; 3523428d7b3dSmrg 3524428d7b3dSmrg if (!(alu == GXcopy || alu == GXclear) || 3525428d7b3dSmrg too_large(dst->drawable.width, dst->drawable.height)) 3526428d7b3dSmrg return sna_blt_fill(sna, alu, 3527428d7b3dSmrg dst_bo, dst->drawable.bitsPerPixel, 3528428d7b3dSmrg color, 3529428d7b3dSmrg op); 3530428d7b3dSmrg 3531428d7b3dSmrg if (alu == GXclear) 3532428d7b3dSmrg color = 0; 3533428d7b3dSmrg 3534428d7b3dSmrg op->base.dst.pixmap = dst; 3535428d7b3dSmrg op->base.dst.width = dst->drawable.width; 3536428d7b3dSmrg op->base.dst.height = dst->drawable.height; 3537428d7b3dSmrg op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3538428d7b3dSmrg op->base.dst.bo = dst_bo; 3539428d7b3dSmrg op->base.dst.x = op->base.dst.y = 0; 3540428d7b3dSmrg 3541428d7b3dSmrg op->base.src.bo = 3542428d7b3dSmrg sna_render_get_solid(sna, 3543428d7b3dSmrg sna_rgba_for_color(color, 3544428d7b3dSmrg dst->drawable.depth)); 3545428d7b3dSmrg op->base.mask.bo = NULL; 3546428d7b3dSmrg 3547428d7b3dSmrg op->base.need_magic_ca_pass = false; 3548428d7b3dSmrg op->base.floats_per_vertex = 2; 3549428d7b3dSmrg op->base.floats_per_rect = 6; 3550428d7b3dSmrg 3551428d7b3dSmrg op->base.u.gen7.flags = FILL_FLAGS_NOBLEND; 3552428d7b3dSmrg 3553428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3554428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3555428d7b3dSmrg kgem_submit(&sna->kgem); 3556428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3557428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3558428d7b3dSmrg return false; 3559428d7b3dSmrg } 3560428d7b3dSmrg 3561428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3562428d7b3dSmrg } 3563428d7b3dSmrg 3564428d7b3dSmrg gen7_align_vertex(sna, &op->base); 3565428d7b3dSmrg gen7_emit_fill_state(sna, &op->base); 3566428d7b3dSmrg 3567428d7b3dSmrg op->blt = gen7_render_fill_op_blt; 3568428d7b3dSmrg op->box = gen7_render_fill_op_box; 3569428d7b3dSmrg op->boxes = gen7_render_fill_op_boxes; 3570428d7b3dSmrg op->points = NULL; 3571428d7b3dSmrg op->done = gen7_render_fill_op_done; 3572428d7b3dSmrg return true; 3573428d7b3dSmrg} 3574428d7b3dSmrg 3575428d7b3dSmrgstatic bool 3576428d7b3dSmrggen7_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3577428d7b3dSmrg uint32_t color, 3578428d7b3dSmrg int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3579428d7b3dSmrg uint8_t alu) 3580428d7b3dSmrg{ 3581428d7b3dSmrg BoxRec box; 3582428d7b3dSmrg 3583428d7b3dSmrg box.x1 = x1; 3584428d7b3dSmrg box.y1 = y1; 3585428d7b3dSmrg box.x2 = x2; 3586428d7b3dSmrg box.y2 = y2; 3587428d7b3dSmrg 3588428d7b3dSmrg return sna_blt_fill_boxes(sna, alu, 3589428d7b3dSmrg bo, dst->drawable.bitsPerPixel, 3590428d7b3dSmrg color, &box, 1); 3591428d7b3dSmrg} 3592428d7b3dSmrg 3593428d7b3dSmrgstatic bool 3594428d7b3dSmrggen7_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3595428d7b3dSmrg uint32_t color, 3596428d7b3dSmrg int16_t x1, int16_t y1, 3597428d7b3dSmrg int16_t x2, int16_t y2, 3598428d7b3dSmrg uint8_t alu) 3599428d7b3dSmrg{ 3600428d7b3dSmrg struct sna_composite_op tmp; 3601428d7b3dSmrg int16_t *v; 3602428d7b3dSmrg 3603428d7b3dSmrg /* Prefer to use the BLT if already engaged */ 3604428d7b3dSmrg if (prefer_blt_fill(sna, bo, FILL_BOXES) && 3605428d7b3dSmrg gen7_render_fill_one_try_blt(sna, dst, bo, color, 3606428d7b3dSmrg x1, y1, x2, y2, alu)) 3607428d7b3dSmrg return true; 3608428d7b3dSmrg 3609428d7b3dSmrg /* Must use the BLT if we can't RENDER... */ 3610428d7b3dSmrg if (!(alu == GXcopy || alu == GXclear) || 3611428d7b3dSmrg too_large(dst->drawable.width, dst->drawable.height)) 3612428d7b3dSmrg return gen7_render_fill_one_try_blt(sna, dst, bo, color, 3613428d7b3dSmrg x1, y1, x2, y2, alu); 3614428d7b3dSmrg 3615428d7b3dSmrg if (alu == GXclear) 3616428d7b3dSmrg color = 0; 3617428d7b3dSmrg 3618428d7b3dSmrg tmp.dst.pixmap = dst; 3619428d7b3dSmrg tmp.dst.width = dst->drawable.width; 3620428d7b3dSmrg tmp.dst.height = dst->drawable.height; 3621428d7b3dSmrg tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3622428d7b3dSmrg tmp.dst.bo = bo; 3623428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 3624428d7b3dSmrg 3625428d7b3dSmrg tmp.src.bo = 3626428d7b3dSmrg sna_render_get_solid(sna, 3627428d7b3dSmrg sna_rgba_for_color(color, 3628428d7b3dSmrg dst->drawable.depth)); 3629428d7b3dSmrg tmp.mask.bo = NULL; 3630428d7b3dSmrg 3631428d7b3dSmrg tmp.floats_per_vertex = 2; 3632428d7b3dSmrg tmp.floats_per_rect = 6; 3633428d7b3dSmrg tmp.need_magic_ca_pass = false; 3634428d7b3dSmrg 3635428d7b3dSmrg tmp.u.gen7.flags = FILL_FLAGS_NOBLEND; 3636428d7b3dSmrg 3637428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3638428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3639428d7b3dSmrg kgem_submit(&sna->kgem); 3640428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3641428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3642428d7b3dSmrg return false; 3643428d7b3dSmrg } 3644428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3645428d7b3dSmrg } 3646428d7b3dSmrg 3647428d7b3dSmrg gen7_align_vertex(sna, &tmp); 3648428d7b3dSmrg gen7_emit_fill_state(sna, &tmp); 3649428d7b3dSmrg 3650428d7b3dSmrg gen7_get_rectangles(sna, &tmp, 1, gen7_emit_fill_state); 3651428d7b3dSmrg 3652428d7b3dSmrg DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); 3653428d7b3dSmrg 3654428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3655428d7b3dSmrg sna->render.vertex_used += 6; 3656428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3657428d7b3dSmrg 3658428d7b3dSmrg v[0] = x2; 3659428d7b3dSmrg v[8] = v[4] = x1; 3660428d7b3dSmrg v[5] = v[1] = y2; 3661428d7b3dSmrg v[9] = y1; 3662428d7b3dSmrg v[7] = v[2] = v[3] = 1; 3663428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3664428d7b3dSmrg 3665428d7b3dSmrg gen4_vertex_flush(sna); 3666428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3667428d7b3dSmrg 3668428d7b3dSmrg return true; 3669428d7b3dSmrg} 3670428d7b3dSmrg 3671428d7b3dSmrgstatic bool 3672428d7b3dSmrggen7_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3673428d7b3dSmrg{ 3674428d7b3dSmrg BoxRec box; 3675428d7b3dSmrg 3676428d7b3dSmrg box.x1 = 0; 3677428d7b3dSmrg box.y1 = 0; 3678428d7b3dSmrg box.x2 = dst->drawable.width; 3679428d7b3dSmrg box.y2 = dst->drawable.height; 3680428d7b3dSmrg 3681428d7b3dSmrg return sna_blt_fill_boxes(sna, GXclear, 3682428d7b3dSmrg bo, dst->drawable.bitsPerPixel, 3683428d7b3dSmrg 0, &box, 1); 3684428d7b3dSmrg} 3685428d7b3dSmrg 3686428d7b3dSmrgstatic bool 3687428d7b3dSmrggen7_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3688428d7b3dSmrg{ 3689428d7b3dSmrg struct sna_composite_op tmp; 3690428d7b3dSmrg int16_t *v; 3691428d7b3dSmrg 3692428d7b3dSmrg DBG(("%s: %dx%d\n", 3693428d7b3dSmrg __FUNCTION__, 3694428d7b3dSmrg dst->drawable.width, 3695428d7b3dSmrg dst->drawable.height)); 3696428d7b3dSmrg 3697428d7b3dSmrg /* Prefer to use the BLT if already engaged */ 3698428d7b3dSmrg if (sna->kgem.mode == KGEM_BLT && 3699428d7b3dSmrg gen7_render_clear_try_blt(sna, dst, bo)) 3700428d7b3dSmrg return true; 3701428d7b3dSmrg 3702428d7b3dSmrg /* Must use the BLT if we can't RENDER... */ 3703428d7b3dSmrg if (too_large(dst->drawable.width, dst->drawable.height)) 3704428d7b3dSmrg return gen7_render_clear_try_blt(sna, dst, bo); 3705428d7b3dSmrg 3706428d7b3dSmrg tmp.dst.pixmap = dst; 3707428d7b3dSmrg tmp.dst.width = dst->drawable.width; 3708428d7b3dSmrg tmp.dst.height = dst->drawable.height; 3709428d7b3dSmrg tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3710428d7b3dSmrg tmp.dst.bo = bo; 3711428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 3712428d7b3dSmrg 3713428d7b3dSmrg tmp.src.bo = sna_render_get_solid(sna, 0); 3714428d7b3dSmrg tmp.mask.bo = NULL; 3715428d7b3dSmrg 3716428d7b3dSmrg tmp.floats_per_vertex = 2; 3717428d7b3dSmrg tmp.floats_per_rect = 6; 3718428d7b3dSmrg tmp.need_magic_ca_pass = false; 3719428d7b3dSmrg 3720428d7b3dSmrg tmp.u.gen7.flags = FILL_FLAGS_NOBLEND; 3721428d7b3dSmrg 3722428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3723428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3724428d7b3dSmrg kgem_submit(&sna->kgem); 3725428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3726428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3727428d7b3dSmrg return false; 3728428d7b3dSmrg } 3729428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3730428d7b3dSmrg } 3731428d7b3dSmrg 3732428d7b3dSmrg gen7_align_vertex(sna, &tmp); 3733428d7b3dSmrg gen7_emit_fill_state(sna, &tmp); 3734428d7b3dSmrg 3735428d7b3dSmrg gen7_get_rectangles(sna, &tmp, 1, gen7_emit_fill_state); 3736428d7b3dSmrg 3737428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3738428d7b3dSmrg sna->render.vertex_used += 6; 3739428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3740428d7b3dSmrg 3741428d7b3dSmrg v[0] = dst->drawable.width; 3742428d7b3dSmrg v[5] = v[1] = dst->drawable.height; 3743428d7b3dSmrg v[8] = v[4] = 0; 3744428d7b3dSmrg v[9] = 0; 3745428d7b3dSmrg 3746428d7b3dSmrg v[7] = v[2] = v[3] = 1; 3747428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3748428d7b3dSmrg 3749428d7b3dSmrg gen4_vertex_flush(sna); 3750428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3751428d7b3dSmrg 3752428d7b3dSmrg return true; 3753428d7b3dSmrg} 3754428d7b3dSmrgstatic void gen7_render_reset(struct sna *sna) 3755428d7b3dSmrg{ 3756428d7b3dSmrg sna->render_state.gen7.pipe_controls_since_stall = 0; 3757428d7b3dSmrg sna->render_state.gen7.emit_flush = false; 3758428d7b3dSmrg sna->render_state.gen7.needs_invariant = true; 3759428d7b3dSmrg sna->render_state.gen7.ve_id = 3 << 2; 3760428d7b3dSmrg sna->render_state.gen7.last_primitive = -1; 3761428d7b3dSmrg 3762428d7b3dSmrg sna->render_state.gen7.num_sf_outputs = 0; 3763428d7b3dSmrg sna->render_state.gen7.samplers = -1; 3764428d7b3dSmrg sna->render_state.gen7.blend = -1; 3765428d7b3dSmrg sna->render_state.gen7.kernel = -1; 3766428d7b3dSmrg sna->render_state.gen7.drawrect_offset = -1; 3767428d7b3dSmrg sna->render_state.gen7.drawrect_limit = -1; 3768428d7b3dSmrg sna->render_state.gen7.surface_table = 0; 3769428d7b3dSmrg 3770428d7b3dSmrg if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 3771428d7b3dSmrg DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 3772428d7b3dSmrg discard_vbo(sna); 3773428d7b3dSmrg } 3774428d7b3dSmrg 3775428d7b3dSmrg sna->render.vertex_offset = 0; 3776428d7b3dSmrg sna->render.nvertex_reloc = 0; 3777428d7b3dSmrg sna->render.vb_id = 0; 3778428d7b3dSmrg} 3779428d7b3dSmrg 3780428d7b3dSmrgstatic void gen7_render_fini(struct sna *sna) 3781428d7b3dSmrg{ 3782428d7b3dSmrg kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo); 3783428d7b3dSmrg} 3784428d7b3dSmrg 3785428d7b3dSmrgstatic bool is_gt3(struct sna *sna, int devid) 3786428d7b3dSmrg{ 3787428d7b3dSmrg assert(sna->kgem.gen == 075); 3788428d7b3dSmrg return devid & 0x20; 3789428d7b3dSmrg} 3790428d7b3dSmrg 3791428d7b3dSmrgstatic bool is_gt2(struct sna *sna, int devid) 3792428d7b3dSmrg{ 3793428d7b3dSmrg return devid & (is_hsw(sna)? 0x30 : 0x20); 3794428d7b3dSmrg} 3795428d7b3dSmrg 3796428d7b3dSmrgstatic bool is_mobile(struct sna *sna, int devid) 3797428d7b3dSmrg{ 3798428d7b3dSmrg return (devid & 0xf) == 0x6; 3799428d7b3dSmrg} 3800428d7b3dSmrg 3801428d7b3dSmrgstatic bool gen7_render_setup(struct sna *sna, int devid) 3802428d7b3dSmrg{ 3803428d7b3dSmrg struct gen7_render_state *state = &sna->render_state.gen7; 3804428d7b3dSmrg struct sna_static_stream general; 3805428d7b3dSmrg struct gen7_sampler_state *ss; 3806428d7b3dSmrg int i, j, k, l, m; 3807428d7b3dSmrg 3808428d7b3dSmrg if (is_ivb(sna)) { 3809428d7b3dSmrg state->info = &ivb_gt_info; 3810428d7b3dSmrg if (devid & 0xf) { 3811428d7b3dSmrg state->info = &ivb_gt1_info; 3812428d7b3dSmrg if (is_gt2(sna, devid)) 3813428d7b3dSmrg state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */ 3814428d7b3dSmrg } 3815428d7b3dSmrg } else if (is_byt(sna)) { 3816428d7b3dSmrg state->info = &byt_gt_info; 3817428d7b3dSmrg } else if (is_hsw(sna)) { 3818428d7b3dSmrg state->info = &hsw_gt_info; 3819428d7b3dSmrg if (devid & 0xf) { 3820428d7b3dSmrg if (is_gt3(sna, devid)) 3821428d7b3dSmrg state->info = &hsw_gt3_info; 3822428d7b3dSmrg else if (is_gt2(sna, devid)) 3823428d7b3dSmrg state->info = &hsw_gt2_info; 3824428d7b3dSmrg else 3825428d7b3dSmrg state->info = &hsw_gt1_info; 3826428d7b3dSmrg } 3827428d7b3dSmrg } else 3828428d7b3dSmrg return false; 3829428d7b3dSmrg 3830428d7b3dSmrg state->gt = state->info->gt; 3831428d7b3dSmrg 3832428d7b3dSmrg sna_static_stream_init(&general); 3833428d7b3dSmrg 3834428d7b3dSmrg /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 3835428d7b3dSmrg * dumps, you know it points to zero. 3836428d7b3dSmrg */ 3837428d7b3dSmrg null_create(&general); 3838428d7b3dSmrg 3839428d7b3dSmrg for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++) { 3840428d7b3dSmrg if (wm_kernels[m].size) { 3841428d7b3dSmrg state->wm_kernel[m][1] = 3842428d7b3dSmrg sna_static_stream_add(&general, 3843428d7b3dSmrg wm_kernels[m].data, 3844428d7b3dSmrg wm_kernels[m].size, 3845428d7b3dSmrg 64); 3846428d7b3dSmrg } else { 3847428d7b3dSmrg if (USE_8_PIXEL_DISPATCH) { 3848428d7b3dSmrg state->wm_kernel[m][0] = 3849428d7b3dSmrg sna_static_stream_compile_wm(sna, &general, 3850428d7b3dSmrg wm_kernels[m].data, 8); 3851428d7b3dSmrg } 3852428d7b3dSmrg 3853428d7b3dSmrg if (USE_16_PIXEL_DISPATCH) { 3854428d7b3dSmrg state->wm_kernel[m][1] = 3855428d7b3dSmrg sna_static_stream_compile_wm(sna, &general, 3856428d7b3dSmrg wm_kernels[m].data, 16); 3857428d7b3dSmrg } 3858428d7b3dSmrg 3859428d7b3dSmrg if (USE_32_PIXEL_DISPATCH) { 3860428d7b3dSmrg state->wm_kernel[m][2] = 3861428d7b3dSmrg sna_static_stream_compile_wm(sna, &general, 3862428d7b3dSmrg wm_kernels[m].data, 32); 3863428d7b3dSmrg } 3864428d7b3dSmrg } 3865428d7b3dSmrg assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]); 3866428d7b3dSmrg } 3867428d7b3dSmrg 3868428d7b3dSmrg ss = sna_static_stream_map(&general, 3869428d7b3dSmrg 2 * sizeof(*ss) * 3870428d7b3dSmrg (2 + 3871428d7b3dSmrg FILTER_COUNT * EXTEND_COUNT * 3872428d7b3dSmrg FILTER_COUNT * EXTEND_COUNT), 3873428d7b3dSmrg 32); 3874428d7b3dSmrg state->wm_state = sna_static_stream_offsetof(&general, ss); 3875428d7b3dSmrg sampler_copy_init(ss); ss += 2; 3876428d7b3dSmrg sampler_fill_init(ss); ss += 2; 3877428d7b3dSmrg for (i = 0; i < FILTER_COUNT; i++) { 3878428d7b3dSmrg for (j = 0; j < EXTEND_COUNT; j++) { 3879428d7b3dSmrg for (k = 0; k < FILTER_COUNT; k++) { 3880428d7b3dSmrg for (l = 0; l < EXTEND_COUNT; l++) { 3881428d7b3dSmrg sampler_state_init(ss++, i, j); 3882428d7b3dSmrg sampler_state_init(ss++, k, l); 3883428d7b3dSmrg } 3884428d7b3dSmrg } 3885428d7b3dSmrg } 3886428d7b3dSmrg } 3887428d7b3dSmrg 3888428d7b3dSmrg state->cc_blend = gen7_composite_create_blend_state(&general); 3889428d7b3dSmrg 3890428d7b3dSmrg state->general_bo = sna_static_stream_fini(sna, &general); 3891428d7b3dSmrg return state->general_bo != NULL; 3892428d7b3dSmrg} 3893428d7b3dSmrg 3894428d7b3dSmrgconst char *gen7_render_init(struct sna *sna, const char *backend) 3895428d7b3dSmrg{ 3896428d7b3dSmrg int devid = intel_get_device_id(sna->dev); 3897428d7b3dSmrg 3898428d7b3dSmrg if (!gen7_render_setup(sna, devid)) 3899428d7b3dSmrg return backend; 3900428d7b3dSmrg 3901428d7b3dSmrg sna->kgem.context_switch = gen6_render_context_switch; 3902428d7b3dSmrg sna->kgem.retire = gen6_render_retire; 3903428d7b3dSmrg sna->kgem.expire = gen4_render_expire; 3904428d7b3dSmrg 3905428d7b3dSmrg#if !NO_COMPOSITE 3906428d7b3dSmrg sna->render.composite = gen7_render_composite; 3907428d7b3dSmrg sna->render.prefer_gpu |= PREFER_GPU_RENDER; 3908428d7b3dSmrg#endif 3909428d7b3dSmrg#if !NO_COMPOSITE_SPANS 3910428d7b3dSmrg sna->render.check_composite_spans = gen7_check_composite_spans; 3911428d7b3dSmrg sna->render.composite_spans = gen7_render_composite_spans; 3912428d7b3dSmrg if (is_mobile(sna, devid) || is_gt2(sna, devid) || is_byt(sna)) 3913428d7b3dSmrg sna->render.prefer_gpu |= PREFER_GPU_SPANS; 3914428d7b3dSmrg#endif 3915428d7b3dSmrg sna->render.video = gen7_render_video; 3916428d7b3dSmrg 3917428d7b3dSmrg#if !NO_COPY_BOXES 3918428d7b3dSmrg sna->render.copy_boxes = gen7_render_copy_boxes; 3919428d7b3dSmrg#endif 3920428d7b3dSmrg#if !NO_COPY 3921428d7b3dSmrg sna->render.copy = gen7_render_copy; 3922428d7b3dSmrg#endif 3923428d7b3dSmrg 3924428d7b3dSmrg#if !NO_FILL_BOXES 3925428d7b3dSmrg sna->render.fill_boxes = gen7_render_fill_boxes; 3926428d7b3dSmrg#endif 3927428d7b3dSmrg#if !NO_FILL 3928428d7b3dSmrg sna->render.fill = gen7_render_fill; 3929428d7b3dSmrg#endif 3930428d7b3dSmrg#if !NO_FILL_ONE 3931428d7b3dSmrg sna->render.fill_one = gen7_render_fill_one; 3932428d7b3dSmrg#endif 3933428d7b3dSmrg#if !NO_FILL_CLEAR 3934428d7b3dSmrg sna->render.clear = gen7_render_clear; 3935428d7b3dSmrg#endif 3936428d7b3dSmrg 3937428d7b3dSmrg sna->render.flush = gen4_render_flush; 3938428d7b3dSmrg sna->render.reset = gen7_render_reset; 3939428d7b3dSmrg sna->render.fini = gen7_render_fini; 3940428d7b3dSmrg 3941428d7b3dSmrg sna->render.max_3d_size = GEN7_MAX_SIZE; 3942428d7b3dSmrg sna->render.max_3d_pitch = 1 << 18; 3943428d7b3dSmrg return sna->render_state.gen7.info->name; 3944428d7b3dSmrg} 3945