1428d7b3dSmrg/* 2428d7b3dSmrg * Copyright © 2006,2008,2011 Intel Corporation 3428d7b3dSmrg * Copyright © 2007 Red Hat, Inc. 4428d7b3dSmrg * 5428d7b3dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 6428d7b3dSmrg * copy of this software and associated documentation files (the "Software"), 7428d7b3dSmrg * to deal in the Software without restriction, including without limitation 8428d7b3dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9428d7b3dSmrg * and/or sell copies of the Software, and to permit persons to whom the 10428d7b3dSmrg * Software is furnished to do so, subject to the following conditions: 11428d7b3dSmrg * 12428d7b3dSmrg * The above copyright notice and this permission notice (including the next 13428d7b3dSmrg * paragraph) shall be included in all copies or substantial portions of the 14428d7b3dSmrg * Software. 15428d7b3dSmrg * 16428d7b3dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17428d7b3dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18428d7b3dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19428d7b3dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20428d7b3dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21428d7b3dSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22428d7b3dSmrg * SOFTWARE. 23428d7b3dSmrg * 24428d7b3dSmrg * Authors: 25428d7b3dSmrg * Wang Zhenyu <zhenyu.z.wang@sna.com> 26428d7b3dSmrg * Eric Anholt <eric@anholt.net> 27428d7b3dSmrg * Carl Worth <cworth@redhat.com> 28428d7b3dSmrg * Keith Packard <keithp@keithp.com> 29428d7b3dSmrg * Chris Wilson <chris@chris-wilson.co.uk> 30428d7b3dSmrg * 31428d7b3dSmrg */ 32428d7b3dSmrg 33428d7b3dSmrg#ifdef HAVE_CONFIG_H 34428d7b3dSmrg#include "config.h" 35428d7b3dSmrg#endif 36428d7b3dSmrg 37428d7b3dSmrg#include "sna.h" 38428d7b3dSmrg#include "sna_reg.h" 39428d7b3dSmrg#include "sna_render.h" 40428d7b3dSmrg#include "sna_render_inline.h" 41428d7b3dSmrg#include "sna_video.h" 42428d7b3dSmrg 43428d7b3dSmrg#include "brw/brw.h" 44428d7b3dSmrg#include "gen6_render.h" 45428d7b3dSmrg#include "gen6_common.h" 46428d7b3dSmrg#include "gen4_common.h" 47428d7b3dSmrg#include "gen4_source.h" 48428d7b3dSmrg#include "gen4_vertex.h" 49428d7b3dSmrg 50428d7b3dSmrg#define ALWAYS_INVALIDATE 0 51428d7b3dSmrg#define ALWAYS_FLUSH 0 52428d7b3dSmrg#define ALWAYS_STALL 0 53428d7b3dSmrg 54428d7b3dSmrg#define NO_COMPOSITE 0 55428d7b3dSmrg#define NO_COMPOSITE_SPANS 0 56428d7b3dSmrg#define NO_COPY 0 57428d7b3dSmrg#define NO_COPY_BOXES 0 58428d7b3dSmrg#define NO_FILL 0 59428d7b3dSmrg#define NO_FILL_BOXES 0 60428d7b3dSmrg#define NO_FILL_ONE 0 61428d7b3dSmrg#define NO_FILL_CLEAR 0 62428d7b3dSmrg 63428d7b3dSmrg#define USE_8_PIXEL_DISPATCH 1 64428d7b3dSmrg#define USE_16_PIXEL_DISPATCH 1 65428d7b3dSmrg#define USE_32_PIXEL_DISPATCH 0 66428d7b3dSmrg 67428d7b3dSmrg#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH 68428d7b3dSmrg#error "Must select at least 8, 16 or 32 pixel dispatch" 69428d7b3dSmrg#endif 70428d7b3dSmrg 71428d7b3dSmrg#define GEN6_MAX_SIZE 8192 72428d7b3dSmrg 73428d7b3dSmrgstruct gt_info { 74428d7b3dSmrg const char *name; 75428d7b3dSmrg int max_vs_threads; 76428d7b3dSmrg int max_gs_threads; 77428d7b3dSmrg int max_wm_threads; 78428d7b3dSmrg struct { 79428d7b3dSmrg int size; 80428d7b3dSmrg int max_vs_entries; 81428d7b3dSmrg int max_gs_entries; 82428d7b3dSmrg } urb; 83428d7b3dSmrg int gt; 84428d7b3dSmrg}; 85428d7b3dSmrg 86428d7b3dSmrgstatic const struct gt_info gt1_info = { 87428d7b3dSmrg .name = "Sandybridge (gen6, gt1)", 88428d7b3dSmrg .max_vs_threads = 24, 89428d7b3dSmrg .max_gs_threads = 21, 90428d7b3dSmrg .max_wm_threads = 40, 91428d7b3dSmrg .urb = { 32, 256, 256 }, 92428d7b3dSmrg .gt = 1, 93428d7b3dSmrg}; 94428d7b3dSmrg 95428d7b3dSmrgstatic const struct gt_info gt2_info = { 96428d7b3dSmrg .name = "Sandybridge (gen6, gt2)", 97428d7b3dSmrg .max_vs_threads = 60, 98428d7b3dSmrg .max_gs_threads = 60, 99428d7b3dSmrg .max_wm_threads = 80, 100428d7b3dSmrg .urb = { 64, 256, 256 }, 101428d7b3dSmrg .gt = 2, 102428d7b3dSmrg}; 103428d7b3dSmrg 104428d7b3dSmrgstatic const uint32_t ps_kernel_packed[][4] = { 105428d7b3dSmrg#include "exa_wm_src_affine.g6b" 106428d7b3dSmrg#include "exa_wm_src_sample_argb.g6b" 107428d7b3dSmrg#include "exa_wm_yuv_rgb.g6b" 108428d7b3dSmrg#include "exa_wm_write.g6b" 109428d7b3dSmrg}; 110428d7b3dSmrg 111428d7b3dSmrgstatic const uint32_t ps_kernel_planar[][4] = { 112428d7b3dSmrg#include "exa_wm_src_affine.g6b" 113428d7b3dSmrg#include "exa_wm_src_sample_planar.g6b" 114428d7b3dSmrg#include "exa_wm_yuv_rgb.g6b" 115428d7b3dSmrg#include "exa_wm_write.g6b" 116428d7b3dSmrg}; 117428d7b3dSmrg 118428d7b3dSmrg#define NOKERNEL(kernel_enum, func, ns) \ 119428d7b3dSmrg [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns} 120428d7b3dSmrg#define KERNEL(kernel_enum, kernel, ns) \ 121428d7b3dSmrg [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns} 122428d7b3dSmrg 123428d7b3dSmrgstatic const struct wm_kernel_info { 124428d7b3dSmrg const char *name; 125428d7b3dSmrg const void *data; 126428d7b3dSmrg unsigned int size; 127428d7b3dSmrg unsigned int num_surfaces; 128428d7b3dSmrg} wm_kernels[] = { 129428d7b3dSmrg NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), 130428d7b3dSmrg NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), 131428d7b3dSmrg 132428d7b3dSmrg NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3), 133428d7b3dSmrg NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3), 134428d7b3dSmrg 135428d7b3dSmrg NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3), 136428d7b3dSmrg NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3), 137428d7b3dSmrg 138428d7b3dSmrg NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), 139428d7b3dSmrg NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), 140428d7b3dSmrg 141428d7b3dSmrg NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2), 142428d7b3dSmrg NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2), 143428d7b3dSmrg 144428d7b3dSmrg KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), 145428d7b3dSmrg KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), 146428d7b3dSmrg}; 147428d7b3dSmrg#undef KERNEL 148428d7b3dSmrg 149428d7b3dSmrgstatic const struct blendinfo { 150428d7b3dSmrg bool src_alpha; 151428d7b3dSmrg uint32_t src_blend; 152428d7b3dSmrg uint32_t dst_blend; 153428d7b3dSmrg} gen6_blend_op[] = { 154428d7b3dSmrg /* Clear */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO}, 155428d7b3dSmrg /* Src */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO}, 156428d7b3dSmrg /* Dst */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE}, 157428d7b3dSmrg /* Over */ {1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 158428d7b3dSmrg /* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE}, 159428d7b3dSmrg /* In */ {0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, 160428d7b3dSmrg /* InReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA}, 161428d7b3dSmrg /* Out */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, 162428d7b3dSmrg /* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 163428d7b3dSmrg /* Atop */ {1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 164428d7b3dSmrg /* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA}, 165428d7b3dSmrg /* Xor */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, 166428d7b3dSmrg /* Add */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE}, 167428d7b3dSmrg}; 168428d7b3dSmrg 169428d7b3dSmrg/** 170428d7b3dSmrg * Highest-valued BLENDFACTOR used in gen6_blend_op. 171428d7b3dSmrg * 172428d7b3dSmrg * This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR, 173428d7b3dSmrg * GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 174428d7b3dSmrg * GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 175428d7b3dSmrg */ 176428d7b3dSmrg#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1) 177428d7b3dSmrg 178428d7b3dSmrg#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64) 179428d7b3dSmrg 180428d7b3dSmrg#define BLEND_OFFSET(s, d) \ 181428d7b3dSmrg (((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE) 182428d7b3dSmrg 183428d7b3dSmrg#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO) 184428d7b3dSmrg#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO) 185428d7b3dSmrg 186428d7b3dSmrg#define SAMPLER_OFFSET(sf, se, mf, me) \ 187428d7b3dSmrg (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state)) 188428d7b3dSmrg 189428d7b3dSmrg#define VERTEX_2s2s 0 190428d7b3dSmrg 191428d7b3dSmrg#define COPY_SAMPLER 0 192428d7b3dSmrg#define COPY_VERTEX VERTEX_2s2s 193428d7b3dSmrg#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX) 194428d7b3dSmrg 195428d7b3dSmrg#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state)) 196428d7b3dSmrg#define FILL_VERTEX VERTEX_2s2s 197428d7b3dSmrg#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) 198428d7b3dSmrg#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) 199428d7b3dSmrg 200428d7b3dSmrg#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0) 201428d7b3dSmrg#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0) 202428d7b3dSmrg#define GEN6_KERNEL(f) (((f) >> 16) & 0xf) 203428d7b3dSmrg#define GEN6_VERTEX(f) (((f) >> 0) & 0xf) 204428d7b3dSmrg#define GEN6_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V))) 205428d7b3dSmrg 206428d7b3dSmrg#define OUT_BATCH(v) batch_emit(sna, v) 207428d7b3dSmrg#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 208428d7b3dSmrg#define OUT_VERTEX_F(v) vertex_emit(sna, v) 209428d7b3dSmrg 210428d7b3dSmrgstatic inline bool too_large(int width, int height) 211428d7b3dSmrg{ 212428d7b3dSmrg return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE; 213428d7b3dSmrg} 214428d7b3dSmrg 215428d7b3dSmrgstatic uint32_t gen6_get_blend(int op, 216428d7b3dSmrg bool has_component_alpha, 217428d7b3dSmrg uint32_t dst_format) 218428d7b3dSmrg{ 219428d7b3dSmrg uint32_t src, dst; 220428d7b3dSmrg 221428d7b3dSmrg src = gen6_blend_op[op].src_blend; 222428d7b3dSmrg dst = gen6_blend_op[op].dst_blend; 223428d7b3dSmrg 224428d7b3dSmrg /* If there's no dst alpha channel, adjust the blend op so that 225428d7b3dSmrg * we'll treat it always as 1. 226428d7b3dSmrg */ 227428d7b3dSmrg if (PICT_FORMAT_A(dst_format) == 0) { 228428d7b3dSmrg if (src == GEN6_BLENDFACTOR_DST_ALPHA) 229428d7b3dSmrg src = GEN6_BLENDFACTOR_ONE; 230428d7b3dSmrg else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA) 231428d7b3dSmrg src = GEN6_BLENDFACTOR_ZERO; 232428d7b3dSmrg } 233428d7b3dSmrg 234428d7b3dSmrg /* If the source alpha is being used, then we should only be in a 235428d7b3dSmrg * case where the source blend factor is 0, and the source blend 236428d7b3dSmrg * value is the mask channels multiplied by the source picture's alpha. 237428d7b3dSmrg */ 238428d7b3dSmrg if (has_component_alpha && gen6_blend_op[op].src_alpha) { 239428d7b3dSmrg if (dst == GEN6_BLENDFACTOR_SRC_ALPHA) 240428d7b3dSmrg dst = GEN6_BLENDFACTOR_SRC_COLOR; 241428d7b3dSmrg else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA) 242428d7b3dSmrg dst = GEN6_BLENDFACTOR_INV_SRC_COLOR; 243428d7b3dSmrg } 244428d7b3dSmrg 245428d7b3dSmrg DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 246428d7b3dSmrg op, dst_format, PICT_FORMAT_A(dst_format), 247428d7b3dSmrg src, dst, (int)BLEND_OFFSET(src, dst))); 248428d7b3dSmrg return BLEND_OFFSET(src, dst); 249428d7b3dSmrg} 250428d7b3dSmrg 251428d7b3dSmrgstatic uint32_t gen6_get_card_format(PictFormat format) 252428d7b3dSmrg{ 253428d7b3dSmrg switch (format) { 254428d7b3dSmrg default: 255428d7b3dSmrg return -1; 256428d7b3dSmrg case PICT_a8r8g8b8: 257428d7b3dSmrg return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; 258428d7b3dSmrg case PICT_x8r8g8b8: 259428d7b3dSmrg return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM; 260428d7b3dSmrg case PICT_a8b8g8r8: 261428d7b3dSmrg return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; 262428d7b3dSmrg case PICT_x8b8g8r8: 263428d7b3dSmrg return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM; 264428d7b3dSmrg#ifdef PICT_a2r10g10b10 265428d7b3dSmrg case PICT_a2r10g10b10: 266428d7b3dSmrg return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; 267428d7b3dSmrg case PICT_x2r10g10b10: 268428d7b3dSmrg return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM; 269428d7b3dSmrg#endif 270428d7b3dSmrg case PICT_r8g8b8: 271428d7b3dSmrg return GEN6_SURFACEFORMAT_R8G8B8_UNORM; 272428d7b3dSmrg case PICT_r5g6b5: 273428d7b3dSmrg return GEN6_SURFACEFORMAT_B5G6R5_UNORM; 274428d7b3dSmrg case PICT_a1r5g5b5: 275428d7b3dSmrg return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; 276428d7b3dSmrg case PICT_a8: 277428d7b3dSmrg return GEN6_SURFACEFORMAT_A8_UNORM; 278428d7b3dSmrg case PICT_a4r4g4b4: 279428d7b3dSmrg return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; 280428d7b3dSmrg } 281428d7b3dSmrg} 282428d7b3dSmrg 283428d7b3dSmrgstatic uint32_t gen6_get_dest_format(PictFormat format) 284428d7b3dSmrg{ 285428d7b3dSmrg switch (format) { 286428d7b3dSmrg default: 287428d7b3dSmrg return -1; 288428d7b3dSmrg case PICT_a8r8g8b8: 289428d7b3dSmrg case PICT_x8r8g8b8: 290428d7b3dSmrg return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; 291428d7b3dSmrg case PICT_a8b8g8r8: 292428d7b3dSmrg case PICT_x8b8g8r8: 293428d7b3dSmrg return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; 294428d7b3dSmrg#ifdef PICT_a2r10g10b10 295428d7b3dSmrg case PICT_a2r10g10b10: 296428d7b3dSmrg case PICT_x2r10g10b10: 297428d7b3dSmrg return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; 298428d7b3dSmrg#endif 299428d7b3dSmrg case PICT_r5g6b5: 300428d7b3dSmrg return GEN6_SURFACEFORMAT_B5G6R5_UNORM; 301428d7b3dSmrg case PICT_x1r5g5b5: 302428d7b3dSmrg case PICT_a1r5g5b5: 303428d7b3dSmrg return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; 304428d7b3dSmrg case PICT_a8: 305428d7b3dSmrg return GEN6_SURFACEFORMAT_A8_UNORM; 306428d7b3dSmrg case PICT_a4r4g4b4: 307428d7b3dSmrg case PICT_x4r4g4b4: 308428d7b3dSmrg return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; 309428d7b3dSmrg } 310428d7b3dSmrg} 311428d7b3dSmrg 312428d7b3dSmrgstatic bool gen6_check_dst_format(PictFormat format) 313428d7b3dSmrg{ 314428d7b3dSmrg if (gen6_get_dest_format(format) != -1) 315428d7b3dSmrg return true; 316428d7b3dSmrg 317428d7b3dSmrg DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 318428d7b3dSmrg return false; 319428d7b3dSmrg} 320428d7b3dSmrg 321428d7b3dSmrgstatic bool gen6_check_format(uint32_t format) 322428d7b3dSmrg{ 323428d7b3dSmrg if (gen6_get_card_format(format) != -1) 324428d7b3dSmrg return true; 325428d7b3dSmrg 326428d7b3dSmrg DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 327428d7b3dSmrg return false; 328428d7b3dSmrg} 329428d7b3dSmrg 330428d7b3dSmrgstatic uint32_t gen6_filter(uint32_t filter) 331428d7b3dSmrg{ 332428d7b3dSmrg switch (filter) { 333428d7b3dSmrg default: 334428d7b3dSmrg assert(0); 335428d7b3dSmrg case PictFilterNearest: 336428d7b3dSmrg return SAMPLER_FILTER_NEAREST; 337428d7b3dSmrg case PictFilterBilinear: 338428d7b3dSmrg return SAMPLER_FILTER_BILINEAR; 339428d7b3dSmrg } 340428d7b3dSmrg} 341428d7b3dSmrg 342428d7b3dSmrgstatic uint32_t gen6_check_filter(PicturePtr picture) 343428d7b3dSmrg{ 344428d7b3dSmrg switch (picture->filter) { 345428d7b3dSmrg case PictFilterNearest: 346428d7b3dSmrg case PictFilterBilinear: 347428d7b3dSmrg return true; 348428d7b3dSmrg default: 349428d7b3dSmrg return false; 350428d7b3dSmrg } 351428d7b3dSmrg} 352428d7b3dSmrg 353428d7b3dSmrgstatic uint32_t gen6_repeat(uint32_t repeat) 354428d7b3dSmrg{ 355428d7b3dSmrg switch (repeat) { 356428d7b3dSmrg default: 357428d7b3dSmrg assert(0); 358428d7b3dSmrg case RepeatNone: 359428d7b3dSmrg return SAMPLER_EXTEND_NONE; 360428d7b3dSmrg case RepeatNormal: 361428d7b3dSmrg return SAMPLER_EXTEND_REPEAT; 362428d7b3dSmrg case RepeatPad: 363428d7b3dSmrg return SAMPLER_EXTEND_PAD; 364428d7b3dSmrg case RepeatReflect: 365428d7b3dSmrg return SAMPLER_EXTEND_REFLECT; 366428d7b3dSmrg } 367428d7b3dSmrg} 368428d7b3dSmrg 369428d7b3dSmrgstatic bool gen6_check_repeat(PicturePtr picture) 370428d7b3dSmrg{ 371428d7b3dSmrg if (!picture->repeat) 372428d7b3dSmrg return true; 373428d7b3dSmrg 374428d7b3dSmrg switch (picture->repeatType) { 375428d7b3dSmrg case RepeatNone: 376428d7b3dSmrg case RepeatNormal: 377428d7b3dSmrg case RepeatPad: 378428d7b3dSmrg case RepeatReflect: 379428d7b3dSmrg return true; 380428d7b3dSmrg default: 381428d7b3dSmrg return false; 382428d7b3dSmrg } 383428d7b3dSmrg} 384428d7b3dSmrg 385428d7b3dSmrgstatic int 386428d7b3dSmrggen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 387428d7b3dSmrg{ 388428d7b3dSmrg int base; 389428d7b3dSmrg 390428d7b3dSmrg if (has_mask) { 391428d7b3dSmrg if (is_ca) { 392428d7b3dSmrg if (gen6_blend_op[op].src_alpha) 393428d7b3dSmrg base = GEN6_WM_KERNEL_MASKSA; 394428d7b3dSmrg else 395428d7b3dSmrg base = GEN6_WM_KERNEL_MASKCA; 396428d7b3dSmrg } else 397428d7b3dSmrg base = GEN6_WM_KERNEL_MASK; 398428d7b3dSmrg } else 399428d7b3dSmrg base = GEN6_WM_KERNEL_NOMASK; 400428d7b3dSmrg 401428d7b3dSmrg return base + !is_affine; 402428d7b3dSmrg} 403428d7b3dSmrg 404428d7b3dSmrginline static void 405428d7b3dSmrggen6_emit_pipe_invalidate(struct sna *sna) 406428d7b3dSmrg{ 407428d7b3dSmrg OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 408428d7b3dSmrg OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH | 409428d7b3dSmrg GEN6_PIPE_CONTROL_TC_FLUSH | 410428d7b3dSmrg GEN6_PIPE_CONTROL_CS_STALL); 411428d7b3dSmrg OUT_BATCH(0); 412428d7b3dSmrg OUT_BATCH(0); 413428d7b3dSmrg} 414428d7b3dSmrg 415428d7b3dSmrginline static void 416428d7b3dSmrggen6_emit_pipe_flush(struct sna *sna, bool need_stall) 417428d7b3dSmrg{ 418428d7b3dSmrg unsigned stall; 419428d7b3dSmrg 420428d7b3dSmrg stall = 0; 421428d7b3dSmrg if (need_stall) 422428d7b3dSmrg stall = GEN6_PIPE_CONTROL_CS_STALL; 423428d7b3dSmrg 424428d7b3dSmrg OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 425428d7b3dSmrg OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH | stall); 426428d7b3dSmrg OUT_BATCH(0); 427428d7b3dSmrg OUT_BATCH(0); 428428d7b3dSmrg} 429428d7b3dSmrg 430428d7b3dSmrginline static void 431428d7b3dSmrggen6_emit_pipe_stall(struct sna *sna) 432428d7b3dSmrg{ 433428d7b3dSmrg OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 434428d7b3dSmrg OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | 435428d7b3dSmrg GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); 436428d7b3dSmrg OUT_BATCH(0); 437428d7b3dSmrg OUT_BATCH(0); 438428d7b3dSmrg} 439428d7b3dSmrg 440428d7b3dSmrgstatic void 441428d7b3dSmrggen6_emit_urb(struct sna *sna) 442428d7b3dSmrg{ 443428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2)); 444428d7b3dSmrg OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | 445428d7b3dSmrg (sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ 446428d7b3dSmrg OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | 447428d7b3dSmrg (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */ 448428d7b3dSmrg} 449428d7b3dSmrg 450428d7b3dSmrgstatic void 451428d7b3dSmrggen6_emit_state_base_address(struct sna *sna) 452428d7b3dSmrg{ 453428d7b3dSmrg OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2)); 454428d7b3dSmrg OUT_BATCH(0); /* general */ 455428d7b3dSmrg OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ 456428d7b3dSmrg sna->kgem.nbatch, 457428d7b3dSmrg NULL, 458428d7b3dSmrg I915_GEM_DOMAIN_INSTRUCTION << 16, 459428d7b3dSmrg BASE_ADDRESS_MODIFY)); 460428d7b3dSmrg OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ 461428d7b3dSmrg sna->kgem.nbatch, 462428d7b3dSmrg sna->render_state.gen6.general_bo, 463428d7b3dSmrg I915_GEM_DOMAIN_INSTRUCTION << 16, 464428d7b3dSmrg BASE_ADDRESS_MODIFY)); 465428d7b3dSmrg OUT_BATCH(0); /* indirect */ 466428d7b3dSmrg OUT_BATCH(kgem_add_reloc(&sna->kgem, 467428d7b3dSmrg sna->kgem.nbatch, 468428d7b3dSmrg sna->render_state.gen6.general_bo, 469428d7b3dSmrg I915_GEM_DOMAIN_INSTRUCTION << 16, 470428d7b3dSmrg BASE_ADDRESS_MODIFY)); 471428d7b3dSmrg 472428d7b3dSmrg /* upper bounds, disable */ 473428d7b3dSmrg OUT_BATCH(0); 474428d7b3dSmrg OUT_BATCH(BASE_ADDRESS_MODIFY); 475428d7b3dSmrg OUT_BATCH(0); 476428d7b3dSmrg OUT_BATCH(BASE_ADDRESS_MODIFY); 477428d7b3dSmrg} 478428d7b3dSmrg 479428d7b3dSmrgstatic void 480428d7b3dSmrggen6_emit_viewports(struct sna *sna) 481428d7b3dSmrg{ 482428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS | 483428d7b3dSmrg GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC | 484428d7b3dSmrg (4 - 2)); 485428d7b3dSmrg OUT_BATCH(0); 486428d7b3dSmrg OUT_BATCH(0); 487428d7b3dSmrg OUT_BATCH(0); 488428d7b3dSmrg} 489428d7b3dSmrg 490428d7b3dSmrgstatic void 491428d7b3dSmrggen6_emit_vs(struct sna *sna) 492428d7b3dSmrg{ 493428d7b3dSmrg /* disable VS constant buffer */ 494428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2)); 495428d7b3dSmrg OUT_BATCH(0); 496428d7b3dSmrg OUT_BATCH(0); 497428d7b3dSmrg OUT_BATCH(0); 498428d7b3dSmrg OUT_BATCH(0); 499428d7b3dSmrg 500428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2)); 501428d7b3dSmrg OUT_BATCH(0); /* no VS kernel */ 502428d7b3dSmrg OUT_BATCH(0); 503428d7b3dSmrg OUT_BATCH(0); 504428d7b3dSmrg OUT_BATCH(0); 505428d7b3dSmrg OUT_BATCH(0); /* pass-through */ 506428d7b3dSmrg} 507428d7b3dSmrg 508428d7b3dSmrgstatic void 509428d7b3dSmrggen6_emit_gs(struct sna *sna) 510428d7b3dSmrg{ 511428d7b3dSmrg /* disable GS constant buffer */ 512428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2)); 513428d7b3dSmrg OUT_BATCH(0); 514428d7b3dSmrg OUT_BATCH(0); 515428d7b3dSmrg OUT_BATCH(0); 516428d7b3dSmrg OUT_BATCH(0); 517428d7b3dSmrg 518428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2)); 519428d7b3dSmrg OUT_BATCH(0); /* no GS kernel */ 520428d7b3dSmrg OUT_BATCH(0); 521428d7b3dSmrg OUT_BATCH(0); 522428d7b3dSmrg OUT_BATCH(0); 523428d7b3dSmrg OUT_BATCH(0); 524428d7b3dSmrg OUT_BATCH(0); /* pass-through */ 525428d7b3dSmrg} 526428d7b3dSmrg 527428d7b3dSmrgstatic void 528428d7b3dSmrggen6_emit_clip(struct sna *sna) 529428d7b3dSmrg{ 530428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2)); 531428d7b3dSmrg OUT_BATCH(0); 532428d7b3dSmrg OUT_BATCH(0); /* pass-through */ 533428d7b3dSmrg OUT_BATCH(0); 534428d7b3dSmrg} 535428d7b3dSmrg 536428d7b3dSmrgstatic void 537428d7b3dSmrggen6_emit_wm_constants(struct sna *sna) 538428d7b3dSmrg{ 539428d7b3dSmrg /* disable WM constant buffer */ 540428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); 541428d7b3dSmrg OUT_BATCH(0); 542428d7b3dSmrg OUT_BATCH(0); 543428d7b3dSmrg OUT_BATCH(0); 544428d7b3dSmrg OUT_BATCH(0); 545428d7b3dSmrg} 546428d7b3dSmrg 547428d7b3dSmrgstatic void 548428d7b3dSmrggen6_emit_null_depth_buffer(struct sna *sna) 549428d7b3dSmrg{ 550428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2)); 551428d7b3dSmrg OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT | 552428d7b3dSmrg GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT); 553428d7b3dSmrg OUT_BATCH(0); 554428d7b3dSmrg OUT_BATCH(0); 555428d7b3dSmrg OUT_BATCH(0); 556428d7b3dSmrg OUT_BATCH(0); 557428d7b3dSmrg OUT_BATCH(0); 558428d7b3dSmrg 559428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2)); 560428d7b3dSmrg OUT_BATCH(0); 561428d7b3dSmrg} 562428d7b3dSmrg 563428d7b3dSmrgstatic void 564428d7b3dSmrggen6_emit_invariant(struct sna *sna) 565428d7b3dSmrg{ 566428d7b3dSmrg OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D); 567428d7b3dSmrg 568428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); 569428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | 570428d7b3dSmrg GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ 571428d7b3dSmrg OUT_BATCH(0); 572428d7b3dSmrg 573428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); 574428d7b3dSmrg OUT_BATCH(1); 575428d7b3dSmrg 576428d7b3dSmrg gen6_emit_urb(sna); 577428d7b3dSmrg 578428d7b3dSmrg gen6_emit_state_base_address(sna); 579428d7b3dSmrg 580428d7b3dSmrg gen6_emit_viewports(sna); 581428d7b3dSmrg gen6_emit_vs(sna); 582428d7b3dSmrg gen6_emit_gs(sna); 583428d7b3dSmrg gen6_emit_clip(sna); 584428d7b3dSmrg gen6_emit_wm_constants(sna); 585428d7b3dSmrg gen6_emit_null_depth_buffer(sna); 586428d7b3dSmrg 587428d7b3dSmrg sna->render_state.gen6.needs_invariant = false; 588428d7b3dSmrg} 589428d7b3dSmrg 590428d7b3dSmrgstatic void 591428d7b3dSmrggen6_emit_cc(struct sna *sna, int blend) 592428d7b3dSmrg{ 593428d7b3dSmrg struct gen6_render_state *render = &sna->render_state.gen6; 594428d7b3dSmrg 595428d7b3dSmrg if (render->blend == blend) 596428d7b3dSmrg return; 597428d7b3dSmrg 598428d7b3dSmrg DBG(("%s: blend = %x\n", __FUNCTION__, blend)); 599428d7b3dSmrg 600428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); 601428d7b3dSmrg OUT_BATCH((render->cc_blend + blend) | 1); 602428d7b3dSmrg if (render->blend == (unsigned)-1) { 603428d7b3dSmrg OUT_BATCH(1); 604428d7b3dSmrg OUT_BATCH(1); 605428d7b3dSmrg } else { 606428d7b3dSmrg OUT_BATCH(0); 607428d7b3dSmrg OUT_BATCH(0); 608428d7b3dSmrg } 609428d7b3dSmrg 610428d7b3dSmrg render->blend = blend; 611428d7b3dSmrg} 612428d7b3dSmrg 613428d7b3dSmrgstatic void 614428d7b3dSmrggen6_emit_sampler(struct sna *sna, uint32_t state) 615428d7b3dSmrg{ 616428d7b3dSmrg if (sna->render_state.gen6.samplers == state) 617428d7b3dSmrg return; 618428d7b3dSmrg 619428d7b3dSmrg sna->render_state.gen6.samplers = state; 620428d7b3dSmrg 621428d7b3dSmrg DBG(("%s: sampler = %x\n", __FUNCTION__, state)); 622428d7b3dSmrg 623428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS | 624428d7b3dSmrg GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS | 625428d7b3dSmrg (4 - 2)); 626428d7b3dSmrg OUT_BATCH(0); /* VS */ 627428d7b3dSmrg OUT_BATCH(0); /* GS */ 628428d7b3dSmrg OUT_BATCH(sna->render_state.gen6.wm_state + state); 629428d7b3dSmrg} 630428d7b3dSmrg 631428d7b3dSmrgstatic void 632428d7b3dSmrggen6_emit_sf(struct sna *sna, bool has_mask) 633428d7b3dSmrg{ 634428d7b3dSmrg int num_sf_outputs = has_mask ? 2 : 1; 635428d7b3dSmrg 636428d7b3dSmrg if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs) 637428d7b3dSmrg return; 638428d7b3dSmrg 639428d7b3dSmrg DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n", 640428d7b3dSmrg __FUNCTION__, num_sf_outputs, 1, 0)); 641428d7b3dSmrg 642428d7b3dSmrg sna->render_state.gen6.num_sf_outputs = num_sf_outputs; 643428d7b3dSmrg 644428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2)); 645428d7b3dSmrg OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT | 646428d7b3dSmrg 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT | 647428d7b3dSmrg 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT); 648428d7b3dSmrg OUT_BATCH(0); 649428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE); 650428d7b3dSmrg OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */ 651428d7b3dSmrg OUT_BATCH(0); 652428d7b3dSmrg OUT_BATCH(0); 653428d7b3dSmrg OUT_BATCH(0); 654428d7b3dSmrg OUT_BATCH(0); 655428d7b3dSmrg OUT_BATCH(0); /* DW9 */ 656428d7b3dSmrg OUT_BATCH(0); 657428d7b3dSmrg OUT_BATCH(0); 658428d7b3dSmrg OUT_BATCH(0); 659428d7b3dSmrg OUT_BATCH(0); 660428d7b3dSmrg OUT_BATCH(0); /* DW14 */ 661428d7b3dSmrg OUT_BATCH(0); 662428d7b3dSmrg OUT_BATCH(0); 663428d7b3dSmrg OUT_BATCH(0); 664428d7b3dSmrg OUT_BATCH(0); 665428d7b3dSmrg OUT_BATCH(0); /* DW19 */ 666428d7b3dSmrg} 667428d7b3dSmrg 668428d7b3dSmrgstatic void 669428d7b3dSmrggen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask) 670428d7b3dSmrg{ 671428d7b3dSmrg const uint32_t *kernels; 672428d7b3dSmrg 673428d7b3dSmrg if (sna->render_state.gen6.kernel == kernel) 674428d7b3dSmrg return; 675428d7b3dSmrg 676428d7b3dSmrg sna->render_state.gen6.kernel = kernel; 677428d7b3dSmrg kernels = sna->render_state.gen6.wm_kernel[kernel]; 678428d7b3dSmrg 679428d7b3dSmrg DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n", 680428d7b3dSmrg __FUNCTION__, 681428d7b3dSmrg wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces, 682428d7b3dSmrg kernels[0], kernels[1], kernels[2])); 683428d7b3dSmrg 684428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); 685428d7b3dSmrg OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]); 686428d7b3dSmrg OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT | 687428d7b3dSmrg wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT); 688428d7b3dSmrg OUT_BATCH(0); /* scratch space */ 689428d7b3dSmrg OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT | 690428d7b3dSmrg 8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT | 691428d7b3dSmrg 6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT); 692428d7b3dSmrg OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT | 693428d7b3dSmrg (kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) | 694428d7b3dSmrg (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) | 695428d7b3dSmrg (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) | 696428d7b3dSmrg GEN6_3DSTATE_WM_DISPATCH_ENABLE); 697428d7b3dSmrg OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT | 698428d7b3dSmrg GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 699428d7b3dSmrg OUT_BATCH(kernels[2]); 700428d7b3dSmrg OUT_BATCH(kernels[1]); 701428d7b3dSmrg} 702428d7b3dSmrg 703428d7b3dSmrgstatic bool 704428d7b3dSmrggen6_emit_binding_table(struct sna *sna, uint16_t offset) 705428d7b3dSmrg{ 706428d7b3dSmrg if (sna->render_state.gen6.surface_table == offset) 707428d7b3dSmrg return false; 708428d7b3dSmrg 709428d7b3dSmrg /* Binding table pointers */ 710428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS | 711428d7b3dSmrg GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS | 712428d7b3dSmrg (4 - 2)); 713428d7b3dSmrg OUT_BATCH(0); /* vs */ 714428d7b3dSmrg OUT_BATCH(0); /* gs */ 715428d7b3dSmrg /* Only the PS uses the binding table */ 716428d7b3dSmrg OUT_BATCH(offset*4); 717428d7b3dSmrg 718428d7b3dSmrg sna->render_state.gen6.surface_table = offset; 719428d7b3dSmrg return true; 720428d7b3dSmrg} 721428d7b3dSmrg 722428d7b3dSmrgstatic bool 723428d7b3dSmrggen6_emit_drawing_rectangle(struct sna *sna, 724428d7b3dSmrg const struct sna_composite_op *op) 725428d7b3dSmrg{ 726428d7b3dSmrg uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 727428d7b3dSmrg uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 728428d7b3dSmrg 729428d7b3dSmrg assert(!too_large(abs(op->dst.x), abs(op->dst.y))); 730428d7b3dSmrg assert(!too_large(op->dst.width, op->dst.height)); 731428d7b3dSmrg 732428d7b3dSmrg if (sna->render_state.gen6.drawrect_limit == limit && 733428d7b3dSmrg sna->render_state.gen6.drawrect_offset == offset) 734428d7b3dSmrg return true; 735428d7b3dSmrg 736428d7b3dSmrg /* [DevSNB-C+{W/A}] Before any depth stall flush (including those 737428d7b3dSmrg * produced by non-pipelined state commands), software needs to first 738428d7b3dSmrg * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 739428d7b3dSmrg * 0. 740428d7b3dSmrg * 741428d7b3dSmrg * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 742428d7b3dSmrg * BEFORE the pipe-control with a post-sync op and no write-cache 743428d7b3dSmrg * flushes. 744428d7b3dSmrg */ 745428d7b3dSmrg if (!sna->render_state.gen6.first_state_packet) 746428d7b3dSmrg gen6_emit_pipe_stall(sna); 747428d7b3dSmrg 748428d7b3dSmrg OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); 749428d7b3dSmrg OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME); 750428d7b3dSmrg OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, 751428d7b3dSmrg sna->render_state.gen6.general_bo, 752428d7b3dSmrg I915_GEM_DOMAIN_INSTRUCTION << 16 | 753428d7b3dSmrg I915_GEM_DOMAIN_INSTRUCTION, 754428d7b3dSmrg 64)); 755428d7b3dSmrg OUT_BATCH(0); 756428d7b3dSmrg 757428d7b3dSmrg DBG(("%s: offset=(%d, %d), limit=(%d, %d)\n", 758428d7b3dSmrg __FUNCTION__, op->dst.x, op->dst.y, op->dst.width, op->dst.height)); 759428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 760428d7b3dSmrg OUT_BATCH(0); 761428d7b3dSmrg OUT_BATCH(limit); 762428d7b3dSmrg OUT_BATCH(offset); 763428d7b3dSmrg 764428d7b3dSmrg sna->render_state.gen6.drawrect_offset = offset; 765428d7b3dSmrg sna->render_state.gen6.drawrect_limit = limit; 766428d7b3dSmrg return false; 767428d7b3dSmrg} 768428d7b3dSmrg 769428d7b3dSmrgstatic void 770428d7b3dSmrggen6_emit_vertex_elements(struct sna *sna, 771428d7b3dSmrg const struct sna_composite_op *op) 772428d7b3dSmrg{ 773428d7b3dSmrg /* 774428d7b3dSmrg * vertex data in vertex buffer 775428d7b3dSmrg * position: (x, y) 776428d7b3dSmrg * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 777428d7b3dSmrg * texture coordinate 1 if (has_mask is true): same as above 778428d7b3dSmrg */ 779428d7b3dSmrg struct gen6_render_state *render = &sna->render_state.gen6; 780428d7b3dSmrg uint32_t src_format, dw; 781428d7b3dSmrg int id = GEN6_VERTEX(op->u.gen6.flags); 782428d7b3dSmrg bool has_mask; 783428d7b3dSmrg 784428d7b3dSmrg DBG(("%s: setup id=%d\n", __FUNCTION__, id)); 785428d7b3dSmrg 786428d7b3dSmrg if (render->ve_id == id) 787428d7b3dSmrg return; 788428d7b3dSmrg render->ve_id = id; 789428d7b3dSmrg 790428d7b3dSmrg /* The VUE layout 791428d7b3dSmrg * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 792428d7b3dSmrg * dword 4-7: position (x, y, 1.0, 1.0), 793428d7b3dSmrg * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 794428d7b3dSmrg * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 795428d7b3dSmrg * 796428d7b3dSmrg * dword 4-15 are fetched from vertex buffer 797428d7b3dSmrg */ 798428d7b3dSmrg has_mask = (id >> 2) != 0; 799428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | 800428d7b3dSmrg ((2 * (3 + has_mask)) + 1 - 2)); 801428d7b3dSmrg 802428d7b3dSmrg OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 803428d7b3dSmrg GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT | 804428d7b3dSmrg 0 << VE0_OFFSET_SHIFT); 805428d7b3dSmrg OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | 806428d7b3dSmrg GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | 807428d7b3dSmrg GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | 808428d7b3dSmrg GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT); 809428d7b3dSmrg 810428d7b3dSmrg /* x,y */ 811428d7b3dSmrg OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 812428d7b3dSmrg GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | 813428d7b3dSmrg 0 << VE0_OFFSET_SHIFT); 814428d7b3dSmrg OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | 815428d7b3dSmrg GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | 816428d7b3dSmrg GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | 817428d7b3dSmrg GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); 818428d7b3dSmrg 819428d7b3dSmrg /* u0, v0, w0 */ 820428d7b3dSmrg DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); 821428d7b3dSmrg dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; 822428d7b3dSmrg switch (id & 3) { 823428d7b3dSmrg default: 824428d7b3dSmrg assert(0); 825428d7b3dSmrg case 0: 826428d7b3dSmrg src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED; 827428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 828428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 829428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 830428d7b3dSmrg break; 831428d7b3dSmrg case 1: 832428d7b3dSmrg src_format = GEN6_SURFACEFORMAT_R32_FLOAT; 833428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 834428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; 835428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 836428d7b3dSmrg break; 837428d7b3dSmrg case 2: 838428d7b3dSmrg src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; 839428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 840428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 841428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 842428d7b3dSmrg break; 843428d7b3dSmrg case 3: 844428d7b3dSmrg src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; 845428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 846428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 847428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; 848428d7b3dSmrg break; 849428d7b3dSmrg } 850428d7b3dSmrg OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 851428d7b3dSmrg src_format << VE0_FORMAT_SHIFT | 852428d7b3dSmrg 4 << VE0_OFFSET_SHIFT); 853428d7b3dSmrg OUT_BATCH(dw); 854428d7b3dSmrg 855428d7b3dSmrg /* u1, v1, w1 */ 856428d7b3dSmrg if (has_mask) { 857428d7b3dSmrg unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); 858428d7b3dSmrg DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset)); 859428d7b3dSmrg dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; 860428d7b3dSmrg switch (id >> 2) { 861428d7b3dSmrg case 1: 862428d7b3dSmrg src_format = GEN6_SURFACEFORMAT_R32_FLOAT; 863428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 864428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; 865428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 866428d7b3dSmrg break; 867428d7b3dSmrg default: 868428d7b3dSmrg assert(0); 869428d7b3dSmrg case 2: 870428d7b3dSmrg src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; 871428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 872428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 873428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; 874428d7b3dSmrg break; 875428d7b3dSmrg case 3: 876428d7b3dSmrg src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; 877428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 878428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 879428d7b3dSmrg dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; 880428d7b3dSmrg break; 881428d7b3dSmrg } 882428d7b3dSmrg OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 883428d7b3dSmrg src_format << VE0_FORMAT_SHIFT | 884428d7b3dSmrg offset << VE0_OFFSET_SHIFT); 885428d7b3dSmrg OUT_BATCH(dw); 886428d7b3dSmrg } 887428d7b3dSmrg} 888428d7b3dSmrg 889428d7b3dSmrgstatic void 890428d7b3dSmrggen6_emit_state(struct sna *sna, 891428d7b3dSmrg const struct sna_composite_op *op, 892428d7b3dSmrg uint16_t wm_binding_table) 893428d7b3dSmrg{ 894428d7b3dSmrg bool need_invalidate; 895428d7b3dSmrg bool need_flush; 896428d7b3dSmrg bool need_stall; 897428d7b3dSmrg 898428d7b3dSmrg assert(op->dst.bo->exec); 899428d7b3dSmrg 900428d7b3dSmrg need_flush = wm_binding_table & 1; 901428d7b3dSmrg if (ALWAYS_FLUSH) 902428d7b3dSmrg need_flush = true; 903428d7b3dSmrg 904428d7b3dSmrg wm_binding_table &= ~1; 905428d7b3dSmrg need_stall = sna->render_state.gen6.surface_table != wm_binding_table; 906428d7b3dSmrg if (ALWAYS_STALL) 907428d7b3dSmrg need_stall = true; 908428d7b3dSmrg 909428d7b3dSmrg need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); 910428d7b3dSmrg if (ALWAYS_INVALIDATE) 911428d7b3dSmrg need_invalidate = true; 912428d7b3dSmrg 913428d7b3dSmrg if (need_invalidate) { 914428d7b3dSmrg gen6_emit_pipe_invalidate(sna); 915428d7b3dSmrg kgem_clear_dirty(&sna->kgem); 916428d7b3dSmrg assert(op->dst.bo->exec); 917428d7b3dSmrg kgem_bo_mark_dirty(op->dst.bo); 918428d7b3dSmrg 919428d7b3dSmrg need_flush = false; 920428d7b3dSmrg need_stall = false; 921428d7b3dSmrg sna->render_state.gen6.first_state_packet = true; 922428d7b3dSmrg } 923428d7b3dSmrg if (need_flush) { 924428d7b3dSmrg gen6_emit_pipe_flush(sna, need_stall); 925428d7b3dSmrg need_stall = false; 926428d7b3dSmrg sna->render_state.gen6.first_state_packet = true; 927428d7b3dSmrg } 928428d7b3dSmrg 929428d7b3dSmrg need_stall &= gen6_emit_drawing_rectangle(sna, op); 930428d7b3dSmrg if (need_stall) 931428d7b3dSmrg gen6_emit_pipe_stall(sna); 932428d7b3dSmrg 933428d7b3dSmrg gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)); 934428d7b3dSmrg gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags)); 935428d7b3dSmrg gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2); 936428d7b3dSmrg gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2); 937428d7b3dSmrg gen6_emit_vertex_elements(sna, op); 938428d7b3dSmrg gen6_emit_binding_table(sna, wm_binding_table); 939428d7b3dSmrg 940428d7b3dSmrg sna->render_state.gen6.first_state_packet = false; 941428d7b3dSmrg} 942428d7b3dSmrg 943428d7b3dSmrgstatic bool gen6_magic_ca_pass(struct sna *sna, 944428d7b3dSmrg const struct sna_composite_op *op) 945428d7b3dSmrg{ 946428d7b3dSmrg struct gen6_render_state *state = &sna->render_state.gen6; 947428d7b3dSmrg 948428d7b3dSmrg if (!op->need_magic_ca_pass) 949428d7b3dSmrg return false; 950428d7b3dSmrg 951428d7b3dSmrg DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, 952428d7b3dSmrg sna->render.vertex_start, sna->render.vertex_index)); 953428d7b3dSmrg 954428d7b3dSmrg gen6_emit_pipe_stall(sna); 955428d7b3dSmrg 956428d7b3dSmrg gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format)); 957428d7b3dSmrg gen6_emit_wm(sna, 958428d7b3dSmrg gen6_choose_composite_kernel(PictOpAdd, 959428d7b3dSmrg true, true, 960428d7b3dSmrg op->is_affine), 961428d7b3dSmrg true); 962428d7b3dSmrg 963428d7b3dSmrg OUT_BATCH(GEN6_3DPRIMITIVE | 964428d7b3dSmrg GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | 965428d7b3dSmrg _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | 966428d7b3dSmrg 0 << 9 | 967428d7b3dSmrg 4); 968428d7b3dSmrg OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 969428d7b3dSmrg OUT_BATCH(sna->render.vertex_start); 970428d7b3dSmrg OUT_BATCH(1); /* single instance */ 971428d7b3dSmrg OUT_BATCH(0); /* start instance location */ 972428d7b3dSmrg OUT_BATCH(0); /* index buffer offset, ignored */ 973428d7b3dSmrg 974428d7b3dSmrg state->last_primitive = sna->kgem.nbatch; 975428d7b3dSmrg return true; 976428d7b3dSmrg} 977428d7b3dSmrg 978428d7b3dSmrgtypedef struct gen6_surface_state_padded { 979428d7b3dSmrg struct gen6_surface_state state; 980428d7b3dSmrg char pad[32 - sizeof(struct gen6_surface_state)]; 981428d7b3dSmrg} gen6_surface_state_padded; 982428d7b3dSmrg 983428d7b3dSmrgstatic void null_create(struct sna_static_stream *stream) 984428d7b3dSmrg{ 985428d7b3dSmrg /* A bunch of zeros useful for legacy border color and depth-stencil */ 986428d7b3dSmrg sna_static_stream_map(stream, 64, 64); 987428d7b3dSmrg} 988428d7b3dSmrg 989428d7b3dSmrgstatic void scratch_create(struct sna_static_stream *stream) 990428d7b3dSmrg{ 991428d7b3dSmrg /* 64 bytes of scratch space for random writes, such as 992428d7b3dSmrg * the pipe-control w/a. 993428d7b3dSmrg */ 994428d7b3dSmrg sna_static_stream_map(stream, 64, 64); 995428d7b3dSmrg} 996428d7b3dSmrg 997428d7b3dSmrgstatic void 998428d7b3dSmrgsampler_state_init(struct gen6_sampler_state *sampler_state, 999428d7b3dSmrg sampler_filter_t filter, 1000428d7b3dSmrg sampler_extend_t extend) 1001428d7b3dSmrg{ 1002428d7b3dSmrg sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 1003428d7b3dSmrg 1004428d7b3dSmrg /* We use the legacy mode to get the semantics specified by 1005428d7b3dSmrg * the Render extension. */ 1006428d7b3dSmrg sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY; 1007428d7b3dSmrg 1008428d7b3dSmrg switch (filter) { 1009428d7b3dSmrg default: 1010428d7b3dSmrg case SAMPLER_FILTER_NEAREST: 1011428d7b3dSmrg sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST; 1012428d7b3dSmrg sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST; 1013428d7b3dSmrg break; 1014428d7b3dSmrg case SAMPLER_FILTER_BILINEAR: 1015428d7b3dSmrg sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR; 1016428d7b3dSmrg sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR; 1017428d7b3dSmrg break; 1018428d7b3dSmrg } 1019428d7b3dSmrg 1020428d7b3dSmrg switch (extend) { 1021428d7b3dSmrg default: 1022428d7b3dSmrg case SAMPLER_EXTEND_NONE: 1023428d7b3dSmrg sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; 1024428d7b3dSmrg sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; 1025428d7b3dSmrg sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; 1026428d7b3dSmrg break; 1027428d7b3dSmrg case SAMPLER_EXTEND_REPEAT: 1028428d7b3dSmrg sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP; 1029428d7b3dSmrg sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP; 1030428d7b3dSmrg sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP; 1031428d7b3dSmrg break; 1032428d7b3dSmrg case SAMPLER_EXTEND_PAD: 1033428d7b3dSmrg sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; 1034428d7b3dSmrg sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; 1035428d7b3dSmrg sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; 1036428d7b3dSmrg break; 1037428d7b3dSmrg case SAMPLER_EXTEND_REFLECT: 1038428d7b3dSmrg sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; 1039428d7b3dSmrg sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; 1040428d7b3dSmrg sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; 1041428d7b3dSmrg break; 1042428d7b3dSmrg } 1043428d7b3dSmrg} 1044428d7b3dSmrg 1045428d7b3dSmrgstatic void 1046428d7b3dSmrgsampler_copy_init(struct gen6_sampler_state *ss) 1047428d7b3dSmrg{ 1048428d7b3dSmrg sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1049428d7b3dSmrg ss->ss3.non_normalized_coord = 1; 1050428d7b3dSmrg 1051428d7b3dSmrg sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1052428d7b3dSmrg} 1053428d7b3dSmrg 1054428d7b3dSmrgstatic void 1055428d7b3dSmrgsampler_fill_init(struct gen6_sampler_state *ss) 1056428d7b3dSmrg{ 1057428d7b3dSmrg sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); 1058428d7b3dSmrg ss->ss3.non_normalized_coord = 1; 1059428d7b3dSmrg 1060428d7b3dSmrg sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1061428d7b3dSmrg} 1062428d7b3dSmrg 1063428d7b3dSmrgstatic uint32_t 1064428d7b3dSmrggen6_tiling_bits(uint32_t tiling) 1065428d7b3dSmrg{ 1066428d7b3dSmrg switch (tiling) { 1067428d7b3dSmrg default: assert(0); 1068428d7b3dSmrg case I915_TILING_NONE: return 0; 1069428d7b3dSmrg case I915_TILING_X: return GEN6_SURFACE_TILED; 1070428d7b3dSmrg case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y; 1071428d7b3dSmrg } 1072428d7b3dSmrg} 1073428d7b3dSmrg 1074428d7b3dSmrg/** 1075428d7b3dSmrg * Sets up the common fields for a surface state buffer for the given 1076428d7b3dSmrg * picture in the given surface state buffer. 1077428d7b3dSmrg */ 1078428d7b3dSmrgstatic int 1079428d7b3dSmrggen6_bind_bo(struct sna *sna, 1080428d7b3dSmrg struct kgem_bo *bo, 1081428d7b3dSmrg uint32_t width, 1082428d7b3dSmrg uint32_t height, 1083428d7b3dSmrg uint32_t format, 1084428d7b3dSmrg bool is_dst) 1085428d7b3dSmrg{ 1086428d7b3dSmrg uint32_t *ss; 1087428d7b3dSmrg uint32_t domains; 1088428d7b3dSmrg uint16_t offset; 1089428d7b3dSmrg uint32_t is_scanout = is_dst && bo->scanout; 1090428d7b3dSmrg 1091428d7b3dSmrg /* After the first bind, we manage the cache domains within the batch */ 1092428d7b3dSmrg offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); 1093428d7b3dSmrg if (offset) { 1094428d7b3dSmrg DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n", 1095428d7b3dSmrg offset, bo->handle, format, 1096428d7b3dSmrg is_dst ? "render" : "sampler")); 1097428d7b3dSmrg assert(offset >= sna->kgem.surface); 1098428d7b3dSmrg if (is_dst) 1099428d7b3dSmrg kgem_bo_mark_dirty(bo); 1100428d7b3dSmrg return offset * sizeof(uint32_t); 1101428d7b3dSmrg } 1102428d7b3dSmrg 1103428d7b3dSmrg offset = sna->kgem.surface -= 1104428d7b3dSmrg sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1105428d7b3dSmrg ss = sna->kgem.batch + offset; 1106428d7b3dSmrg ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT | 1107428d7b3dSmrg GEN6_SURFACE_BLEND_ENABLED | 1108428d7b3dSmrg format << GEN6_SURFACE_FORMAT_SHIFT); 1109428d7b3dSmrg if (is_dst) { 1110428d7b3dSmrg ss[0] |= GEN6_SURFACE_RC_READ_WRITE; 1111428d7b3dSmrg domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; 1112428d7b3dSmrg } else 1113428d7b3dSmrg domains = I915_GEM_DOMAIN_SAMPLER << 16; 1114428d7b3dSmrg ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); 1115428d7b3dSmrg ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT | 1116428d7b3dSmrg (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT); 1117428d7b3dSmrg assert(bo->pitch <= (1 << 18)); 1118428d7b3dSmrg ss[3] = (gen6_tiling_bits(bo->tiling) | 1119428d7b3dSmrg (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT); 1120428d7b3dSmrg ss[4] = 0; 1121428d7b3dSmrg ss[5] = (is_scanout || bo->io) ? 0 : 3 << 16; 1122428d7b3dSmrg 1123428d7b3dSmrg kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); 1124428d7b3dSmrg 1125428d7b3dSmrg DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 1126428d7b3dSmrg offset, bo->handle, ss[1], 1127428d7b3dSmrg format, width, height, bo->pitch, bo->tiling, 1128428d7b3dSmrg domains & 0xffff ? "render" : "sampler")); 1129428d7b3dSmrg 1130428d7b3dSmrg return offset * sizeof(uint32_t); 1131428d7b3dSmrg} 1132428d7b3dSmrg 1133428d7b3dSmrgstatic void gen6_emit_vertex_buffer(struct sna *sna, 1134428d7b3dSmrg const struct sna_composite_op *op) 1135428d7b3dSmrg{ 1136428d7b3dSmrg int id = GEN6_VERTEX(op->u.gen6.flags); 1137428d7b3dSmrg 1138428d7b3dSmrg OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3); 1139428d7b3dSmrg OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA | 1140428d7b3dSmrg 4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT); 1141428d7b3dSmrg sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 1142428d7b3dSmrg OUT_BATCH(0); 1143428d7b3dSmrg OUT_BATCH(~0); /* max address: disabled */ 1144428d7b3dSmrg OUT_BATCH(0); 1145428d7b3dSmrg 1146428d7b3dSmrg sna->render.vb_id |= 1 << id; 1147428d7b3dSmrg} 1148428d7b3dSmrg 1149428d7b3dSmrgstatic void gen6_emit_primitive(struct sna *sna) 1150428d7b3dSmrg{ 1151428d7b3dSmrg if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) { 1152428d7b3dSmrg DBG(("%s: continuing previous primitive, start=%d, index=%d\n", 1153428d7b3dSmrg __FUNCTION__, 1154428d7b3dSmrg sna->render.vertex_start, 1155428d7b3dSmrg sna->render.vertex_index)); 1156428d7b3dSmrg sna->render.vertex_offset = sna->kgem.nbatch - 5; 1157428d7b3dSmrg return; 1158428d7b3dSmrg } 1159428d7b3dSmrg 1160428d7b3dSmrg OUT_BATCH(GEN6_3DPRIMITIVE | 1161428d7b3dSmrg GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | 1162428d7b3dSmrg _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | 1163428d7b3dSmrg 0 << 9 | 1164428d7b3dSmrg 4); 1165428d7b3dSmrg sna->render.vertex_offset = sna->kgem.nbatch; 1166428d7b3dSmrg OUT_BATCH(0); /* vertex count, to be filled in later */ 1167428d7b3dSmrg OUT_BATCH(sna->render.vertex_index); 1168428d7b3dSmrg OUT_BATCH(1); /* single instance */ 1169428d7b3dSmrg OUT_BATCH(0); /* start instance location */ 1170428d7b3dSmrg OUT_BATCH(0); /* index buffer offset, ignored */ 1171428d7b3dSmrg sna->render.vertex_start = sna->render.vertex_index; 1172428d7b3dSmrg DBG(("%s: started new primitive: index=%d\n", 1173428d7b3dSmrg __FUNCTION__, sna->render.vertex_start)); 1174428d7b3dSmrg 1175428d7b3dSmrg sna->render_state.gen6.last_primitive = sna->kgem.nbatch; 1176428d7b3dSmrg} 1177428d7b3dSmrg 1178428d7b3dSmrgstatic bool gen6_rectangle_begin(struct sna *sna, 1179428d7b3dSmrg const struct sna_composite_op *op) 1180428d7b3dSmrg{ 1181428d7b3dSmrg int id = 1 << GEN6_VERTEX(op->u.gen6.flags); 1182428d7b3dSmrg int ndwords; 1183428d7b3dSmrg 1184428d7b3dSmrg if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 1185428d7b3dSmrg return true; 1186428d7b3dSmrg 1187428d7b3dSmrg ndwords = op->need_magic_ca_pass ? 60 : 6; 1188428d7b3dSmrg if ((sna->render.vb_id & id) == 0) 1189428d7b3dSmrg ndwords += 5; 1190428d7b3dSmrg if (!kgem_check_batch(&sna->kgem, ndwords)) 1191428d7b3dSmrg return false; 1192428d7b3dSmrg 1193428d7b3dSmrg if ((sna->render.vb_id & id) == 0) 1194428d7b3dSmrg gen6_emit_vertex_buffer(sna, op); 1195428d7b3dSmrg 1196428d7b3dSmrg gen6_emit_primitive(sna); 1197428d7b3dSmrg return true; 1198428d7b3dSmrg} 1199428d7b3dSmrg 1200428d7b3dSmrgstatic int gen6_get_rectangles__flush(struct sna *sna, 1201428d7b3dSmrg const struct sna_composite_op *op) 1202428d7b3dSmrg{ 1203428d7b3dSmrg /* Preventing discarding new vbo after lock contention */ 1204428d7b3dSmrg if (sna_vertex_wait__locked(&sna->render)) { 1205428d7b3dSmrg int rem = vertex_space(sna); 1206428d7b3dSmrg if (rem > op->floats_per_rect) 1207428d7b3dSmrg return rem; 1208428d7b3dSmrg } 1209428d7b3dSmrg 1210428d7b3dSmrg if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5)) 1211428d7b3dSmrg return 0; 1212428d7b3dSmrg if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 1213428d7b3dSmrg return 0; 1214428d7b3dSmrg 1215428d7b3dSmrg if (sna->render.vertex_offset) { 1216428d7b3dSmrg gen4_vertex_flush(sna); 1217428d7b3dSmrg if (gen6_magic_ca_pass(sna, op)) { 1218428d7b3dSmrg gen6_emit_pipe_stall(sna); 1219428d7b3dSmrg gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)); 1220428d7b3dSmrg gen6_emit_wm(sna, 1221428d7b3dSmrg GEN6_KERNEL(op->u.gen6.flags), 1222428d7b3dSmrg GEN6_VERTEX(op->u.gen6.flags) >> 2); 1223428d7b3dSmrg } 1224428d7b3dSmrg } 1225428d7b3dSmrg 1226428d7b3dSmrg return gen4_vertex_finish(sna); 1227428d7b3dSmrg} 1228428d7b3dSmrg 1229428d7b3dSmrginline static int gen6_get_rectangles(struct sna *sna, 1230428d7b3dSmrg const struct sna_composite_op *op, 1231428d7b3dSmrg int want, 1232428d7b3dSmrg void (*emit_state)(struct sna *, const struct sna_composite_op *op)) 1233428d7b3dSmrg{ 1234428d7b3dSmrg int rem; 1235428d7b3dSmrg 1236428d7b3dSmrg assert(want); 1237428d7b3dSmrg 1238428d7b3dSmrgstart: 1239428d7b3dSmrg rem = vertex_space(sna); 1240428d7b3dSmrg if (unlikely(rem < op->floats_per_rect)) { 1241428d7b3dSmrg DBG(("flushing vbo for %s: %d < %d\n", 1242428d7b3dSmrg __FUNCTION__, rem, op->floats_per_rect)); 1243428d7b3dSmrg rem = gen6_get_rectangles__flush(sna, op); 1244428d7b3dSmrg if (unlikely(rem == 0)) 1245428d7b3dSmrg goto flush; 1246428d7b3dSmrg } 1247428d7b3dSmrg 1248428d7b3dSmrg if (unlikely(sna->render.vertex_offset == 0)) { 1249428d7b3dSmrg if (!gen6_rectangle_begin(sna, op)) 1250428d7b3dSmrg goto flush; 1251428d7b3dSmrg else 1252428d7b3dSmrg goto start; 1253428d7b3dSmrg } 1254428d7b3dSmrg 1255428d7b3dSmrg assert(rem <= vertex_space(sna)); 1256428d7b3dSmrg assert(op->floats_per_rect <= rem); 1257428d7b3dSmrg if (want > 1 && want * op->floats_per_rect > rem) 1258428d7b3dSmrg want = rem / op->floats_per_rect; 1259428d7b3dSmrg 1260428d7b3dSmrg assert(want > 0); 1261428d7b3dSmrg sna->render.vertex_index += 3*want; 1262428d7b3dSmrg return want; 1263428d7b3dSmrg 1264428d7b3dSmrgflush: 1265428d7b3dSmrg if (sna->render.vertex_offset) { 1266428d7b3dSmrg gen4_vertex_flush(sna); 1267428d7b3dSmrg gen6_magic_ca_pass(sna, op); 1268428d7b3dSmrg } 1269428d7b3dSmrg sna_vertex_wait__locked(&sna->render); 1270428d7b3dSmrg _kgem_submit(&sna->kgem); 1271428d7b3dSmrg emit_state(sna, op); 1272428d7b3dSmrg goto start; 1273428d7b3dSmrg} 1274428d7b3dSmrg 1275428d7b3dSmrginline static uint32_t *gen6_composite_get_binding_table(struct sna *sna, 1276428d7b3dSmrg uint16_t *offset) 1277428d7b3dSmrg{ 1278428d7b3dSmrg uint32_t *table; 1279428d7b3dSmrg 1280428d7b3dSmrg sna->kgem.surface -= 1281428d7b3dSmrg sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1282428d7b3dSmrg /* Clear all surplus entries to zero in case of prefetch */ 1283428d7b3dSmrg table = memset(sna->kgem.batch + sna->kgem.surface, 1284428d7b3dSmrg 0, sizeof(struct gen6_surface_state_padded)); 1285428d7b3dSmrg 1286428d7b3dSmrg DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 1287428d7b3dSmrg 1288428d7b3dSmrg *offset = sna->kgem.surface; 1289428d7b3dSmrg return table; 1290428d7b3dSmrg} 1291428d7b3dSmrg 1292428d7b3dSmrgstatic bool 1293428d7b3dSmrggen6_get_batch(struct sna *sna, const struct sna_composite_op *op) 1294428d7b3dSmrg{ 1295428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1296428d7b3dSmrg 1297428d7b3dSmrg if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { 1298428d7b3dSmrg DBG(("%s: flushing batch: %d < %d+%d\n", 1299428d7b3dSmrg __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 1300428d7b3dSmrg 150, 4*8)); 1301428d7b3dSmrg kgem_submit(&sna->kgem); 1302428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1303428d7b3dSmrg } 1304428d7b3dSmrg 1305428d7b3dSmrg if (sna->render_state.gen6.needs_invariant) 1306428d7b3dSmrg gen6_emit_invariant(sna); 1307428d7b3dSmrg 1308428d7b3dSmrg return kgem_bo_is_dirty(op->dst.bo); 1309428d7b3dSmrg} 1310428d7b3dSmrg 1311428d7b3dSmrgstatic void gen6_emit_composite_state(struct sna *sna, 1312428d7b3dSmrg const struct sna_composite_op *op) 1313428d7b3dSmrg{ 1314428d7b3dSmrg uint32_t *binding_table; 1315428d7b3dSmrg uint16_t offset; 1316428d7b3dSmrg bool dirty; 1317428d7b3dSmrg 1318428d7b3dSmrg dirty = gen6_get_batch(sna, op); 1319428d7b3dSmrg 1320428d7b3dSmrg binding_table = gen6_composite_get_binding_table(sna, &offset); 1321428d7b3dSmrg 1322428d7b3dSmrg binding_table[0] = 1323428d7b3dSmrg gen6_bind_bo(sna, 1324428d7b3dSmrg op->dst.bo, op->dst.width, op->dst.height, 1325428d7b3dSmrg gen6_get_dest_format(op->dst.format), 1326428d7b3dSmrg true); 1327428d7b3dSmrg binding_table[1] = 1328428d7b3dSmrg gen6_bind_bo(sna, 1329428d7b3dSmrg op->src.bo, op->src.width, op->src.height, 1330428d7b3dSmrg op->src.card_format, 1331428d7b3dSmrg false); 1332428d7b3dSmrg if (op->mask.bo) { 1333428d7b3dSmrg binding_table[2] = 1334428d7b3dSmrg gen6_bind_bo(sna, 1335428d7b3dSmrg op->mask.bo, 1336428d7b3dSmrg op->mask.width, 1337428d7b3dSmrg op->mask.height, 1338428d7b3dSmrg op->mask.card_format, 1339428d7b3dSmrg false); 1340428d7b3dSmrg } 1341428d7b3dSmrg 1342428d7b3dSmrg if (sna->kgem.surface == offset && 1343428d7b3dSmrg *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table && 1344428d7b3dSmrg (op->mask.bo == NULL || 1345428d7b3dSmrg sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) { 1346428d7b3dSmrg sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1347428d7b3dSmrg offset = sna->render_state.gen6.surface_table; 1348428d7b3dSmrg } 1349428d7b3dSmrg 1350428d7b3dSmrg gen6_emit_state(sna, op, offset | dirty); 1351428d7b3dSmrg} 1352428d7b3dSmrg 1353428d7b3dSmrgstatic void 1354428d7b3dSmrggen6_align_vertex(struct sna *sna, const struct sna_composite_op *op) 1355428d7b3dSmrg{ 1356428d7b3dSmrg assert (sna->render.vertex_offset == 0); 1357428d7b3dSmrg if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) { 1358428d7b3dSmrg DBG(("aligning vertex: was %d, now %d floats per vertex\n", 1359428d7b3dSmrg sna->render_state.gen6.floats_per_vertex, 1360428d7b3dSmrg op->floats_per_vertex)); 1361428d7b3dSmrg gen4_vertex_align(sna, op); 1362428d7b3dSmrg sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex; 1363428d7b3dSmrg } 1364428d7b3dSmrg assert((sna->render.vertex_used % op->floats_per_vertex) == 0); 1365428d7b3dSmrg} 1366428d7b3dSmrg 1367428d7b3dSmrgfastcall static void 1368428d7b3dSmrggen6_render_composite_blt(struct sna *sna, 1369428d7b3dSmrg const struct sna_composite_op *op, 1370428d7b3dSmrg const struct sna_composite_rectangles *r) 1371428d7b3dSmrg{ 1372428d7b3dSmrg gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); 1373428d7b3dSmrg op->prim_emit(sna, op, r); 1374428d7b3dSmrg} 1375428d7b3dSmrg 1376428d7b3dSmrgfastcall static void 1377428d7b3dSmrggen6_render_composite_box(struct sna *sna, 1378428d7b3dSmrg const struct sna_composite_op *op, 1379428d7b3dSmrg const BoxRec *box) 1380428d7b3dSmrg{ 1381428d7b3dSmrg struct sna_composite_rectangles r; 1382428d7b3dSmrg 1383428d7b3dSmrg gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); 1384428d7b3dSmrg 1385428d7b3dSmrg DBG((" %s: (%d, %d), (%d, %d)\n", 1386428d7b3dSmrg __FUNCTION__, 1387428d7b3dSmrg box->x1, box->y1, box->x2, box->y2)); 1388428d7b3dSmrg 1389428d7b3dSmrg r.dst.x = box->x1; 1390428d7b3dSmrg r.dst.y = box->y1; 1391428d7b3dSmrg r.width = box->x2 - box->x1; 1392428d7b3dSmrg r.height = box->y2 - box->y1; 1393428d7b3dSmrg r.src = r.mask = r.dst; 1394428d7b3dSmrg 1395428d7b3dSmrg op->prim_emit(sna, op, &r); 1396428d7b3dSmrg} 1397428d7b3dSmrg 1398428d7b3dSmrgstatic void 1399428d7b3dSmrggen6_render_composite_boxes__blt(struct sna *sna, 1400428d7b3dSmrg const struct sna_composite_op *op, 1401428d7b3dSmrg const BoxRec *box, int nbox) 1402428d7b3dSmrg{ 1403428d7b3dSmrg DBG(("composite_boxes(%d)\n", nbox)); 1404428d7b3dSmrg 1405428d7b3dSmrg do { 1406428d7b3dSmrg int nbox_this_time; 1407428d7b3dSmrg 1408428d7b3dSmrg nbox_this_time = gen6_get_rectangles(sna, op, nbox, 1409428d7b3dSmrg gen6_emit_composite_state); 1410428d7b3dSmrg nbox -= nbox_this_time; 1411428d7b3dSmrg 1412428d7b3dSmrg do { 1413428d7b3dSmrg struct sna_composite_rectangles r; 1414428d7b3dSmrg 1415428d7b3dSmrg DBG((" %s: (%d, %d), (%d, %d)\n", 1416428d7b3dSmrg __FUNCTION__, 1417428d7b3dSmrg box->x1, box->y1, box->x2, box->y2)); 1418428d7b3dSmrg 1419428d7b3dSmrg r.dst.x = box->x1; 1420428d7b3dSmrg r.dst.y = box->y1; 1421428d7b3dSmrg r.width = box->x2 - box->x1; 1422428d7b3dSmrg r.height = box->y2 - box->y1; 1423428d7b3dSmrg r.src = r.mask = r.dst; 1424428d7b3dSmrg 1425428d7b3dSmrg op->prim_emit(sna, op, &r); 1426428d7b3dSmrg box++; 1427428d7b3dSmrg } while (--nbox_this_time); 1428428d7b3dSmrg } while (nbox); 1429428d7b3dSmrg} 1430428d7b3dSmrg 1431428d7b3dSmrgstatic void 1432428d7b3dSmrggen6_render_composite_boxes(struct sna *sna, 1433428d7b3dSmrg const struct sna_composite_op *op, 1434428d7b3dSmrg const BoxRec *box, int nbox) 1435428d7b3dSmrg{ 1436428d7b3dSmrg DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1437428d7b3dSmrg 1438428d7b3dSmrg do { 1439428d7b3dSmrg int nbox_this_time; 1440428d7b3dSmrg float *v; 1441428d7b3dSmrg 1442428d7b3dSmrg nbox_this_time = gen6_get_rectangles(sna, op, nbox, 1443428d7b3dSmrg gen6_emit_composite_state); 1444428d7b3dSmrg assert(nbox_this_time); 1445428d7b3dSmrg nbox -= nbox_this_time; 1446428d7b3dSmrg 1447428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1448428d7b3dSmrg sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1449428d7b3dSmrg 1450428d7b3dSmrg op->emit_boxes(op, box, nbox_this_time, v); 1451428d7b3dSmrg box += nbox_this_time; 1452428d7b3dSmrg } while (nbox); 1453428d7b3dSmrg} 1454428d7b3dSmrg 1455428d7b3dSmrgstatic void 1456428d7b3dSmrggen6_render_composite_boxes__thread(struct sna *sna, 1457428d7b3dSmrg const struct sna_composite_op *op, 1458428d7b3dSmrg const BoxRec *box, int nbox) 1459428d7b3dSmrg{ 1460428d7b3dSmrg DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1461428d7b3dSmrg 1462428d7b3dSmrg sna_vertex_lock(&sna->render); 1463428d7b3dSmrg do { 1464428d7b3dSmrg int nbox_this_time; 1465428d7b3dSmrg float *v; 1466428d7b3dSmrg 1467428d7b3dSmrg nbox_this_time = gen6_get_rectangles(sna, op, nbox, 1468428d7b3dSmrg gen6_emit_composite_state); 1469428d7b3dSmrg assert(nbox_this_time); 1470428d7b3dSmrg nbox -= nbox_this_time; 1471428d7b3dSmrg 1472428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1473428d7b3dSmrg sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1474428d7b3dSmrg 1475428d7b3dSmrg sna_vertex_acquire__locked(&sna->render); 1476428d7b3dSmrg sna_vertex_unlock(&sna->render); 1477428d7b3dSmrg 1478428d7b3dSmrg op->emit_boxes(op, box, nbox_this_time, v); 1479428d7b3dSmrg box += nbox_this_time; 1480428d7b3dSmrg 1481428d7b3dSmrg sna_vertex_lock(&sna->render); 1482428d7b3dSmrg sna_vertex_release__locked(&sna->render); 1483428d7b3dSmrg } while (nbox); 1484428d7b3dSmrg sna_vertex_unlock(&sna->render); 1485428d7b3dSmrg} 1486428d7b3dSmrg 1487428d7b3dSmrg#ifndef MAX 1488428d7b3dSmrg#define MAX(a,b) ((a) > (b) ? (a) : (b)) 1489428d7b3dSmrg#endif 1490428d7b3dSmrg 1491428d7b3dSmrgstatic uint32_t 1492428d7b3dSmrggen6_composite_create_blend_state(struct sna_static_stream *stream) 1493428d7b3dSmrg{ 1494428d7b3dSmrg char *base, *ptr; 1495428d7b3dSmrg int src, dst; 1496428d7b3dSmrg 1497428d7b3dSmrg base = sna_static_stream_map(stream, 1498428d7b3dSmrg GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE, 1499428d7b3dSmrg 64); 1500428d7b3dSmrg 1501428d7b3dSmrg ptr = base; 1502428d7b3dSmrg for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) { 1503428d7b3dSmrg for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) { 1504428d7b3dSmrg struct gen6_blend_state *blend = 1505428d7b3dSmrg (struct gen6_blend_state *)ptr; 1506428d7b3dSmrg 1507428d7b3dSmrg blend->blend0.dest_blend_factor = dst; 1508428d7b3dSmrg blend->blend0.source_blend_factor = src; 1509428d7b3dSmrg blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD; 1510428d7b3dSmrg blend->blend0.blend_enable = 1511428d7b3dSmrg !(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE); 1512428d7b3dSmrg 1513428d7b3dSmrg blend->blend1.post_blend_clamp_enable = 1; 1514428d7b3dSmrg blend->blend1.pre_blend_clamp_enable = 1; 1515428d7b3dSmrg 1516428d7b3dSmrg ptr += GEN6_BLEND_STATE_PADDED_SIZE; 1517428d7b3dSmrg } 1518428d7b3dSmrg } 1519428d7b3dSmrg 1520428d7b3dSmrg return sna_static_stream_offsetof(stream, base); 1521428d7b3dSmrg} 1522428d7b3dSmrg 1523428d7b3dSmrgstatic uint32_t gen6_bind_video_source(struct sna *sna, 1524428d7b3dSmrg struct kgem_bo *src_bo, 1525428d7b3dSmrg uint32_t src_offset, 1526428d7b3dSmrg int src_width, 1527428d7b3dSmrg int src_height, 1528428d7b3dSmrg int src_pitch, 1529428d7b3dSmrg uint32_t src_surf_format) 1530428d7b3dSmrg{ 1531428d7b3dSmrg struct gen6_surface_state *ss; 1532428d7b3dSmrg 1533428d7b3dSmrg sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 1534428d7b3dSmrg 1535428d7b3dSmrg ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); 1536428d7b3dSmrg ss->ss0.surface_type = GEN6_SURFACE_2D; 1537428d7b3dSmrg ss->ss0.surface_format = src_surf_format; 1538428d7b3dSmrg 1539428d7b3dSmrg ss->ss1.base_addr = 1540428d7b3dSmrg kgem_add_reloc(&sna->kgem, 1541428d7b3dSmrg sna->kgem.surface + 1, 1542428d7b3dSmrg src_bo, 1543428d7b3dSmrg I915_GEM_DOMAIN_SAMPLER << 16, 1544428d7b3dSmrg src_offset); 1545428d7b3dSmrg 1546428d7b3dSmrg ss->ss2.width = src_width - 1; 1547428d7b3dSmrg ss->ss2.height = src_height - 1; 1548428d7b3dSmrg ss->ss3.pitch = src_pitch - 1; 1549428d7b3dSmrg 1550428d7b3dSmrg return sna->kgem.surface * sizeof(uint32_t); 1551428d7b3dSmrg} 1552428d7b3dSmrg 1553428d7b3dSmrgstatic void gen6_emit_video_state(struct sna *sna, 1554428d7b3dSmrg const struct sna_composite_op *op) 1555428d7b3dSmrg{ 1556428d7b3dSmrg struct sna_video_frame *frame = op->priv; 1557428d7b3dSmrg uint32_t src_surf_format; 1558428d7b3dSmrg uint32_t src_surf_base[6]; 1559428d7b3dSmrg int src_width[6]; 1560428d7b3dSmrg int src_height[6]; 1561428d7b3dSmrg int src_pitch[6]; 1562428d7b3dSmrg uint32_t *binding_table; 1563428d7b3dSmrg uint16_t offset; 1564428d7b3dSmrg bool dirty; 1565428d7b3dSmrg int n_src, n; 1566428d7b3dSmrg 1567428d7b3dSmrg dirty = gen6_get_batch(sna, op); 1568428d7b3dSmrg 1569428d7b3dSmrg src_surf_base[0] = 0; 1570428d7b3dSmrg src_surf_base[1] = 0; 1571428d7b3dSmrg src_surf_base[2] = frame->VBufOffset; 1572428d7b3dSmrg src_surf_base[3] = frame->VBufOffset; 1573428d7b3dSmrg src_surf_base[4] = frame->UBufOffset; 1574428d7b3dSmrg src_surf_base[5] = frame->UBufOffset; 1575428d7b3dSmrg 1576428d7b3dSmrg if (is_planar_fourcc(frame->id)) { 1577428d7b3dSmrg src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM; 1578428d7b3dSmrg src_width[1] = src_width[0] = frame->width; 1579428d7b3dSmrg src_height[1] = src_height[0] = frame->height; 1580428d7b3dSmrg src_pitch[1] = src_pitch[0] = frame->pitch[1]; 1581428d7b3dSmrg src_width[4] = src_width[5] = src_width[2] = src_width[3] = 1582428d7b3dSmrg frame->width / 2; 1583428d7b3dSmrg src_height[4] = src_height[5] = src_height[2] = src_height[3] = 1584428d7b3dSmrg frame->height / 2; 1585428d7b3dSmrg src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = 1586428d7b3dSmrg frame->pitch[0]; 1587428d7b3dSmrg n_src = 6; 1588428d7b3dSmrg } else { 1589428d7b3dSmrg if (frame->id == FOURCC_UYVY) 1590428d7b3dSmrg src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY; 1591428d7b3dSmrg else 1592428d7b3dSmrg src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL; 1593428d7b3dSmrg 1594428d7b3dSmrg src_width[0] = frame->width; 1595428d7b3dSmrg src_height[0] = frame->height; 1596428d7b3dSmrg src_pitch[0] = frame->pitch[0]; 1597428d7b3dSmrg n_src = 1; 1598428d7b3dSmrg } 1599428d7b3dSmrg 1600428d7b3dSmrg binding_table = gen6_composite_get_binding_table(sna, &offset); 1601428d7b3dSmrg 1602428d7b3dSmrg binding_table[0] = 1603428d7b3dSmrg gen6_bind_bo(sna, 1604428d7b3dSmrg op->dst.bo, op->dst.width, op->dst.height, 1605428d7b3dSmrg gen6_get_dest_format(op->dst.format), 1606428d7b3dSmrg true); 1607428d7b3dSmrg for (n = 0; n < n_src; n++) { 1608428d7b3dSmrg binding_table[1+n] = 1609428d7b3dSmrg gen6_bind_video_source(sna, 1610428d7b3dSmrg frame->bo, 1611428d7b3dSmrg src_surf_base[n], 1612428d7b3dSmrg src_width[n], 1613428d7b3dSmrg src_height[n], 1614428d7b3dSmrg src_pitch[n], 1615428d7b3dSmrg src_surf_format); 1616428d7b3dSmrg } 1617428d7b3dSmrg 1618428d7b3dSmrg gen6_emit_state(sna, op, offset | dirty); 1619428d7b3dSmrg} 1620428d7b3dSmrg 1621428d7b3dSmrgstatic bool 1622428d7b3dSmrggen6_render_video(struct sna *sna, 1623428d7b3dSmrg struct sna_video *video, 1624428d7b3dSmrg struct sna_video_frame *frame, 1625428d7b3dSmrg RegionPtr dstRegion, 1626428d7b3dSmrg PixmapPtr pixmap) 1627428d7b3dSmrg{ 1628428d7b3dSmrg struct sna_composite_op tmp; 1629428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 1630428d7b3dSmrg int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 1631428d7b3dSmrg int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 1632428d7b3dSmrg int src_width = frame->src.x2 - frame->src.x1; 1633428d7b3dSmrg int src_height = frame->src.y2 - frame->src.y1; 1634428d7b3dSmrg float src_offset_x, src_offset_y; 1635428d7b3dSmrg float src_scale_x, src_scale_y; 1636428d7b3dSmrg int nbox, pix_xoff, pix_yoff; 1637428d7b3dSmrg unsigned filter; 1638428d7b3dSmrg const BoxRec *box; 1639428d7b3dSmrg 1640428d7b3dSmrg DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", 1641428d7b3dSmrg __FUNCTION__, 1642428d7b3dSmrg src_width, src_height, dst_width, dst_height, 1643428d7b3dSmrg region_num_rects(dstRegion), 1644428d7b3dSmrg REGION_EXTENTS(NULL, dstRegion)->x1, 1645428d7b3dSmrg REGION_EXTENTS(NULL, dstRegion)->y1, 1646428d7b3dSmrg REGION_EXTENTS(NULL, dstRegion)->x2, 1647428d7b3dSmrg REGION_EXTENTS(NULL, dstRegion)->y2)); 1648428d7b3dSmrg 1649428d7b3dSmrg assert(priv->gpu_bo); 1650428d7b3dSmrg memset(&tmp, 0, sizeof(tmp)); 1651428d7b3dSmrg 1652428d7b3dSmrg tmp.dst.pixmap = pixmap; 1653428d7b3dSmrg tmp.dst.width = pixmap->drawable.width; 1654428d7b3dSmrg tmp.dst.height = pixmap->drawable.height; 1655428d7b3dSmrg tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); 1656428d7b3dSmrg tmp.dst.bo = priv->gpu_bo; 1657428d7b3dSmrg 1658428d7b3dSmrg tmp.src.bo = frame->bo; 1659428d7b3dSmrg tmp.mask.bo = NULL; 1660428d7b3dSmrg 1661428d7b3dSmrg tmp.floats_per_vertex = 3; 1662428d7b3dSmrg tmp.floats_per_rect = 9; 1663428d7b3dSmrg 1664428d7b3dSmrg if (src_width == dst_width && src_height == dst_height) 1665428d7b3dSmrg filter = SAMPLER_FILTER_NEAREST; 1666428d7b3dSmrg else 1667428d7b3dSmrg filter = SAMPLER_FILTER_BILINEAR; 1668428d7b3dSmrg 1669428d7b3dSmrg tmp.u.gen6.flags = 1670428d7b3dSmrg GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, 1671428d7b3dSmrg SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), 1672428d7b3dSmrg NO_BLEND, 1673428d7b3dSmrg is_planar_fourcc(frame->id) ? 1674428d7b3dSmrg GEN6_WM_KERNEL_VIDEO_PLANAR : 1675428d7b3dSmrg GEN6_WM_KERNEL_VIDEO_PACKED, 1676428d7b3dSmrg 2); 1677428d7b3dSmrg tmp.priv = frame; 1678428d7b3dSmrg 1679428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 1680428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 1681428d7b3dSmrg kgem_submit(&sna->kgem); 1682428d7b3dSmrg assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); 1683428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1684428d7b3dSmrg } 1685428d7b3dSmrg 1686428d7b3dSmrg gen6_align_vertex(sna, &tmp); 1687428d7b3dSmrg gen6_emit_video_state(sna, &tmp); 1688428d7b3dSmrg 1689428d7b3dSmrg /* Set up the offset for translating from the given region (in screen 1690428d7b3dSmrg * coordinates) to the backing pixmap. 1691428d7b3dSmrg */ 1692428d7b3dSmrg#ifdef COMPOSITE 1693428d7b3dSmrg pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 1694428d7b3dSmrg pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 1695428d7b3dSmrg#else 1696428d7b3dSmrg pix_xoff = 0; 1697428d7b3dSmrg pix_yoff = 0; 1698428d7b3dSmrg#endif 1699428d7b3dSmrg 1700428d7b3dSmrg src_scale_x = (float)src_width / dst_width / frame->width; 1701428d7b3dSmrg src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 1702428d7b3dSmrg 1703428d7b3dSmrg src_scale_y = (float)src_height / dst_height / frame->height; 1704428d7b3dSmrg src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 1705428d7b3dSmrg 1706428d7b3dSmrg box = region_rects(dstRegion); 1707428d7b3dSmrg nbox = region_num_rects(dstRegion); 1708428d7b3dSmrg while (nbox--) { 1709428d7b3dSmrg BoxRec r; 1710428d7b3dSmrg 1711428d7b3dSmrg r.x1 = box->x1 + pix_xoff; 1712428d7b3dSmrg r.x2 = box->x2 + pix_xoff; 1713428d7b3dSmrg r.y1 = box->y1 + pix_yoff; 1714428d7b3dSmrg r.y2 = box->y2 + pix_yoff; 1715428d7b3dSmrg 1716428d7b3dSmrg gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state); 1717428d7b3dSmrg 1718428d7b3dSmrg OUT_VERTEX(r.x2, r.y2); 1719428d7b3dSmrg OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 1720428d7b3dSmrg OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1721428d7b3dSmrg 1722428d7b3dSmrg OUT_VERTEX(r.x1, r.y2); 1723428d7b3dSmrg OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1724428d7b3dSmrg OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1725428d7b3dSmrg 1726428d7b3dSmrg OUT_VERTEX(r.x1, r.y1); 1727428d7b3dSmrg OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1728428d7b3dSmrg OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 1729428d7b3dSmrg 1730428d7b3dSmrg if (!DAMAGE_IS_ALL(priv->gpu_damage)) { 1731428d7b3dSmrg sna_damage_add_box(&priv->gpu_damage, &r); 1732428d7b3dSmrg sna_damage_subtract_box(&priv->cpu_damage, &r); 1733428d7b3dSmrg } 1734428d7b3dSmrg box++; 1735428d7b3dSmrg } 1736428d7b3dSmrg 1737428d7b3dSmrg gen4_vertex_flush(sna); 1738428d7b3dSmrg return true; 1739428d7b3dSmrg} 1740428d7b3dSmrg 1741428d7b3dSmrgstatic int 1742428d7b3dSmrggen6_composite_picture(struct sna *sna, 1743428d7b3dSmrg PicturePtr picture, 1744428d7b3dSmrg struct sna_composite_channel *channel, 1745428d7b3dSmrg int x, int y, 1746428d7b3dSmrg int w, int h, 1747428d7b3dSmrg int dst_x, int dst_y, 1748428d7b3dSmrg bool precise) 1749428d7b3dSmrg{ 1750428d7b3dSmrg PixmapPtr pixmap; 1751428d7b3dSmrg uint32_t color; 1752428d7b3dSmrg int16_t dx, dy; 1753428d7b3dSmrg 1754428d7b3dSmrg DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d), precise=%d\n", 1755428d7b3dSmrg __FUNCTION__, x, y, w, h, dst_x, dst_y, precise)); 1756428d7b3dSmrg 1757428d7b3dSmrg channel->is_solid = false; 1758428d7b3dSmrg channel->card_format = -1; 1759428d7b3dSmrg 1760428d7b3dSmrg if (sna_picture_is_solid(picture, &color)) 1761428d7b3dSmrg return gen4_channel_init_solid(sna, channel, color); 1762428d7b3dSmrg 1763428d7b3dSmrg if (picture->pDrawable == NULL) { 1764428d7b3dSmrg int ret; 1765428d7b3dSmrg 1766428d7b3dSmrg if (picture->pSourcePict->type == SourcePictTypeLinear) 1767428d7b3dSmrg return gen4_channel_init_linear(sna, picture, channel, 1768428d7b3dSmrg x, y, 1769428d7b3dSmrg w, h, 1770428d7b3dSmrg dst_x, dst_y); 1771428d7b3dSmrg 1772428d7b3dSmrg DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 1773428d7b3dSmrg ret = -1; 1774428d7b3dSmrg if (!precise) 1775428d7b3dSmrg ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1776428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1777428d7b3dSmrg if (ret == -1) 1778428d7b3dSmrg ret = sna_render_picture_fixup(sna, picture, channel, 1779428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1780428d7b3dSmrg return ret; 1781428d7b3dSmrg } 1782428d7b3dSmrg 1783428d7b3dSmrg if (picture->alphaMap) { 1784428d7b3dSmrg DBG(("%s -- fixup, alphamap\n", __FUNCTION__)); 1785428d7b3dSmrg return sna_render_picture_fixup(sna, picture, channel, 1786428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1787428d7b3dSmrg } 1788428d7b3dSmrg 1789428d7b3dSmrg if (!gen6_check_repeat(picture)) 1790428d7b3dSmrg return sna_render_picture_fixup(sna, picture, channel, 1791428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1792428d7b3dSmrg 1793428d7b3dSmrg if (!gen6_check_filter(picture)) 1794428d7b3dSmrg return sna_render_picture_fixup(sna, picture, channel, 1795428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1796428d7b3dSmrg 1797428d7b3dSmrg channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1798428d7b3dSmrg channel->filter = picture->filter; 1799428d7b3dSmrg 1800428d7b3dSmrg pixmap = get_drawable_pixmap(picture->pDrawable); 1801428d7b3dSmrg get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 1802428d7b3dSmrg 1803428d7b3dSmrg x += dx + picture->pDrawable->x; 1804428d7b3dSmrg y += dy + picture->pDrawable->y; 1805428d7b3dSmrg 1806428d7b3dSmrg channel->is_affine = sna_transform_is_affine(picture->transform); 1807428d7b3dSmrg if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 1808428d7b3dSmrg DBG(("%s: integer translation (%d, %d), removing\n", 1809428d7b3dSmrg __FUNCTION__, dx, dy)); 1810428d7b3dSmrg x += dx; 1811428d7b3dSmrg y += dy; 1812428d7b3dSmrg channel->transform = NULL; 1813428d7b3dSmrg channel->filter = PictFilterNearest; 1814428d7b3dSmrg 1815428d7b3dSmrg if (channel->repeat && 1816428d7b3dSmrg (x >= 0 && 1817428d7b3dSmrg y >= 0 && 1818428d7b3dSmrg x + w < pixmap->drawable.width && 1819428d7b3dSmrg y + h < pixmap->drawable.height)) { 1820428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 1821428d7b3dSmrg if (priv && priv->clear) { 1822428d7b3dSmrg DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 1823428d7b3dSmrg return gen4_channel_init_solid(sna, channel, priv->clear_color); 1824428d7b3dSmrg } 1825428d7b3dSmrg } 1826428d7b3dSmrg } else 1827428d7b3dSmrg channel->transform = picture->transform; 1828428d7b3dSmrg 1829428d7b3dSmrg channel->pict_format = picture->format; 1830428d7b3dSmrg channel->card_format = gen6_get_card_format(picture->format); 1831428d7b3dSmrg if (channel->card_format == (unsigned)-1) 1832428d7b3dSmrg return sna_render_picture_convert(sna, picture, channel, pixmap, 1833428d7b3dSmrg x, y, w, h, dst_x, dst_y, 1834428d7b3dSmrg false); 1835428d7b3dSmrg 1836428d7b3dSmrg if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 1837428d7b3dSmrg DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, 1838428d7b3dSmrg pixmap->drawable.width, pixmap->drawable.height)); 1839428d7b3dSmrg return sna_render_picture_extract(sna, picture, channel, 1840428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1841428d7b3dSmrg } 1842428d7b3dSmrg 1843428d7b3dSmrg DBG(("%s: pixmap, repeat=%d, filter=%d, transform?=%d [affine? %d], format=%08x\n", 1844428d7b3dSmrg __FUNCTION__, 1845428d7b3dSmrg channel->repeat, channel->filter, 1846428d7b3dSmrg channel->transform != NULL, channel->is_affine, 1847428d7b3dSmrg channel->pict_format)); 1848428d7b3dSmrg if (channel->transform) { 1849428d7b3dSmrg#define f2d(x) (((double)(x))/65536.) 1850428d7b3dSmrg DBG(("%s: transform=[%f %f %f, %f %f %f, %f %f %f] (raw [%x %x %x, %x %x %x, %x %x %x])\n", 1851428d7b3dSmrg __FUNCTION__, 1852428d7b3dSmrg f2d(channel->transform->matrix[0][0]), 1853428d7b3dSmrg f2d(channel->transform->matrix[0][1]), 1854428d7b3dSmrg f2d(channel->transform->matrix[0][2]), 1855428d7b3dSmrg f2d(channel->transform->matrix[1][0]), 1856428d7b3dSmrg f2d(channel->transform->matrix[1][1]), 1857428d7b3dSmrg f2d(channel->transform->matrix[1][2]), 1858428d7b3dSmrg f2d(channel->transform->matrix[2][0]), 1859428d7b3dSmrg f2d(channel->transform->matrix[2][1]), 1860428d7b3dSmrg f2d(channel->transform->matrix[2][2]), 1861428d7b3dSmrg channel->transform->matrix[0][0], 1862428d7b3dSmrg channel->transform->matrix[0][1], 1863428d7b3dSmrg channel->transform->matrix[0][2], 1864428d7b3dSmrg channel->transform->matrix[1][0], 1865428d7b3dSmrg channel->transform->matrix[1][1], 1866428d7b3dSmrg channel->transform->matrix[1][2], 1867428d7b3dSmrg channel->transform->matrix[2][0], 1868428d7b3dSmrg channel->transform->matrix[2][1], 1869428d7b3dSmrg channel->transform->matrix[2][2])); 1870428d7b3dSmrg#undef f2d 1871428d7b3dSmrg } 1872428d7b3dSmrg 1873428d7b3dSmrg return sna_render_pixmap_bo(sna, channel, pixmap, 1874428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1875428d7b3dSmrg} 1876428d7b3dSmrg 1877428d7b3dSmrginline static void gen6_composite_channel_convert(struct sna_composite_channel *channel) 1878428d7b3dSmrg{ 1879428d7b3dSmrg channel->repeat = gen6_repeat(channel->repeat); 1880428d7b3dSmrg channel->filter = gen6_filter(channel->filter); 1881428d7b3dSmrg if (channel->card_format == (unsigned)-1) 1882428d7b3dSmrg channel->card_format = gen6_get_card_format(channel->pict_format); 1883428d7b3dSmrg assert(channel->card_format != (unsigned)-1); 1884428d7b3dSmrg} 1885428d7b3dSmrg 1886428d7b3dSmrgstatic void gen6_render_composite_done(struct sna *sna, 1887428d7b3dSmrg const struct sna_composite_op *op) 1888428d7b3dSmrg{ 1889428d7b3dSmrg DBG(("%s\n", __FUNCTION__)); 1890428d7b3dSmrg 1891428d7b3dSmrg assert(!sna->render.active); 1892428d7b3dSmrg if (sna->render.vertex_offset) { 1893428d7b3dSmrg gen4_vertex_flush(sna); 1894428d7b3dSmrg gen6_magic_ca_pass(sna, op); 1895428d7b3dSmrg } 1896428d7b3dSmrg 1897428d7b3dSmrg if (op->mask.bo) 1898428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->mask.bo); 1899428d7b3dSmrg if (op->src.bo) 1900428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->src.bo); 1901428d7b3dSmrg 1902428d7b3dSmrg sna_render_composite_redirect_done(sna, op); 1903428d7b3dSmrg} 1904428d7b3dSmrg 1905428d7b3dSmrginline static bool 1906428d7b3dSmrggen6_composite_set_target(struct sna *sna, 1907428d7b3dSmrg struct sna_composite_op *op, 1908428d7b3dSmrg PicturePtr dst, 1909428d7b3dSmrg int x, int y, int w, int h, 1910428d7b3dSmrg bool partial) 1911428d7b3dSmrg{ 1912428d7b3dSmrg BoxRec box; 1913428d7b3dSmrg unsigned int hint; 1914428d7b3dSmrg 1915428d7b3dSmrg DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); 1916428d7b3dSmrg 1917428d7b3dSmrg op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 1918428d7b3dSmrg op->dst.format = dst->format; 1919428d7b3dSmrg op->dst.width = op->dst.pixmap->drawable.width; 1920428d7b3dSmrg op->dst.height = op->dst.pixmap->drawable.height; 1921428d7b3dSmrg 1922428d7b3dSmrg if (w && h) { 1923428d7b3dSmrg box.x1 = x; 1924428d7b3dSmrg box.y1 = y; 1925428d7b3dSmrg box.x2 = x + w; 1926428d7b3dSmrg box.y2 = y + h; 1927428d7b3dSmrg } else 1928428d7b3dSmrg sna_render_picture_extents(dst, &box); 1929428d7b3dSmrg 1930428d7b3dSmrg hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; 1931428d7b3dSmrg if (!partial) { 1932428d7b3dSmrg hint |= IGNORE_DAMAGE; 1933428d7b3dSmrg if (w == op->dst.width && h == op->dst.height) 1934428d7b3dSmrg hint |= REPLACES; 1935428d7b3dSmrg } 1936428d7b3dSmrg 1937428d7b3dSmrg op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 1938428d7b3dSmrg if (op->dst.bo == NULL) 1939428d7b3dSmrg return false; 1940428d7b3dSmrg 1941428d7b3dSmrg if (hint & REPLACES) { 1942428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 1943428d7b3dSmrg kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 1944428d7b3dSmrg } 1945428d7b3dSmrg 1946428d7b3dSmrg get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 1947428d7b3dSmrg &op->dst.x, &op->dst.y); 1948428d7b3dSmrg 1949428d7b3dSmrg DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 1950428d7b3dSmrg __FUNCTION__, 1951428d7b3dSmrg op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 1952428d7b3dSmrg op->dst.width, op->dst.height, 1953428d7b3dSmrg op->dst.bo->pitch, 1954428d7b3dSmrg op->dst.x, op->dst.y, 1955428d7b3dSmrg op->damage ? *op->damage : (void *)-1)); 1956428d7b3dSmrg 1957428d7b3dSmrg assert(op->dst.bo->proxy == NULL); 1958428d7b3dSmrg 1959428d7b3dSmrg if (too_large(op->dst.width, op->dst.height) && 1960428d7b3dSmrg !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 1961428d7b3dSmrg return false; 1962428d7b3dSmrg 1963428d7b3dSmrg return true; 1964428d7b3dSmrg} 1965428d7b3dSmrg 1966428d7b3dSmrgstatic bool 1967428d7b3dSmrgtry_blt(struct sna *sna, 1968428d7b3dSmrg PicturePtr dst, PicturePtr src, 1969428d7b3dSmrg int width, int height) 1970428d7b3dSmrg{ 1971428d7b3dSmrg struct kgem_bo *bo; 1972428d7b3dSmrg 1973428d7b3dSmrg if (sna->kgem.mode == KGEM_BLT) { 1974428d7b3dSmrg DBG(("%s: already performing BLT\n", __FUNCTION__)); 1975428d7b3dSmrg return true; 1976428d7b3dSmrg } 1977428d7b3dSmrg 1978428d7b3dSmrg if (too_large(width, height)) { 1979428d7b3dSmrg DBG(("%s: operation too large for 3D pipe (%d, %d)\n", 1980428d7b3dSmrg __FUNCTION__, width, height)); 1981428d7b3dSmrg return true; 1982428d7b3dSmrg } 1983428d7b3dSmrg 1984428d7b3dSmrg bo = __sna_drawable_peek_bo(dst->pDrawable); 1985428d7b3dSmrg if (bo == NULL) 1986428d7b3dSmrg return true; 1987428d7b3dSmrg if (bo->rq) 1988428d7b3dSmrg return RQ_IS_BLT(bo->rq); 1989428d7b3dSmrg 1990428d7b3dSmrg if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) 1991428d7b3dSmrg return true; 1992428d7b3dSmrg 1993428d7b3dSmrg if (src->pDrawable) { 1994428d7b3dSmrg bo = __sna_drawable_peek_bo(src->pDrawable); 1995428d7b3dSmrg if (bo == NULL) 1996428d7b3dSmrg return true; 1997428d7b3dSmrg 1998428d7b3dSmrg if (prefer_blt_bo(sna, bo)) 1999428d7b3dSmrg return true; 2000428d7b3dSmrg } 2001428d7b3dSmrg 2002428d7b3dSmrg if (sna->kgem.ring == KGEM_BLT) { 2003428d7b3dSmrg DBG(("%s: already performing BLT\n", __FUNCTION__)); 2004428d7b3dSmrg return true; 2005428d7b3dSmrg } 2006428d7b3dSmrg 2007428d7b3dSmrg return false; 2008428d7b3dSmrg} 2009428d7b3dSmrg 2010428d7b3dSmrgstatic bool 2011428d7b3dSmrgcheck_gradient(PicturePtr picture, bool precise) 2012428d7b3dSmrg{ 2013428d7b3dSmrg if (picture->pDrawable) 2014428d7b3dSmrg return false; 2015428d7b3dSmrg 2016428d7b3dSmrg switch (picture->pSourcePict->type) { 2017428d7b3dSmrg case SourcePictTypeSolidFill: 2018428d7b3dSmrg case SourcePictTypeLinear: 2019428d7b3dSmrg return false; 2020428d7b3dSmrg default: 2021428d7b3dSmrg return precise; 2022428d7b3dSmrg } 2023428d7b3dSmrg} 2024428d7b3dSmrg 2025428d7b3dSmrgstatic bool 2026428d7b3dSmrghas_alphamap(PicturePtr p) 2027428d7b3dSmrg{ 2028428d7b3dSmrg return p->alphaMap != NULL; 2029428d7b3dSmrg} 2030428d7b3dSmrg 2031428d7b3dSmrgstatic bool 2032428d7b3dSmrgneed_upload(PicturePtr p) 2033428d7b3dSmrg{ 2034428d7b3dSmrg return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 2035428d7b3dSmrg} 2036428d7b3dSmrg 2037428d7b3dSmrgstatic bool 2038428d7b3dSmrgsource_is_busy(PixmapPtr pixmap) 2039428d7b3dSmrg{ 2040428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 2041428d7b3dSmrg if (priv == NULL || priv->clear) 2042428d7b3dSmrg return false; 2043428d7b3dSmrg 2044428d7b3dSmrg if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 2045428d7b3dSmrg return true; 2046428d7b3dSmrg 2047428d7b3dSmrg if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2048428d7b3dSmrg return true; 2049428d7b3dSmrg 2050428d7b3dSmrg return priv->gpu_damage && !priv->cpu_damage; 2051428d7b3dSmrg} 2052428d7b3dSmrg 2053428d7b3dSmrgstatic bool 2054428d7b3dSmrgsource_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 2055428d7b3dSmrg{ 2056428d7b3dSmrg if (sna_picture_is_solid(p, NULL)) 2057428d7b3dSmrg return false; 2058428d7b3dSmrg 2059428d7b3dSmrg if (p->pSourcePict) 2060428d7b3dSmrg return check_gradient(p, precise); 2061428d7b3dSmrg 2062428d7b3dSmrg if (!gen6_check_repeat(p) || !gen6_check_format(p->format)) 2063428d7b3dSmrg return true; 2064428d7b3dSmrg 2065428d7b3dSmrg if (pixmap && source_is_busy(pixmap)) 2066428d7b3dSmrg return false; 2067428d7b3dSmrg 2068428d7b3dSmrg return has_alphamap(p) || !gen6_check_filter(p) || need_upload(p); 2069428d7b3dSmrg} 2070428d7b3dSmrg 2071428d7b3dSmrgstatic bool 2072428d7b3dSmrggen6_composite_fallback(struct sna *sna, 2073428d7b3dSmrg PicturePtr src, 2074428d7b3dSmrg PicturePtr mask, 2075428d7b3dSmrg PicturePtr dst) 2076428d7b3dSmrg{ 2077428d7b3dSmrg PixmapPtr src_pixmap; 2078428d7b3dSmrg PixmapPtr mask_pixmap; 2079428d7b3dSmrg PixmapPtr dst_pixmap; 2080428d7b3dSmrg bool src_fallback, mask_fallback; 2081428d7b3dSmrg 2082428d7b3dSmrg if (!gen6_check_dst_format(dst->format)) { 2083428d7b3dSmrg DBG(("%s: unknown destination format: %d\n", 2084428d7b3dSmrg __FUNCTION__, dst->format)); 2085428d7b3dSmrg return true; 2086428d7b3dSmrg } 2087428d7b3dSmrg 2088428d7b3dSmrg dst_pixmap = get_drawable_pixmap(dst->pDrawable); 2089428d7b3dSmrg 2090428d7b3dSmrg src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 2091428d7b3dSmrg src_fallback = source_fallback(src, src_pixmap, 2092428d7b3dSmrg dst->polyMode == PolyModePrecise); 2093428d7b3dSmrg 2094428d7b3dSmrg if (mask) { 2095428d7b3dSmrg mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 2096428d7b3dSmrg mask_fallback = source_fallback(mask, mask_pixmap, 2097428d7b3dSmrg dst->polyMode == PolyModePrecise); 2098428d7b3dSmrg } else { 2099428d7b3dSmrg mask_pixmap = NULL; 2100428d7b3dSmrg mask_fallback = false; 2101428d7b3dSmrg } 2102428d7b3dSmrg 2103428d7b3dSmrg /* If we are using the destination as a source and need to 2104428d7b3dSmrg * readback in order to upload the source, do it all 2105428d7b3dSmrg * on the cpu. 2106428d7b3dSmrg */ 2107428d7b3dSmrg if (src_pixmap == dst_pixmap && src_fallback) { 2108428d7b3dSmrg DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 2109428d7b3dSmrg return true; 2110428d7b3dSmrg } 2111428d7b3dSmrg if (mask_pixmap == dst_pixmap && mask_fallback) { 2112428d7b3dSmrg DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 2113428d7b3dSmrg return true; 2114428d7b3dSmrg } 2115428d7b3dSmrg 2116428d7b3dSmrg /* If anything is on the GPU, push everything out to the GPU */ 2117428d7b3dSmrg if (dst_use_gpu(dst_pixmap)) { 2118428d7b3dSmrg DBG(("%s: dst is already on the GPU, try to use GPU\n", 2119428d7b3dSmrg __FUNCTION__)); 2120428d7b3dSmrg return false; 2121428d7b3dSmrg } 2122428d7b3dSmrg 2123428d7b3dSmrg if (src_pixmap && !src_fallback) { 2124428d7b3dSmrg DBG(("%s: src is already on the GPU, try to use GPU\n", 2125428d7b3dSmrg __FUNCTION__)); 2126428d7b3dSmrg return false; 2127428d7b3dSmrg } 2128428d7b3dSmrg if (mask_pixmap && !mask_fallback) { 2129428d7b3dSmrg DBG(("%s: mask is already on the GPU, try to use GPU\n", 2130428d7b3dSmrg __FUNCTION__)); 2131428d7b3dSmrg return false; 2132428d7b3dSmrg } 2133428d7b3dSmrg 2134428d7b3dSmrg /* However if the dst is not on the GPU and we need to 2135428d7b3dSmrg * render one of the sources using the CPU, we may 2136428d7b3dSmrg * as well do the entire operation in place onthe CPU. 2137428d7b3dSmrg */ 2138428d7b3dSmrg if (src_fallback) { 2139428d7b3dSmrg DBG(("%s: dst is on the CPU and src will fallback\n", 2140428d7b3dSmrg __FUNCTION__)); 2141428d7b3dSmrg return true; 2142428d7b3dSmrg } 2143428d7b3dSmrg 2144428d7b3dSmrg if (mask && mask_fallback) { 2145428d7b3dSmrg DBG(("%s: dst is on the CPU and mask will fallback\n", 2146428d7b3dSmrg __FUNCTION__)); 2147428d7b3dSmrg return true; 2148428d7b3dSmrg } 2149428d7b3dSmrg 2150428d7b3dSmrg if (too_large(dst_pixmap->drawable.width, 2151428d7b3dSmrg dst_pixmap->drawable.height) && 2152428d7b3dSmrg dst_is_cpu(dst_pixmap)) { 2153428d7b3dSmrg DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 2154428d7b3dSmrg return true; 2155428d7b3dSmrg } 2156428d7b3dSmrg 2157428d7b3dSmrg DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 2158428d7b3dSmrg __FUNCTION__)); 2159428d7b3dSmrg return dst_use_cpu(dst_pixmap); 2160428d7b3dSmrg} 2161428d7b3dSmrg 2162428d7b3dSmrgstatic int 2163428d7b3dSmrgreuse_source(struct sna *sna, 2164428d7b3dSmrg PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 2165428d7b3dSmrg PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 2166428d7b3dSmrg{ 2167428d7b3dSmrg uint32_t color; 2168428d7b3dSmrg 2169428d7b3dSmrg if (src_x != msk_x || src_y != msk_y) 2170428d7b3dSmrg return false; 2171428d7b3dSmrg 2172428d7b3dSmrg if (src == mask) { 2173428d7b3dSmrg DBG(("%s: mask is source\n", __FUNCTION__)); 2174428d7b3dSmrg *mc = *sc; 2175428d7b3dSmrg mc->bo = kgem_bo_reference(mc->bo); 2176428d7b3dSmrg return true; 2177428d7b3dSmrg } 2178428d7b3dSmrg 2179428d7b3dSmrg if (sna_picture_is_solid(mask, &color)) 2180428d7b3dSmrg return gen4_channel_init_solid(sna, mc, color); 2181428d7b3dSmrg 2182428d7b3dSmrg if (sc->is_solid) 2183428d7b3dSmrg return false; 2184428d7b3dSmrg 2185428d7b3dSmrg if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 2186428d7b3dSmrg return false; 2187428d7b3dSmrg 2188428d7b3dSmrg DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 2189428d7b3dSmrg 2190428d7b3dSmrg if (!sna_transform_equal(src->transform, mask->transform)) 2191428d7b3dSmrg return false; 2192428d7b3dSmrg 2193428d7b3dSmrg if (!sna_picture_alphamap_equal(src, mask)) 2194428d7b3dSmrg return false; 2195428d7b3dSmrg 2196428d7b3dSmrg if (!gen6_check_repeat(mask)) 2197428d7b3dSmrg return false; 2198428d7b3dSmrg 2199428d7b3dSmrg if (!gen6_check_filter(mask)) 2200428d7b3dSmrg return false; 2201428d7b3dSmrg 2202428d7b3dSmrg if (!gen6_check_format(mask->format)) 2203428d7b3dSmrg return false; 2204428d7b3dSmrg 2205428d7b3dSmrg DBG(("%s: reusing source channel for mask with a twist\n", 2206428d7b3dSmrg __FUNCTION__)); 2207428d7b3dSmrg 2208428d7b3dSmrg *mc = *sc; 2209428d7b3dSmrg mc->repeat = gen6_repeat(mask->repeat ? mask->repeatType : RepeatNone); 2210428d7b3dSmrg mc->filter = gen6_filter(mask->filter); 2211428d7b3dSmrg mc->pict_format = mask->format; 2212428d7b3dSmrg mc->card_format = gen6_get_card_format(mask->format); 2213428d7b3dSmrg mc->bo = kgem_bo_reference(mc->bo); 2214428d7b3dSmrg return true; 2215428d7b3dSmrg} 2216428d7b3dSmrg 2217428d7b3dSmrgstatic bool 2218428d7b3dSmrggen6_render_composite(struct sna *sna, 2219428d7b3dSmrg uint8_t op, 2220428d7b3dSmrg PicturePtr src, 2221428d7b3dSmrg PicturePtr mask, 2222428d7b3dSmrg PicturePtr dst, 2223428d7b3dSmrg int16_t src_x, int16_t src_y, 2224428d7b3dSmrg int16_t msk_x, int16_t msk_y, 2225428d7b3dSmrg int16_t dst_x, int16_t dst_y, 2226428d7b3dSmrg int16_t width, int16_t height, 2227428d7b3dSmrg unsigned flags, 2228428d7b3dSmrg struct sna_composite_op *tmp) 2229428d7b3dSmrg{ 2230428d7b3dSmrg if (op >= ARRAY_SIZE(gen6_blend_op)) 2231428d7b3dSmrg return false; 2232428d7b3dSmrg 2233428d7b3dSmrg DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, 2234428d7b3dSmrg width, height, sna->kgem.ring)); 2235428d7b3dSmrg 2236428d7b3dSmrg if (mask == NULL && 2237428d7b3dSmrg try_blt(sna, dst, src, width, height) && 2238428d7b3dSmrg sna_blt_composite(sna, op, 2239428d7b3dSmrg src, dst, 2240428d7b3dSmrg src_x, src_y, 2241428d7b3dSmrg dst_x, dst_y, 2242428d7b3dSmrg width, height, 2243428d7b3dSmrg flags, tmp)) 2244428d7b3dSmrg return true; 2245428d7b3dSmrg 2246428d7b3dSmrg if (gen6_composite_fallback(sna, src, mask, dst)) 2247428d7b3dSmrg goto fallback; 2248428d7b3dSmrg 2249428d7b3dSmrg if (need_tiling(sna, width, height)) 2250428d7b3dSmrg return sna_tiling_composite(op, src, mask, dst, 2251428d7b3dSmrg src_x, src_y, 2252428d7b3dSmrg msk_x, msk_y, 2253428d7b3dSmrg dst_x, dst_y, 2254428d7b3dSmrg width, height, 2255428d7b3dSmrg tmp); 2256428d7b3dSmrg 2257428d7b3dSmrg if (op == PictOpClear && src == sna->clear) 2258428d7b3dSmrg op = PictOpSrc; 2259428d7b3dSmrg tmp->op = op; 2260428d7b3dSmrg if (!gen6_composite_set_target(sna, tmp, dst, 2261428d7b3dSmrg dst_x, dst_y, width, height, 2262428d7b3dSmrg flags & COMPOSITE_PARTIAL || op > PictOpSrc)) 2263428d7b3dSmrg goto fallback; 2264428d7b3dSmrg 2265428d7b3dSmrg switch (gen6_composite_picture(sna, src, &tmp->src, 2266428d7b3dSmrg src_x, src_y, 2267428d7b3dSmrg width, height, 2268428d7b3dSmrg dst_x, dst_y, 2269428d7b3dSmrg dst->polyMode == PolyModePrecise)) { 2270428d7b3dSmrg case -1: 2271428d7b3dSmrg goto cleanup_dst; 2272428d7b3dSmrg case 0: 2273428d7b3dSmrg if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 2274428d7b3dSmrg goto cleanup_dst; 2275428d7b3dSmrg /* fall through to fixup */ 2276428d7b3dSmrg case 1: 2277428d7b3dSmrg /* Did we just switch rings to prepare the source? */ 2278428d7b3dSmrg if (mask == NULL && 2279428d7b3dSmrg prefer_blt_composite(sna, tmp) && 2280428d7b3dSmrg sna_blt_composite__convert(sna, 2281428d7b3dSmrg dst_x, dst_y, width, height, 2282428d7b3dSmrg tmp)) 2283428d7b3dSmrg return true; 2284428d7b3dSmrg 2285428d7b3dSmrg gen6_composite_channel_convert(&tmp->src); 2286428d7b3dSmrg break; 2287428d7b3dSmrg } 2288428d7b3dSmrg 2289428d7b3dSmrg tmp->is_affine = tmp->src.is_affine; 2290428d7b3dSmrg tmp->has_component_alpha = false; 2291428d7b3dSmrg tmp->need_magic_ca_pass = false; 2292428d7b3dSmrg 2293428d7b3dSmrg tmp->mask.bo = NULL; 2294428d7b3dSmrg tmp->mask.filter = SAMPLER_FILTER_NEAREST; 2295428d7b3dSmrg tmp->mask.repeat = SAMPLER_EXTEND_NONE; 2296428d7b3dSmrg 2297428d7b3dSmrg if (mask) { 2298428d7b3dSmrg if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 2299428d7b3dSmrg tmp->has_component_alpha = true; 2300428d7b3dSmrg 2301428d7b3dSmrg /* Check if it's component alpha that relies on a source alpha and on 2302428d7b3dSmrg * the source value. We can only get one of those into the single 2303428d7b3dSmrg * source value that we get to blend with. 2304428d7b3dSmrg */ 2305428d7b3dSmrg if (gen6_blend_op[op].src_alpha && 2306428d7b3dSmrg (gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) { 2307428d7b3dSmrg if (op != PictOpOver) 2308428d7b3dSmrg goto cleanup_src; 2309428d7b3dSmrg 2310428d7b3dSmrg tmp->need_magic_ca_pass = true; 2311428d7b3dSmrg tmp->op = PictOpOutReverse; 2312428d7b3dSmrg } 2313428d7b3dSmrg } 2314428d7b3dSmrg 2315428d7b3dSmrg if (!reuse_source(sna, 2316428d7b3dSmrg src, &tmp->src, src_x, src_y, 2317428d7b3dSmrg mask, &tmp->mask, msk_x, msk_y)) { 2318428d7b3dSmrg switch (gen6_composite_picture(sna, mask, &tmp->mask, 2319428d7b3dSmrg msk_x, msk_y, 2320428d7b3dSmrg width, height, 2321428d7b3dSmrg dst_x, dst_y, 2322428d7b3dSmrg dst->polyMode == PolyModePrecise)) { 2323428d7b3dSmrg case -1: 2324428d7b3dSmrg goto cleanup_src; 2325428d7b3dSmrg case 0: 2326428d7b3dSmrg if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2327428d7b3dSmrg goto cleanup_src; 2328428d7b3dSmrg /* fall through to fixup */ 2329428d7b3dSmrg case 1: 2330428d7b3dSmrg gen6_composite_channel_convert(&tmp->mask); 2331428d7b3dSmrg break; 2332428d7b3dSmrg } 2333428d7b3dSmrg } 2334428d7b3dSmrg 2335428d7b3dSmrg tmp->is_affine &= tmp->mask.is_affine; 2336428d7b3dSmrg } 2337428d7b3dSmrg 2338428d7b3dSmrg tmp->u.gen6.flags = 2339428d7b3dSmrg GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, 2340428d7b3dSmrg tmp->src.repeat, 2341428d7b3dSmrg tmp->mask.filter, 2342428d7b3dSmrg tmp->mask.repeat), 2343428d7b3dSmrg gen6_get_blend(tmp->op, 2344428d7b3dSmrg tmp->has_component_alpha, 2345428d7b3dSmrg tmp->dst.format), 2346428d7b3dSmrg gen6_choose_composite_kernel(tmp->op, 2347428d7b3dSmrg tmp->mask.bo != NULL, 2348428d7b3dSmrg tmp->has_component_alpha, 2349428d7b3dSmrg tmp->is_affine), 2350428d7b3dSmrg gen4_choose_composite_emitter(sna, tmp)); 2351428d7b3dSmrg 2352428d7b3dSmrg tmp->blt = gen6_render_composite_blt; 2353428d7b3dSmrg tmp->box = gen6_render_composite_box; 2354428d7b3dSmrg tmp->boxes = gen6_render_composite_boxes__blt; 2355428d7b3dSmrg if (tmp->emit_boxes) { 2356428d7b3dSmrg tmp->boxes = gen6_render_composite_boxes; 2357428d7b3dSmrg tmp->thread_boxes = gen6_render_composite_boxes__thread; 2358428d7b3dSmrg } 2359428d7b3dSmrg tmp->done = gen6_render_composite_done; 2360428d7b3dSmrg 2361428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); 2362428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2363428d7b3dSmrg tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2364428d7b3dSmrg NULL)) { 2365428d7b3dSmrg kgem_submit(&sna->kgem); 2366428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2367428d7b3dSmrg tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2368428d7b3dSmrg NULL)) 2369428d7b3dSmrg goto cleanup_mask; 2370428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2371428d7b3dSmrg } 2372428d7b3dSmrg 2373428d7b3dSmrg gen6_align_vertex(sna, tmp); 2374428d7b3dSmrg gen6_emit_composite_state(sna, tmp); 2375428d7b3dSmrg return true; 2376428d7b3dSmrg 2377428d7b3dSmrgcleanup_mask: 2378428d7b3dSmrg if (tmp->mask.bo) { 2379428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2380428d7b3dSmrg tmp->mask.bo = NULL; 2381428d7b3dSmrg } 2382428d7b3dSmrgcleanup_src: 2383428d7b3dSmrg if (tmp->src.bo) { 2384428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2385428d7b3dSmrg tmp->src.bo = NULL; 2386428d7b3dSmrg } 2387428d7b3dSmrgcleanup_dst: 2388428d7b3dSmrg if (tmp->redirect.real_bo) { 2389428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2390428d7b3dSmrg tmp->redirect.real_bo = NULL; 2391428d7b3dSmrg } 2392428d7b3dSmrgfallback: 2393428d7b3dSmrg return (mask == NULL && 2394428d7b3dSmrg sna_blt_composite(sna, op, 2395428d7b3dSmrg src, dst, 2396428d7b3dSmrg src_x, src_y, 2397428d7b3dSmrg dst_x, dst_y, 2398428d7b3dSmrg width, height, 2399428d7b3dSmrg flags | COMPOSITE_FALLBACK, tmp)); 2400428d7b3dSmrg} 2401428d7b3dSmrg 2402428d7b3dSmrg#if !NO_COMPOSITE_SPANS 2403428d7b3dSmrgfastcall static void 2404428d7b3dSmrggen6_render_composite_spans_box(struct sna *sna, 2405428d7b3dSmrg const struct sna_composite_spans_op *op, 2406428d7b3dSmrg const BoxRec *box, float opacity) 2407428d7b3dSmrg{ 2408428d7b3dSmrg DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2409428d7b3dSmrg __FUNCTION__, 2410428d7b3dSmrg op->base.src.offset[0], op->base.src.offset[1], 2411428d7b3dSmrg opacity, 2412428d7b3dSmrg op->base.dst.x, op->base.dst.y, 2413428d7b3dSmrg box->x1, box->y1, 2414428d7b3dSmrg box->x2 - box->x1, 2415428d7b3dSmrg box->y2 - box->y1)); 2416428d7b3dSmrg 2417428d7b3dSmrg gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state); 2418428d7b3dSmrg op->prim_emit(sna, op, box, opacity); 2419428d7b3dSmrg} 2420428d7b3dSmrg 2421428d7b3dSmrgstatic void 2422428d7b3dSmrggen6_render_composite_spans_boxes(struct sna *sna, 2423428d7b3dSmrg const struct sna_composite_spans_op *op, 2424428d7b3dSmrg const BoxRec *box, int nbox, 2425428d7b3dSmrg float opacity) 2426428d7b3dSmrg{ 2427428d7b3dSmrg DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2428428d7b3dSmrg __FUNCTION__, nbox, 2429428d7b3dSmrg op->base.src.offset[0], op->base.src.offset[1], 2430428d7b3dSmrg opacity, 2431428d7b3dSmrg op->base.dst.x, op->base.dst.y)); 2432428d7b3dSmrg 2433428d7b3dSmrg do { 2434428d7b3dSmrg int nbox_this_time; 2435428d7b3dSmrg 2436428d7b3dSmrg nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, 2437428d7b3dSmrg gen6_emit_composite_state); 2438428d7b3dSmrg nbox -= nbox_this_time; 2439428d7b3dSmrg 2440428d7b3dSmrg do { 2441428d7b3dSmrg DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2442428d7b3dSmrg box->x1, box->y1, 2443428d7b3dSmrg box->x2 - box->x1, 2444428d7b3dSmrg box->y2 - box->y1)); 2445428d7b3dSmrg 2446428d7b3dSmrg op->prim_emit(sna, op, box++, opacity); 2447428d7b3dSmrg } while (--nbox_this_time); 2448428d7b3dSmrg } while (nbox); 2449428d7b3dSmrg} 2450428d7b3dSmrg 2451428d7b3dSmrgfastcall static void 2452428d7b3dSmrggen6_render_composite_spans_boxes__thread(struct sna *sna, 2453428d7b3dSmrg const struct sna_composite_spans_op *op, 2454428d7b3dSmrg const struct sna_opacity_box *box, 2455428d7b3dSmrg int nbox) 2456428d7b3dSmrg{ 2457428d7b3dSmrg DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2458428d7b3dSmrg __FUNCTION__, nbox, 2459428d7b3dSmrg op->base.src.offset[0], op->base.src.offset[1], 2460428d7b3dSmrg op->base.dst.x, op->base.dst.y)); 2461428d7b3dSmrg 2462428d7b3dSmrg sna_vertex_lock(&sna->render); 2463428d7b3dSmrg do { 2464428d7b3dSmrg int nbox_this_time; 2465428d7b3dSmrg float *v; 2466428d7b3dSmrg 2467428d7b3dSmrg nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, 2468428d7b3dSmrg gen6_emit_composite_state); 2469428d7b3dSmrg assert(nbox_this_time); 2470428d7b3dSmrg nbox -= nbox_this_time; 2471428d7b3dSmrg 2472428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 2473428d7b3dSmrg sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2474428d7b3dSmrg 2475428d7b3dSmrg sna_vertex_acquire__locked(&sna->render); 2476428d7b3dSmrg sna_vertex_unlock(&sna->render); 2477428d7b3dSmrg 2478428d7b3dSmrg op->emit_boxes(op, box, nbox_this_time, v); 2479428d7b3dSmrg box += nbox_this_time; 2480428d7b3dSmrg 2481428d7b3dSmrg sna_vertex_lock(&sna->render); 2482428d7b3dSmrg sna_vertex_release__locked(&sna->render); 2483428d7b3dSmrg } while (nbox); 2484428d7b3dSmrg sna_vertex_unlock(&sna->render); 2485428d7b3dSmrg} 2486428d7b3dSmrg 2487428d7b3dSmrgfastcall static void 2488428d7b3dSmrggen6_render_composite_spans_done(struct sna *sna, 2489428d7b3dSmrg const struct sna_composite_spans_op *op) 2490428d7b3dSmrg{ 2491428d7b3dSmrg DBG(("%s()\n", __FUNCTION__)); 2492428d7b3dSmrg assert(!sna->render.active); 2493428d7b3dSmrg 2494428d7b3dSmrg if (sna->render.vertex_offset) 2495428d7b3dSmrg gen4_vertex_flush(sna); 2496428d7b3dSmrg 2497428d7b3dSmrg if (op->base.src.bo) 2498428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2499428d7b3dSmrg 2500428d7b3dSmrg sna_render_composite_redirect_done(sna, &op->base); 2501428d7b3dSmrg} 2502428d7b3dSmrg 2503428d7b3dSmrgstatic bool 2504428d7b3dSmrggen6_check_composite_spans(struct sna *sna, 2505428d7b3dSmrg uint8_t op, PicturePtr src, PicturePtr dst, 2506428d7b3dSmrg int16_t width, int16_t height, 2507428d7b3dSmrg unsigned flags) 2508428d7b3dSmrg{ 2509428d7b3dSmrg DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", 2510428d7b3dSmrg __FUNCTION__, op, width, height, flags)); 2511428d7b3dSmrg 2512428d7b3dSmrg if (op >= ARRAY_SIZE(gen6_blend_op)) 2513428d7b3dSmrg return false; 2514428d7b3dSmrg 2515428d7b3dSmrg if (gen6_composite_fallback(sna, src, NULL, dst)) { 2516428d7b3dSmrg DBG(("%s: operation would fallback\n", __FUNCTION__)); 2517428d7b3dSmrg return false; 2518428d7b3dSmrg } 2519428d7b3dSmrg 2520428d7b3dSmrg if (need_tiling(sna, width, height) && 2521428d7b3dSmrg !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2522428d7b3dSmrg DBG(("%s: fallback, tiled operation not on GPU\n", 2523428d7b3dSmrg __FUNCTION__)); 2524428d7b3dSmrg return false; 2525428d7b3dSmrg } 2526428d7b3dSmrg 2527428d7b3dSmrg if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { 2528428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable); 2529428d7b3dSmrg assert(priv); 2530428d7b3dSmrg 2531428d7b3dSmrg if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2532428d7b3dSmrg return true; 2533428d7b3dSmrg 2534428d7b3dSmrg if (flags & COMPOSITE_SPANS_INPLACE_HINT) 2535428d7b3dSmrg return false; 2536428d7b3dSmrg 2537428d7b3dSmrg return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); 2538428d7b3dSmrg } 2539428d7b3dSmrg 2540428d7b3dSmrg return true; 2541428d7b3dSmrg} 2542428d7b3dSmrg 2543428d7b3dSmrgstatic bool 2544428d7b3dSmrggen6_render_composite_spans(struct sna *sna, 2545428d7b3dSmrg uint8_t op, 2546428d7b3dSmrg PicturePtr src, 2547428d7b3dSmrg PicturePtr dst, 2548428d7b3dSmrg int16_t src_x, int16_t src_y, 2549428d7b3dSmrg int16_t dst_x, int16_t dst_y, 2550428d7b3dSmrg int16_t width, int16_t height, 2551428d7b3dSmrg unsigned flags, 2552428d7b3dSmrg struct sna_composite_spans_op *tmp) 2553428d7b3dSmrg{ 2554428d7b3dSmrg DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, 2555428d7b3dSmrg width, height, flags, sna->kgem.ring)); 2556428d7b3dSmrg 2557428d7b3dSmrg assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags)); 2558428d7b3dSmrg 2559428d7b3dSmrg if (need_tiling(sna, width, height)) { 2560428d7b3dSmrg DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2561428d7b3dSmrg __FUNCTION__, width, height)); 2562428d7b3dSmrg return sna_tiling_composite_spans(op, src, dst, 2563428d7b3dSmrg src_x, src_y, dst_x, dst_y, 2564428d7b3dSmrg width, height, flags, tmp); 2565428d7b3dSmrg } 2566428d7b3dSmrg 2567428d7b3dSmrg tmp->base.op = op; 2568428d7b3dSmrg if (!gen6_composite_set_target(sna, &tmp->base, dst, 2569428d7b3dSmrg dst_x, dst_y, width, height, true)) 2570428d7b3dSmrg return false; 2571428d7b3dSmrg 2572428d7b3dSmrg switch (gen6_composite_picture(sna, src, &tmp->base.src, 2573428d7b3dSmrg src_x, src_y, 2574428d7b3dSmrg width, height, 2575428d7b3dSmrg dst_x, dst_y, 2576428d7b3dSmrg dst->polyMode == PolyModePrecise)) { 2577428d7b3dSmrg case -1: 2578428d7b3dSmrg goto cleanup_dst; 2579428d7b3dSmrg case 0: 2580428d7b3dSmrg if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2581428d7b3dSmrg goto cleanup_dst; 2582428d7b3dSmrg /* fall through to fixup */ 2583428d7b3dSmrg case 1: 2584428d7b3dSmrg gen6_composite_channel_convert(&tmp->base.src); 2585428d7b3dSmrg break; 2586428d7b3dSmrg } 2587428d7b3dSmrg tmp->base.mask.bo = NULL; 2588428d7b3dSmrg 2589428d7b3dSmrg tmp->base.is_affine = tmp->base.src.is_affine; 2590428d7b3dSmrg tmp->base.need_magic_ca_pass = false; 2591428d7b3dSmrg 2592428d7b3dSmrg tmp->base.u.gen6.flags = 2593428d7b3dSmrg GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, 2594428d7b3dSmrg tmp->base.src.repeat, 2595428d7b3dSmrg SAMPLER_FILTER_NEAREST, 2596428d7b3dSmrg SAMPLER_EXTEND_PAD), 2597428d7b3dSmrg gen6_get_blend(tmp->base.op, false, tmp->base.dst.format), 2598428d7b3dSmrg GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine, 2599428d7b3dSmrg gen4_choose_spans_emitter(sna, tmp)); 2600428d7b3dSmrg 2601428d7b3dSmrg tmp->box = gen6_render_composite_spans_box; 2602428d7b3dSmrg tmp->boxes = gen6_render_composite_spans_boxes; 2603428d7b3dSmrg if (tmp->emit_boxes) 2604428d7b3dSmrg tmp->thread_boxes = gen6_render_composite_spans_boxes__thread; 2605428d7b3dSmrg tmp->done = gen6_render_composite_spans_done; 2606428d7b3dSmrg 2607428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); 2608428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2609428d7b3dSmrg tmp->base.dst.bo, tmp->base.src.bo, 2610428d7b3dSmrg NULL)) { 2611428d7b3dSmrg kgem_submit(&sna->kgem); 2612428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2613428d7b3dSmrg tmp->base.dst.bo, tmp->base.src.bo, 2614428d7b3dSmrg NULL)) 2615428d7b3dSmrg goto cleanup_src; 2616428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2617428d7b3dSmrg } 2618428d7b3dSmrg 2619428d7b3dSmrg gen6_align_vertex(sna, &tmp->base); 2620428d7b3dSmrg gen6_emit_composite_state(sna, &tmp->base); 2621428d7b3dSmrg return true; 2622428d7b3dSmrg 2623428d7b3dSmrgcleanup_src: 2624428d7b3dSmrg if (tmp->base.src.bo) 2625428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2626428d7b3dSmrgcleanup_dst: 2627428d7b3dSmrg if (tmp->base.redirect.real_bo) 2628428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2629428d7b3dSmrg return false; 2630428d7b3dSmrg} 2631428d7b3dSmrg#endif 2632428d7b3dSmrg 2633428d7b3dSmrgstatic void 2634428d7b3dSmrggen6_emit_copy_state(struct sna *sna, 2635428d7b3dSmrg const struct sna_composite_op *op) 2636428d7b3dSmrg{ 2637428d7b3dSmrg uint32_t *binding_table; 2638428d7b3dSmrg uint16_t offset; 2639428d7b3dSmrg bool dirty; 2640428d7b3dSmrg 2641428d7b3dSmrg dirty = gen6_get_batch(sna, op); 2642428d7b3dSmrg 2643428d7b3dSmrg binding_table = gen6_composite_get_binding_table(sna, &offset); 2644428d7b3dSmrg 2645428d7b3dSmrg binding_table[0] = 2646428d7b3dSmrg gen6_bind_bo(sna, 2647428d7b3dSmrg op->dst.bo, op->dst.width, op->dst.height, 2648428d7b3dSmrg gen6_get_dest_format(op->dst.format), 2649428d7b3dSmrg true); 2650428d7b3dSmrg binding_table[1] = 2651428d7b3dSmrg gen6_bind_bo(sna, 2652428d7b3dSmrg op->src.bo, op->src.width, op->src.height, 2653428d7b3dSmrg op->src.card_format, 2654428d7b3dSmrg false); 2655428d7b3dSmrg 2656428d7b3dSmrg if (sna->kgem.surface == offset && 2657428d7b3dSmrg *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { 2658428d7b3dSmrg sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); 2659428d7b3dSmrg offset = sna->render_state.gen6.surface_table; 2660428d7b3dSmrg } 2661428d7b3dSmrg 2662428d7b3dSmrg gen6_emit_state(sna, op, offset | dirty); 2663428d7b3dSmrg} 2664428d7b3dSmrg 2665428d7b3dSmrgstatic inline bool prefer_blt_copy(struct sna *sna, 2666428d7b3dSmrg struct kgem_bo *src_bo, 2667428d7b3dSmrg struct kgem_bo *dst_bo, 2668428d7b3dSmrg unsigned flags) 2669428d7b3dSmrg{ 2670428d7b3dSmrg if (flags & COPY_SYNC) 2671428d7b3dSmrg return false; 2672428d7b3dSmrg 2673428d7b3dSmrg if (PREFER_RENDER) 2674428d7b3dSmrg return PREFER_RENDER > 0; 2675428d7b3dSmrg 2676428d7b3dSmrg if (sna->kgem.ring == KGEM_BLT) 2677428d7b3dSmrg return true; 2678428d7b3dSmrg 2679428d7b3dSmrg if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) 2680428d7b3dSmrg return true; 2681428d7b3dSmrg 2682428d7b3dSmrg if (untiled_tlb_miss(src_bo) || 2683428d7b3dSmrg untiled_tlb_miss(dst_bo)) 2684428d7b3dSmrg return true; 2685428d7b3dSmrg 2686428d7b3dSmrg if (force_blt_ring(sna)) 2687428d7b3dSmrg return true; 2688428d7b3dSmrg 2689428d7b3dSmrg if (kgem_bo_is_render(dst_bo) || 2690428d7b3dSmrg kgem_bo_is_render(src_bo)) 2691428d7b3dSmrg return false; 2692428d7b3dSmrg 2693428d7b3dSmrg if (prefer_render_ring(sna, dst_bo)) 2694428d7b3dSmrg return false; 2695428d7b3dSmrg 2696428d7b3dSmrg if (!prefer_blt_ring(sna, dst_bo, flags)) 2697428d7b3dSmrg return false; 2698428d7b3dSmrg 2699428d7b3dSmrg return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); 2700428d7b3dSmrg} 2701428d7b3dSmrg 2702428d7b3dSmrgstatic bool 2703428d7b3dSmrggen6_render_copy_boxes(struct sna *sna, uint8_t alu, 2704428d7b3dSmrg const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2705428d7b3dSmrg const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2706428d7b3dSmrg const BoxRec *box, int n, unsigned flags) 2707428d7b3dSmrg{ 2708428d7b3dSmrg struct sna_composite_op tmp; 2709428d7b3dSmrg BoxRec extents; 2710428d7b3dSmrg 2711428d7b3dSmrg DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n", 2712428d7b3dSmrg __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, 2713428d7b3dSmrg src_bo == dst_bo, 2714428d7b3dSmrg overlaps(sna, 2715428d7b3dSmrg src_bo, src_dx, src_dy, 2716428d7b3dSmrg dst_bo, dst_dx, dst_dy, 2717428d7b3dSmrg box, n, flags, &extents))); 2718428d7b3dSmrg 2719428d7b3dSmrg if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && 2720428d7b3dSmrg sna_blt_compare_depth(src, dst) && 2721428d7b3dSmrg sna_blt_copy_boxes(sna, alu, 2722428d7b3dSmrg src_bo, src_dx, src_dy, 2723428d7b3dSmrg dst_bo, dst_dx, dst_dy, 2724428d7b3dSmrg dst->bitsPerPixel, 2725428d7b3dSmrg box, n)) 2726428d7b3dSmrg return true; 2727428d7b3dSmrg 2728428d7b3dSmrg if (!(alu == GXcopy || alu == GXclear)) { 2729428d7b3dSmrgfallback_blt: 2730428d7b3dSmrg if (!sna_blt_compare_depth(src, dst)) 2731428d7b3dSmrg return false; 2732428d7b3dSmrg 2733428d7b3dSmrg return sna_blt_copy_boxes_fallback(sna, alu, 2734428d7b3dSmrg src, src_bo, src_dx, src_dy, 2735428d7b3dSmrg dst, dst_bo, dst_dx, dst_dy, 2736428d7b3dSmrg box, n); 2737428d7b3dSmrg } 2738428d7b3dSmrg 2739428d7b3dSmrg if (overlaps(sna, 2740428d7b3dSmrg src_bo, src_dx, src_dy, 2741428d7b3dSmrg dst_bo, dst_dx, dst_dy, 2742428d7b3dSmrg box, n, flags, 2743428d7b3dSmrg &extents)) { 2744428d7b3dSmrg bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); 2745428d7b3dSmrg 2746428d7b3dSmrg if ((big || can_switch_to_blt(sna, dst_bo, flags)) && 2747428d7b3dSmrg sna_blt_copy_boxes(sna, alu, 2748428d7b3dSmrg src_bo, src_dx, src_dy, 2749428d7b3dSmrg dst_bo, dst_dx, dst_dy, 2750428d7b3dSmrg dst->bitsPerPixel, 2751428d7b3dSmrg box, n)) 2752428d7b3dSmrg return true; 2753428d7b3dSmrg 2754428d7b3dSmrg if (big) 2755428d7b3dSmrg goto fallback_blt; 2756428d7b3dSmrg 2757428d7b3dSmrg assert(src_bo == dst_bo); 2758428d7b3dSmrg assert(src->depth == dst->depth); 2759428d7b3dSmrg assert(src->width == dst->width); 2760428d7b3dSmrg assert(src->height == dst->height); 2761428d7b3dSmrg return sna_render_copy_boxes__overlap(sna, alu, 2762428d7b3dSmrg src, src_bo, 2763428d7b3dSmrg src_dx, src_dy, 2764428d7b3dSmrg dst_dx, dst_dy, 2765428d7b3dSmrg box, n, &extents); 2766428d7b3dSmrg } 2767428d7b3dSmrg 2768428d7b3dSmrg if (dst->depth == src->depth) { 2769428d7b3dSmrg tmp.dst.format = sna_render_format_for_depth(dst->depth); 2770428d7b3dSmrg tmp.src.pict_format = tmp.dst.format; 2771428d7b3dSmrg } else { 2772428d7b3dSmrg tmp.dst.format = sna_format_for_depth(dst->depth); 2773428d7b3dSmrg tmp.src.pict_format = sna_format_for_depth(src->depth); 2774428d7b3dSmrg } 2775428d7b3dSmrg if (!gen6_check_format(tmp.src.pict_format)) 2776428d7b3dSmrg goto fallback_blt; 2777428d7b3dSmrg 2778428d7b3dSmrg tmp.dst.pixmap = (PixmapPtr)dst; 2779428d7b3dSmrg tmp.dst.width = dst->width; 2780428d7b3dSmrg tmp.dst.height = dst->height; 2781428d7b3dSmrg tmp.dst.bo = dst_bo; 2782428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 2783428d7b3dSmrg tmp.damage = NULL; 2784428d7b3dSmrg 2785428d7b3dSmrg sna_render_composite_redirect_init(&tmp); 2786428d7b3dSmrg if (too_large(tmp.dst.width, tmp.dst.height)) { 2787428d7b3dSmrg int i; 2788428d7b3dSmrg 2789428d7b3dSmrg extents = box[0]; 2790428d7b3dSmrg for (i = 1; i < n; i++) { 2791428d7b3dSmrg if (box[i].x1 < extents.x1) 2792428d7b3dSmrg extents.x1 = box[i].x1; 2793428d7b3dSmrg if (box[i].y1 < extents.y1) 2794428d7b3dSmrg extents.y1 = box[i].y1; 2795428d7b3dSmrg 2796428d7b3dSmrg if (box[i].x2 > extents.x2) 2797428d7b3dSmrg extents.x2 = box[i].x2; 2798428d7b3dSmrg if (box[i].y2 > extents.y2) 2799428d7b3dSmrg extents.y2 = box[i].y2; 2800428d7b3dSmrg } 2801428d7b3dSmrg 2802428d7b3dSmrg if (!sna_render_composite_redirect(sna, &tmp, 2803428d7b3dSmrg extents.x1 + dst_dx, 2804428d7b3dSmrg extents.y1 + dst_dy, 2805428d7b3dSmrg extents.x2 - extents.x1, 2806428d7b3dSmrg extents.y2 - extents.y1, 2807428d7b3dSmrg n > 1)) 2808428d7b3dSmrg goto fallback_tiled; 2809428d7b3dSmrg } 2810428d7b3dSmrg 2811428d7b3dSmrg tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format); 2812428d7b3dSmrg if (too_large(src->width, src->height)) { 2813428d7b3dSmrg int i; 2814428d7b3dSmrg 2815428d7b3dSmrg extents = box[0]; 2816428d7b3dSmrg for (i = 1; i < n; i++) { 2817428d7b3dSmrg if (box[i].x1 < extents.x1) 2818428d7b3dSmrg extents.x1 = box[i].x1; 2819428d7b3dSmrg if (box[i].y1 < extents.y1) 2820428d7b3dSmrg extents.y1 = box[i].y1; 2821428d7b3dSmrg 2822428d7b3dSmrg if (box[i].x2 > extents.x2) 2823428d7b3dSmrg extents.x2 = box[i].x2; 2824428d7b3dSmrg if (box[i].y2 > extents.y2) 2825428d7b3dSmrg extents.y2 = box[i].y2; 2826428d7b3dSmrg } 2827428d7b3dSmrg 2828428d7b3dSmrg if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 2829428d7b3dSmrg extents.x1 + src_dx, 2830428d7b3dSmrg extents.y1 + src_dy, 2831428d7b3dSmrg extents.x2 - extents.x1, 2832428d7b3dSmrg extents.y2 - extents.y1)) { 2833428d7b3dSmrg DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__)); 2834428d7b3dSmrg goto fallback_tiled_dst; 2835428d7b3dSmrg } 2836428d7b3dSmrg } else { 2837428d7b3dSmrg tmp.src.bo = src_bo; 2838428d7b3dSmrg tmp.src.width = src->width; 2839428d7b3dSmrg tmp.src.height = src->height; 2840428d7b3dSmrg tmp.src.offset[0] = tmp.src.offset[1] = 0; 2841428d7b3dSmrg } 2842428d7b3dSmrg 2843428d7b3dSmrg tmp.mask.bo = NULL; 2844428d7b3dSmrg 2845428d7b3dSmrg tmp.floats_per_vertex = 2; 2846428d7b3dSmrg tmp.floats_per_rect = 6; 2847428d7b3dSmrg tmp.need_magic_ca_pass = 0; 2848428d7b3dSmrg 2849428d7b3dSmrg tmp.u.gen6.flags = COPY_FLAGS(alu); 2850428d7b3dSmrg assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 2851428d7b3dSmrg assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER); 2852428d7b3dSmrg assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX); 2853428d7b3dSmrg 2854428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 2855428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2856428d7b3dSmrg kgem_submit(&sna->kgem); 2857428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 2858428d7b3dSmrg DBG(("%s: too large for a single operation\n", 2859428d7b3dSmrg __FUNCTION__)); 2860428d7b3dSmrg if (tmp.src.bo != src_bo) 2861428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2862428d7b3dSmrg if (tmp.redirect.real_bo) 2863428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2864428d7b3dSmrg goto fallback_blt; 2865428d7b3dSmrg } 2866428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2867428d7b3dSmrg } 2868428d7b3dSmrg 2869428d7b3dSmrg src_dx += tmp.src.offset[0]; 2870428d7b3dSmrg src_dy += tmp.src.offset[1]; 2871428d7b3dSmrg 2872428d7b3dSmrg dst_dx += tmp.dst.x; 2873428d7b3dSmrg dst_dy += tmp.dst.y; 2874428d7b3dSmrg 2875428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 2876428d7b3dSmrg 2877428d7b3dSmrg gen6_align_vertex(sna, &tmp); 2878428d7b3dSmrg gen6_emit_copy_state(sna, &tmp); 2879428d7b3dSmrg 2880428d7b3dSmrg do { 2881428d7b3dSmrg int16_t *v; 2882428d7b3dSmrg int n_this_time; 2883428d7b3dSmrg 2884428d7b3dSmrg n_this_time = gen6_get_rectangles(sna, &tmp, n, 2885428d7b3dSmrg gen6_emit_copy_state); 2886428d7b3dSmrg n -= n_this_time; 2887428d7b3dSmrg 2888428d7b3dSmrg v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 2889428d7b3dSmrg sna->render.vertex_used += 6 * n_this_time; 2890428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 2891428d7b3dSmrg do { 2892428d7b3dSmrg 2893428d7b3dSmrg DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 2894428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 2895428d7b3dSmrg box->x1 + dst_dx, box->y1 + dst_dy, 2896428d7b3dSmrg box->x2 - box->x1, box->y2 - box->y1)); 2897428d7b3dSmrg v[0] = box->x2 + dst_dx; 2898428d7b3dSmrg v[2] = box->x2 + src_dx; 2899428d7b3dSmrg v[1] = v[5] = box->y2 + dst_dy; 2900428d7b3dSmrg v[3] = v[7] = box->y2 + src_dy; 2901428d7b3dSmrg v[8] = v[4] = box->x1 + dst_dx; 2902428d7b3dSmrg v[10] = v[6] = box->x1 + src_dx; 2903428d7b3dSmrg v[9] = box->y1 + dst_dy; 2904428d7b3dSmrg v[11] = box->y1 + src_dy; 2905428d7b3dSmrg v += 12; box++; 2906428d7b3dSmrg } while (--n_this_time); 2907428d7b3dSmrg } while (n); 2908428d7b3dSmrg 2909428d7b3dSmrg gen4_vertex_flush(sna); 2910428d7b3dSmrg sna_render_composite_redirect_done(sna, &tmp); 2911428d7b3dSmrg if (tmp.src.bo != src_bo) 2912428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2913428d7b3dSmrg return true; 2914428d7b3dSmrg 2915428d7b3dSmrgfallback_tiled_dst: 2916428d7b3dSmrg if (tmp.redirect.real_bo) 2917428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2918428d7b3dSmrgfallback_tiled: 2919428d7b3dSmrg if (sna_blt_compare_depth(src, dst) && 2920428d7b3dSmrg sna_blt_copy_boxes(sna, alu, 2921428d7b3dSmrg src_bo, src_dx, src_dy, 2922428d7b3dSmrg dst_bo, dst_dx, dst_dy, 2923428d7b3dSmrg dst->bitsPerPixel, 2924428d7b3dSmrg box, n)) 2925428d7b3dSmrg return true; 2926428d7b3dSmrg 2927428d7b3dSmrg return sna_tiling_copy_boxes(sna, alu, 2928428d7b3dSmrg src, src_bo, src_dx, src_dy, 2929428d7b3dSmrg dst, dst_bo, dst_dx, dst_dy, 2930428d7b3dSmrg box, n); 2931428d7b3dSmrg} 2932428d7b3dSmrg 2933428d7b3dSmrgstatic void 2934428d7b3dSmrggen6_render_copy_blt(struct sna *sna, 2935428d7b3dSmrg const struct sna_copy_op *op, 2936428d7b3dSmrg int16_t sx, int16_t sy, 2937428d7b3dSmrg int16_t w, int16_t h, 2938428d7b3dSmrg int16_t dx, int16_t dy) 2939428d7b3dSmrg{ 2940428d7b3dSmrg int16_t *v; 2941428d7b3dSmrg 2942428d7b3dSmrg gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state); 2943428d7b3dSmrg 2944428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 2945428d7b3dSmrg sna->render.vertex_used += 6; 2946428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 2947428d7b3dSmrg 2948428d7b3dSmrg v[0] = dx+w; v[1] = dy+h; 2949428d7b3dSmrg v[2] = sx+w; v[3] = sy+h; 2950428d7b3dSmrg v[4] = dx; v[5] = dy+h; 2951428d7b3dSmrg v[6] = sx; v[7] = sy+h; 2952428d7b3dSmrg v[8] = dx; v[9] = dy; 2953428d7b3dSmrg v[10] = sx; v[11] = sy; 2954428d7b3dSmrg} 2955428d7b3dSmrg 2956428d7b3dSmrgstatic void 2957428d7b3dSmrggen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 2958428d7b3dSmrg{ 2959428d7b3dSmrg DBG(("%s()\n", __FUNCTION__)); 2960428d7b3dSmrg 2961428d7b3dSmrg assert(!sna->render.active); 2962428d7b3dSmrg if (sna->render.vertex_offset) 2963428d7b3dSmrg gen4_vertex_flush(sna); 2964428d7b3dSmrg} 2965428d7b3dSmrg 2966428d7b3dSmrgstatic bool 2967428d7b3dSmrggen6_render_copy(struct sna *sna, uint8_t alu, 2968428d7b3dSmrg PixmapPtr src, struct kgem_bo *src_bo, 2969428d7b3dSmrg PixmapPtr dst, struct kgem_bo *dst_bo, 2970428d7b3dSmrg struct sna_copy_op *op) 2971428d7b3dSmrg{ 2972428d7b3dSmrg DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", 2973428d7b3dSmrg __FUNCTION__, alu, 2974428d7b3dSmrg src->drawable.width, src->drawable.height, 2975428d7b3dSmrg dst->drawable.width, dst->drawable.height)); 2976428d7b3dSmrg 2977428d7b3dSmrg if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && 2978428d7b3dSmrg sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2979428d7b3dSmrg sna_blt_copy(sna, alu, 2980428d7b3dSmrg src_bo, dst_bo, 2981428d7b3dSmrg dst->drawable.bitsPerPixel, 2982428d7b3dSmrg op)) 2983428d7b3dSmrg return true; 2984428d7b3dSmrg 2985428d7b3dSmrg if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 2986428d7b3dSmrg too_large(src->drawable.width, src->drawable.height) || 2987428d7b3dSmrg too_large(dst->drawable.width, dst->drawable.height)) { 2988428d7b3dSmrgfallback: 2989428d7b3dSmrg if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 2990428d7b3dSmrg return false; 2991428d7b3dSmrg 2992428d7b3dSmrg return sna_blt_copy(sna, alu, src_bo, dst_bo, 2993428d7b3dSmrg dst->drawable.bitsPerPixel, 2994428d7b3dSmrg op); 2995428d7b3dSmrg } 2996428d7b3dSmrg 2997428d7b3dSmrg if (dst->drawable.depth == src->drawable.depth) { 2998428d7b3dSmrg op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 2999428d7b3dSmrg op->base.src.pict_format = op->base.dst.format; 3000428d7b3dSmrg } else { 3001428d7b3dSmrg op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3002428d7b3dSmrg op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 3003428d7b3dSmrg } 3004428d7b3dSmrg if (!gen6_check_format(op->base.src.pict_format)) 3005428d7b3dSmrg goto fallback; 3006428d7b3dSmrg 3007428d7b3dSmrg op->base.dst.pixmap = dst; 3008428d7b3dSmrg op->base.dst.width = dst->drawable.width; 3009428d7b3dSmrg op->base.dst.height = dst->drawable.height; 3010428d7b3dSmrg op->base.dst.bo = dst_bo; 3011428d7b3dSmrg 3012428d7b3dSmrg op->base.src.bo = src_bo; 3013428d7b3dSmrg op->base.src.card_format = 3014428d7b3dSmrg gen6_get_card_format(op->base.src.pict_format); 3015428d7b3dSmrg op->base.src.width = src->drawable.width; 3016428d7b3dSmrg op->base.src.height = src->drawable.height; 3017428d7b3dSmrg 3018428d7b3dSmrg op->base.mask.bo = NULL; 3019428d7b3dSmrg 3020428d7b3dSmrg op->base.floats_per_vertex = 2; 3021428d7b3dSmrg op->base.floats_per_rect = 6; 3022428d7b3dSmrg 3023428d7b3dSmrg op->base.u.gen6.flags = COPY_FLAGS(alu); 3024428d7b3dSmrg assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3025428d7b3dSmrg assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER); 3026428d7b3dSmrg assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX); 3027428d7b3dSmrg 3028428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3029428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3030428d7b3dSmrg kgem_submit(&sna->kgem); 3031428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3032428d7b3dSmrg goto fallback; 3033428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3034428d7b3dSmrg } 3035428d7b3dSmrg 3036428d7b3dSmrg gen6_align_vertex(sna, &op->base); 3037428d7b3dSmrg gen6_emit_copy_state(sna, &op->base); 3038428d7b3dSmrg 3039428d7b3dSmrg op->blt = gen6_render_copy_blt; 3040428d7b3dSmrg op->done = gen6_render_copy_done; 3041428d7b3dSmrg return true; 3042428d7b3dSmrg} 3043428d7b3dSmrg 3044428d7b3dSmrgstatic void 3045428d7b3dSmrggen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) 3046428d7b3dSmrg{ 3047428d7b3dSmrg uint32_t *binding_table; 3048428d7b3dSmrg uint16_t offset; 3049428d7b3dSmrg bool dirty; 3050428d7b3dSmrg 3051428d7b3dSmrg dirty = gen6_get_batch(sna, op); 3052428d7b3dSmrg 3053428d7b3dSmrg binding_table = gen6_composite_get_binding_table(sna, &offset); 3054428d7b3dSmrg 3055428d7b3dSmrg binding_table[0] = 3056428d7b3dSmrg gen6_bind_bo(sna, 3057428d7b3dSmrg op->dst.bo, op->dst.width, op->dst.height, 3058428d7b3dSmrg gen6_get_dest_format(op->dst.format), 3059428d7b3dSmrg true); 3060428d7b3dSmrg binding_table[1] = 3061428d7b3dSmrg gen6_bind_bo(sna, 3062428d7b3dSmrg op->src.bo, 1, 1, 3063428d7b3dSmrg GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 3064428d7b3dSmrg false); 3065428d7b3dSmrg 3066428d7b3dSmrg if (sna->kgem.surface == offset && 3067428d7b3dSmrg *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { 3068428d7b3dSmrg sna->kgem.surface += 3069428d7b3dSmrg sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t); 3070428d7b3dSmrg offset = sna->render_state.gen6.surface_table; 3071428d7b3dSmrg } 3072428d7b3dSmrg 3073428d7b3dSmrg gen6_emit_state(sna, op, offset | dirty); 3074428d7b3dSmrg} 3075428d7b3dSmrg 3076428d7b3dSmrgstatic bool 3077428d7b3dSmrggen6_render_fill_boxes(struct sna *sna, 3078428d7b3dSmrg CARD8 op, 3079428d7b3dSmrg PictFormat format, 3080428d7b3dSmrg const xRenderColor *color, 3081428d7b3dSmrg const DrawableRec *dst, struct kgem_bo *dst_bo, 3082428d7b3dSmrg const BoxRec *box, int n) 3083428d7b3dSmrg{ 3084428d7b3dSmrg struct sna_composite_op tmp; 3085428d7b3dSmrg uint32_t pixel; 3086428d7b3dSmrg 3087428d7b3dSmrg DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", 3088428d7b3dSmrg __FUNCTION__, op, 3089428d7b3dSmrg color->red, color->green, color->blue, color->alpha, (int)format)); 3090428d7b3dSmrg 3091428d7b3dSmrg if (op >= ARRAY_SIZE(gen6_blend_op)) { 3092428d7b3dSmrg DBG(("%s: fallback due to unhandled blend op: %d\n", 3093428d7b3dSmrg __FUNCTION__, op)); 3094428d7b3dSmrg return false; 3095428d7b3dSmrg } 3096428d7b3dSmrg 3097428d7b3dSmrg if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || 3098428d7b3dSmrg !gen6_check_dst_format(format)) { 3099428d7b3dSmrg uint8_t alu = GXinvalid; 3100428d7b3dSmrg 3101428d7b3dSmrg if (op <= PictOpSrc) { 3102428d7b3dSmrg pixel = 0; 3103428d7b3dSmrg if (op == PictOpClear) 3104428d7b3dSmrg alu = GXclear; 3105428d7b3dSmrg else if (sna_get_pixel_from_rgba(&pixel, 3106428d7b3dSmrg color->red, 3107428d7b3dSmrg color->green, 3108428d7b3dSmrg color->blue, 3109428d7b3dSmrg color->alpha, 3110428d7b3dSmrg format)) 3111428d7b3dSmrg alu = GXcopy; 3112428d7b3dSmrg } 3113428d7b3dSmrg 3114428d7b3dSmrg if (alu != GXinvalid && 3115428d7b3dSmrg sna_blt_fill_boxes(sna, alu, 3116428d7b3dSmrg dst_bo, dst->bitsPerPixel, 3117428d7b3dSmrg pixel, box, n)) 3118428d7b3dSmrg return true; 3119428d7b3dSmrg 3120428d7b3dSmrg if (!gen6_check_dst_format(format)) 3121428d7b3dSmrg return false; 3122428d7b3dSmrg } 3123428d7b3dSmrg 3124428d7b3dSmrg if (op == PictOpClear) { 3125428d7b3dSmrg pixel = 0; 3126428d7b3dSmrg op = PictOpSrc; 3127428d7b3dSmrg } else if (!sna_get_pixel_from_rgba(&pixel, 3128428d7b3dSmrg color->red, 3129428d7b3dSmrg color->green, 3130428d7b3dSmrg color->blue, 3131428d7b3dSmrg color->alpha, 3132428d7b3dSmrg PICT_a8r8g8b8)) 3133428d7b3dSmrg return false; 3134428d7b3dSmrg 3135428d7b3dSmrg DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", 3136428d7b3dSmrg __FUNCTION__, pixel, n, 3137428d7b3dSmrg box[0].x1, box[0].y1, box[0].x2, box[0].y2)); 3138428d7b3dSmrg 3139428d7b3dSmrg tmp.dst.pixmap = (PixmapPtr)dst; 3140428d7b3dSmrg tmp.dst.width = dst->width; 3141428d7b3dSmrg tmp.dst.height = dst->height; 3142428d7b3dSmrg tmp.dst.format = format; 3143428d7b3dSmrg tmp.dst.bo = dst_bo; 3144428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 3145428d7b3dSmrg tmp.damage = NULL; 3146428d7b3dSmrg 3147428d7b3dSmrg sna_render_composite_redirect_init(&tmp); 3148428d7b3dSmrg if (too_large(dst->width, dst->height)) { 3149428d7b3dSmrg BoxRec extents; 3150428d7b3dSmrg 3151428d7b3dSmrg boxes_extents(box, n, &extents); 3152428d7b3dSmrg if (!sna_render_composite_redirect(sna, &tmp, 3153428d7b3dSmrg extents.x1, extents.y1, 3154428d7b3dSmrg extents.x2 - extents.x1, 3155428d7b3dSmrg extents.y2 - extents.y1, 3156428d7b3dSmrg n > 1)) 3157428d7b3dSmrg return sna_tiling_fill_boxes(sna, op, format, color, 3158428d7b3dSmrg dst, dst_bo, box, n); 3159428d7b3dSmrg } 3160428d7b3dSmrg 3161428d7b3dSmrg tmp.src.bo = sna_render_get_solid(sna, pixel); 3162428d7b3dSmrg tmp.mask.bo = NULL; 3163428d7b3dSmrg 3164428d7b3dSmrg tmp.floats_per_vertex = 2; 3165428d7b3dSmrg tmp.floats_per_rect = 6; 3166428d7b3dSmrg tmp.need_magic_ca_pass = false; 3167428d7b3dSmrg 3168428d7b3dSmrg tmp.u.gen6.flags = FILL_FLAGS(op, format); 3169428d7b3dSmrg assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3170428d7b3dSmrg assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); 3171428d7b3dSmrg assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); 3172428d7b3dSmrg 3173428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3174428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3175428d7b3dSmrg kgem_submit(&sna->kgem); 3176428d7b3dSmrg assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); 3177428d7b3dSmrg } 3178428d7b3dSmrg 3179428d7b3dSmrg gen6_align_vertex(sna, &tmp); 3180428d7b3dSmrg gen6_emit_fill_state(sna, &tmp); 3181428d7b3dSmrg 3182428d7b3dSmrg do { 3183428d7b3dSmrg int n_this_time; 3184428d7b3dSmrg int16_t *v; 3185428d7b3dSmrg 3186428d7b3dSmrg n_this_time = gen6_get_rectangles(sna, &tmp, n, 3187428d7b3dSmrg gen6_emit_fill_state); 3188428d7b3dSmrg n -= n_this_time; 3189428d7b3dSmrg 3190428d7b3dSmrg v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3191428d7b3dSmrg sna->render.vertex_used += 6 * n_this_time; 3192428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3193428d7b3dSmrg do { 3194428d7b3dSmrg DBG((" (%d, %d), (%d, %d)\n", 3195428d7b3dSmrg box->x1, box->y1, box->x2, box->y2)); 3196428d7b3dSmrg 3197428d7b3dSmrg v[0] = box->x2; 3198428d7b3dSmrg v[5] = v[1] = box->y2; 3199428d7b3dSmrg v[8] = v[4] = box->x1; 3200428d7b3dSmrg v[9] = box->y1; 3201428d7b3dSmrg v[2] = v[3] = v[7] = 1; 3202428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3203428d7b3dSmrg v += 12; box++; 3204428d7b3dSmrg } while (--n_this_time); 3205428d7b3dSmrg } while (n); 3206428d7b3dSmrg 3207428d7b3dSmrg gen4_vertex_flush(sna); 3208428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3209428d7b3dSmrg sna_render_composite_redirect_done(sna, &tmp); 3210428d7b3dSmrg return true; 3211428d7b3dSmrg} 3212428d7b3dSmrg 3213428d7b3dSmrgstatic void 3214428d7b3dSmrggen6_render_op_fill_blt(struct sna *sna, 3215428d7b3dSmrg const struct sna_fill_op *op, 3216428d7b3dSmrg int16_t x, int16_t y, int16_t w, int16_t h) 3217428d7b3dSmrg{ 3218428d7b3dSmrg int16_t *v; 3219428d7b3dSmrg 3220428d7b3dSmrg DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); 3221428d7b3dSmrg 3222428d7b3dSmrg gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); 3223428d7b3dSmrg 3224428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3225428d7b3dSmrg sna->render.vertex_used += 6; 3226428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3227428d7b3dSmrg 3228428d7b3dSmrg v[0] = x+w; 3229428d7b3dSmrg v[4] = v[8] = x; 3230428d7b3dSmrg v[1] = v[5] = y+h; 3231428d7b3dSmrg v[9] = y; 3232428d7b3dSmrg 3233428d7b3dSmrg v[2] = v[3] = v[7] = 1; 3234428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3235428d7b3dSmrg} 3236428d7b3dSmrg 3237428d7b3dSmrgfastcall static void 3238428d7b3dSmrggen6_render_op_fill_box(struct sna *sna, 3239428d7b3dSmrg const struct sna_fill_op *op, 3240428d7b3dSmrg const BoxRec *box) 3241428d7b3dSmrg{ 3242428d7b3dSmrg int16_t *v; 3243428d7b3dSmrg 3244428d7b3dSmrg DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, 3245428d7b3dSmrg box->x1, box->y1, box->x2, box->y2)); 3246428d7b3dSmrg 3247428d7b3dSmrg gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); 3248428d7b3dSmrg 3249428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3250428d7b3dSmrg sna->render.vertex_used += 6; 3251428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3252428d7b3dSmrg 3253428d7b3dSmrg v[0] = box->x2; 3254428d7b3dSmrg v[8] = v[4] = box->x1; 3255428d7b3dSmrg v[5] = v[1] = box->y2; 3256428d7b3dSmrg v[9] = box->y1; 3257428d7b3dSmrg 3258428d7b3dSmrg v[7] = v[2] = v[3] = 1; 3259428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3260428d7b3dSmrg} 3261428d7b3dSmrg 3262428d7b3dSmrgfastcall static void 3263428d7b3dSmrggen6_render_op_fill_boxes(struct sna *sna, 3264428d7b3dSmrg const struct sna_fill_op *op, 3265428d7b3dSmrg const BoxRec *box, 3266428d7b3dSmrg int nbox) 3267428d7b3dSmrg{ 3268428d7b3dSmrg DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 3269428d7b3dSmrg box->x1, box->y1, box->x2, box->y2, nbox)); 3270428d7b3dSmrg 3271428d7b3dSmrg do { 3272428d7b3dSmrg int nbox_this_time; 3273428d7b3dSmrg int16_t *v; 3274428d7b3dSmrg 3275428d7b3dSmrg nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, 3276428d7b3dSmrg gen6_emit_fill_state); 3277428d7b3dSmrg nbox -= nbox_this_time; 3278428d7b3dSmrg 3279428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3280428d7b3dSmrg sna->render.vertex_used += 6 * nbox_this_time; 3281428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3282428d7b3dSmrg 3283428d7b3dSmrg do { 3284428d7b3dSmrg v[0] = box->x2; 3285428d7b3dSmrg v[8] = v[4] = box->x1; 3286428d7b3dSmrg v[5] = v[1] = box->y2; 3287428d7b3dSmrg v[9] = box->y1; 3288428d7b3dSmrg v[7] = v[2] = v[3] = 1; 3289428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3290428d7b3dSmrg box++; v += 12; 3291428d7b3dSmrg } while (--nbox_this_time); 3292428d7b3dSmrg } while (nbox); 3293428d7b3dSmrg} 3294428d7b3dSmrg 3295428d7b3dSmrgstatic void 3296428d7b3dSmrggen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op) 3297428d7b3dSmrg{ 3298428d7b3dSmrg DBG(("%s()\n", __FUNCTION__)); 3299428d7b3dSmrg 3300428d7b3dSmrg assert(!sna->render.active); 3301428d7b3dSmrg if (sna->render.vertex_offset) 3302428d7b3dSmrg gen4_vertex_flush(sna); 3303428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3304428d7b3dSmrg} 3305428d7b3dSmrg 3306428d7b3dSmrgstatic bool 3307428d7b3dSmrggen6_render_fill(struct sna *sna, uint8_t alu, 3308428d7b3dSmrg PixmapPtr dst, struct kgem_bo *dst_bo, 3309428d7b3dSmrg uint32_t color, unsigned flags, 3310428d7b3dSmrg struct sna_fill_op *op) 3311428d7b3dSmrg{ 3312428d7b3dSmrg DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); 3313428d7b3dSmrg 3314428d7b3dSmrg if (prefer_blt_fill(sna, dst_bo, flags) && 3315428d7b3dSmrg sna_blt_fill(sna, alu, 3316428d7b3dSmrg dst_bo, dst->drawable.bitsPerPixel, 3317428d7b3dSmrg color, 3318428d7b3dSmrg op)) 3319428d7b3dSmrg return true; 3320428d7b3dSmrg 3321428d7b3dSmrg if (!(alu == GXcopy || alu == GXclear) || 3322428d7b3dSmrg too_large(dst->drawable.width, dst->drawable.height)) 3323428d7b3dSmrg return sna_blt_fill(sna, alu, 3324428d7b3dSmrg dst_bo, dst->drawable.bitsPerPixel, 3325428d7b3dSmrg color, 3326428d7b3dSmrg op); 3327428d7b3dSmrg 3328428d7b3dSmrg if (alu == GXclear) 3329428d7b3dSmrg color = 0; 3330428d7b3dSmrg 3331428d7b3dSmrg op->base.dst.pixmap = dst; 3332428d7b3dSmrg op->base.dst.width = dst->drawable.width; 3333428d7b3dSmrg op->base.dst.height = dst->drawable.height; 3334428d7b3dSmrg op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3335428d7b3dSmrg op->base.dst.bo = dst_bo; 3336428d7b3dSmrg op->base.dst.x = op->base.dst.y = 0; 3337428d7b3dSmrg 3338428d7b3dSmrg op->base.src.bo = 3339428d7b3dSmrg sna_render_get_solid(sna, 3340428d7b3dSmrg sna_rgba_for_color(color, 3341428d7b3dSmrg dst->drawable.depth)); 3342428d7b3dSmrg op->base.mask.bo = NULL; 3343428d7b3dSmrg 3344428d7b3dSmrg op->base.need_magic_ca_pass = false; 3345428d7b3dSmrg op->base.floats_per_vertex = 2; 3346428d7b3dSmrg op->base.floats_per_rect = 6; 3347428d7b3dSmrg 3348428d7b3dSmrg op->base.u.gen6.flags = FILL_FLAGS_NOBLEND; 3349428d7b3dSmrg assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3350428d7b3dSmrg assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER); 3351428d7b3dSmrg assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX); 3352428d7b3dSmrg 3353428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3354428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3355428d7b3dSmrg kgem_submit(&sna->kgem); 3356428d7b3dSmrg assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); 3357428d7b3dSmrg } 3358428d7b3dSmrg 3359428d7b3dSmrg gen6_align_vertex(sna, &op->base); 3360428d7b3dSmrg gen6_emit_fill_state(sna, &op->base); 3361428d7b3dSmrg 3362428d7b3dSmrg op->blt = gen6_render_op_fill_blt; 3363428d7b3dSmrg op->box = gen6_render_op_fill_box; 3364428d7b3dSmrg op->boxes = gen6_render_op_fill_boxes; 3365428d7b3dSmrg op->points = NULL; 3366428d7b3dSmrg op->done = gen6_render_op_fill_done; 3367428d7b3dSmrg return true; 3368428d7b3dSmrg} 3369428d7b3dSmrg 3370428d7b3dSmrgstatic bool 3371428d7b3dSmrggen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3372428d7b3dSmrg uint32_t color, 3373428d7b3dSmrg int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3374428d7b3dSmrg uint8_t alu) 3375428d7b3dSmrg{ 3376428d7b3dSmrg BoxRec box; 3377428d7b3dSmrg 3378428d7b3dSmrg box.x1 = x1; 3379428d7b3dSmrg box.y1 = y1; 3380428d7b3dSmrg box.x2 = x2; 3381428d7b3dSmrg box.y2 = y2; 3382428d7b3dSmrg 3383428d7b3dSmrg return sna_blt_fill_boxes(sna, alu, 3384428d7b3dSmrg bo, dst->drawable.bitsPerPixel, 3385428d7b3dSmrg color, &box, 1); 3386428d7b3dSmrg} 3387428d7b3dSmrg 3388428d7b3dSmrgstatic bool 3389428d7b3dSmrggen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3390428d7b3dSmrg uint32_t color, 3391428d7b3dSmrg int16_t x1, int16_t y1, 3392428d7b3dSmrg int16_t x2, int16_t y2, 3393428d7b3dSmrg uint8_t alu) 3394428d7b3dSmrg{ 3395428d7b3dSmrg struct sna_composite_op tmp; 3396428d7b3dSmrg int16_t *v; 3397428d7b3dSmrg 3398428d7b3dSmrg /* Prefer to use the BLT if already engaged */ 3399428d7b3dSmrg if (prefer_blt_fill(sna, bo, FILL_BOXES) && 3400428d7b3dSmrg gen6_render_fill_one_try_blt(sna, dst, bo, color, 3401428d7b3dSmrg x1, y1, x2, y2, alu)) 3402428d7b3dSmrg return true; 3403428d7b3dSmrg 3404428d7b3dSmrg /* Must use the BLT if we can't RENDER... */ 3405428d7b3dSmrg if (!(alu == GXcopy || alu == GXclear) || 3406428d7b3dSmrg too_large(dst->drawable.width, dst->drawable.height)) 3407428d7b3dSmrg return gen6_render_fill_one_try_blt(sna, dst, bo, color, 3408428d7b3dSmrg x1, y1, x2, y2, alu); 3409428d7b3dSmrg 3410428d7b3dSmrg if (alu == GXclear) 3411428d7b3dSmrg color = 0; 3412428d7b3dSmrg 3413428d7b3dSmrg tmp.dst.pixmap = dst; 3414428d7b3dSmrg tmp.dst.width = dst->drawable.width; 3415428d7b3dSmrg tmp.dst.height = dst->drawable.height; 3416428d7b3dSmrg tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3417428d7b3dSmrg tmp.dst.bo = bo; 3418428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 3419428d7b3dSmrg 3420428d7b3dSmrg tmp.src.bo = 3421428d7b3dSmrg sna_render_get_solid(sna, 3422428d7b3dSmrg sna_rgba_for_color(color, 3423428d7b3dSmrg dst->drawable.depth)); 3424428d7b3dSmrg tmp.mask.bo = NULL; 3425428d7b3dSmrg 3426428d7b3dSmrg tmp.floats_per_vertex = 2; 3427428d7b3dSmrg tmp.floats_per_rect = 6; 3428428d7b3dSmrg tmp.need_magic_ca_pass = false; 3429428d7b3dSmrg 3430428d7b3dSmrg tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; 3431428d7b3dSmrg assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3432428d7b3dSmrg assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); 3433428d7b3dSmrg assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); 3434428d7b3dSmrg 3435428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3436428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3437428d7b3dSmrg kgem_submit(&sna->kgem); 3438428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3439428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3440428d7b3dSmrg return false; 3441428d7b3dSmrg } 3442428d7b3dSmrg } 3443428d7b3dSmrg 3444428d7b3dSmrg gen6_align_vertex(sna, &tmp); 3445428d7b3dSmrg gen6_emit_fill_state(sna, &tmp); 3446428d7b3dSmrg 3447428d7b3dSmrg gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); 3448428d7b3dSmrg 3449428d7b3dSmrg DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); 3450428d7b3dSmrg 3451428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3452428d7b3dSmrg sna->render.vertex_used += 6; 3453428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3454428d7b3dSmrg 3455428d7b3dSmrg v[0] = x2; 3456428d7b3dSmrg v[8] = v[4] = x1; 3457428d7b3dSmrg v[5] = v[1] = y2; 3458428d7b3dSmrg v[9] = y1; 3459428d7b3dSmrg v[7] = v[2] = v[3] = 1; 3460428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3461428d7b3dSmrg 3462428d7b3dSmrg gen4_vertex_flush(sna); 3463428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3464428d7b3dSmrg 3465428d7b3dSmrg return true; 3466428d7b3dSmrg} 3467428d7b3dSmrg 3468428d7b3dSmrgstatic bool 3469428d7b3dSmrggen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3470428d7b3dSmrg{ 3471428d7b3dSmrg BoxRec box; 3472428d7b3dSmrg 3473428d7b3dSmrg box.x1 = 0; 3474428d7b3dSmrg box.y1 = 0; 3475428d7b3dSmrg box.x2 = dst->drawable.width; 3476428d7b3dSmrg box.y2 = dst->drawable.height; 3477428d7b3dSmrg 3478428d7b3dSmrg return sna_blt_fill_boxes(sna, GXclear, 3479428d7b3dSmrg bo, dst->drawable.bitsPerPixel, 3480428d7b3dSmrg 0, &box, 1); 3481428d7b3dSmrg} 3482428d7b3dSmrg 3483428d7b3dSmrgstatic bool 3484428d7b3dSmrggen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3485428d7b3dSmrg{ 3486428d7b3dSmrg struct sna_composite_op tmp; 3487428d7b3dSmrg int16_t *v; 3488428d7b3dSmrg 3489428d7b3dSmrg DBG(("%s: %dx%d\n", 3490428d7b3dSmrg __FUNCTION__, 3491428d7b3dSmrg dst->drawable.width, 3492428d7b3dSmrg dst->drawable.height)); 3493428d7b3dSmrg 3494428d7b3dSmrg /* Prefer to use the BLT if, and only if, already engaged */ 3495428d7b3dSmrg if (sna->kgem.ring == KGEM_BLT && 3496428d7b3dSmrg gen6_render_clear_try_blt(sna, dst, bo)) 3497428d7b3dSmrg return true; 3498428d7b3dSmrg 3499428d7b3dSmrg /* Must use the BLT if we can't RENDER... */ 3500428d7b3dSmrg if (too_large(dst->drawable.width, dst->drawable.height)) 3501428d7b3dSmrg return gen6_render_clear_try_blt(sna, dst, bo); 3502428d7b3dSmrg 3503428d7b3dSmrg tmp.dst.pixmap = dst; 3504428d7b3dSmrg tmp.dst.width = dst->drawable.width; 3505428d7b3dSmrg tmp.dst.height = dst->drawable.height; 3506428d7b3dSmrg tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3507428d7b3dSmrg tmp.dst.bo = bo; 3508428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 3509428d7b3dSmrg 3510428d7b3dSmrg tmp.src.bo = sna_render_get_solid(sna, 0); 3511428d7b3dSmrg tmp.mask.bo = NULL; 3512428d7b3dSmrg 3513428d7b3dSmrg tmp.floats_per_vertex = 2; 3514428d7b3dSmrg tmp.floats_per_rect = 6; 3515428d7b3dSmrg tmp.need_magic_ca_pass = false; 3516428d7b3dSmrg 3517428d7b3dSmrg tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; 3518428d7b3dSmrg assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); 3519428d7b3dSmrg assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); 3520428d7b3dSmrg assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); 3521428d7b3dSmrg 3522428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3523428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3524428d7b3dSmrg kgem_submit(&sna->kgem); 3525428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3526428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3527428d7b3dSmrg return false; 3528428d7b3dSmrg } 3529428d7b3dSmrg } 3530428d7b3dSmrg 3531428d7b3dSmrg gen6_align_vertex(sna, &tmp); 3532428d7b3dSmrg gen6_emit_fill_state(sna, &tmp); 3533428d7b3dSmrg 3534428d7b3dSmrg gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); 3535428d7b3dSmrg 3536428d7b3dSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3537428d7b3dSmrg sna->render.vertex_used += 6; 3538428d7b3dSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3539428d7b3dSmrg 3540428d7b3dSmrg v[0] = dst->drawable.width; 3541428d7b3dSmrg v[5] = v[1] = dst->drawable.height; 3542428d7b3dSmrg v[8] = v[4] = 0; 3543428d7b3dSmrg v[9] = 0; 3544428d7b3dSmrg 3545428d7b3dSmrg v[7] = v[2] = v[3] = 1; 3546428d7b3dSmrg v[6] = v[10] = v[11] = 0; 3547428d7b3dSmrg 3548428d7b3dSmrg gen4_vertex_flush(sna); 3549428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3550428d7b3dSmrg 3551428d7b3dSmrg return true; 3552428d7b3dSmrg} 3553428d7b3dSmrg 3554428d7b3dSmrgstatic void gen6_render_reset(struct sna *sna) 3555428d7b3dSmrg{ 3556428d7b3dSmrg sna->render_state.gen6.needs_invariant = true; 3557428d7b3dSmrg sna->render_state.gen6.first_state_packet = true; 3558428d7b3dSmrg sna->render_state.gen6.ve_id = 3 << 2; 3559428d7b3dSmrg sna->render_state.gen6.last_primitive = -1; 3560428d7b3dSmrg 3561428d7b3dSmrg sna->render_state.gen6.num_sf_outputs = 0; 3562428d7b3dSmrg sna->render_state.gen6.samplers = -1; 3563428d7b3dSmrg sna->render_state.gen6.blend = -1; 3564428d7b3dSmrg sna->render_state.gen6.kernel = -1; 3565428d7b3dSmrg sna->render_state.gen6.drawrect_offset = -1; 3566428d7b3dSmrg sna->render_state.gen6.drawrect_limit = -1; 3567428d7b3dSmrg sna->render_state.gen6.surface_table = -1; 3568428d7b3dSmrg 3569428d7b3dSmrg if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 3570428d7b3dSmrg DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 3571428d7b3dSmrg discard_vbo(sna); 3572428d7b3dSmrg } 3573428d7b3dSmrg 3574428d7b3dSmrg sna->render.vertex_offset = 0; 3575428d7b3dSmrg sna->render.nvertex_reloc = 0; 3576428d7b3dSmrg sna->render.vb_id = 0; 3577428d7b3dSmrg} 3578428d7b3dSmrg 3579428d7b3dSmrgstatic void gen6_render_fini(struct sna *sna) 3580428d7b3dSmrg{ 3581428d7b3dSmrg kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo); 3582428d7b3dSmrg} 3583428d7b3dSmrg 3584428d7b3dSmrgstatic bool is_gt2(struct sna *sna, int devid) 3585428d7b3dSmrg{ 3586428d7b3dSmrg return devid & 0x30; 3587428d7b3dSmrg} 3588428d7b3dSmrg 3589428d7b3dSmrgstatic bool is_mobile(struct sna *sna, int devid) 3590428d7b3dSmrg{ 3591428d7b3dSmrg return (devid & 0xf) == 0x6; 3592428d7b3dSmrg} 3593428d7b3dSmrg 3594428d7b3dSmrgstatic bool gen6_render_setup(struct sna *sna, int devid) 3595428d7b3dSmrg{ 3596428d7b3dSmrg struct gen6_render_state *state = &sna->render_state.gen6; 3597428d7b3dSmrg struct sna_static_stream general; 3598428d7b3dSmrg struct gen6_sampler_state *ss; 3599428d7b3dSmrg int i, j, k, l, m; 3600428d7b3dSmrg 3601428d7b3dSmrg state->info = >1_info; 3602428d7b3dSmrg if (is_gt2(sna, devid)) 3603428d7b3dSmrg state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */ 3604428d7b3dSmrg state->gt = state->info->gt; 3605428d7b3dSmrg 3606428d7b3dSmrg sna_static_stream_init(&general); 3607428d7b3dSmrg 3608428d7b3dSmrg /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 3609428d7b3dSmrg * dumps, you know it points to zero. 3610428d7b3dSmrg */ 3611428d7b3dSmrg null_create(&general); 3612428d7b3dSmrg scratch_create(&general); 3613428d7b3dSmrg 3614428d7b3dSmrg for (m = 0; m < GEN6_KERNEL_COUNT; m++) { 3615428d7b3dSmrg if (wm_kernels[m].size) { 3616428d7b3dSmrg state->wm_kernel[m][1] = 3617428d7b3dSmrg sna_static_stream_add(&general, 3618428d7b3dSmrg wm_kernels[m].data, 3619428d7b3dSmrg wm_kernels[m].size, 3620428d7b3dSmrg 64); 3621428d7b3dSmrg } else { 3622428d7b3dSmrg if (USE_8_PIXEL_DISPATCH) { 3623428d7b3dSmrg state->wm_kernel[m][0] = 3624428d7b3dSmrg sna_static_stream_compile_wm(sna, &general, 3625428d7b3dSmrg wm_kernels[m].data, 8); 3626428d7b3dSmrg } 3627428d7b3dSmrg 3628428d7b3dSmrg if (USE_16_PIXEL_DISPATCH) { 3629428d7b3dSmrg state->wm_kernel[m][1] = 3630428d7b3dSmrg sna_static_stream_compile_wm(sna, &general, 3631428d7b3dSmrg wm_kernels[m].data, 16); 3632428d7b3dSmrg } 3633428d7b3dSmrg 3634428d7b3dSmrg if (USE_32_PIXEL_DISPATCH) { 3635428d7b3dSmrg state->wm_kernel[m][2] = 3636428d7b3dSmrg sna_static_stream_compile_wm(sna, &general, 3637428d7b3dSmrg wm_kernels[m].data, 32); 3638428d7b3dSmrg } 3639428d7b3dSmrg } 3640428d7b3dSmrg if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) { 3641428d7b3dSmrg state->wm_kernel[m][1] = 3642428d7b3dSmrg sna_static_stream_compile_wm(sna, &general, 3643428d7b3dSmrg wm_kernels[m].data, 16); 3644428d7b3dSmrg } 3645428d7b3dSmrg } 3646428d7b3dSmrg 3647428d7b3dSmrg ss = sna_static_stream_map(&general, 3648428d7b3dSmrg 2 * sizeof(*ss) * 3649428d7b3dSmrg (2 + 3650428d7b3dSmrg FILTER_COUNT * EXTEND_COUNT * 3651428d7b3dSmrg FILTER_COUNT * EXTEND_COUNT), 3652428d7b3dSmrg 32); 3653428d7b3dSmrg state->wm_state = sna_static_stream_offsetof(&general, ss); 3654428d7b3dSmrg sampler_copy_init(ss); ss += 2; 3655428d7b3dSmrg sampler_fill_init(ss); ss += 2; 3656428d7b3dSmrg for (i = 0; i < FILTER_COUNT; i++) { 3657428d7b3dSmrg for (j = 0; j < EXTEND_COUNT; j++) { 3658428d7b3dSmrg for (k = 0; k < FILTER_COUNT; k++) { 3659428d7b3dSmrg for (l = 0; l < EXTEND_COUNT; l++) { 3660428d7b3dSmrg sampler_state_init(ss++, i, j); 3661428d7b3dSmrg sampler_state_init(ss++, k, l); 3662428d7b3dSmrg } 3663428d7b3dSmrg } 3664428d7b3dSmrg } 3665428d7b3dSmrg } 3666428d7b3dSmrg 3667428d7b3dSmrg state->cc_blend = gen6_composite_create_blend_state(&general); 3668428d7b3dSmrg 3669428d7b3dSmrg state->general_bo = sna_static_stream_fini(sna, &general); 3670428d7b3dSmrg return state->general_bo != NULL; 3671428d7b3dSmrg} 3672428d7b3dSmrg 3673428d7b3dSmrgconst char *gen6_render_init(struct sna *sna, const char *backend) 3674428d7b3dSmrg{ 3675428d7b3dSmrg int devid = intel_get_device_id(sna->dev); 3676428d7b3dSmrg 3677428d7b3dSmrg if (!gen6_render_setup(sna, devid)) 3678428d7b3dSmrg return backend; 3679428d7b3dSmrg 3680428d7b3dSmrg sna->kgem.context_switch = gen6_render_context_switch; 3681428d7b3dSmrg sna->kgem.retire = gen6_render_retire; 3682428d7b3dSmrg sna->kgem.expire = gen4_render_expire; 3683428d7b3dSmrg 3684428d7b3dSmrg#if !NO_COMPOSITE 3685428d7b3dSmrg sna->render.composite = gen6_render_composite; 3686428d7b3dSmrg sna->render.prefer_gpu |= PREFER_GPU_RENDER; 3687428d7b3dSmrg#endif 3688428d7b3dSmrg 3689428d7b3dSmrg#if !NO_COMPOSITE_SPANS 3690428d7b3dSmrg sna->render.check_composite_spans = gen6_check_composite_spans; 3691428d7b3dSmrg sna->render.composite_spans = gen6_render_composite_spans; 3692428d7b3dSmrg if (is_mobile(sna, devid)) 3693428d7b3dSmrg sna->render.prefer_gpu |= PREFER_GPU_SPANS; 3694428d7b3dSmrg#endif 3695428d7b3dSmrg sna->render.video = gen6_render_video; 3696428d7b3dSmrg 3697428d7b3dSmrg#if !NO_COPY_BOXES 3698428d7b3dSmrg sna->render.copy_boxes = gen6_render_copy_boxes; 3699428d7b3dSmrg#endif 3700428d7b3dSmrg#if !NO_COPY 3701428d7b3dSmrg sna->render.copy = gen6_render_copy; 3702428d7b3dSmrg#endif 3703428d7b3dSmrg 3704428d7b3dSmrg#if !NO_FILL_BOXES 3705428d7b3dSmrg sna->render.fill_boxes = gen6_render_fill_boxes; 3706428d7b3dSmrg#endif 3707428d7b3dSmrg#if !NO_FILL 3708428d7b3dSmrg sna->render.fill = gen6_render_fill; 3709428d7b3dSmrg#endif 3710428d7b3dSmrg#if !NO_FILL_ONE 3711428d7b3dSmrg sna->render.fill_one = gen6_render_fill_one; 3712428d7b3dSmrg#endif 3713428d7b3dSmrg#if !NO_FILL_CLEAR 3714428d7b3dSmrg sna->render.clear = gen6_render_clear; 3715428d7b3dSmrg#endif 3716428d7b3dSmrg 3717428d7b3dSmrg sna->render.flush = gen4_render_flush; 3718428d7b3dSmrg sna->render.reset = gen6_render_reset; 3719428d7b3dSmrg sna->render.fini = gen6_render_fini; 3720428d7b3dSmrg 3721428d7b3dSmrg sna->render.max_3d_size = GEN6_MAX_SIZE; 3722428d7b3dSmrg sna->render.max_3d_pitch = 1 << 18; 3723428d7b3dSmrg return sna->render_state.gen6.info->name; 3724428d7b3dSmrg} 3725