1428d7b3dSmrg/* 2428d7b3dSmrg * Copyright © 2006,2008,2011 Intel Corporation 3428d7b3dSmrg * Copyright © 2007 Red Hat, Inc. 4428d7b3dSmrg * 5428d7b3dSmrg * Permission is hereby granted, free of charge, to any person obtaining a 6428d7b3dSmrg * copy of this software and associated documentation files (the "Software"), 7428d7b3dSmrg * to deal in the Software without restriction, including without limitation 8428d7b3dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9428d7b3dSmrg * and/or sell copies of the Software, and to permit persons to whom the 10428d7b3dSmrg * Software is furnished to do so, subject to the following conditions: 11428d7b3dSmrg * 12428d7b3dSmrg * The above copyright notice and this permission notice (including the next 13428d7b3dSmrg * paragraph) shall be included in all copies or substantial portions of the 14428d7b3dSmrg * Software. 15428d7b3dSmrg * 16428d7b3dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17428d7b3dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18428d7b3dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19428d7b3dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20428d7b3dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21428d7b3dSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22428d7b3dSmrg * SOFTWARE. 23428d7b3dSmrg * 24428d7b3dSmrg * Authors: 25428d7b3dSmrg * Wang Zhenyu <zhenyu.z.wang@sna.com> 26428d7b3dSmrg * Eric Anholt <eric@anholt.net> 27428d7b3dSmrg * Carl Worth <cworth@redhat.com> 28428d7b3dSmrg * Keith Packard <keithp@keithp.com> 29428d7b3dSmrg * Chris Wilson <chris@chris-wilson.co.uk> 30428d7b3dSmrg * 31428d7b3dSmrg */ 32428d7b3dSmrg 33428d7b3dSmrg#ifdef HAVE_CONFIG_H 34428d7b3dSmrg#include "config.h" 35428d7b3dSmrg#endif 36428d7b3dSmrg 37428d7b3dSmrg#include "sna.h" 38428d7b3dSmrg#include "sna_reg.h" 39428d7b3dSmrg#include "sna_render.h" 40428d7b3dSmrg#include "sna_render_inline.h" 41428d7b3dSmrg#include "sna_video.h" 42428d7b3dSmrg 43428d7b3dSmrg#include "brw/brw.h" 44428d7b3dSmrg#include "gen4_common.h" 45428d7b3dSmrg#include "gen4_render.h" 46428d7b3dSmrg#include "gen4_source.h" 47428d7b3dSmrg#include "gen4_vertex.h" 48428d7b3dSmrg 49428d7b3dSmrg/* gen4 has a serious issue with its shaders that we need to flush 50428d7b3dSmrg * after every rectangle... So until that is resolved, prefer 51428d7b3dSmrg * the BLT engine. 52428d7b3dSmrg */ 53428d7b3dSmrg#define FORCE_SPANS 0 54428d7b3dSmrg#define FORCE_NONRECTILINEAR_SPANS -1 55428d7b3dSmrg#define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */ 56428d7b3dSmrg 57428d7b3dSmrg#define ALWAYS_FLUSH 1 58428d7b3dSmrg 59428d7b3dSmrg#define NO_COMPOSITE 0 60428d7b3dSmrg#define NO_COMPOSITE_SPANS 0 61428d7b3dSmrg#define NO_COPY 0 62428d7b3dSmrg#define NO_COPY_BOXES 0 63428d7b3dSmrg#define NO_FILL 0 64428d7b3dSmrg#define NO_FILL_ONE 0 65428d7b3dSmrg#define NO_FILL_BOXES 0 66428d7b3dSmrg#define NO_VIDEO 0 67428d7b3dSmrg 68428d7b3dSmrg#define MAX_FLUSH_VERTICES 1 /* was 6, https://bugs.freedesktop.org/show_bug.cgi?id=55500 */ 69428d7b3dSmrg 70428d7b3dSmrg#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) 71428d7b3dSmrg 72428d7b3dSmrg/* Set up a default static partitioning of the URB, which is supposed to 73428d7b3dSmrg * allow anything we would want to do, at potentially lower performance. 74428d7b3dSmrg */ 75428d7b3dSmrg#define URB_CS_ENTRY_SIZE 1 76428d7b3dSmrg#define URB_CS_ENTRIES 0 77428d7b3dSmrg 78428d7b3dSmrg#define URB_VS_ENTRY_SIZE 1 79428d7b3dSmrg#define URB_VS_ENTRIES 32 80428d7b3dSmrg 81428d7b3dSmrg#define URB_GS_ENTRY_SIZE 0 82428d7b3dSmrg#define URB_GS_ENTRIES 0 83428d7b3dSmrg 84428d7b3dSmrg#define URB_CL_ENTRY_SIZE 0 85428d7b3dSmrg#define URB_CL_ENTRIES 0 86428d7b3dSmrg 87428d7b3dSmrg#define URB_SF_ENTRY_SIZE 2 88428d7b3dSmrg#define URB_SF_ENTRIES 64 89428d7b3dSmrg 90428d7b3dSmrg/* 91428d7b3dSmrg * this program computes dA/dx and dA/dy for the texture coordinates along 92428d7b3dSmrg * with the base texture coordinate. It was extracted from the Mesa driver 93428d7b3dSmrg */ 94428d7b3dSmrg 95428d7b3dSmrg#define SF_KERNEL_NUM_GRF 16 96428d7b3dSmrg#define PS_KERNEL_NUM_GRF 32 97428d7b3dSmrg 98428d7b3dSmrg#define GEN4_MAX_SF_THREADS 24 99428d7b3dSmrg#define GEN4_MAX_WM_THREADS 32 100428d7b3dSmrg#define G4X_MAX_WM_THREADS 50 101428d7b3dSmrg 102428d7b3dSmrgstatic const uint32_t ps_kernel_packed_static[][4] = { 103428d7b3dSmrg#include "exa_wm_xy.g4b" 104428d7b3dSmrg#include "exa_wm_src_affine.g4b" 105428d7b3dSmrg#include "exa_wm_src_sample_argb.g4b" 106428d7b3dSmrg#include "exa_wm_yuv_rgb.g4b" 107428d7b3dSmrg#include "exa_wm_write.g4b" 108428d7b3dSmrg}; 109428d7b3dSmrg 110428d7b3dSmrgstatic const uint32_t ps_kernel_planar_static[][4] = { 111428d7b3dSmrg#include "exa_wm_xy.g4b" 112428d7b3dSmrg#include "exa_wm_src_affine.g4b" 113428d7b3dSmrg#include "exa_wm_src_sample_planar.g4b" 114428d7b3dSmrg#include "exa_wm_yuv_rgb.g4b" 115428d7b3dSmrg#include "exa_wm_write.g4b" 116428d7b3dSmrg}; 117428d7b3dSmrg 118428d7b3dSmrg#define NOKERNEL(kernel_enum, func, masked) \ 119428d7b3dSmrg [kernel_enum] = {func, 0, masked} 120428d7b3dSmrg#define KERNEL(kernel_enum, kernel, masked) \ 121428d7b3dSmrg [kernel_enum] = {&kernel, sizeof(kernel), masked} 122428d7b3dSmrgstatic const struct wm_kernel_info { 123428d7b3dSmrg const void *data; 124428d7b3dSmrg unsigned int size; 125428d7b3dSmrg bool has_mask; 126428d7b3dSmrg} wm_kernels[] = { 127428d7b3dSmrg NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false), 128428d7b3dSmrg NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false), 129428d7b3dSmrg 130428d7b3dSmrg NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true), 131428d7b3dSmrg NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true), 132428d7b3dSmrg 133428d7b3dSmrg NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true), 134428d7b3dSmrg NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true), 135428d7b3dSmrg 136428d7b3dSmrg NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true), 137428d7b3dSmrg NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true), 138428d7b3dSmrg 139428d7b3dSmrg NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true), 140428d7b3dSmrg NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true), 141428d7b3dSmrg 142428d7b3dSmrg KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false), 143428d7b3dSmrg KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false), 144428d7b3dSmrg}; 145428d7b3dSmrg#undef KERNEL 146428d7b3dSmrg 147428d7b3dSmrgstatic const struct blendinfo { 148428d7b3dSmrg bool src_alpha; 149428d7b3dSmrg uint32_t src_blend; 150428d7b3dSmrg uint32_t dst_blend; 151428d7b3dSmrg} gen4_blend_op[] = { 152428d7b3dSmrg /* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO}, 153428d7b3dSmrg /* Src */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO}, 154428d7b3dSmrg /* Dst */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE}, 155428d7b3dSmrg /* Over */ {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA}, 156428d7b3dSmrg /* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE}, 157428d7b3dSmrg /* In */ {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO}, 158428d7b3dSmrg /* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA}, 159428d7b3dSmrg /* Out */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO}, 160428d7b3dSmrg /* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA}, 161428d7b3dSmrg /* Atop */ {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA}, 162428d7b3dSmrg /* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA}, 163428d7b3dSmrg /* Xor */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA}, 164428d7b3dSmrg /* Add */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE}, 165428d7b3dSmrg}; 166428d7b3dSmrg 167428d7b3dSmrg/** 168428d7b3dSmrg * Highest-valued BLENDFACTOR used in gen4_blend_op. 169428d7b3dSmrg * 170428d7b3dSmrg * This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR, 171428d7b3dSmrg * GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 172428d7b3dSmrg * GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 173428d7b3dSmrg */ 174428d7b3dSmrg#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1) 175428d7b3dSmrg 176428d7b3dSmrg#define BLEND_OFFSET(s, d) \ 177428d7b3dSmrg (((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64) 178428d7b3dSmrg 179428d7b3dSmrg#define SAMPLER_OFFSET(sf, se, mf, me, k) \ 180428d7b3dSmrg ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64) 181428d7b3dSmrg 182428d7b3dSmrgstatic void 183428d7b3dSmrggen4_emit_pipelined_pointers(struct sna *sna, 184428d7b3dSmrg const struct sna_composite_op *op, 185428d7b3dSmrg int blend, int kernel); 186428d7b3dSmrg 187428d7b3dSmrg#define OUT_BATCH(v) batch_emit(sna, v) 188428d7b3dSmrg#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 189428d7b3dSmrg#define OUT_VERTEX_F(v) vertex_emit(sna, v) 190428d7b3dSmrg 191428d7b3dSmrg#define GEN4_MAX_3D_SIZE 8192 192428d7b3dSmrg 193428d7b3dSmrgstatic inline bool too_large(int width, int height) 194428d7b3dSmrg{ 195428d7b3dSmrg return width > GEN4_MAX_3D_SIZE || height > GEN4_MAX_3D_SIZE; 196428d7b3dSmrg} 197428d7b3dSmrg 198428d7b3dSmrgstatic int 199428d7b3dSmrggen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 200428d7b3dSmrg{ 201428d7b3dSmrg int base; 202428d7b3dSmrg 203428d7b3dSmrg if (has_mask) { 204428d7b3dSmrg if (is_ca) { 205428d7b3dSmrg if (gen4_blend_op[op].src_alpha) 206428d7b3dSmrg base = WM_KERNEL_MASKSA; 207428d7b3dSmrg else 208428d7b3dSmrg base = WM_KERNEL_MASKCA; 209428d7b3dSmrg } else 210428d7b3dSmrg base = WM_KERNEL_MASK; 211428d7b3dSmrg } else 212428d7b3dSmrg base = WM_KERNEL; 213428d7b3dSmrg 214428d7b3dSmrg return base + !is_affine; 215428d7b3dSmrg} 216428d7b3dSmrg 217428d7b3dSmrgstatic bool gen4_magic_ca_pass(struct sna *sna, 218428d7b3dSmrg const struct sna_composite_op *op) 219428d7b3dSmrg{ 220428d7b3dSmrg struct gen4_render_state *state = &sna->render_state.gen4; 221428d7b3dSmrg 222428d7b3dSmrg if (!op->need_magic_ca_pass) 223428d7b3dSmrg return false; 224428d7b3dSmrg 225428d7b3dSmrg assert(sna->render.vertex_index > sna->render.vertex_start); 226428d7b3dSmrg 227428d7b3dSmrg DBG(("%s: CA fixup\n", __FUNCTION__)); 228428d7b3dSmrg assert(op->mask.bo != NULL); 229428d7b3dSmrg assert(op->has_component_alpha); 230428d7b3dSmrg 231428d7b3dSmrg gen4_emit_pipelined_pointers(sna, op, PictOpAdd, 232428d7b3dSmrg gen4_choose_composite_kernel(PictOpAdd, 233428d7b3dSmrg true, true, op->is_affine)); 234428d7b3dSmrg 235428d7b3dSmrg OUT_BATCH(GEN4_3DPRIMITIVE | 236428d7b3dSmrg GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL | 237428d7b3dSmrg (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) | 238428d7b3dSmrg (0 << 9) | 239428d7b3dSmrg 4); 240428d7b3dSmrg OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 241428d7b3dSmrg OUT_BATCH(sna->render.vertex_start); 242428d7b3dSmrg OUT_BATCH(1); /* single instance */ 243428d7b3dSmrg OUT_BATCH(0); /* start instance location */ 244428d7b3dSmrg OUT_BATCH(0); /* index buffer offset, ignored */ 245428d7b3dSmrg 246428d7b3dSmrg state->last_primitive = sna->kgem.nbatch; 247428d7b3dSmrg return true; 248428d7b3dSmrg} 249428d7b3dSmrg 250428d7b3dSmrgstatic uint32_t gen4_get_blend(int op, 251428d7b3dSmrg bool has_component_alpha, 252428d7b3dSmrg uint32_t dst_format) 253428d7b3dSmrg{ 254428d7b3dSmrg uint32_t src, dst; 255428d7b3dSmrg 256428d7b3dSmrg src = gen4_blend_op[op].src_blend; 257428d7b3dSmrg dst = gen4_blend_op[op].dst_blend; 258428d7b3dSmrg 259428d7b3dSmrg /* If there's no dst alpha channel, adjust the blend op so that we'll treat 260428d7b3dSmrg * it as always 1. 261428d7b3dSmrg */ 262428d7b3dSmrg if (PICT_FORMAT_A(dst_format) == 0) { 263428d7b3dSmrg if (src == GEN4_BLENDFACTOR_DST_ALPHA) 264428d7b3dSmrg src = GEN4_BLENDFACTOR_ONE; 265428d7b3dSmrg else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA) 266428d7b3dSmrg src = GEN4_BLENDFACTOR_ZERO; 267428d7b3dSmrg } 268428d7b3dSmrg 269428d7b3dSmrg /* If the source alpha is being used, then we should only be in a 270428d7b3dSmrg * case where the source blend factor is 0, and the source blend 271428d7b3dSmrg * value is the mask channels multiplied by the source picture's alpha. 272428d7b3dSmrg */ 273428d7b3dSmrg if (has_component_alpha && gen4_blend_op[op].src_alpha) { 274428d7b3dSmrg if (dst == GEN4_BLENDFACTOR_SRC_ALPHA) 275428d7b3dSmrg dst = GEN4_BLENDFACTOR_SRC_COLOR; 276428d7b3dSmrg else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA) 277428d7b3dSmrg dst = GEN4_BLENDFACTOR_INV_SRC_COLOR; 278428d7b3dSmrg } 279428d7b3dSmrg 280428d7b3dSmrg DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 281428d7b3dSmrg op, dst_format, PICT_FORMAT_A(dst_format), 282428d7b3dSmrg src, dst, BLEND_OFFSET(src, dst))); 283428d7b3dSmrg return BLEND_OFFSET(src, dst); 284428d7b3dSmrg} 285428d7b3dSmrg 286428d7b3dSmrgstatic uint32_t gen4_get_card_format(PictFormat format) 287428d7b3dSmrg{ 288428d7b3dSmrg switch (format) { 289428d7b3dSmrg default: 290428d7b3dSmrg return -1; 291428d7b3dSmrg case PICT_a8r8g8b8: 292428d7b3dSmrg return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; 293428d7b3dSmrg case PICT_x8r8g8b8: 294428d7b3dSmrg return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM; 295428d7b3dSmrg case PICT_a8b8g8r8: 296428d7b3dSmrg return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM; 297428d7b3dSmrg case PICT_x8b8g8r8: 298428d7b3dSmrg return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM; 299428d7b3dSmrg#ifdef PICT_a2r10g10b10 300428d7b3dSmrg case PICT_a2r10g10b10: 301428d7b3dSmrg return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM; 302428d7b3dSmrg case PICT_x2r10g10b10: 303428d7b3dSmrg return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM; 304428d7b3dSmrg#endif 305428d7b3dSmrg case PICT_r8g8b8: 306428d7b3dSmrg return GEN4_SURFACEFORMAT_R8G8B8_UNORM; 307428d7b3dSmrg case PICT_r5g6b5: 308428d7b3dSmrg return GEN4_SURFACEFORMAT_B5G6R5_UNORM; 309428d7b3dSmrg case PICT_a1r5g5b5: 310428d7b3dSmrg return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM; 311428d7b3dSmrg case PICT_a8: 312428d7b3dSmrg return GEN4_SURFACEFORMAT_A8_UNORM; 313428d7b3dSmrg case PICT_a4r4g4b4: 314428d7b3dSmrg return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM; 315428d7b3dSmrg } 316428d7b3dSmrg} 317428d7b3dSmrg 318428d7b3dSmrgstatic uint32_t gen4_get_dest_format(PictFormat format) 319428d7b3dSmrg{ 320428d7b3dSmrg switch (format) { 321428d7b3dSmrg default: 322428d7b3dSmrg return -1; 323428d7b3dSmrg case PICT_a8r8g8b8: 324428d7b3dSmrg case PICT_x8r8g8b8: 325428d7b3dSmrg return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; 326428d7b3dSmrg case PICT_a8b8g8r8: 327428d7b3dSmrg case PICT_x8b8g8r8: 328428d7b3dSmrg return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM; 329428d7b3dSmrg#ifdef PICT_a2r10g10b10 330428d7b3dSmrg case PICT_a2r10g10b10: 331428d7b3dSmrg case PICT_x2r10g10b10: 332428d7b3dSmrg return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM; 333428d7b3dSmrg#endif 334428d7b3dSmrg case PICT_r5g6b5: 335428d7b3dSmrg return GEN4_SURFACEFORMAT_B5G6R5_UNORM; 336428d7b3dSmrg case PICT_x1r5g5b5: 337428d7b3dSmrg case PICT_a1r5g5b5: 338428d7b3dSmrg return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM; 339428d7b3dSmrg case PICT_a8: 340428d7b3dSmrg return GEN4_SURFACEFORMAT_A8_UNORM; 341428d7b3dSmrg case PICT_a4r4g4b4: 342428d7b3dSmrg case PICT_x4r4g4b4: 343428d7b3dSmrg return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM; 344428d7b3dSmrg } 345428d7b3dSmrg} 346428d7b3dSmrg 347428d7b3dSmrgstatic bool gen4_check_dst_format(PictFormat format) 348428d7b3dSmrg{ 349428d7b3dSmrg if (gen4_get_dest_format(format) != -1) 350428d7b3dSmrg return true; 351428d7b3dSmrg 352428d7b3dSmrg DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 353428d7b3dSmrg return false; 354428d7b3dSmrg} 355428d7b3dSmrg 356428d7b3dSmrgstatic bool gen4_check_format(uint32_t format) 357428d7b3dSmrg{ 358428d7b3dSmrg if (gen4_get_card_format(format) != -1) 359428d7b3dSmrg return true; 360428d7b3dSmrg 361428d7b3dSmrg DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 362428d7b3dSmrg return false; 363428d7b3dSmrg} 364428d7b3dSmrg 365428d7b3dSmrgtypedef struct gen4_surface_state_padded { 366428d7b3dSmrg struct gen4_surface_state state; 367428d7b3dSmrg char pad[32 - sizeof(struct gen4_surface_state)]; 368428d7b3dSmrg} gen4_surface_state_padded; 369428d7b3dSmrg 370428d7b3dSmrgstatic void null_create(struct sna_static_stream *stream) 371428d7b3dSmrg{ 372428d7b3dSmrg /* A bunch of zeros useful for legacy border color and depth-stencil */ 373428d7b3dSmrg sna_static_stream_map(stream, 64, 64); 374428d7b3dSmrg} 375428d7b3dSmrg 376428d7b3dSmrgstatic void 377428d7b3dSmrgsampler_state_init(struct gen4_sampler_state *sampler_state, 378428d7b3dSmrg sampler_filter_t filter, 379428d7b3dSmrg sampler_extend_t extend) 380428d7b3dSmrg{ 381428d7b3dSmrg sampler_state->ss0.lod_preclamp = 1; /* GL mode */ 382428d7b3dSmrg 383428d7b3dSmrg /* We use the legacy mode to get the semantics specified by 384428d7b3dSmrg * the Render extension. */ 385428d7b3dSmrg sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY; 386428d7b3dSmrg 387428d7b3dSmrg switch (filter) { 388428d7b3dSmrg default: 389428d7b3dSmrg case SAMPLER_FILTER_NEAREST: 390428d7b3dSmrg sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST; 391428d7b3dSmrg sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST; 392428d7b3dSmrg break; 393428d7b3dSmrg case SAMPLER_FILTER_BILINEAR: 394428d7b3dSmrg sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR; 395428d7b3dSmrg sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR; 396428d7b3dSmrg break; 397428d7b3dSmrg } 398428d7b3dSmrg 399428d7b3dSmrg switch (extend) { 400428d7b3dSmrg default: 401428d7b3dSmrg case SAMPLER_EXTEND_NONE: 402428d7b3dSmrg sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER; 403428d7b3dSmrg sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER; 404428d7b3dSmrg sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER; 405428d7b3dSmrg break; 406428d7b3dSmrg case SAMPLER_EXTEND_REPEAT: 407428d7b3dSmrg sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP; 408428d7b3dSmrg sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP; 409428d7b3dSmrg sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP; 410428d7b3dSmrg break; 411428d7b3dSmrg case SAMPLER_EXTEND_PAD: 412428d7b3dSmrg sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP; 413428d7b3dSmrg sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP; 414428d7b3dSmrg sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP; 415428d7b3dSmrg break; 416428d7b3dSmrg case SAMPLER_EXTEND_REFLECT: 417428d7b3dSmrg sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR; 418428d7b3dSmrg sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR; 419428d7b3dSmrg sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR; 420428d7b3dSmrg break; 421428d7b3dSmrg } 422428d7b3dSmrg} 423428d7b3dSmrg 424428d7b3dSmrgstatic uint32_t gen4_filter(uint32_t filter) 425428d7b3dSmrg{ 426428d7b3dSmrg switch (filter) { 427428d7b3dSmrg default: 428428d7b3dSmrg assert(0); 429428d7b3dSmrg case PictFilterNearest: 430428d7b3dSmrg return SAMPLER_FILTER_NEAREST; 431428d7b3dSmrg case PictFilterBilinear: 432428d7b3dSmrg return SAMPLER_FILTER_BILINEAR; 433428d7b3dSmrg } 434428d7b3dSmrg} 435428d7b3dSmrg 436428d7b3dSmrgstatic uint32_t gen4_check_filter(PicturePtr picture) 437428d7b3dSmrg{ 438428d7b3dSmrg switch (picture->filter) { 439428d7b3dSmrg case PictFilterNearest: 440428d7b3dSmrg case PictFilterBilinear: 441428d7b3dSmrg return true; 442428d7b3dSmrg default: 443428d7b3dSmrg DBG(("%s: unknown filter: %s [%d]\n", 444428d7b3dSmrg __FUNCTION__, 445428d7b3dSmrg PictureGetFilterName(picture->filter), 446428d7b3dSmrg picture->filter)); 447428d7b3dSmrg return false; 448428d7b3dSmrg } 449428d7b3dSmrg} 450428d7b3dSmrg 451428d7b3dSmrgstatic uint32_t gen4_repeat(uint32_t repeat) 452428d7b3dSmrg{ 453428d7b3dSmrg switch (repeat) { 454428d7b3dSmrg default: 455428d7b3dSmrg assert(0); 456428d7b3dSmrg case RepeatNone: 457428d7b3dSmrg return SAMPLER_EXTEND_NONE; 458428d7b3dSmrg case RepeatNormal: 459428d7b3dSmrg return SAMPLER_EXTEND_REPEAT; 460428d7b3dSmrg case RepeatPad: 461428d7b3dSmrg return SAMPLER_EXTEND_PAD; 462428d7b3dSmrg case RepeatReflect: 463428d7b3dSmrg return SAMPLER_EXTEND_REFLECT; 464428d7b3dSmrg } 465428d7b3dSmrg} 466428d7b3dSmrg 467428d7b3dSmrgstatic bool gen4_check_repeat(PicturePtr picture) 468428d7b3dSmrg{ 469428d7b3dSmrg if (!picture->repeat) 470428d7b3dSmrg return true; 471428d7b3dSmrg 472428d7b3dSmrg switch (picture->repeatType) { 473428d7b3dSmrg case RepeatNone: 474428d7b3dSmrg case RepeatNormal: 475428d7b3dSmrg case RepeatPad: 476428d7b3dSmrg case RepeatReflect: 477428d7b3dSmrg return true; 478428d7b3dSmrg default: 479428d7b3dSmrg DBG(("%s: unknown repeat: %d\n", 480428d7b3dSmrg __FUNCTION__, picture->repeatType)); 481428d7b3dSmrg return false; 482428d7b3dSmrg } 483428d7b3dSmrg} 484428d7b3dSmrg 485428d7b3dSmrgstatic uint32_t 486428d7b3dSmrggen4_tiling_bits(uint32_t tiling) 487428d7b3dSmrg{ 488428d7b3dSmrg switch (tiling) { 489428d7b3dSmrg default: assert(0); 490428d7b3dSmrg case I915_TILING_NONE: return 0; 491428d7b3dSmrg case I915_TILING_X: return GEN4_SURFACE_TILED; 492428d7b3dSmrg case I915_TILING_Y: return GEN4_SURFACE_TILED | GEN4_SURFACE_TILED_Y; 493428d7b3dSmrg } 494428d7b3dSmrg} 495428d7b3dSmrg 496428d7b3dSmrg/** 497428d7b3dSmrg * Sets up the common fields for a surface state buffer for the given 498428d7b3dSmrg * picture in the given surface state buffer. 499428d7b3dSmrg */ 500428d7b3dSmrgstatic uint32_t 501428d7b3dSmrggen4_bind_bo(struct sna *sna, 502428d7b3dSmrg struct kgem_bo *bo, 503428d7b3dSmrg uint32_t width, 504428d7b3dSmrg uint32_t height, 505428d7b3dSmrg uint32_t format, 506428d7b3dSmrg bool is_dst) 507428d7b3dSmrg{ 508428d7b3dSmrg uint32_t domains; 509428d7b3dSmrg uint16_t offset; 510428d7b3dSmrg uint32_t *ss; 511428d7b3dSmrg 512428d7b3dSmrg assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo)); 513428d7b3dSmrg 514428d7b3dSmrg /* After the first bind, we manage the cache domains within the batch */ 515428d7b3dSmrg offset = kgem_bo_get_binding(bo, format | is_dst << 31); 516428d7b3dSmrg if (offset) { 517428d7b3dSmrg assert(offset >= sna->kgem.surface); 518428d7b3dSmrg if (is_dst) 519428d7b3dSmrg kgem_bo_mark_dirty(bo); 520428d7b3dSmrg return offset * sizeof(uint32_t); 521428d7b3dSmrg } 522428d7b3dSmrg 523428d7b3dSmrg offset = sna->kgem.surface -= 524428d7b3dSmrg sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); 525428d7b3dSmrg ss = sna->kgem.batch + offset; 526428d7b3dSmrg 527428d7b3dSmrg ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT | 528428d7b3dSmrg GEN4_SURFACE_BLEND_ENABLED | 529428d7b3dSmrg format << GEN4_SURFACE_FORMAT_SHIFT); 530428d7b3dSmrg 531428d7b3dSmrg if (is_dst) { 532428d7b3dSmrg ss[0] |= GEN4_SURFACE_RC_READ_WRITE; 533428d7b3dSmrg domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER; 534428d7b3dSmrg } else 535428d7b3dSmrg domains = I915_GEM_DOMAIN_SAMPLER << 16; 536428d7b3dSmrg ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); 537428d7b3dSmrg 538428d7b3dSmrg ss[2] = ((width - 1) << GEN4_SURFACE_WIDTH_SHIFT | 539428d7b3dSmrg (height - 1) << GEN4_SURFACE_HEIGHT_SHIFT); 540428d7b3dSmrg ss[3] = (gen4_tiling_bits(bo->tiling) | 541428d7b3dSmrg (bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT); 542428d7b3dSmrg ss[4] = 0; 543428d7b3dSmrg ss[5] = 0; 544428d7b3dSmrg 545428d7b3dSmrg kgem_bo_set_binding(bo, format | is_dst << 31, offset); 546428d7b3dSmrg 547428d7b3dSmrg DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 548428d7b3dSmrg offset, bo->handle, ss[1], 549428d7b3dSmrg format, width, height, bo->pitch, bo->tiling, 550428d7b3dSmrg domains & 0xffff ? "render" : "sampler")); 551428d7b3dSmrg 552428d7b3dSmrg return offset * sizeof(uint32_t); 553428d7b3dSmrg} 554428d7b3dSmrg 555428d7b3dSmrgstatic void gen4_emit_vertex_buffer(struct sna *sna, 556428d7b3dSmrg const struct sna_composite_op *op) 557428d7b3dSmrg{ 558428d7b3dSmrg int id = op->u.gen4.ve_id; 559428d7b3dSmrg 560428d7b3dSmrg assert((sna->render.vb_id & (1 << id)) == 0); 561428d7b3dSmrg 562428d7b3dSmrg OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3); 563428d7b3dSmrg OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | 564428d7b3dSmrg (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); 565428d7b3dSmrg assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc)); 566428d7b3dSmrg sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 567428d7b3dSmrg OUT_BATCH(0); 568428d7b3dSmrg OUT_BATCH(0); 569428d7b3dSmrg OUT_BATCH(0); 570428d7b3dSmrg 571428d7b3dSmrg sna->render.vb_id |= 1 << id; 572428d7b3dSmrg} 573428d7b3dSmrg 574428d7b3dSmrginline static void 575428d7b3dSmrggen4_emit_pipe_flush(struct sna *sna) 576428d7b3dSmrg{ 577428d7b3dSmrg#if 1 578428d7b3dSmrg OUT_BATCH(GEN4_PIPE_CONTROL | 579428d7b3dSmrg GEN4_PIPE_CONTROL_WC_FLUSH | 580428d7b3dSmrg (4 - 2)); 581428d7b3dSmrg OUT_BATCH(0); 582428d7b3dSmrg OUT_BATCH(0); 583428d7b3dSmrg OUT_BATCH(0); 584428d7b3dSmrg#else 585428d7b3dSmrg OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); 586428d7b3dSmrg#endif 587428d7b3dSmrg} 588428d7b3dSmrg 589428d7b3dSmrginline static void 590428d7b3dSmrggen4_emit_pipe_break(struct sna *sna) 591428d7b3dSmrg{ 592428d7b3dSmrg#if !ALWAYS_FLUSH 593428d7b3dSmrg OUT_BATCH(GEN4_PIPE_CONTROL | (4 - 2)); 594428d7b3dSmrg OUT_BATCH(0); 595428d7b3dSmrg OUT_BATCH(0); 596428d7b3dSmrg OUT_BATCH(0); 597428d7b3dSmrg#else 598428d7b3dSmrg OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); 599428d7b3dSmrg#endif 600428d7b3dSmrg} 601428d7b3dSmrg 602428d7b3dSmrginline static void 603428d7b3dSmrggen4_emit_pipe_invalidate(struct sna *sna) 604428d7b3dSmrg{ 605428d7b3dSmrg#if 0 606428d7b3dSmrg OUT_BATCH(GEN4_PIPE_CONTROL | 607428d7b3dSmrg GEN4_PIPE_CONTROL_WC_FLUSH | 608428d7b3dSmrg (sna->kgem.gen >= 045 ? GEN4_PIPE_CONTROL_TC_FLUSH : 0) | 609428d7b3dSmrg (4 - 2)); 610428d7b3dSmrg OUT_BATCH(0); 611428d7b3dSmrg OUT_BATCH(0); 612428d7b3dSmrg OUT_BATCH(0); 613428d7b3dSmrg#else 614428d7b3dSmrg OUT_BATCH(MI_FLUSH); 615428d7b3dSmrg#endif 616428d7b3dSmrg} 617428d7b3dSmrg 618428d7b3dSmrgstatic void gen4_emit_primitive(struct sna *sna) 619428d7b3dSmrg{ 620428d7b3dSmrg if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) { 621428d7b3dSmrg sna->render.vertex_offset = sna->kgem.nbatch - 5; 622428d7b3dSmrg return; 623428d7b3dSmrg } 624428d7b3dSmrg 625428d7b3dSmrg OUT_BATCH(GEN4_3DPRIMITIVE | 626428d7b3dSmrg GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL | 627428d7b3dSmrg (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) | 628428d7b3dSmrg (0 << 9) | 629428d7b3dSmrg 4); 630428d7b3dSmrg sna->render.vertex_offset = sna->kgem.nbatch; 631428d7b3dSmrg OUT_BATCH(0); /* vertex count, to be filled in later */ 632428d7b3dSmrg OUT_BATCH(sna->render.vertex_index); 633428d7b3dSmrg OUT_BATCH(1); /* single instance */ 634428d7b3dSmrg OUT_BATCH(0); /* start instance location */ 635428d7b3dSmrg OUT_BATCH(0); /* index buffer offset, ignored */ 636428d7b3dSmrg sna->render.vertex_start = sna->render.vertex_index; 637428d7b3dSmrg 638428d7b3dSmrg sna->render_state.gen4.last_primitive = sna->kgem.nbatch; 639428d7b3dSmrg} 640428d7b3dSmrg 641428d7b3dSmrgstatic bool gen4_rectangle_begin(struct sna *sna, 642428d7b3dSmrg const struct sna_composite_op *op) 643428d7b3dSmrg{ 644428d7b3dSmrg unsigned int id = 1 << op->u.gen4.ve_id; 645428d7b3dSmrg int ndwords; 646428d7b3dSmrg 647428d7b3dSmrg if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 648428d7b3dSmrg return true; 649428d7b3dSmrg 650428d7b3dSmrg /* 7xpipelined pointers + 6xprimitive + 1xflush */ 651428d7b3dSmrg ndwords = op->need_magic_ca_pass? 19 : 6; 652428d7b3dSmrg if ((sna->render.vb_id & id) == 0) 653428d7b3dSmrg ndwords += 5; 654428d7b3dSmrg ndwords += 8*FORCE_FLUSH; 655428d7b3dSmrg 656428d7b3dSmrg if (!kgem_check_batch(&sna->kgem, ndwords)) 657428d7b3dSmrg return false; 658428d7b3dSmrg 659428d7b3dSmrg if ((sna->render.vb_id & id) == 0) 660428d7b3dSmrg gen4_emit_vertex_buffer(sna, op); 661428d7b3dSmrg if (sna->render.vertex_offset == 0) 662428d7b3dSmrg gen4_emit_primitive(sna); 663428d7b3dSmrg 664428d7b3dSmrg return true; 665428d7b3dSmrg} 666428d7b3dSmrg 667428d7b3dSmrgstatic int gen4_get_rectangles__flush(struct sna *sna, 668428d7b3dSmrg const struct sna_composite_op *op) 669428d7b3dSmrg{ 670428d7b3dSmrg /* Preventing discarding new vbo after lock contention */ 671428d7b3dSmrg if (sna_vertex_wait__locked(&sna->render)) { 672428d7b3dSmrg int rem = vertex_space(sna); 673428d7b3dSmrg if (rem > op->floats_per_rect) 674428d7b3dSmrg return rem; 675428d7b3dSmrg } 676428d7b3dSmrg 677428d7b3dSmrg if (!kgem_check_batch(&sna->kgem, 678428d7b3dSmrg 8*FORCE_FLUSH + (op->need_magic_ca_pass ? 2*19+6 : 6))) 679428d7b3dSmrg return 0; 680428d7b3dSmrg if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 681428d7b3dSmrg return 0; 682428d7b3dSmrg 683428d7b3dSmrg if (sna->render.vertex_offset) { 684428d7b3dSmrg gen4_vertex_flush(sna); 685428d7b3dSmrg if (gen4_magic_ca_pass(sna, op)) 686428d7b3dSmrg gen4_emit_pipelined_pointers(sna, op, op->op, 687428d7b3dSmrg op->u.gen4.wm_kernel); 688428d7b3dSmrg } 689428d7b3dSmrg 690428d7b3dSmrg return gen4_vertex_finish(sna); 691428d7b3dSmrg} 692428d7b3dSmrg 693428d7b3dSmrginline static int gen4_get_rectangles(struct sna *sna, 694428d7b3dSmrg const struct sna_composite_op *op, 695428d7b3dSmrg int want, 696428d7b3dSmrg void (*emit_state)(struct sna *sna, const struct sna_composite_op *op)) 697428d7b3dSmrg{ 698428d7b3dSmrg int rem; 699428d7b3dSmrg 700428d7b3dSmrg assert(want); 701428d7b3dSmrg#if FORCE_FLUSH 702428d7b3dSmrg rem = sna->render.vertex_offset; 703428d7b3dSmrg if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) 704428d7b3dSmrg rem = sna->kgem.nbatch - 5; 705428d7b3dSmrg if (rem) { 706428d7b3dSmrg rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3; 707428d7b3dSmrg if (rem <= 0) { 708428d7b3dSmrg if (sna->render.vertex_offset) { 709428d7b3dSmrg gen4_vertex_flush(sna); 710428d7b3dSmrg if (gen4_magic_ca_pass(sna, op)) { 711428d7b3dSmrg if (kgem_check_batch(&sna->kgem, 19+6)) 712428d7b3dSmrg gen4_emit_pipelined_pointers(sna, op, op->op, 713428d7b3dSmrg op->u.gen4.wm_kernel); 714428d7b3dSmrg } 715428d7b3dSmrg } 716428d7b3dSmrg gen4_emit_pipe_break(sna); 717428d7b3dSmrg rem = MAX_FLUSH_VERTICES; 718428d7b3dSmrg } 719428d7b3dSmrg } else 720428d7b3dSmrg rem = MAX_FLUSH_VERTICES; 721428d7b3dSmrg if (want > rem) 722428d7b3dSmrg want = rem; 723428d7b3dSmrg#endif 724428d7b3dSmrg 725428d7b3dSmrgstart: 726428d7b3dSmrg rem = vertex_space(sna); 727428d7b3dSmrg if (unlikely(rem < op->floats_per_rect)) { 728428d7b3dSmrg DBG(("flushing vbo for %s: %d < %d\n", 729428d7b3dSmrg __FUNCTION__, rem, op->floats_per_rect)); 730428d7b3dSmrg rem = gen4_get_rectangles__flush(sna, op); 731428d7b3dSmrg if (unlikely(rem == 0)) 732428d7b3dSmrg goto flush; 733428d7b3dSmrg } 734428d7b3dSmrg 735428d7b3dSmrg if (unlikely(sna->render.vertex_offset == 0)) { 736428d7b3dSmrg if (!gen4_rectangle_begin(sna, op)) 737428d7b3dSmrg goto flush; 738428d7b3dSmrg else 739428d7b3dSmrg goto start; 740428d7b3dSmrg } 741428d7b3dSmrg 742428d7b3dSmrg assert(rem <= vertex_space(sna)); 743428d7b3dSmrg assert(op->floats_per_rect <= rem); 744428d7b3dSmrg if (want > 1 && want * op->floats_per_rect > rem) 745428d7b3dSmrg want = rem / op->floats_per_rect; 746428d7b3dSmrg 747428d7b3dSmrg sna->render.vertex_index += 3*want; 748428d7b3dSmrg return want; 749428d7b3dSmrg 750428d7b3dSmrgflush: 751428d7b3dSmrg if (sna->render.vertex_offset) { 752428d7b3dSmrg gen4_vertex_flush(sna); 753428d7b3dSmrg gen4_magic_ca_pass(sna, op); 754428d7b3dSmrg } 755428d7b3dSmrg sna_vertex_wait__locked(&sna->render); 756428d7b3dSmrg _kgem_submit(&sna->kgem); 757428d7b3dSmrg emit_state(sna, op); 758428d7b3dSmrg goto start; 759428d7b3dSmrg} 760428d7b3dSmrg 761428d7b3dSmrgstatic uint32_t * 762428d7b3dSmrggen4_composite_get_binding_table(struct sna *sna, uint16_t *offset) 763428d7b3dSmrg{ 764428d7b3dSmrg sna->kgem.surface -= 765428d7b3dSmrg sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); 766428d7b3dSmrg 767428d7b3dSmrg DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 768428d7b3dSmrg 769428d7b3dSmrg /* Clear all surplus entries to zero in case of prefetch */ 770428d7b3dSmrg *offset = sna->kgem.surface; 771428d7b3dSmrg return memset(sna->kgem.batch + sna->kgem.surface, 772428d7b3dSmrg 0, sizeof(struct gen4_surface_state_padded)); 773428d7b3dSmrg} 774428d7b3dSmrg 775428d7b3dSmrgstatic void 776428d7b3dSmrggen4_emit_urb(struct sna *sna) 777428d7b3dSmrg{ 778428d7b3dSmrg int urb_vs_end; 779428d7b3dSmrg int urb_gs_end; 780428d7b3dSmrg int urb_cl_end; 781428d7b3dSmrg int urb_sf_end; 782428d7b3dSmrg int urb_cs_end; 783428d7b3dSmrg 784428d7b3dSmrg if (!sna->render_state.gen4.needs_urb) 785428d7b3dSmrg return; 786428d7b3dSmrg 787428d7b3dSmrg urb_vs_end = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; 788428d7b3dSmrg urb_gs_end = urb_vs_end + URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; 789428d7b3dSmrg urb_cl_end = urb_gs_end + URB_CL_ENTRIES * URB_CL_ENTRY_SIZE; 790428d7b3dSmrg urb_sf_end = urb_cl_end + URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; 791428d7b3dSmrg urb_cs_end = urb_sf_end + URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; 792428d7b3dSmrg assert(urb_cs_end <= 256); 793428d7b3dSmrg 794428d7b3dSmrg while ((sna->kgem.nbatch & 15) > 12) 795428d7b3dSmrg OUT_BATCH(MI_NOOP); 796428d7b3dSmrg 797428d7b3dSmrg OUT_BATCH(GEN4_URB_FENCE | 798428d7b3dSmrg UF0_CS_REALLOC | 799428d7b3dSmrg UF0_SF_REALLOC | 800428d7b3dSmrg UF0_CLIP_REALLOC | 801428d7b3dSmrg UF0_GS_REALLOC | 802428d7b3dSmrg UF0_VS_REALLOC | 803428d7b3dSmrg 1); 804428d7b3dSmrg OUT_BATCH(urb_cl_end << UF1_CLIP_FENCE_SHIFT | 805428d7b3dSmrg urb_gs_end << UF1_GS_FENCE_SHIFT | 806428d7b3dSmrg urb_vs_end << UF1_VS_FENCE_SHIFT); 807428d7b3dSmrg OUT_BATCH(urb_cs_end << UF2_CS_FENCE_SHIFT | 808428d7b3dSmrg urb_sf_end << UF2_SF_FENCE_SHIFT); 809428d7b3dSmrg 810428d7b3dSmrg /* Constant buffer state */ 811428d7b3dSmrg OUT_BATCH(GEN4_CS_URB_STATE | 0); 812428d7b3dSmrg OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0); 813428d7b3dSmrg 814428d7b3dSmrg sna->render_state.gen4.needs_urb = false; 815428d7b3dSmrg} 816428d7b3dSmrg 817428d7b3dSmrgstatic void 818428d7b3dSmrggen4_emit_state_base_address(struct sna *sna) 819428d7b3dSmrg{ 820428d7b3dSmrg assert(sna->render_state.gen4.general_bo->proxy == NULL); 821428d7b3dSmrg OUT_BATCH(GEN4_STATE_BASE_ADDRESS | 4); 822428d7b3dSmrg OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */ 823428d7b3dSmrg sna->kgem.nbatch, 824428d7b3dSmrg sna->render_state.gen4.general_bo, 825428d7b3dSmrg I915_GEM_DOMAIN_INSTRUCTION << 16, 826428d7b3dSmrg BASE_ADDRESS_MODIFY)); 827428d7b3dSmrg OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ 828428d7b3dSmrg sna->kgem.nbatch, 829428d7b3dSmrg NULL, 830428d7b3dSmrg I915_GEM_DOMAIN_INSTRUCTION << 16, 831428d7b3dSmrg BASE_ADDRESS_MODIFY)); 832428d7b3dSmrg OUT_BATCH(0); /* media */ 833428d7b3dSmrg 834428d7b3dSmrg /* upper bounds, all disabled */ 835428d7b3dSmrg OUT_BATCH(BASE_ADDRESS_MODIFY); 836428d7b3dSmrg OUT_BATCH(0); 837428d7b3dSmrg} 838428d7b3dSmrg 839428d7b3dSmrgstatic void 840428d7b3dSmrggen4_emit_invariant(struct sna *sna) 841428d7b3dSmrg{ 842428d7b3dSmrg assert(sna->kgem.surface == sna->kgem.batch_size); 843428d7b3dSmrg 844428d7b3dSmrg if (sna->kgem.gen >= 045) 845428d7b3dSmrg OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); 846428d7b3dSmrg else 847428d7b3dSmrg OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D); 848428d7b3dSmrg 849428d7b3dSmrg gen4_emit_state_base_address(sna); 850428d7b3dSmrg 851428d7b3dSmrg sna->render_state.gen4.needs_invariant = false; 852428d7b3dSmrg} 853428d7b3dSmrg 854428d7b3dSmrgstatic void 855428d7b3dSmrggen4_get_batch(struct sna *sna, const struct sna_composite_op *op) 856428d7b3dSmrg{ 857428d7b3dSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 858428d7b3dSmrg 859428d7b3dSmrg if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) { 860428d7b3dSmrg DBG(("%s: flushing batch: %d < %d+%d\n", 861428d7b3dSmrg __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 862428d7b3dSmrg 150, 4*8)); 863428d7b3dSmrg kgem_submit(&sna->kgem); 864428d7b3dSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 865428d7b3dSmrg } 866428d7b3dSmrg 867428d7b3dSmrg if (sna->render_state.gen4.needs_invariant) 868428d7b3dSmrg gen4_emit_invariant(sna); 869428d7b3dSmrg} 870428d7b3dSmrg 871428d7b3dSmrgstatic void 872428d7b3dSmrggen4_align_vertex(struct sna *sna, const struct sna_composite_op *op) 873428d7b3dSmrg{ 874428d7b3dSmrg assert(op->floats_per_rect == 3*op->floats_per_vertex); 875428d7b3dSmrg if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) { 876428d7b3dSmrg DBG(("aligning vertex: was %d, now %d floats per vertex\n", 877428d7b3dSmrg sna->render_state.gen4.floats_per_vertex, 878428d7b3dSmrg op->floats_per_vertex)); 879428d7b3dSmrg gen4_vertex_align(sna, op); 880428d7b3dSmrg sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex; 881428d7b3dSmrg } 882428d7b3dSmrg} 883428d7b3dSmrg 884428d7b3dSmrgstatic void 885428d7b3dSmrggen4_emit_binding_table(struct sna *sna, uint16_t offset) 886428d7b3dSmrg{ 887428d7b3dSmrg if (sna->render_state.gen4.surface_table == offset) 888428d7b3dSmrg return; 889428d7b3dSmrg 890428d7b3dSmrg sna->render_state.gen4.surface_table = offset; 891428d7b3dSmrg 892428d7b3dSmrg /* Binding table pointers */ 893428d7b3dSmrg OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | 4); 894428d7b3dSmrg OUT_BATCH(0); /* vs */ 895428d7b3dSmrg OUT_BATCH(0); /* gs */ 896428d7b3dSmrg OUT_BATCH(0); /* clip */ 897428d7b3dSmrg OUT_BATCH(0); /* sf */ 898428d7b3dSmrg /* Only the PS uses the binding table */ 899428d7b3dSmrg OUT_BATCH(offset*4); 900428d7b3dSmrg} 901428d7b3dSmrg 902428d7b3dSmrgstatic void 903428d7b3dSmrggen4_emit_pipelined_pointers(struct sna *sna, 904428d7b3dSmrg const struct sna_composite_op *op, 905428d7b3dSmrg int blend, int kernel) 906428d7b3dSmrg{ 907428d7b3dSmrg uint16_t sp, bp; 908428d7b3dSmrg uint32_t key; 909428d7b3dSmrg 910428d7b3dSmrg DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n", 911428d7b3dSmrg __FUNCTION__, op->u.gen4.ve_id & 2, 912428d7b3dSmrg op->src.filter, op->src.repeat, 913428d7b3dSmrg op->mask.filter, op->mask.repeat, 914428d7b3dSmrg kernel, blend, op->has_component_alpha, (int)op->dst.format)); 915428d7b3dSmrg 916428d7b3dSmrg sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat, 917428d7b3dSmrg op->mask.filter, op->mask.repeat, 918428d7b3dSmrg kernel); 919428d7b3dSmrg bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format); 920428d7b3dSmrg 921428d7b3dSmrg DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp)); 922428d7b3dSmrg key = sp | (uint32_t)bp << 16; 923428d7b3dSmrg if (key == sna->render_state.gen4.last_pipelined_pointers) 924428d7b3dSmrg return; 925428d7b3dSmrg 926428d7b3dSmrg OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | 5); 927428d7b3dSmrg OUT_BATCH(sna->render_state.gen4.vs); 928428d7b3dSmrg OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */ 929428d7b3dSmrg OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */ 930428d7b3dSmrg OUT_BATCH(sna->render_state.gen4.sf); 931428d7b3dSmrg OUT_BATCH(sna->render_state.gen4.wm + sp); 932428d7b3dSmrg OUT_BATCH(sna->render_state.gen4.cc + bp); 933428d7b3dSmrg 934428d7b3dSmrg sna->render_state.gen4.last_pipelined_pointers = key; 935428d7b3dSmrg gen4_emit_urb(sna); 936428d7b3dSmrg} 937428d7b3dSmrg 938428d7b3dSmrgstatic bool 939428d7b3dSmrggen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op) 940428d7b3dSmrg{ 941428d7b3dSmrg uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 942428d7b3dSmrg uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 943428d7b3dSmrg 944428d7b3dSmrg assert(!too_large(abs(op->dst.x), abs(op->dst.y))); 945428d7b3dSmrg assert(!too_large(op->dst.width, op->dst.height)); 946428d7b3dSmrg 947428d7b3dSmrg if (sna->render_state.gen4.drawrect_limit == limit && 948428d7b3dSmrg sna->render_state.gen4.drawrect_offset == offset) 949428d7b3dSmrg return true; 950428d7b3dSmrg 951428d7b3dSmrg sna->render_state.gen4.drawrect_offset = offset; 952428d7b3dSmrg sna->render_state.gen4.drawrect_limit = limit; 953428d7b3dSmrg 954428d7b3dSmrg OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 955428d7b3dSmrg OUT_BATCH(0); 956428d7b3dSmrg OUT_BATCH(limit); 957428d7b3dSmrg OUT_BATCH(offset); 958428d7b3dSmrg return false; 959428d7b3dSmrg} 960428d7b3dSmrg 961428d7b3dSmrgstatic void 962428d7b3dSmrggen4_emit_vertex_elements(struct sna *sna, 963428d7b3dSmrg const struct sna_composite_op *op) 964428d7b3dSmrg{ 965428d7b3dSmrg /* 966428d7b3dSmrg * vertex data in vertex buffer 967428d7b3dSmrg * position: (x, y) 968428d7b3dSmrg * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 969428d7b3dSmrg * texture coordinate 1 if (has_mask is true): same as above 970428d7b3dSmrg */ 971428d7b3dSmrg struct gen4_render_state *render = &sna->render_state.gen4; 972428d7b3dSmrg uint32_t src_format, dw; 973428d7b3dSmrg int id = op->u.gen4.ve_id; 974428d7b3dSmrg 975428d7b3dSmrg if (render->ve_id == id) 976428d7b3dSmrg return; 977428d7b3dSmrg render->ve_id = id; 978428d7b3dSmrg 979428d7b3dSmrg /* The VUE layout 980428d7b3dSmrg * dword 0-3: position (x, y, 1.0, 1.0), 981428d7b3dSmrg * dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0) 982428d7b3dSmrg * [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0) 983428d7b3dSmrg */ 984428d7b3dSmrg OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1)); 985428d7b3dSmrg 986428d7b3dSmrg /* x,y */ 987428d7b3dSmrg OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 988428d7b3dSmrg GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | 989428d7b3dSmrg 0 << VE0_OFFSET_SHIFT); 990428d7b3dSmrg OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | 991428d7b3dSmrg VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | 992428d7b3dSmrg VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT | 993428d7b3dSmrg VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT | 994428d7b3dSmrg (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); 995428d7b3dSmrg 996428d7b3dSmrg /* u0, v0, w0 */ 997428d7b3dSmrg /* u0, v0, w0 */ 998428d7b3dSmrg DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); 999428d7b3dSmrg dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; 1000428d7b3dSmrg switch (id & 3) { 1001428d7b3dSmrg default: 1002428d7b3dSmrg assert(0); 1003428d7b3dSmrg case 0: 1004428d7b3dSmrg src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED; 1005428d7b3dSmrg dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1006428d7b3dSmrg dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 1007428d7b3dSmrg dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; 1008428d7b3dSmrg break; 1009428d7b3dSmrg case 1: 1010428d7b3dSmrg src_format = GEN4_SURFACEFORMAT_R32_FLOAT; 1011428d7b3dSmrg dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1012428d7b3dSmrg dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; 1013428d7b3dSmrg dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; 1014428d7b3dSmrg break; 1015428d7b3dSmrg case 2: 1016428d7b3dSmrg src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT; 1017428d7b3dSmrg dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1018428d7b3dSmrg dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 1019428d7b3dSmrg dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; 1020428d7b3dSmrg break; 1021428d7b3dSmrg case 3: 1022428d7b3dSmrg src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT; 1023428d7b3dSmrg dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1024428d7b3dSmrg dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 1025428d7b3dSmrg dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; 1026428d7b3dSmrg break; 1027428d7b3dSmrg } 1028428d7b3dSmrg OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 1029428d7b3dSmrg src_format << VE0_FORMAT_SHIFT | 1030428d7b3dSmrg 4 << VE0_OFFSET_SHIFT); 1031428d7b3dSmrg OUT_BATCH(dw | 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); 1032428d7b3dSmrg 1033428d7b3dSmrg /* u1, v1, w1 */ 1034428d7b3dSmrg if (id >> 2) { 1035428d7b3dSmrg unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float); 1036428d7b3dSmrg DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, 1037428d7b3dSmrg id >> 2, src_offset)); 1038428d7b3dSmrg dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; 1039428d7b3dSmrg switch (id >> 2) { 1040428d7b3dSmrg case 1: 1041428d7b3dSmrg src_format = GEN4_SURFACEFORMAT_R32_FLOAT; 1042428d7b3dSmrg dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1043428d7b3dSmrg dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; 1044428d7b3dSmrg dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; 1045428d7b3dSmrg break; 1046428d7b3dSmrg default: 1047428d7b3dSmrg assert(0); 1048428d7b3dSmrg case 2: 1049428d7b3dSmrg src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT; 1050428d7b3dSmrg dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1051428d7b3dSmrg dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 1052428d7b3dSmrg dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT; 1053428d7b3dSmrg break; 1054428d7b3dSmrg case 3: 1055428d7b3dSmrg src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT; 1056428d7b3dSmrg dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; 1057428d7b3dSmrg dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; 1058428d7b3dSmrg dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; 1059428d7b3dSmrg break; 1060428d7b3dSmrg } 1061428d7b3dSmrg OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 1062428d7b3dSmrg src_format << VE0_FORMAT_SHIFT | 1063428d7b3dSmrg src_offset << VE0_OFFSET_SHIFT); 1064428d7b3dSmrg OUT_BATCH(dw | 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); 1065428d7b3dSmrg } else { 1066428d7b3dSmrg OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | 1067428d7b3dSmrg GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | 1068428d7b3dSmrg 0 << VE0_OFFSET_SHIFT); 1069428d7b3dSmrg OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | 1070428d7b3dSmrg VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | 1071428d7b3dSmrg VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | 1072428d7b3dSmrg VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT | 1073428d7b3dSmrg 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); 1074428d7b3dSmrg } 1075428d7b3dSmrg} 1076428d7b3dSmrg 1077428d7b3dSmrgstatic void 1078428d7b3dSmrggen4_emit_state(struct sna *sna, 1079428d7b3dSmrg const struct sna_composite_op *op, 1080428d7b3dSmrg uint16_t wm_binding_table) 1081428d7b3dSmrg{ 1082428d7b3dSmrg bool flush; 1083428d7b3dSmrg 1084428d7b3dSmrg assert(op->dst.bo->exec); 1085428d7b3dSmrg 1086428d7b3dSmrg flush = wm_binding_table & 1; 1087428d7b3dSmrg wm_binding_table &= ~1; 1088428d7b3dSmrg 1089428d7b3dSmrg if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { 1090428d7b3dSmrg DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__, 1091428d7b3dSmrg kgem_bo_is_dirty(op->src.bo), 1092428d7b3dSmrg kgem_bo_is_dirty(op->mask.bo), 1093428d7b3dSmrg flush)); 1094428d7b3dSmrg gen4_emit_pipe_invalidate(sna); 1095428d7b3dSmrg kgem_clear_dirty(&sna->kgem); 1096428d7b3dSmrg kgem_bo_mark_dirty(op->dst.bo); 1097428d7b3dSmrg flush = false; 1098428d7b3dSmrg } 1099428d7b3dSmrg flush &= gen4_emit_drawing_rectangle(sna, op); 1100428d7b3dSmrg if (flush && op->op > PictOpSrc) 1101428d7b3dSmrg gen4_emit_pipe_flush(sna); 1102428d7b3dSmrg 1103428d7b3dSmrg gen4_emit_binding_table(sna, wm_binding_table); 1104428d7b3dSmrg gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel); 1105428d7b3dSmrg gen4_emit_vertex_elements(sna, op); 1106428d7b3dSmrg} 1107428d7b3dSmrg 1108428d7b3dSmrgstatic void 1109428d7b3dSmrggen4_bind_surfaces(struct sna *sna, 1110428d7b3dSmrg const struct sna_composite_op *op) 1111428d7b3dSmrg{ 1112428d7b3dSmrg uint32_t *binding_table; 1113428d7b3dSmrg uint16_t offset, dirty; 1114428d7b3dSmrg 1115428d7b3dSmrg gen4_get_batch(sna, op); 1116428d7b3dSmrg dirty = kgem_bo_is_dirty(op->dst.bo); 1117428d7b3dSmrg 1118428d7b3dSmrg binding_table = gen4_composite_get_binding_table(sna, &offset); 1119428d7b3dSmrg 1120428d7b3dSmrg binding_table[0] = 1121428d7b3dSmrg gen4_bind_bo(sna, 1122428d7b3dSmrg op->dst.bo, op->dst.width, op->dst.height, 1123428d7b3dSmrg gen4_get_dest_format(op->dst.format), 1124428d7b3dSmrg true); 1125428d7b3dSmrg binding_table[1] = 1126428d7b3dSmrg gen4_bind_bo(sna, 1127428d7b3dSmrg op->src.bo, op->src.width, op->src.height, 1128428d7b3dSmrg op->src.card_format, 1129428d7b3dSmrg false); 1130428d7b3dSmrg if (op->mask.bo) { 1131428d7b3dSmrg assert(op->u.gen4.ve_id >> 2); 1132428d7b3dSmrg binding_table[2] = 1133428d7b3dSmrg gen4_bind_bo(sna, 1134428d7b3dSmrg op->mask.bo, 1135428d7b3dSmrg op->mask.width, 1136428d7b3dSmrg op->mask.height, 1137428d7b3dSmrg op->mask.card_format, 1138428d7b3dSmrg false); 1139428d7b3dSmrg } 1140428d7b3dSmrg 1141428d7b3dSmrg if (sna->kgem.surface == offset && 1142428d7b3dSmrg *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table && 1143428d7b3dSmrg (op->mask.bo == NULL || 1144428d7b3dSmrg sna->kgem.batch[sna->render_state.gen4.surface_table+2] == binding_table[2])) { 1145428d7b3dSmrg sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); 1146428d7b3dSmrg offset = sna->render_state.gen4.surface_table; 1147428d7b3dSmrg } 1148428d7b3dSmrg 1149428d7b3dSmrg if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0]) 1150428d7b3dSmrg dirty = 0; 1151428d7b3dSmrg 1152428d7b3dSmrg gen4_emit_state(sna, op, offset | dirty); 1153428d7b3dSmrg} 1154428d7b3dSmrg 1155428d7b3dSmrgfastcall static void 1156428d7b3dSmrggen4_render_composite_blt(struct sna *sna, 1157428d7b3dSmrg const struct sna_composite_op *op, 1158428d7b3dSmrg const struct sna_composite_rectangles *r) 1159428d7b3dSmrg{ 1160428d7b3dSmrg DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", 1161428d7b3dSmrg __FUNCTION__, 1162428d7b3dSmrg r->src.x, r->src.y, op->src.offset[0], op->src.offset[1], 1163428d7b3dSmrg r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1], 1164428d7b3dSmrg r->dst.x, r->dst.y, op->dst.x, op->dst.y, 1165428d7b3dSmrg r->width, r->height)); 1166428d7b3dSmrg 1167428d7b3dSmrg gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces); 1168428d7b3dSmrg op->prim_emit(sna, op, r); 1169428d7b3dSmrg} 1170428d7b3dSmrg 1171428d7b3dSmrgfastcall static void 1172428d7b3dSmrggen4_render_composite_box(struct sna *sna, 1173428d7b3dSmrg const struct sna_composite_op *op, 1174428d7b3dSmrg const BoxRec *box) 1175428d7b3dSmrg{ 1176428d7b3dSmrg struct sna_composite_rectangles r; 1177428d7b3dSmrg 1178428d7b3dSmrg DBG((" %s: (%d, %d), (%d, %d)\n", 1179428d7b3dSmrg __FUNCTION__, 1180428d7b3dSmrg box->x1, box->y1, box->x2, box->y2)); 1181428d7b3dSmrg 1182428d7b3dSmrg gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces); 1183428d7b3dSmrg 1184428d7b3dSmrg r.dst.x = box->x1; 1185428d7b3dSmrg r.dst.y = box->y1; 1186428d7b3dSmrg r.width = box->x2 - box->x1; 1187428d7b3dSmrg r.height = box->y2 - box->y1; 1188428d7b3dSmrg r.mask = r.src = r.dst; 1189428d7b3dSmrg 1190428d7b3dSmrg op->prim_emit(sna, op, &r); 1191428d7b3dSmrg} 1192428d7b3dSmrg 1193428d7b3dSmrgstatic void 1194428d7b3dSmrggen4_render_composite_boxes__blt(struct sna *sna, 1195428d7b3dSmrg const struct sna_composite_op *op, 1196428d7b3dSmrg const BoxRec *box, int nbox) 1197428d7b3dSmrg{ 1198428d7b3dSmrg DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n", 1199428d7b3dSmrg __FUNCTION__, nbox, op->dst.x, op->dst.y, 1200428d7b3dSmrg op->src.offset[0], op->src.offset[1], 1201428d7b3dSmrg op->src.width, op->src.height, 1202428d7b3dSmrg op->mask.offset[0], op->mask.offset[1], 1203428d7b3dSmrg op->mask.width, op->mask.height)); 1204428d7b3dSmrg 1205428d7b3dSmrg do { 1206428d7b3dSmrg int nbox_this_time; 1207428d7b3dSmrg 1208428d7b3dSmrg nbox_this_time = gen4_get_rectangles(sna, op, nbox, 1209428d7b3dSmrg gen4_bind_surfaces); 1210428d7b3dSmrg nbox -= nbox_this_time; 1211428d7b3dSmrg 1212428d7b3dSmrg do { 1213428d7b3dSmrg struct sna_composite_rectangles r; 1214428d7b3dSmrg 1215428d7b3dSmrg DBG((" %s: (%d, %d), (%d, %d)\n", 1216428d7b3dSmrg __FUNCTION__, 1217428d7b3dSmrg box->x1, box->y1, box->x2, box->y2)); 1218428d7b3dSmrg 1219428d7b3dSmrg r.dst.x = box->x1; 1220428d7b3dSmrg r.dst.y = box->y1; 1221428d7b3dSmrg r.width = box->x2 - box->x1; 1222428d7b3dSmrg r.height = box->y2 - box->y1; 1223428d7b3dSmrg r.mask = r.src = r.dst; 1224428d7b3dSmrg op->prim_emit(sna, op, &r); 1225428d7b3dSmrg box++; 1226428d7b3dSmrg } while (--nbox_this_time); 1227428d7b3dSmrg } while (nbox); 1228428d7b3dSmrg} 1229428d7b3dSmrg 1230428d7b3dSmrgstatic void 1231428d7b3dSmrggen4_render_composite_boxes(struct sna *sna, 1232428d7b3dSmrg const struct sna_composite_op *op, 1233428d7b3dSmrg const BoxRec *box, int nbox) 1234428d7b3dSmrg{ 1235428d7b3dSmrg DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1236428d7b3dSmrg 1237428d7b3dSmrg do { 1238428d7b3dSmrg int nbox_this_time; 1239428d7b3dSmrg float *v; 1240428d7b3dSmrg 1241428d7b3dSmrg nbox_this_time = gen4_get_rectangles(sna, op, nbox, 1242428d7b3dSmrg gen4_bind_surfaces); 1243428d7b3dSmrg assert(nbox_this_time); 1244428d7b3dSmrg nbox -= nbox_this_time; 1245428d7b3dSmrg 1246428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1247428d7b3dSmrg sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1248428d7b3dSmrg 1249428d7b3dSmrg op->emit_boxes(op, box, nbox_this_time, v); 1250428d7b3dSmrg box += nbox_this_time; 1251428d7b3dSmrg } while (nbox); 1252428d7b3dSmrg} 1253428d7b3dSmrg 1254428d7b3dSmrg#if !FORCE_FLUSH 1255428d7b3dSmrgstatic void 1256428d7b3dSmrggen4_render_composite_boxes__thread(struct sna *sna, 1257428d7b3dSmrg const struct sna_composite_op *op, 1258428d7b3dSmrg const BoxRec *box, int nbox) 1259428d7b3dSmrg{ 1260428d7b3dSmrg DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1261428d7b3dSmrg 1262428d7b3dSmrg sna_vertex_lock(&sna->render); 1263428d7b3dSmrg do { 1264428d7b3dSmrg int nbox_this_time; 1265428d7b3dSmrg float *v; 1266428d7b3dSmrg 1267428d7b3dSmrg nbox_this_time = gen4_get_rectangles(sna, op, nbox, 1268428d7b3dSmrg gen4_bind_surfaces); 1269428d7b3dSmrg assert(nbox_this_time); 1270428d7b3dSmrg nbox -= nbox_this_time; 1271428d7b3dSmrg 1272428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 1273428d7b3dSmrg sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1274428d7b3dSmrg 1275428d7b3dSmrg sna_vertex_acquire__locked(&sna->render); 1276428d7b3dSmrg sna_vertex_unlock(&sna->render); 1277428d7b3dSmrg 1278428d7b3dSmrg op->emit_boxes(op, box, nbox_this_time, v); 1279428d7b3dSmrg box += nbox_this_time; 1280428d7b3dSmrg 1281428d7b3dSmrg sna_vertex_lock(&sna->render); 1282428d7b3dSmrg sna_vertex_release__locked(&sna->render); 1283428d7b3dSmrg } while (nbox); 1284428d7b3dSmrg sna_vertex_unlock(&sna->render); 1285428d7b3dSmrg} 1286428d7b3dSmrg#endif 1287428d7b3dSmrg 1288428d7b3dSmrg#ifndef MAX 1289428d7b3dSmrg#define MAX(a,b) ((a) > (b) ? (a) : (b)) 1290428d7b3dSmrg#endif 1291428d7b3dSmrg 1292428d7b3dSmrgstatic uint32_t gen4_bind_video_source(struct sna *sna, 1293428d7b3dSmrg struct kgem_bo *src_bo, 1294428d7b3dSmrg uint32_t src_offset, 1295428d7b3dSmrg int src_width, 1296428d7b3dSmrg int src_height, 1297428d7b3dSmrg int src_pitch, 1298428d7b3dSmrg uint32_t src_surf_format) 1299428d7b3dSmrg{ 1300428d7b3dSmrg struct gen4_surface_state *ss; 1301428d7b3dSmrg 1302428d7b3dSmrg sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); 1303428d7b3dSmrg 1304428d7b3dSmrg ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); 1305428d7b3dSmrg ss->ss0.surface_type = GEN4_SURFACE_2D; 1306428d7b3dSmrg ss->ss0.surface_format = src_surf_format; 1307428d7b3dSmrg ss->ss0.color_blend = 1; 1308428d7b3dSmrg 1309428d7b3dSmrg ss->ss1.base_addr = 1310428d7b3dSmrg kgem_add_reloc(&sna->kgem, 1311428d7b3dSmrg sna->kgem.surface + 1, 1312428d7b3dSmrg src_bo, 1313428d7b3dSmrg I915_GEM_DOMAIN_SAMPLER << 16, 1314428d7b3dSmrg src_offset); 1315428d7b3dSmrg 1316428d7b3dSmrg ss->ss2.width = src_width - 1; 1317428d7b3dSmrg ss->ss2.height = src_height - 1; 1318428d7b3dSmrg ss->ss3.pitch = src_pitch - 1; 1319428d7b3dSmrg 1320428d7b3dSmrg return sna->kgem.surface * sizeof(uint32_t); 1321428d7b3dSmrg} 1322428d7b3dSmrg 1323428d7b3dSmrgstatic void gen4_video_bind_surfaces(struct sna *sna, 1324428d7b3dSmrg const struct sna_composite_op *op) 1325428d7b3dSmrg{ 1326428d7b3dSmrg struct sna_video_frame *frame = op->priv; 1327428d7b3dSmrg uint32_t src_surf_format; 1328428d7b3dSmrg uint32_t src_surf_base[6]; 1329428d7b3dSmrg int src_width[6]; 1330428d7b3dSmrg int src_height[6]; 1331428d7b3dSmrg int src_pitch[6]; 1332428d7b3dSmrg uint32_t *binding_table; 1333428d7b3dSmrg uint16_t offset, dirty; 1334428d7b3dSmrg int n_src, n; 1335428d7b3dSmrg 1336428d7b3dSmrg src_surf_base[0] = 0; 1337428d7b3dSmrg src_surf_base[1] = 0; 1338428d7b3dSmrg src_surf_base[2] = frame->VBufOffset; 1339428d7b3dSmrg src_surf_base[3] = frame->VBufOffset; 1340428d7b3dSmrg src_surf_base[4] = frame->UBufOffset; 1341428d7b3dSmrg src_surf_base[5] = frame->UBufOffset; 1342428d7b3dSmrg 1343428d7b3dSmrg if (is_planar_fourcc(frame->id)) { 1344428d7b3dSmrg src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM; 1345428d7b3dSmrg src_width[1] = src_width[0] = frame->width; 1346428d7b3dSmrg src_height[1] = src_height[0] = frame->height; 1347428d7b3dSmrg src_pitch[1] = src_pitch[0] = frame->pitch[1]; 1348428d7b3dSmrg src_width[4] = src_width[5] = src_width[2] = src_width[3] = 1349428d7b3dSmrg frame->width / 2; 1350428d7b3dSmrg src_height[4] = src_height[5] = src_height[2] = src_height[3] = 1351428d7b3dSmrg frame->height / 2; 1352428d7b3dSmrg src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = 1353428d7b3dSmrg frame->pitch[0]; 1354428d7b3dSmrg n_src = 6; 1355428d7b3dSmrg } else { 1356428d7b3dSmrg if (frame->id == FOURCC_UYVY) 1357428d7b3dSmrg src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY; 1358428d7b3dSmrg else 1359428d7b3dSmrg src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL; 1360428d7b3dSmrg 1361428d7b3dSmrg src_width[0] = frame->width; 1362428d7b3dSmrg src_height[0] = frame->height; 1363428d7b3dSmrg src_pitch[0] = frame->pitch[0]; 1364428d7b3dSmrg n_src = 1; 1365428d7b3dSmrg } 1366428d7b3dSmrg 1367428d7b3dSmrg gen4_get_batch(sna, op); 1368428d7b3dSmrg dirty = kgem_bo_is_dirty(op->dst.bo); 1369428d7b3dSmrg 1370428d7b3dSmrg binding_table = gen4_composite_get_binding_table(sna, &offset); 1371428d7b3dSmrg binding_table[0] = 1372428d7b3dSmrg gen4_bind_bo(sna, 1373428d7b3dSmrg op->dst.bo, op->dst.width, op->dst.height, 1374428d7b3dSmrg gen4_get_dest_format(op->dst.format), 1375428d7b3dSmrg true); 1376428d7b3dSmrg for (n = 0; n < n_src; n++) { 1377428d7b3dSmrg binding_table[1+n] = 1378428d7b3dSmrg gen4_bind_video_source(sna, 1379428d7b3dSmrg frame->bo, 1380428d7b3dSmrg src_surf_base[n], 1381428d7b3dSmrg src_width[n], 1382428d7b3dSmrg src_height[n], 1383428d7b3dSmrg src_pitch[n], 1384428d7b3dSmrg src_surf_format); 1385428d7b3dSmrg } 1386428d7b3dSmrg 1387428d7b3dSmrg if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0]) 1388428d7b3dSmrg dirty = 0; 1389428d7b3dSmrg 1390428d7b3dSmrg gen4_emit_state(sna, op, offset | dirty); 1391428d7b3dSmrg} 1392428d7b3dSmrg 1393428d7b3dSmrgstatic bool 1394428d7b3dSmrggen4_render_video(struct sna *sna, 1395428d7b3dSmrg struct sna_video *video, 1396428d7b3dSmrg struct sna_video_frame *frame, 1397428d7b3dSmrg RegionPtr dstRegion, 1398428d7b3dSmrg PixmapPtr pixmap) 1399428d7b3dSmrg{ 1400428d7b3dSmrg struct sna_composite_op tmp; 1401428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 1402428d7b3dSmrg int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 1403428d7b3dSmrg int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 1404428d7b3dSmrg int src_width = frame->src.x2 - frame->src.x1; 1405428d7b3dSmrg int src_height = frame->src.y2 - frame->src.y1; 1406428d7b3dSmrg float src_offset_x, src_offset_y; 1407428d7b3dSmrg float src_scale_x, src_scale_y; 1408428d7b3dSmrg int nbox, pix_xoff, pix_yoff; 1409428d7b3dSmrg const BoxRec *box; 1410428d7b3dSmrg 1411428d7b3dSmrg DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, 1412428d7b3dSmrg src_width, src_height, dst_width, dst_height)); 1413428d7b3dSmrg 1414428d7b3dSmrg assert(priv->gpu_bo); 1415428d7b3dSmrg memset(&tmp, 0, sizeof(tmp)); 1416428d7b3dSmrg 1417428d7b3dSmrg tmp.op = PictOpSrc; 1418428d7b3dSmrg tmp.dst.pixmap = pixmap; 1419428d7b3dSmrg tmp.dst.width = pixmap->drawable.width; 1420428d7b3dSmrg tmp.dst.height = pixmap->drawable.height; 1421428d7b3dSmrg tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth); 1422428d7b3dSmrg tmp.dst.bo = priv->gpu_bo; 1423428d7b3dSmrg 1424428d7b3dSmrg if (src_width == dst_width && src_height == dst_height) 1425428d7b3dSmrg tmp.src.filter = SAMPLER_FILTER_NEAREST; 1426428d7b3dSmrg else 1427428d7b3dSmrg tmp.src.filter = SAMPLER_FILTER_BILINEAR; 1428428d7b3dSmrg tmp.src.repeat = SAMPLER_EXTEND_PAD; 1429428d7b3dSmrg tmp.src.bo = frame->bo; 1430428d7b3dSmrg tmp.mask.bo = NULL; 1431428d7b3dSmrg tmp.u.gen4.wm_kernel = 1432428d7b3dSmrg is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED; 1433428d7b3dSmrg tmp.u.gen4.ve_id = 2; 1434428d7b3dSmrg tmp.is_affine = true; 1435428d7b3dSmrg tmp.floats_per_vertex = 3; 1436428d7b3dSmrg tmp.floats_per_rect = 9; 1437428d7b3dSmrg tmp.priv = frame; 1438428d7b3dSmrg 1439428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 1440428d7b3dSmrg kgem_submit(&sna->kgem); 1441428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) 1442428d7b3dSmrg return false; 1443428d7b3dSmrg } 1444428d7b3dSmrg 1445428d7b3dSmrg gen4_align_vertex(sna, &tmp); 1446428d7b3dSmrg gen4_video_bind_surfaces(sna, &tmp); 1447428d7b3dSmrg 1448428d7b3dSmrg /* Set up the offset for translating from the given region (in screen 1449428d7b3dSmrg * coordinates) to the backing pixmap. 1450428d7b3dSmrg */ 1451428d7b3dSmrg#ifdef COMPOSITE 1452428d7b3dSmrg pix_xoff = -pixmap->screen_x + pixmap->drawable.x; 1453428d7b3dSmrg pix_yoff = -pixmap->screen_y + pixmap->drawable.y; 1454428d7b3dSmrg#else 1455428d7b3dSmrg pix_xoff = 0; 1456428d7b3dSmrg pix_yoff = 0; 1457428d7b3dSmrg#endif 1458428d7b3dSmrg 1459428d7b3dSmrg src_scale_x = (float)src_width / dst_width / frame->width; 1460428d7b3dSmrg src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 1461428d7b3dSmrg 1462428d7b3dSmrg src_scale_y = (float)src_height / dst_height / frame->height; 1463428d7b3dSmrg src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 1464428d7b3dSmrg 1465428d7b3dSmrg box = region_rects(dstRegion); 1466428d7b3dSmrg nbox = region_num_rects(dstRegion); 1467428d7b3dSmrg do { 1468428d7b3dSmrg int n; 1469428d7b3dSmrg 1470428d7b3dSmrg n = gen4_get_rectangles(sna, &tmp, nbox, 1471428d7b3dSmrg gen4_video_bind_surfaces); 1472428d7b3dSmrg assert(n); 1473428d7b3dSmrg nbox -= n; 1474428d7b3dSmrg 1475428d7b3dSmrg do { 1476428d7b3dSmrg BoxRec r; 1477428d7b3dSmrg 1478428d7b3dSmrg r.x1 = box->x1 + pix_xoff; 1479428d7b3dSmrg r.x2 = box->x2 + pix_xoff; 1480428d7b3dSmrg r.y1 = box->y1 + pix_yoff; 1481428d7b3dSmrg r.y2 = box->y2 + pix_yoff; 1482428d7b3dSmrg 1483428d7b3dSmrg OUT_VERTEX(r.x2, r.y2); 1484428d7b3dSmrg OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 1485428d7b3dSmrg OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1486428d7b3dSmrg 1487428d7b3dSmrg OUT_VERTEX(r.x1, r.y2); 1488428d7b3dSmrg OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1489428d7b3dSmrg OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 1490428d7b3dSmrg 1491428d7b3dSmrg OUT_VERTEX(r.x1, r.y1); 1492428d7b3dSmrg OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 1493428d7b3dSmrg OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 1494428d7b3dSmrg 1495428d7b3dSmrg if (!DAMAGE_IS_ALL(priv->gpu_damage)) { 1496428d7b3dSmrg sna_damage_add_box(&priv->gpu_damage, &r); 1497428d7b3dSmrg sna_damage_subtract_box(&priv->cpu_damage, &r); 1498428d7b3dSmrg } 1499428d7b3dSmrg box++; 1500428d7b3dSmrg } while (--n); 1501428d7b3dSmrg } while (nbox); 1502428d7b3dSmrg gen4_vertex_flush(sna); 1503428d7b3dSmrg 1504428d7b3dSmrg return true; 1505428d7b3dSmrg} 1506428d7b3dSmrg 1507428d7b3dSmrgstatic int 1508428d7b3dSmrggen4_composite_picture(struct sna *sna, 1509428d7b3dSmrg PicturePtr picture, 1510428d7b3dSmrg struct sna_composite_channel *channel, 1511428d7b3dSmrg int x, int y, 1512428d7b3dSmrg int w, int h, 1513428d7b3dSmrg int dst_x, int dst_y, 1514428d7b3dSmrg bool precise) 1515428d7b3dSmrg{ 1516428d7b3dSmrg PixmapPtr pixmap; 1517428d7b3dSmrg uint32_t color; 1518428d7b3dSmrg int16_t dx, dy; 1519428d7b3dSmrg 1520428d7b3dSmrg DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1521428d7b3dSmrg __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1522428d7b3dSmrg 1523428d7b3dSmrg channel->is_solid = false; 1524428d7b3dSmrg channel->card_format = -1; 1525428d7b3dSmrg 1526428d7b3dSmrg if (sna_picture_is_solid(picture, &color)) 1527428d7b3dSmrg return gen4_channel_init_solid(sna, channel, color); 1528428d7b3dSmrg 1529428d7b3dSmrg if (picture->pDrawable == NULL) { 1530428d7b3dSmrg int ret; 1531428d7b3dSmrg 1532428d7b3dSmrg if (picture->pSourcePict->type == SourcePictTypeLinear) 1533428d7b3dSmrg return gen4_channel_init_linear(sna, picture, channel, 1534428d7b3dSmrg x, y, 1535428d7b3dSmrg w, h, 1536428d7b3dSmrg dst_x, dst_y); 1537428d7b3dSmrg 1538428d7b3dSmrg DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 1539428d7b3dSmrg ret = -1; 1540428d7b3dSmrg if (!precise) 1541428d7b3dSmrg ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1542428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1543428d7b3dSmrg if (ret == -1) 1544428d7b3dSmrg ret = sna_render_picture_fixup(sna, picture, channel, 1545428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1546428d7b3dSmrg return ret; 1547428d7b3dSmrg } 1548428d7b3dSmrg 1549428d7b3dSmrg if (picture->alphaMap) { 1550428d7b3dSmrg DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 1551428d7b3dSmrg return sna_render_picture_fixup(sna, picture, channel, 1552428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1553428d7b3dSmrg } 1554428d7b3dSmrg 1555428d7b3dSmrg if (!gen4_check_repeat(picture)) { 1556428d7b3dSmrg DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__)); 1557428d7b3dSmrg return sna_render_picture_fixup(sna, picture, channel, 1558428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1559428d7b3dSmrg } 1560428d7b3dSmrg 1561428d7b3dSmrg if (!gen4_check_filter(picture)) { 1562428d7b3dSmrg DBG(("%s: unhandled filter fixup\n", __FUNCTION__)); 1563428d7b3dSmrg return sna_render_picture_fixup(sna, picture, channel, 1564428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1565428d7b3dSmrg } 1566428d7b3dSmrg 1567428d7b3dSmrg channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 1568428d7b3dSmrg channel->filter = picture->filter; 1569428d7b3dSmrg 1570428d7b3dSmrg pixmap = get_drawable_pixmap(picture->pDrawable); 1571428d7b3dSmrg get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 1572428d7b3dSmrg 1573428d7b3dSmrg x += dx + picture->pDrawable->x; 1574428d7b3dSmrg y += dy + picture->pDrawable->y; 1575428d7b3dSmrg 1576428d7b3dSmrg channel->is_affine = sna_transform_is_affine(picture->transform); 1577428d7b3dSmrg if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 1578428d7b3dSmrg DBG(("%s: integer translation (%d, %d), removing\n", 1579428d7b3dSmrg __FUNCTION__, dx, dy)); 1580428d7b3dSmrg x += dx; 1581428d7b3dSmrg y += dy; 1582428d7b3dSmrg channel->transform = NULL; 1583428d7b3dSmrg channel->filter = PictFilterNearest; 1584428d7b3dSmrg 1585428d7b3dSmrg if (channel->repeat && 1586428d7b3dSmrg (x >= 0 && 1587428d7b3dSmrg y >= 0 && 1588428d7b3dSmrg x + w < pixmap->drawable.width && 1589428d7b3dSmrg y + h < pixmap->drawable.height)) { 1590428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 1591428d7b3dSmrg if (priv && priv->clear) { 1592428d7b3dSmrg DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 1593428d7b3dSmrg return gen4_channel_init_solid(sna, channel, priv->clear_color); 1594428d7b3dSmrg } 1595428d7b3dSmrg } 1596428d7b3dSmrg } else 1597428d7b3dSmrg channel->transform = picture->transform; 1598428d7b3dSmrg 1599428d7b3dSmrg channel->pict_format = picture->format; 1600428d7b3dSmrg channel->card_format = gen4_get_card_format(picture->format); 1601428d7b3dSmrg if (channel->card_format == -1) 1602428d7b3dSmrg return sna_render_picture_convert(sna, picture, channel, pixmap, 1603428d7b3dSmrg x, y, w, h, dst_x, dst_y, 1604428d7b3dSmrg false); 1605428d7b3dSmrg 1606428d7b3dSmrg if (too_large(pixmap->drawable.width, pixmap->drawable.height)) 1607428d7b3dSmrg return sna_render_picture_extract(sna, picture, channel, 1608428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1609428d7b3dSmrg 1610428d7b3dSmrg return sna_render_pixmap_bo(sna, channel, pixmap, 1611428d7b3dSmrg x, y, w, h, dst_x, dst_y); 1612428d7b3dSmrg} 1613428d7b3dSmrg 1614428d7b3dSmrgstatic void gen4_composite_channel_convert(struct sna_composite_channel *channel) 1615428d7b3dSmrg{ 1616428d7b3dSmrg DBG(("%s: repeat %d -> %d, filter %d -> %d\n", 1617428d7b3dSmrg __FUNCTION__, 1618428d7b3dSmrg channel->repeat, gen4_repeat(channel->repeat), 1619428d7b3dSmrg channel->filter, gen4_repeat(channel->filter))); 1620428d7b3dSmrg channel->repeat = gen4_repeat(channel->repeat); 1621428d7b3dSmrg channel->filter = gen4_filter(channel->filter); 1622428d7b3dSmrg if (channel->card_format == (unsigned)-1) 1623428d7b3dSmrg channel->card_format = gen4_get_card_format(channel->pict_format); 1624428d7b3dSmrg} 1625428d7b3dSmrg 1626428d7b3dSmrgstatic void 1627428d7b3dSmrggen4_render_composite_done(struct sna *sna, 1628428d7b3dSmrg const struct sna_composite_op *op) 1629428d7b3dSmrg{ 1630428d7b3dSmrg DBG(("%s()\n", __FUNCTION__)); 1631428d7b3dSmrg 1632428d7b3dSmrg if (sna->render.vertex_offset) { 1633428d7b3dSmrg gen4_vertex_flush(sna); 1634428d7b3dSmrg gen4_magic_ca_pass(sna, op); 1635428d7b3dSmrg } 1636428d7b3dSmrg 1637428d7b3dSmrg if (op->mask.bo) 1638428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->mask.bo); 1639428d7b3dSmrg if (op->src.bo) 1640428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->src.bo); 1641428d7b3dSmrg 1642428d7b3dSmrg sna_render_composite_redirect_done(sna, op); 1643428d7b3dSmrg} 1644428d7b3dSmrg 1645428d7b3dSmrgstatic bool 1646428d7b3dSmrggen4_composite_set_target(struct sna *sna, 1647428d7b3dSmrg struct sna_composite_op *op, 1648428d7b3dSmrg PicturePtr dst, 1649428d7b3dSmrg int x, int y, int w, int h, 1650428d7b3dSmrg bool partial) 1651428d7b3dSmrg{ 1652428d7b3dSmrg BoxRec box; 1653428d7b3dSmrg unsigned hint; 1654428d7b3dSmrg 1655428d7b3dSmrg op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 1656428d7b3dSmrg op->dst.width = op->dst.pixmap->drawable.width; 1657428d7b3dSmrg op->dst.height = op->dst.pixmap->drawable.height; 1658428d7b3dSmrg op->dst.format = dst->format; 1659428d7b3dSmrg if (w && h) { 1660428d7b3dSmrg box.x1 = x; 1661428d7b3dSmrg box.y1 = y; 1662428d7b3dSmrg box.x2 = x + w; 1663428d7b3dSmrg box.y2 = y + h; 1664428d7b3dSmrg } else 1665428d7b3dSmrg sna_render_picture_extents(dst, &box); 1666428d7b3dSmrg 1667428d7b3dSmrg hint = PREFER_GPU | FORCE_GPU | RENDER_GPU; 1668428d7b3dSmrg if (!partial) { 1669428d7b3dSmrg hint |= IGNORE_DAMAGE; 1670428d7b3dSmrg if (w == op->dst.width && h == op->dst.height) 1671428d7b3dSmrg hint |= REPLACES; 1672428d7b3dSmrg } 1673428d7b3dSmrg 1674428d7b3dSmrg op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 1675428d7b3dSmrg if (op->dst.bo == NULL) 1676428d7b3dSmrg return false; 1677428d7b3dSmrg 1678428d7b3dSmrg if (hint & REPLACES) { 1679428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 1680428d7b3dSmrg kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 1681428d7b3dSmrg } 1682428d7b3dSmrg 1683428d7b3dSmrg get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 1684428d7b3dSmrg &op->dst.x, &op->dst.y); 1685428d7b3dSmrg 1686428d7b3dSmrg DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 1687428d7b3dSmrg __FUNCTION__, 1688428d7b3dSmrg op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 1689428d7b3dSmrg op->dst.width, op->dst.height, 1690428d7b3dSmrg op->dst.bo->pitch, 1691428d7b3dSmrg op->dst.x, op->dst.y, 1692428d7b3dSmrg op->damage ? *op->damage : (void *)-1)); 1693428d7b3dSmrg 1694428d7b3dSmrg assert(op->dst.bo->proxy == NULL); 1695428d7b3dSmrg 1696428d7b3dSmrg if (too_large(op->dst.width, op->dst.height) && 1697428d7b3dSmrg !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 1698428d7b3dSmrg return false; 1699428d7b3dSmrg 1700428d7b3dSmrg return true; 1701428d7b3dSmrg} 1702428d7b3dSmrg 1703428d7b3dSmrgstatic bool 1704428d7b3dSmrgcheck_gradient(PicturePtr picture, bool precise) 1705428d7b3dSmrg{ 1706428d7b3dSmrg switch (picture->pSourcePict->type) { 1707428d7b3dSmrg case SourcePictTypeSolidFill: 1708428d7b3dSmrg case SourcePictTypeLinear: 1709428d7b3dSmrg return false; 1710428d7b3dSmrg default: 1711428d7b3dSmrg return precise; 1712428d7b3dSmrg } 1713428d7b3dSmrg} 1714428d7b3dSmrg 1715428d7b3dSmrgstatic bool 1716428d7b3dSmrghas_alphamap(PicturePtr p) 1717428d7b3dSmrg{ 1718428d7b3dSmrg return p->alphaMap != NULL; 1719428d7b3dSmrg} 1720428d7b3dSmrg 1721428d7b3dSmrgstatic bool 1722428d7b3dSmrgneed_upload(struct sna *sna, PicturePtr p) 1723428d7b3dSmrg{ 1724428d7b3dSmrg return p->pDrawable && untransformed(p) && 1725428d7b3dSmrg !is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER); 1726428d7b3dSmrg} 1727428d7b3dSmrg 1728428d7b3dSmrgstatic bool 1729428d7b3dSmrgsource_is_busy(PixmapPtr pixmap) 1730428d7b3dSmrg{ 1731428d7b3dSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 1732428d7b3dSmrg if (priv == NULL) 1733428d7b3dSmrg return false; 1734428d7b3dSmrg 1735428d7b3dSmrg if (priv->clear) 1736428d7b3dSmrg return false; 1737428d7b3dSmrg 1738428d7b3dSmrg if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 1739428d7b3dSmrg return true; 1740428d7b3dSmrg 1741428d7b3dSmrg if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 1742428d7b3dSmrg return true; 1743428d7b3dSmrg 1744428d7b3dSmrg return priv->gpu_damage && !priv->cpu_damage; 1745428d7b3dSmrg} 1746428d7b3dSmrg 1747428d7b3dSmrgstatic bool 1748428d7b3dSmrgsource_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise) 1749428d7b3dSmrg{ 1750428d7b3dSmrg if (sna_picture_is_solid(p, NULL)) 1751428d7b3dSmrg return false; 1752428d7b3dSmrg 1753428d7b3dSmrg if (p->pSourcePict) 1754428d7b3dSmrg return check_gradient(p, precise); 1755428d7b3dSmrg 1756428d7b3dSmrg if (!gen4_check_repeat(p) || !gen4_check_format(p->format)) 1757428d7b3dSmrg return true; 1758428d7b3dSmrg 1759428d7b3dSmrg /* soft errors: perfer to upload/compute rather than readback */ 1760428d7b3dSmrg if (pixmap && source_is_busy(pixmap)) 1761428d7b3dSmrg return false; 1762428d7b3dSmrg 1763428d7b3dSmrg return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p); 1764428d7b3dSmrg} 1765428d7b3dSmrg 1766428d7b3dSmrgstatic bool 1767428d7b3dSmrggen4_composite_fallback(struct sna *sna, 1768428d7b3dSmrg PicturePtr src, 1769428d7b3dSmrg PicturePtr mask, 1770428d7b3dSmrg PicturePtr dst) 1771428d7b3dSmrg{ 1772428d7b3dSmrg PixmapPtr src_pixmap; 1773428d7b3dSmrg PixmapPtr mask_pixmap; 1774428d7b3dSmrg PixmapPtr dst_pixmap; 1775428d7b3dSmrg bool src_fallback, mask_fallback; 1776428d7b3dSmrg 1777428d7b3dSmrg if (!gen4_check_dst_format(dst->format)) { 1778428d7b3dSmrg DBG(("%s: unknown destination format: %d\n", 1779428d7b3dSmrg __FUNCTION__, dst->format)); 1780428d7b3dSmrg return true; 1781428d7b3dSmrg } 1782428d7b3dSmrg 1783428d7b3dSmrg dst_pixmap = get_drawable_pixmap(dst->pDrawable); 1784428d7b3dSmrg 1785428d7b3dSmrg src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 1786428d7b3dSmrg src_fallback = source_fallback(sna, src, src_pixmap, 1787428d7b3dSmrg dst->polyMode == PolyModePrecise); 1788428d7b3dSmrg 1789428d7b3dSmrg if (mask) { 1790428d7b3dSmrg mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 1791428d7b3dSmrg mask_fallback = source_fallback(sna, mask, mask_pixmap, 1792428d7b3dSmrg dst->polyMode == PolyModePrecise); 1793428d7b3dSmrg } else { 1794428d7b3dSmrg mask_pixmap = NULL; 1795428d7b3dSmrg mask_fallback = false; 1796428d7b3dSmrg } 1797428d7b3dSmrg 1798428d7b3dSmrg /* If we are using the destination as a source and need to 1799428d7b3dSmrg * readback in order to upload the source, do it all 1800428d7b3dSmrg * on the cpu. 1801428d7b3dSmrg */ 1802428d7b3dSmrg if (src_pixmap == dst_pixmap && src_fallback) { 1803428d7b3dSmrg DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 1804428d7b3dSmrg return true; 1805428d7b3dSmrg } 1806428d7b3dSmrg if (mask_pixmap == dst_pixmap && mask_fallback) { 1807428d7b3dSmrg DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 1808428d7b3dSmrg return true; 1809428d7b3dSmrg } 1810428d7b3dSmrg 1811428d7b3dSmrg /* If anything is on the GPU, push everything out to the GPU */ 1812428d7b3dSmrg if (dst_use_gpu(dst_pixmap)) { 1813428d7b3dSmrg DBG(("%s: dst is already on the GPU, try to use GPU\n", 1814428d7b3dSmrg __FUNCTION__)); 1815428d7b3dSmrg return false; 1816428d7b3dSmrg } 1817428d7b3dSmrg 1818428d7b3dSmrg if (src_pixmap && !src_fallback) { 1819428d7b3dSmrg DBG(("%s: src is already on the GPU, try to use GPU\n", 1820428d7b3dSmrg __FUNCTION__)); 1821428d7b3dSmrg return false; 1822428d7b3dSmrg } 1823428d7b3dSmrg if (mask_pixmap && !mask_fallback) { 1824428d7b3dSmrg DBG(("%s: mask is already on the GPU, try to use GPU\n", 1825428d7b3dSmrg __FUNCTION__)); 1826428d7b3dSmrg return false; 1827428d7b3dSmrg } 1828428d7b3dSmrg 1829428d7b3dSmrg /* However if the dst is not on the GPU and we need to 1830428d7b3dSmrg * render one of the sources using the CPU, we may 1831428d7b3dSmrg * as well do the entire operation in place onthe CPU. 1832428d7b3dSmrg */ 1833428d7b3dSmrg if (src_fallback) { 1834428d7b3dSmrg DBG(("%s: dst is on the CPU and src will fallback\n", 1835428d7b3dSmrg __FUNCTION__)); 1836428d7b3dSmrg return true; 1837428d7b3dSmrg } 1838428d7b3dSmrg 1839428d7b3dSmrg if (mask_fallback) { 1840428d7b3dSmrg DBG(("%s: dst is on the CPU and mask will fallback\n", 1841428d7b3dSmrg __FUNCTION__)); 1842428d7b3dSmrg return true; 1843428d7b3dSmrg } 1844428d7b3dSmrg 1845428d7b3dSmrg if (too_large(dst_pixmap->drawable.width, 1846428d7b3dSmrg dst_pixmap->drawable.height) && 1847428d7b3dSmrg dst_is_cpu(dst_pixmap)) { 1848428d7b3dSmrg DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 1849428d7b3dSmrg return true; 1850428d7b3dSmrg } 1851428d7b3dSmrg 1852428d7b3dSmrg DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 1853428d7b3dSmrg __FUNCTION__)); 1854428d7b3dSmrg return dst_use_cpu(dst_pixmap); 1855428d7b3dSmrg} 1856428d7b3dSmrg 1857428d7b3dSmrgstatic int 1858428d7b3dSmrgreuse_source(struct sna *sna, 1859428d7b3dSmrg PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 1860428d7b3dSmrg PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 1861428d7b3dSmrg{ 1862428d7b3dSmrg uint32_t color; 1863428d7b3dSmrg 1864428d7b3dSmrg if (src_x != msk_x || src_y != msk_y) 1865428d7b3dSmrg return false; 1866428d7b3dSmrg 1867428d7b3dSmrg if (src == mask) { 1868428d7b3dSmrg DBG(("%s: mask is source\n", __FUNCTION__)); 1869428d7b3dSmrg *mc = *sc; 1870428d7b3dSmrg mc->bo = kgem_bo_reference(mc->bo); 1871428d7b3dSmrg return true; 1872428d7b3dSmrg } 1873428d7b3dSmrg 1874428d7b3dSmrg if (sna_picture_is_solid(mask, &color)) 1875428d7b3dSmrg return gen4_channel_init_solid(sna, mc, color); 1876428d7b3dSmrg 1877428d7b3dSmrg if (sc->is_solid) 1878428d7b3dSmrg return false; 1879428d7b3dSmrg 1880428d7b3dSmrg if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 1881428d7b3dSmrg return false; 1882428d7b3dSmrg 1883428d7b3dSmrg DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 1884428d7b3dSmrg 1885428d7b3dSmrg if (!sna_transform_equal(src->transform, mask->transform)) 1886428d7b3dSmrg return false; 1887428d7b3dSmrg 1888428d7b3dSmrg if (!sna_picture_alphamap_equal(src, mask)) 1889428d7b3dSmrg return false; 1890428d7b3dSmrg 1891428d7b3dSmrg if (!gen4_check_repeat(mask)) 1892428d7b3dSmrg return false; 1893428d7b3dSmrg 1894428d7b3dSmrg if (!gen4_check_filter(mask)) 1895428d7b3dSmrg return false; 1896428d7b3dSmrg 1897428d7b3dSmrg if (!gen4_check_format(mask->format)) 1898428d7b3dSmrg return false; 1899428d7b3dSmrg 1900428d7b3dSmrg DBG(("%s: reusing source channel for mask with a twist\n", 1901428d7b3dSmrg __FUNCTION__)); 1902428d7b3dSmrg 1903428d7b3dSmrg *mc = *sc; 1904428d7b3dSmrg mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone); 1905428d7b3dSmrg mc->filter = gen4_filter(mask->filter); 1906428d7b3dSmrg mc->pict_format = mask->format; 1907428d7b3dSmrg mc->card_format = gen4_get_card_format(mask->format); 1908428d7b3dSmrg mc->bo = kgem_bo_reference(mc->bo); 1909428d7b3dSmrg return true; 1910428d7b3dSmrg} 1911428d7b3dSmrg 1912428d7b3dSmrgstatic bool 1913428d7b3dSmrggen4_render_composite(struct sna *sna, 1914428d7b3dSmrg uint8_t op, 1915428d7b3dSmrg PicturePtr src, 1916428d7b3dSmrg PicturePtr mask, 1917428d7b3dSmrg PicturePtr dst, 1918428d7b3dSmrg int16_t src_x, int16_t src_y, 1919428d7b3dSmrg int16_t msk_x, int16_t msk_y, 1920428d7b3dSmrg int16_t dst_x, int16_t dst_y, 1921428d7b3dSmrg int16_t width, int16_t height, 1922428d7b3dSmrg unsigned flags, 1923428d7b3dSmrg struct sna_composite_op *tmp) 1924428d7b3dSmrg{ 1925428d7b3dSmrg DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, 1926428d7b3dSmrg width, height, sna->kgem.mode)); 1927428d7b3dSmrg 1928428d7b3dSmrg if (op >= ARRAY_SIZE(gen4_blend_op)) 1929428d7b3dSmrg return false; 1930428d7b3dSmrg 1931428d7b3dSmrg if (mask == NULL && 1932428d7b3dSmrg sna_blt_composite(sna, op, 1933428d7b3dSmrg src, dst, 1934428d7b3dSmrg src_x, src_y, 1935428d7b3dSmrg dst_x, dst_y, 1936428d7b3dSmrg width, height, 1937428d7b3dSmrg flags, tmp)) 1938428d7b3dSmrg return true; 1939428d7b3dSmrg 1940428d7b3dSmrg if (gen4_composite_fallback(sna, src, mask, dst)) 1941428d7b3dSmrg goto fallback; 1942428d7b3dSmrg 1943428d7b3dSmrg if (need_tiling(sna, width, height)) 1944428d7b3dSmrg return sna_tiling_composite(op, src, mask, dst, 1945428d7b3dSmrg src_x, src_y, 1946428d7b3dSmrg msk_x, msk_y, 1947428d7b3dSmrg dst_x, dst_y, 1948428d7b3dSmrg width, height, 1949428d7b3dSmrg tmp); 1950428d7b3dSmrg 1951428d7b3dSmrg if (!gen4_composite_set_target(sna, tmp, dst, 1952428d7b3dSmrg dst_x, dst_y, width, height, 1953428d7b3dSmrg flags & COMPOSITE_PARTIAL || op > PictOpSrc)) { 1954428d7b3dSmrg DBG(("%s: failed to set composite target\n", __FUNCTION__)); 1955428d7b3dSmrg goto fallback; 1956428d7b3dSmrg } 1957428d7b3dSmrg 1958428d7b3dSmrg tmp->op = op; 1959428d7b3dSmrg switch (gen4_composite_picture(sna, src, &tmp->src, 1960428d7b3dSmrg src_x, src_y, 1961428d7b3dSmrg width, height, 1962428d7b3dSmrg dst_x, dst_y, 1963428d7b3dSmrg dst->polyMode == PolyModePrecise)) { 1964428d7b3dSmrg case -1: 1965428d7b3dSmrg DBG(("%s: failed to prepare source\n", __FUNCTION__)); 1966428d7b3dSmrg goto cleanup_dst; 1967428d7b3dSmrg case 0: 1968428d7b3dSmrg if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 1969428d7b3dSmrg goto cleanup_dst; 1970428d7b3dSmrg /* fall through to fixup */ 1971428d7b3dSmrg case 1: 1972428d7b3dSmrg if (mask == NULL && 1973428d7b3dSmrg sna_blt_composite__convert(sna, 1974428d7b3dSmrg dst_x, dst_y, width, height, 1975428d7b3dSmrg tmp)) 1976428d7b3dSmrg return true; 1977428d7b3dSmrg 1978428d7b3dSmrg gen4_composite_channel_convert(&tmp->src); 1979428d7b3dSmrg break; 1980428d7b3dSmrg } 1981428d7b3dSmrg 1982428d7b3dSmrg tmp->is_affine = tmp->src.is_affine; 1983428d7b3dSmrg tmp->has_component_alpha = false; 1984428d7b3dSmrg tmp->need_magic_ca_pass = false; 1985428d7b3dSmrg 1986428d7b3dSmrg if (mask) { 1987428d7b3dSmrg if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 1988428d7b3dSmrg tmp->has_component_alpha = true; 1989428d7b3dSmrg 1990428d7b3dSmrg /* Check if it's component alpha that relies on a source alpha and on 1991428d7b3dSmrg * the source value. We can only get one of those into the single 1992428d7b3dSmrg * source value that we get to blend with. 1993428d7b3dSmrg */ 1994428d7b3dSmrg if (gen4_blend_op[op].src_alpha && 1995428d7b3dSmrg (gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) { 1996428d7b3dSmrg if (op != PictOpOver) { 1997428d7b3dSmrg DBG(("%s -- fallback: unhandled component alpha blend\n", 1998428d7b3dSmrg __FUNCTION__)); 1999428d7b3dSmrg 2000428d7b3dSmrg goto cleanup_src; 2001428d7b3dSmrg } 2002428d7b3dSmrg 2003428d7b3dSmrg tmp->need_magic_ca_pass = true; 2004428d7b3dSmrg tmp->op = PictOpOutReverse; 2005428d7b3dSmrg } 2006428d7b3dSmrg } 2007428d7b3dSmrg 2008428d7b3dSmrg if (!reuse_source(sna, 2009428d7b3dSmrg src, &tmp->src, src_x, src_y, 2010428d7b3dSmrg mask, &tmp->mask, msk_x, msk_y)) { 2011428d7b3dSmrg switch (gen4_composite_picture(sna, mask, &tmp->mask, 2012428d7b3dSmrg msk_x, msk_y, 2013428d7b3dSmrg width, height, 2014428d7b3dSmrg dst_x, dst_y, 2015428d7b3dSmrg dst->polyMode == PolyModePrecise)) { 2016428d7b3dSmrg case -1: 2017428d7b3dSmrg DBG(("%s: failed to prepare mask\n", __FUNCTION__)); 2018428d7b3dSmrg goto cleanup_src; 2019428d7b3dSmrg case 0: 2020428d7b3dSmrg if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2021428d7b3dSmrg goto cleanup_src; 2022428d7b3dSmrg /* fall through to fixup */ 2023428d7b3dSmrg case 1: 2024428d7b3dSmrg gen4_composite_channel_convert(&tmp->mask); 2025428d7b3dSmrg break; 2026428d7b3dSmrg } 2027428d7b3dSmrg } 2028428d7b3dSmrg 2029428d7b3dSmrg tmp->is_affine &= tmp->mask.is_affine; 2030428d7b3dSmrg } 2031428d7b3dSmrg 2032428d7b3dSmrg tmp->u.gen4.wm_kernel = 2033428d7b3dSmrg gen4_choose_composite_kernel(tmp->op, 2034428d7b3dSmrg tmp->mask.bo != NULL, 2035428d7b3dSmrg tmp->has_component_alpha, 2036428d7b3dSmrg tmp->is_affine); 2037428d7b3dSmrg tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp); 2038428d7b3dSmrg 2039428d7b3dSmrg tmp->blt = gen4_render_composite_blt; 2040428d7b3dSmrg tmp->box = gen4_render_composite_box; 2041428d7b3dSmrg tmp->boxes = gen4_render_composite_boxes__blt; 2042428d7b3dSmrg if (tmp->emit_boxes) { 2043428d7b3dSmrg tmp->boxes = gen4_render_composite_boxes; 2044428d7b3dSmrg#if !FORCE_FLUSH 2045428d7b3dSmrg tmp->thread_boxes = gen4_render_composite_boxes__thread; 2046428d7b3dSmrg#endif 2047428d7b3dSmrg } 2048428d7b3dSmrg tmp->done = gen4_render_composite_done; 2049428d7b3dSmrg 2050428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2051428d7b3dSmrg tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2052428d7b3dSmrg NULL)) { 2053428d7b3dSmrg kgem_submit(&sna->kgem); 2054428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2055428d7b3dSmrg tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2056428d7b3dSmrg NULL)) 2057428d7b3dSmrg goto cleanup_mask; 2058428d7b3dSmrg } 2059428d7b3dSmrg 2060428d7b3dSmrg gen4_align_vertex(sna, tmp); 2061428d7b3dSmrg gen4_bind_surfaces(sna, tmp); 2062428d7b3dSmrg return true; 2063428d7b3dSmrg 2064428d7b3dSmrgcleanup_mask: 2065428d7b3dSmrg if (tmp->mask.bo) { 2066428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2067428d7b3dSmrg tmp->mask.bo = NULL; 2068428d7b3dSmrg } 2069428d7b3dSmrgcleanup_src: 2070428d7b3dSmrg if (tmp->src.bo) { 2071428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2072428d7b3dSmrg tmp->src.bo = NULL; 2073428d7b3dSmrg } 2074428d7b3dSmrgcleanup_dst: 2075428d7b3dSmrg if (tmp->redirect.real_bo) { 2076428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2077428d7b3dSmrg tmp->redirect.real_bo = NULL; 2078428d7b3dSmrg } 2079428d7b3dSmrgfallback: 2080428d7b3dSmrg return (mask == NULL && 2081428d7b3dSmrg sna_blt_composite(sna, op, 2082428d7b3dSmrg src, dst, 2083428d7b3dSmrg src_x, src_y, 2084428d7b3dSmrg dst_x, dst_y, 2085428d7b3dSmrg width, height, 2086428d7b3dSmrg flags | COMPOSITE_FALLBACK, tmp)); 2087428d7b3dSmrg} 2088428d7b3dSmrg 2089428d7b3dSmrg#if !NO_COMPOSITE_SPANS 2090428d7b3dSmrgfastcall static void 2091428d7b3dSmrggen4_render_composite_spans_box(struct sna *sna, 2092428d7b3dSmrg const struct sna_composite_spans_op *op, 2093428d7b3dSmrg const BoxRec *box, float opacity) 2094428d7b3dSmrg{ 2095428d7b3dSmrg DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2096428d7b3dSmrg __FUNCTION__, 2097428d7b3dSmrg op->base.src.offset[0], op->base.src.offset[1], 2098428d7b3dSmrg opacity, 2099428d7b3dSmrg op->base.dst.x, op->base.dst.y, 2100428d7b3dSmrg box->x1, box->y1, 2101428d7b3dSmrg box->x2 - box->x1, 2102428d7b3dSmrg box->y2 - box->y1)); 2103428d7b3dSmrg 2104428d7b3dSmrg gen4_get_rectangles(sna, &op->base, 1, gen4_bind_surfaces); 2105428d7b3dSmrg op->prim_emit(sna, op, box, opacity); 2106428d7b3dSmrg} 2107428d7b3dSmrg 2108428d7b3dSmrgstatic void 2109428d7b3dSmrggen4_render_composite_spans_boxes(struct sna *sna, 2110428d7b3dSmrg const struct sna_composite_spans_op *op, 2111428d7b3dSmrg const BoxRec *box, int nbox, 2112428d7b3dSmrg float opacity) 2113428d7b3dSmrg{ 2114428d7b3dSmrg DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2115428d7b3dSmrg __FUNCTION__, nbox, 2116428d7b3dSmrg op->base.src.offset[0], op->base.src.offset[1], 2117428d7b3dSmrg opacity, 2118428d7b3dSmrg op->base.dst.x, op->base.dst.y)); 2119428d7b3dSmrg 2120428d7b3dSmrg do { 2121428d7b3dSmrg int nbox_this_time; 2122428d7b3dSmrg 2123428d7b3dSmrg nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox, 2124428d7b3dSmrg gen4_bind_surfaces); 2125428d7b3dSmrg nbox -= nbox_this_time; 2126428d7b3dSmrg 2127428d7b3dSmrg do { 2128428d7b3dSmrg DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2129428d7b3dSmrg box->x1, box->y1, 2130428d7b3dSmrg box->x2 - box->x1, 2131428d7b3dSmrg box->y2 - box->y1)); 2132428d7b3dSmrg 2133428d7b3dSmrg op->prim_emit(sna, op, box++, opacity); 2134428d7b3dSmrg } while (--nbox_this_time); 2135428d7b3dSmrg } while (nbox); 2136428d7b3dSmrg} 2137428d7b3dSmrg 2138428d7b3dSmrgfastcall static void 2139428d7b3dSmrggen4_render_composite_spans_boxes__thread(struct sna *sna, 2140428d7b3dSmrg const struct sna_composite_spans_op *op, 2141428d7b3dSmrg const struct sna_opacity_box *box, 2142428d7b3dSmrg int nbox) 2143428d7b3dSmrg{ 2144428d7b3dSmrg DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2145428d7b3dSmrg __FUNCTION__, nbox, 2146428d7b3dSmrg op->base.src.offset[0], op->base.src.offset[1], 2147428d7b3dSmrg op->base.dst.x, op->base.dst.y)); 2148428d7b3dSmrg assert(nbox); 2149428d7b3dSmrg 2150428d7b3dSmrg sna_vertex_lock(&sna->render); 2151428d7b3dSmrg do { 2152428d7b3dSmrg int nbox_this_time; 2153428d7b3dSmrg float *v; 2154428d7b3dSmrg 2155428d7b3dSmrg nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox, 2156428d7b3dSmrg gen4_bind_surfaces); 2157428d7b3dSmrg assert(nbox_this_time); 2158428d7b3dSmrg nbox -= nbox_this_time; 2159428d7b3dSmrg 2160428d7b3dSmrg v = sna->render.vertices + sna->render.vertex_used; 2161428d7b3dSmrg sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2162428d7b3dSmrg 2163428d7b3dSmrg sna_vertex_acquire__locked(&sna->render); 2164428d7b3dSmrg sna_vertex_unlock(&sna->render); 2165428d7b3dSmrg 2166428d7b3dSmrg op->emit_boxes(op, box, nbox_this_time, v); 2167428d7b3dSmrg box += nbox_this_time; 2168428d7b3dSmrg 2169428d7b3dSmrg sna_vertex_lock(&sna->render); 2170428d7b3dSmrg sna_vertex_release__locked(&sna->render); 2171428d7b3dSmrg } while (nbox); 2172428d7b3dSmrg sna_vertex_unlock(&sna->render); 2173428d7b3dSmrg} 2174428d7b3dSmrg 2175428d7b3dSmrgfastcall static void 2176428d7b3dSmrggen4_render_composite_spans_done(struct sna *sna, 2177428d7b3dSmrg const struct sna_composite_spans_op *op) 2178428d7b3dSmrg{ 2179428d7b3dSmrg if (sna->render.vertex_offset) 2180428d7b3dSmrg gen4_vertex_flush(sna); 2181428d7b3dSmrg 2182428d7b3dSmrg DBG(("%s()\n", __FUNCTION__)); 2183428d7b3dSmrg 2184428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2185428d7b3dSmrg sna_render_composite_redirect_done(sna, &op->base); 2186428d7b3dSmrg} 2187428d7b3dSmrg 2188428d7b3dSmrgstatic bool 2189428d7b3dSmrggen4_check_composite_spans(struct sna *sna, 2190428d7b3dSmrg uint8_t op, PicturePtr src, PicturePtr dst, 2191428d7b3dSmrg int16_t width, int16_t height, 2192428d7b3dSmrg unsigned flags) 2193428d7b3dSmrg{ 2194428d7b3dSmrg DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", 2195428d7b3dSmrg __FUNCTION__, op, width, height, flags)); 2196428d7b3dSmrg 2197428d7b3dSmrg if (op >= ARRAY_SIZE(gen4_blend_op)) 2198428d7b3dSmrg return false; 2199428d7b3dSmrg 2200428d7b3dSmrg if (gen4_composite_fallback(sna, src, NULL, dst)) { 2201428d7b3dSmrg DBG(("%s: operation would fallback\n", __FUNCTION__)); 2202428d7b3dSmrg return false; 2203428d7b3dSmrg } 2204428d7b3dSmrg 2205428d7b3dSmrg if (need_tiling(sna, width, height) && 2206428d7b3dSmrg !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2207428d7b3dSmrg DBG(("%s: fallback, tiled operation not on GPU\n", 2208428d7b3dSmrg __FUNCTION__)); 2209428d7b3dSmrg return false; 2210428d7b3dSmrg } 2211428d7b3dSmrg 2212428d7b3dSmrg if (FORCE_SPANS) 2213428d7b3dSmrg return FORCE_SPANS > 0; 2214428d7b3dSmrg 2215428d7b3dSmrg if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { 2216428d7b3dSmrg struct sna_pixmap *priv; 2217428d7b3dSmrg 2218428d7b3dSmrg if (FORCE_NONRECTILINEAR_SPANS) 2219428d7b3dSmrg return FORCE_NONRECTILINEAR_SPANS > 0; 2220428d7b3dSmrg 2221428d7b3dSmrg if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0) 2222428d7b3dSmrg return false; 2223428d7b3dSmrg 2224428d7b3dSmrg priv = sna_pixmap_from_drawable(dst->pDrawable); 2225428d7b3dSmrg assert(priv); 2226428d7b3dSmrg 2227428d7b3dSmrg if (priv->cpu_bo && 2228428d7b3dSmrg __kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) 2229428d7b3dSmrg return true; 2230428d7b3dSmrg 2231428d7b3dSmrg if (flags & COMPOSITE_SPANS_INPLACE_HINT) 2232428d7b3dSmrg return false; 2233428d7b3dSmrg 2234428d7b3dSmrg return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); 2235428d7b3dSmrg } 2236428d7b3dSmrg 2237428d7b3dSmrg return true; 2238428d7b3dSmrg} 2239428d7b3dSmrg 2240428d7b3dSmrgstatic bool 2241428d7b3dSmrggen4_render_composite_spans(struct sna *sna, 2242428d7b3dSmrg uint8_t op, 2243428d7b3dSmrg PicturePtr src, 2244428d7b3dSmrg PicturePtr dst, 2245428d7b3dSmrg int16_t src_x, int16_t src_y, 2246428d7b3dSmrg int16_t dst_x, int16_t dst_y, 2247428d7b3dSmrg int16_t width, int16_t height, 2248428d7b3dSmrg unsigned flags, 2249428d7b3dSmrg struct sna_composite_spans_op *tmp) 2250428d7b3dSmrg{ 2251428d7b3dSmrg DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, 2252428d7b3dSmrg width, height, flags, sna->kgem.ring)); 2253428d7b3dSmrg 2254428d7b3dSmrg assert(gen4_check_composite_spans(sna, op, src, dst, width, height, flags)); 2255428d7b3dSmrg 2256428d7b3dSmrg if (need_tiling(sna, width, height)) { 2257428d7b3dSmrg DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2258428d7b3dSmrg __FUNCTION__, width, height)); 2259428d7b3dSmrg return sna_tiling_composite_spans(op, src, dst, 2260428d7b3dSmrg src_x, src_y, dst_x, dst_y, 2261428d7b3dSmrg width, height, flags, tmp); 2262428d7b3dSmrg } 2263428d7b3dSmrg 2264428d7b3dSmrg tmp->base.op = op; 2265428d7b3dSmrg if (!gen4_composite_set_target(sna, &tmp->base, dst, 2266428d7b3dSmrg dst_x, dst_y, width, height, true)) 2267428d7b3dSmrg return false; 2268428d7b3dSmrg 2269428d7b3dSmrg switch (gen4_composite_picture(sna, src, &tmp->base.src, 2270428d7b3dSmrg src_x, src_y, 2271428d7b3dSmrg width, height, 2272428d7b3dSmrg dst_x, dst_y, 2273428d7b3dSmrg dst->polyMode == PolyModePrecise)) { 2274428d7b3dSmrg case -1: 2275428d7b3dSmrg goto cleanup_dst; 2276428d7b3dSmrg case 0: 2277428d7b3dSmrg if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2278428d7b3dSmrg goto cleanup_dst; 2279428d7b3dSmrg /* fall through to fixup */ 2280428d7b3dSmrg case 1: 2281428d7b3dSmrg gen4_composite_channel_convert(&tmp->base.src); 2282428d7b3dSmrg break; 2283428d7b3dSmrg } 2284428d7b3dSmrg 2285428d7b3dSmrg tmp->base.mask.bo = NULL; 2286428d7b3dSmrg tmp->base.mask.filter = SAMPLER_FILTER_NEAREST; 2287428d7b3dSmrg tmp->base.mask.repeat = SAMPLER_EXTEND_NONE; 2288428d7b3dSmrg 2289428d7b3dSmrg tmp->base.is_affine = tmp->base.src.is_affine; 2290428d7b3dSmrg tmp->base.has_component_alpha = false; 2291428d7b3dSmrg tmp->base.need_magic_ca_pass = false; 2292428d7b3dSmrg 2293428d7b3dSmrg tmp->base.u.gen4.ve_id = gen4_choose_spans_emitter(sna, tmp); 2294428d7b3dSmrg tmp->base.u.gen4.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine; 2295428d7b3dSmrg 2296428d7b3dSmrg tmp->box = gen4_render_composite_spans_box; 2297428d7b3dSmrg tmp->boxes = gen4_render_composite_spans_boxes; 2298428d7b3dSmrg if (tmp->emit_boxes) 2299428d7b3dSmrg tmp->thread_boxes = gen4_render_composite_spans_boxes__thread; 2300428d7b3dSmrg tmp->done = gen4_render_composite_spans_done; 2301428d7b3dSmrg 2302428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2303428d7b3dSmrg tmp->base.dst.bo, tmp->base.src.bo, 2304428d7b3dSmrg NULL)) { 2305428d7b3dSmrg kgem_submit(&sna->kgem); 2306428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, 2307428d7b3dSmrg tmp->base.dst.bo, tmp->base.src.bo, 2308428d7b3dSmrg NULL)) 2309428d7b3dSmrg goto cleanup_src; 2310428d7b3dSmrg } 2311428d7b3dSmrg 2312428d7b3dSmrg gen4_align_vertex(sna, &tmp->base); 2313428d7b3dSmrg gen4_bind_surfaces(sna, &tmp->base); 2314428d7b3dSmrg return true; 2315428d7b3dSmrg 2316428d7b3dSmrgcleanup_src: 2317428d7b3dSmrg if (tmp->base.src.bo) 2318428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2319428d7b3dSmrgcleanup_dst: 2320428d7b3dSmrg if (tmp->base.redirect.real_bo) 2321428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2322428d7b3dSmrg return false; 2323428d7b3dSmrg} 2324428d7b3dSmrg#endif 2325428d7b3dSmrg 2326428d7b3dSmrgstatic void 2327428d7b3dSmrggen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op) 2328428d7b3dSmrg{ 2329428d7b3dSmrg uint32_t *binding_table; 2330428d7b3dSmrg uint16_t offset, dirty; 2331428d7b3dSmrg 2332428d7b3dSmrg gen4_get_batch(sna, op); 2333428d7b3dSmrg dirty = kgem_bo_is_dirty(op->dst.bo); 2334428d7b3dSmrg 2335428d7b3dSmrg binding_table = gen4_composite_get_binding_table(sna, &offset); 2336428d7b3dSmrg 2337428d7b3dSmrg binding_table[0] = 2338428d7b3dSmrg gen4_bind_bo(sna, 2339428d7b3dSmrg op->dst.bo, op->dst.width, op->dst.height, 2340428d7b3dSmrg gen4_get_dest_format(op->dst.format), 2341428d7b3dSmrg true); 2342428d7b3dSmrg binding_table[1] = 2343428d7b3dSmrg gen4_bind_bo(sna, 2344428d7b3dSmrg op->src.bo, op->src.width, op->src.height, 2345428d7b3dSmrg op->src.card_format, 2346428d7b3dSmrg false); 2347428d7b3dSmrg 2348428d7b3dSmrg if (sna->kgem.surface == offset && 2349428d7b3dSmrg *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table) { 2350428d7b3dSmrg sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); 2351428d7b3dSmrg offset = sna->render_state.gen4.surface_table; 2352428d7b3dSmrg } 2353428d7b3dSmrg 2354428d7b3dSmrg if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0]) 2355428d7b3dSmrg dirty = 0; 2356428d7b3dSmrg 2357428d7b3dSmrg gen4_emit_state(sna, op, offset | dirty); 2358428d7b3dSmrg} 2359428d7b3dSmrg 2360428d7b3dSmrgstatic void 2361428d7b3dSmrggen4_render_copy_one(struct sna *sna, 2362428d7b3dSmrg const struct sna_composite_op *op, 2363428d7b3dSmrg int sx, int sy, 2364428d7b3dSmrg int w, int h, 2365428d7b3dSmrg int dx, int dy) 2366428d7b3dSmrg{ 2367428d7b3dSmrg gen4_get_rectangles(sna, op, 1, gen4_copy_bind_surfaces); 2368428d7b3dSmrg 2369428d7b3dSmrg OUT_VERTEX(dx+w, dy+h); 2370428d7b3dSmrg OUT_VERTEX_F((sx+w)*op->src.scale[0]); 2371428d7b3dSmrg OUT_VERTEX_F((sy+h)*op->src.scale[1]); 2372428d7b3dSmrg 2373428d7b3dSmrg OUT_VERTEX(dx, dy+h); 2374428d7b3dSmrg OUT_VERTEX_F(sx*op->src.scale[0]); 2375428d7b3dSmrg OUT_VERTEX_F((sy+h)*op->src.scale[1]); 2376428d7b3dSmrg 2377428d7b3dSmrg OUT_VERTEX(dx, dy); 2378428d7b3dSmrg OUT_VERTEX_F(sx*op->src.scale[0]); 2379428d7b3dSmrg OUT_VERTEX_F(sy*op->src.scale[1]); 2380428d7b3dSmrg} 2381428d7b3dSmrg 2382428d7b3dSmrgstatic bool 2383428d7b3dSmrggen4_render_copy_boxes(struct sna *sna, uint8_t alu, 2384428d7b3dSmrg const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2385428d7b3dSmrg const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2386428d7b3dSmrg const BoxRec *box, int n, unsigned flags) 2387428d7b3dSmrg{ 2388428d7b3dSmrg struct sna_composite_op tmp; 2389428d7b3dSmrg 2390428d7b3dSmrg DBG(("%s x %d\n", __FUNCTION__, n)); 2391428d7b3dSmrg 2392428d7b3dSmrg if (sna_blt_compare_depth(src, dst) && 2393428d7b3dSmrg sna_blt_copy_boxes(sna, alu, 2394428d7b3dSmrg src_bo, src_dx, src_dy, 2395428d7b3dSmrg dst_bo, dst_dx, dst_dy, 2396428d7b3dSmrg dst->bitsPerPixel, 2397428d7b3dSmrg box, n)) 2398428d7b3dSmrg return true; 2399428d7b3dSmrg 2400428d7b3dSmrg if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) { 2401428d7b3dSmrgfallback_blt: 2402428d7b3dSmrg if (!sna_blt_compare_depth(src, dst)) 2403428d7b3dSmrg return false; 2404428d7b3dSmrg 2405428d7b3dSmrg return sna_blt_copy_boxes_fallback(sna, alu, 2406428d7b3dSmrg src, src_bo, src_dx, src_dy, 2407428d7b3dSmrg dst, dst_bo, dst_dx, dst_dy, 2408428d7b3dSmrg box, n); 2409428d7b3dSmrg } 2410428d7b3dSmrg 2411428d7b3dSmrg memset(&tmp, 0, sizeof(tmp)); 2412428d7b3dSmrg 2413428d7b3dSmrg DBG(("%s (%d, %d)->(%d, %d) x %d\n", 2414428d7b3dSmrg __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n)); 2415428d7b3dSmrg 2416428d7b3dSmrg if (dst->depth == src->depth) { 2417428d7b3dSmrg tmp.dst.format = sna_render_format_for_depth(dst->depth); 2418428d7b3dSmrg tmp.src.pict_format = tmp.dst.format; 2419428d7b3dSmrg } else { 2420428d7b3dSmrg tmp.dst.format = sna_format_for_depth(dst->depth); 2421428d7b3dSmrg tmp.src.pict_format = sna_format_for_depth(src->depth); 2422428d7b3dSmrg } 2423428d7b3dSmrg if (!gen4_check_format(tmp.src.pict_format)) 2424428d7b3dSmrg goto fallback_blt; 2425428d7b3dSmrg 2426428d7b3dSmrg tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear; 2427428d7b3dSmrg 2428428d7b3dSmrg tmp.dst.pixmap = (PixmapPtr)dst; 2429428d7b3dSmrg tmp.dst.width = dst->width; 2430428d7b3dSmrg tmp.dst.height = dst->height; 2431428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 2432428d7b3dSmrg tmp.dst.bo = dst_bo; 2433428d7b3dSmrg tmp.damage = NULL; 2434428d7b3dSmrg 2435428d7b3dSmrg sna_render_composite_redirect_init(&tmp); 2436428d7b3dSmrg if (too_large(tmp.dst.width, tmp.dst.height)) { 2437428d7b3dSmrg BoxRec extents = box[0]; 2438428d7b3dSmrg int i; 2439428d7b3dSmrg 2440428d7b3dSmrg for (i = 1; i < n; i++) { 2441428d7b3dSmrg if (box[i].x1 < extents.x1) 2442428d7b3dSmrg extents.x1 = box[i].x1; 2443428d7b3dSmrg if (box[i].y1 < extents.y1) 2444428d7b3dSmrg extents.y1 = box[i].y1; 2445428d7b3dSmrg 2446428d7b3dSmrg if (box[i].x2 > extents.x2) 2447428d7b3dSmrg extents.x2 = box[i].x2; 2448428d7b3dSmrg if (box[i].y2 > extents.y2) 2449428d7b3dSmrg extents.y2 = box[i].y2; 2450428d7b3dSmrg } 2451428d7b3dSmrg if (!sna_render_composite_redirect(sna, &tmp, 2452428d7b3dSmrg extents.x1 + dst_dx, 2453428d7b3dSmrg extents.y1 + dst_dy, 2454428d7b3dSmrg extents.x2 - extents.x1, 2455428d7b3dSmrg extents.y2 - extents.y1, 2456428d7b3dSmrg n > 1)) 2457428d7b3dSmrg goto fallback_tiled; 2458428d7b3dSmrg } 2459428d7b3dSmrg 2460428d7b3dSmrg tmp.src.filter = SAMPLER_FILTER_NEAREST; 2461428d7b3dSmrg tmp.src.repeat = SAMPLER_EXTEND_NONE; 2462428d7b3dSmrg tmp.src.card_format = gen4_get_card_format(tmp.src.pict_format); 2463428d7b3dSmrg if (too_large(src->width, src->height)) { 2464428d7b3dSmrg BoxRec extents = box[0]; 2465428d7b3dSmrg int i; 2466428d7b3dSmrg 2467428d7b3dSmrg for (i = 1; i < n; i++) { 2468428d7b3dSmrg if (box[i].x1 < extents.x1) 2469428d7b3dSmrg extents.x1 = box[i].x1; 2470428d7b3dSmrg if (box[i].y1 < extents.y1) 2471428d7b3dSmrg extents.y1 = box[i].y1; 2472428d7b3dSmrg 2473428d7b3dSmrg if (box[i].x2 > extents.x2) 2474428d7b3dSmrg extents.x2 = box[i].x2; 2475428d7b3dSmrg if (box[i].y2 > extents.y2) 2476428d7b3dSmrg extents.y2 = box[i].y2; 2477428d7b3dSmrg } 2478428d7b3dSmrg 2479428d7b3dSmrg if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 2480428d7b3dSmrg extents.x1 + src_dx, 2481428d7b3dSmrg extents.y1 + src_dy, 2482428d7b3dSmrg extents.x2 - extents.x1, 2483428d7b3dSmrg extents.y2 - extents.y1)) 2484428d7b3dSmrg goto fallback_tiled_dst; 2485428d7b3dSmrg } else { 2486428d7b3dSmrg tmp.src.bo = kgem_bo_reference(src_bo); 2487428d7b3dSmrg tmp.src.width = src->width; 2488428d7b3dSmrg tmp.src.height = src->height; 2489428d7b3dSmrg tmp.src.offset[0] = tmp.src.offset[1] = 0; 2490428d7b3dSmrg tmp.src.scale[0] = 1.f/src->width; 2491428d7b3dSmrg tmp.src.scale[1] = 1.f/src->height; 2492428d7b3dSmrg } 2493428d7b3dSmrg 2494428d7b3dSmrg tmp.is_affine = true; 2495428d7b3dSmrg tmp.floats_per_vertex = 3; 2496428d7b3dSmrg tmp.floats_per_rect = 9; 2497428d7b3dSmrg tmp.u.gen4.wm_kernel = WM_KERNEL; 2498428d7b3dSmrg tmp.u.gen4.ve_id = 2; 2499428d7b3dSmrg 2500428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 2501428d7b3dSmrg kgem_submit(&sna->kgem); 2502428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 2503428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2504428d7b3dSmrg if (tmp.redirect.real_bo) 2505428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2506428d7b3dSmrg 2507428d7b3dSmrg goto fallback_blt; 2508428d7b3dSmrg } 2509428d7b3dSmrg } 2510428d7b3dSmrg 2511428d7b3dSmrg dst_dx += tmp.dst.x; 2512428d7b3dSmrg dst_dy += tmp.dst.y; 2513428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 2514428d7b3dSmrg 2515428d7b3dSmrg src_dx += tmp.src.offset[0]; 2516428d7b3dSmrg src_dy += tmp.src.offset[1]; 2517428d7b3dSmrg 2518428d7b3dSmrg gen4_align_vertex(sna, &tmp); 2519428d7b3dSmrg gen4_copy_bind_surfaces(sna, &tmp); 2520428d7b3dSmrg 2521428d7b3dSmrg do { 2522428d7b3dSmrg gen4_render_copy_one(sna, &tmp, 2523428d7b3dSmrg box->x1 + src_dx, box->y1 + src_dy, 2524428d7b3dSmrg box->x2 - box->x1, box->y2 - box->y1, 2525428d7b3dSmrg box->x1 + dst_dx, box->y1 + dst_dy); 2526428d7b3dSmrg box++; 2527428d7b3dSmrg } while (--n); 2528428d7b3dSmrg 2529428d7b3dSmrg gen4_vertex_flush(sna); 2530428d7b3dSmrg sna_render_composite_redirect_done(sna, &tmp); 2531428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2532428d7b3dSmrg return true; 2533428d7b3dSmrg 2534428d7b3dSmrgfallback_tiled_dst: 2535428d7b3dSmrg if (tmp.redirect.real_bo) 2536428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 2537428d7b3dSmrgfallback_tiled: 2538428d7b3dSmrg if (sna_blt_compare_depth(src, dst) && 2539428d7b3dSmrg sna_blt_copy_boxes(sna, alu, 2540428d7b3dSmrg src_bo, src_dx, src_dy, 2541428d7b3dSmrg dst_bo, dst_dx, dst_dy, 2542428d7b3dSmrg dst->bitsPerPixel, 2543428d7b3dSmrg box, n)) 2544428d7b3dSmrg return true; 2545428d7b3dSmrg 2546428d7b3dSmrg return sna_tiling_copy_boxes(sna, alu, 2547428d7b3dSmrg src, src_bo, src_dx, src_dy, 2548428d7b3dSmrg dst, dst_bo, dst_dx, dst_dy, 2549428d7b3dSmrg box, n); 2550428d7b3dSmrg} 2551428d7b3dSmrg 2552428d7b3dSmrgstatic void 2553428d7b3dSmrggen4_render_copy_blt(struct sna *sna, 2554428d7b3dSmrg const struct sna_copy_op *op, 2555428d7b3dSmrg int16_t sx, int16_t sy, 2556428d7b3dSmrg int16_t w, int16_t h, 2557428d7b3dSmrg int16_t dx, int16_t dy) 2558428d7b3dSmrg{ 2559428d7b3dSmrg gen4_render_copy_one(sna, &op->base, sx, sy, w, h, dx, dy); 2560428d7b3dSmrg} 2561428d7b3dSmrg 2562428d7b3dSmrgstatic void 2563428d7b3dSmrggen4_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 2564428d7b3dSmrg{ 2565428d7b3dSmrg if (sna->render.vertex_offset) 2566428d7b3dSmrg gen4_vertex_flush(sna); 2567428d7b3dSmrg} 2568428d7b3dSmrg 2569428d7b3dSmrgstatic bool 2570428d7b3dSmrggen4_render_copy(struct sna *sna, uint8_t alu, 2571428d7b3dSmrg PixmapPtr src, struct kgem_bo *src_bo, 2572428d7b3dSmrg PixmapPtr dst, struct kgem_bo *dst_bo, 2573428d7b3dSmrg struct sna_copy_op *op) 2574428d7b3dSmrg{ 2575428d7b3dSmrg DBG(("%s: src=%ld, dst=%ld, alu=%d\n", 2576428d7b3dSmrg __FUNCTION__, 2577428d7b3dSmrg src->drawable.serialNumber, 2578428d7b3dSmrg dst->drawable.serialNumber, 2579428d7b3dSmrg alu)); 2580428d7b3dSmrg 2581428d7b3dSmrg if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2582428d7b3dSmrg sna_blt_copy(sna, alu, 2583428d7b3dSmrg src_bo, dst_bo, 2584428d7b3dSmrg dst->drawable.bitsPerPixel, 2585428d7b3dSmrg op)) 2586428d7b3dSmrg return true; 2587428d7b3dSmrg 2588428d7b3dSmrg if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 2589428d7b3dSmrg too_large(src->drawable.width, src->drawable.height) || 2590428d7b3dSmrg too_large(dst->drawable.width, dst->drawable.height)) { 2591428d7b3dSmrgfallback: 2592428d7b3dSmrg if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 2593428d7b3dSmrg return false; 2594428d7b3dSmrg 2595428d7b3dSmrg return sna_blt_copy(sna, alu, src_bo, dst_bo, 2596428d7b3dSmrg dst->drawable.bitsPerPixel, 2597428d7b3dSmrg op); 2598428d7b3dSmrg } 2599428d7b3dSmrg 2600428d7b3dSmrg if (dst->drawable.depth == src->drawable.depth) { 2601428d7b3dSmrg op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 2602428d7b3dSmrg op->base.src.pict_format = op->base.dst.format; 2603428d7b3dSmrg } else { 2604428d7b3dSmrg op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 2605428d7b3dSmrg op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 2606428d7b3dSmrg } 2607428d7b3dSmrg if (!gen4_check_format(op->base.src.pict_format)) 2608428d7b3dSmrg goto fallback; 2609428d7b3dSmrg 2610428d7b3dSmrg op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear; 2611428d7b3dSmrg 2612428d7b3dSmrg op->base.dst.pixmap = dst; 2613428d7b3dSmrg op->base.dst.width = dst->drawable.width; 2614428d7b3dSmrg op->base.dst.height = dst->drawable.height; 2615428d7b3dSmrg op->base.dst.bo = dst_bo; 2616428d7b3dSmrg 2617428d7b3dSmrg op->base.src.bo = src_bo; 2618428d7b3dSmrg op->base.src.card_format = 2619428d7b3dSmrg gen4_get_card_format(op->base.src.pict_format); 2620428d7b3dSmrg op->base.src.width = src->drawable.width; 2621428d7b3dSmrg op->base.src.height = src->drawable.height; 2622428d7b3dSmrg op->base.src.scale[0] = 1.f/src->drawable.width; 2623428d7b3dSmrg op->base.src.scale[1] = 1.f/src->drawable.height; 2624428d7b3dSmrg op->base.src.filter = SAMPLER_FILTER_NEAREST; 2625428d7b3dSmrg op->base.src.repeat = SAMPLER_EXTEND_NONE; 2626428d7b3dSmrg 2627428d7b3dSmrg op->base.is_affine = true; 2628428d7b3dSmrg op->base.floats_per_vertex = 3; 2629428d7b3dSmrg op->base.floats_per_rect = 9; 2630428d7b3dSmrg op->base.u.gen4.wm_kernel = WM_KERNEL; 2631428d7b3dSmrg op->base.u.gen4.ve_id = 2; 2632428d7b3dSmrg 2633428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 2634428d7b3dSmrg kgem_submit(&sna->kgem); 2635428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 2636428d7b3dSmrg goto fallback; 2637428d7b3dSmrg } 2638428d7b3dSmrg 2639428d7b3dSmrg if (kgem_bo_is_dirty(src_bo)) { 2640428d7b3dSmrg if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && 2641428d7b3dSmrg sna_blt_copy(sna, alu, 2642428d7b3dSmrg src_bo, dst_bo, 2643428d7b3dSmrg dst->drawable.bitsPerPixel, 2644428d7b3dSmrg op)) 2645428d7b3dSmrg return true; 2646428d7b3dSmrg } 2647428d7b3dSmrg 2648428d7b3dSmrg gen4_align_vertex(sna, &op->base); 2649428d7b3dSmrg gen4_copy_bind_surfaces(sna, &op->base); 2650428d7b3dSmrg 2651428d7b3dSmrg op->blt = gen4_render_copy_blt; 2652428d7b3dSmrg op->done = gen4_render_copy_done; 2653428d7b3dSmrg return true; 2654428d7b3dSmrg} 2655428d7b3dSmrg 2656428d7b3dSmrgstatic void 2657428d7b3dSmrggen4_render_fill_rectangle(struct sna *sna, 2658428d7b3dSmrg const struct sna_composite_op *op, 2659428d7b3dSmrg int x, int y, int w, int h) 2660428d7b3dSmrg{ 2661428d7b3dSmrg gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces); 2662428d7b3dSmrg 2663428d7b3dSmrg OUT_VERTEX(x+w, y+h); 2664428d7b3dSmrg OUT_VERTEX_F(.5); 2665428d7b3dSmrg 2666428d7b3dSmrg OUT_VERTEX(x, y+h); 2667428d7b3dSmrg OUT_VERTEX_F(.5); 2668428d7b3dSmrg 2669428d7b3dSmrg OUT_VERTEX(x, y); 2670428d7b3dSmrg OUT_VERTEX_F(.5); 2671428d7b3dSmrg} 2672428d7b3dSmrg 2673428d7b3dSmrgstatic bool 2674428d7b3dSmrggen4_render_fill_boxes(struct sna *sna, 2675428d7b3dSmrg CARD8 op, 2676428d7b3dSmrg PictFormat format, 2677428d7b3dSmrg const xRenderColor *color, 2678428d7b3dSmrg const DrawableRec *dst, struct kgem_bo *dst_bo, 2679428d7b3dSmrg const BoxRec *box, int n) 2680428d7b3dSmrg{ 2681428d7b3dSmrg struct sna_composite_op tmp; 2682428d7b3dSmrg uint32_t pixel; 2683428d7b3dSmrg 2684428d7b3dSmrg if (op >= ARRAY_SIZE(gen4_blend_op)) { 2685428d7b3dSmrg DBG(("%s: fallback due to unhandled blend op: %d\n", 2686428d7b3dSmrg __FUNCTION__, op)); 2687428d7b3dSmrg return false; 2688428d7b3dSmrg } 2689428d7b3dSmrg 2690428d7b3dSmrg if (op <= PictOpSrc) { 2691428d7b3dSmrg uint8_t alu = GXinvalid; 2692428d7b3dSmrg 2693428d7b3dSmrg pixel = 0; 2694428d7b3dSmrg if (op == PictOpClear) 2695428d7b3dSmrg alu = GXclear; 2696428d7b3dSmrg else if (sna_get_pixel_from_rgba(&pixel, 2697428d7b3dSmrg color->red, 2698428d7b3dSmrg color->green, 2699428d7b3dSmrg color->blue, 2700428d7b3dSmrg color->alpha, 2701428d7b3dSmrg format)) 2702428d7b3dSmrg alu = GXcopy; 2703428d7b3dSmrg 2704428d7b3dSmrg if (alu != GXinvalid && 2705428d7b3dSmrg sna_blt_fill_boxes(sna, alu, 2706428d7b3dSmrg dst_bo, dst->bitsPerPixel, 2707428d7b3dSmrg pixel, box, n)) 2708428d7b3dSmrg return true; 2709428d7b3dSmrg 2710428d7b3dSmrg if (!gen4_check_dst_format(format)) 2711428d7b3dSmrg return false; 2712428d7b3dSmrg 2713428d7b3dSmrg if (too_large(dst->width, dst->height)) 2714428d7b3dSmrg return sna_tiling_fill_boxes(sna, op, format, color, 2715428d7b3dSmrg dst, dst_bo, box, n); 2716428d7b3dSmrg } 2717428d7b3dSmrg 2718428d7b3dSmrg if (op == PictOpClear) { 2719428d7b3dSmrg pixel = 0; 2720428d7b3dSmrg op = PictOpSrc; 2721428d7b3dSmrg } else if (!sna_get_pixel_from_rgba(&pixel, 2722428d7b3dSmrg color->red, 2723428d7b3dSmrg color->green, 2724428d7b3dSmrg color->blue, 2725428d7b3dSmrg color->alpha, 2726428d7b3dSmrg PICT_a8r8g8b8)) 2727428d7b3dSmrg return false; 2728428d7b3dSmrg 2729428d7b3dSmrg DBG(("%s(%08x x %d)\n", __FUNCTION__, pixel, n)); 2730428d7b3dSmrg 2731428d7b3dSmrg memset(&tmp, 0, sizeof(tmp)); 2732428d7b3dSmrg 2733428d7b3dSmrg tmp.op = op; 2734428d7b3dSmrg 2735428d7b3dSmrg tmp.dst.pixmap = (PixmapPtr)dst; 2736428d7b3dSmrg tmp.dst.width = dst->width; 2737428d7b3dSmrg tmp.dst.height = dst->height; 2738428d7b3dSmrg tmp.dst.format = format; 2739428d7b3dSmrg tmp.dst.bo = dst_bo; 2740428d7b3dSmrg 2741428d7b3dSmrg gen4_channel_init_solid(sna, &tmp.src, pixel); 2742428d7b3dSmrg 2743428d7b3dSmrg tmp.is_affine = true; 2744428d7b3dSmrg tmp.floats_per_vertex = 2; 2745428d7b3dSmrg tmp.floats_per_rect = 6; 2746428d7b3dSmrg tmp.u.gen4.wm_kernel = WM_KERNEL; 2747428d7b3dSmrg tmp.u.gen4.ve_id = 1; 2748428d7b3dSmrg 2749428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 2750428d7b3dSmrg kgem_submit(&sna->kgem); 2751428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) 2752428d7b3dSmrg return false; 2753428d7b3dSmrg } 2754428d7b3dSmrg 2755428d7b3dSmrg gen4_align_vertex(sna, &tmp); 2756428d7b3dSmrg gen4_bind_surfaces(sna, &tmp); 2757428d7b3dSmrg 2758428d7b3dSmrg do { 2759428d7b3dSmrg gen4_render_fill_rectangle(sna, &tmp, 2760428d7b3dSmrg box->x1, box->y1, 2761428d7b3dSmrg box->x2 - box->x1, 2762428d7b3dSmrg box->y2 - box->y1); 2763428d7b3dSmrg box++; 2764428d7b3dSmrg } while (--n); 2765428d7b3dSmrg 2766428d7b3dSmrg gen4_vertex_flush(sna); 2767428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2768428d7b3dSmrg return true; 2769428d7b3dSmrg} 2770428d7b3dSmrg 2771428d7b3dSmrgstatic void 2772428d7b3dSmrggen4_render_fill_op_blt(struct sna *sna, const struct sna_fill_op *op, 2773428d7b3dSmrg int16_t x, int16_t y, int16_t w, int16_t h) 2774428d7b3dSmrg{ 2775428d7b3dSmrg gen4_render_fill_rectangle(sna, &op->base, x, y, w, h); 2776428d7b3dSmrg} 2777428d7b3dSmrg 2778428d7b3dSmrgfastcall static void 2779428d7b3dSmrggen4_render_fill_op_box(struct sna *sna, 2780428d7b3dSmrg const struct sna_fill_op *op, 2781428d7b3dSmrg const BoxRec *box) 2782428d7b3dSmrg{ 2783428d7b3dSmrg gen4_render_fill_rectangle(sna, &op->base, 2784428d7b3dSmrg box->x1, box->y1, 2785428d7b3dSmrg box->x2-box->x1, box->y2-box->y1); 2786428d7b3dSmrg} 2787428d7b3dSmrg 2788428d7b3dSmrgfastcall static void 2789428d7b3dSmrggen4_render_fill_op_boxes(struct sna *sna, 2790428d7b3dSmrg const struct sna_fill_op *op, 2791428d7b3dSmrg const BoxRec *box, 2792428d7b3dSmrg int nbox) 2793428d7b3dSmrg{ 2794428d7b3dSmrg do { 2795428d7b3dSmrg gen4_render_fill_rectangle(sna, &op->base, 2796428d7b3dSmrg box->x1, box->y1, 2797428d7b3dSmrg box->x2-box->x1, box->y2-box->y1); 2798428d7b3dSmrg box++; 2799428d7b3dSmrg } while (--nbox); 2800428d7b3dSmrg} 2801428d7b3dSmrg 2802428d7b3dSmrgstatic void 2803428d7b3dSmrggen4_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 2804428d7b3dSmrg{ 2805428d7b3dSmrg if (sna->render.vertex_offset) 2806428d7b3dSmrg gen4_vertex_flush(sna); 2807428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2808428d7b3dSmrg} 2809428d7b3dSmrg 2810428d7b3dSmrgstatic bool 2811428d7b3dSmrggen4_render_fill(struct sna *sna, uint8_t alu, 2812428d7b3dSmrg PixmapPtr dst, struct kgem_bo *dst_bo, 2813428d7b3dSmrg uint32_t color, unsigned flags, 2814428d7b3dSmrg struct sna_fill_op *op) 2815428d7b3dSmrg{ 2816428d7b3dSmrg if (sna_blt_fill(sna, alu, 2817428d7b3dSmrg dst_bo, dst->drawable.bitsPerPixel, 2818428d7b3dSmrg color, 2819428d7b3dSmrg op)) 2820428d7b3dSmrg return true; 2821428d7b3dSmrg 2822428d7b3dSmrg if (!(alu == GXcopy || alu == GXclear) || 2823428d7b3dSmrg too_large(dst->drawable.width, dst->drawable.height)) 2824428d7b3dSmrg return sna_blt_fill(sna, alu, 2825428d7b3dSmrg dst_bo, dst->drawable.bitsPerPixel, 2826428d7b3dSmrg color, 2827428d7b3dSmrg op); 2828428d7b3dSmrg 2829428d7b3dSmrg if (alu == GXclear) 2830428d7b3dSmrg color = 0; 2831428d7b3dSmrg 2832428d7b3dSmrg op->base.op = color == 0 ? PictOpClear : PictOpSrc; 2833428d7b3dSmrg 2834428d7b3dSmrg op->base.dst.pixmap = dst; 2835428d7b3dSmrg op->base.dst.width = dst->drawable.width; 2836428d7b3dSmrg op->base.dst.height = dst->drawable.height; 2837428d7b3dSmrg op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 2838428d7b3dSmrg op->base.dst.bo = dst_bo; 2839428d7b3dSmrg op->base.dst.x = op->base.dst.y = 0; 2840428d7b3dSmrg 2841428d7b3dSmrg op->base.need_magic_ca_pass = 0; 2842428d7b3dSmrg op->base.has_component_alpha = 0; 2843428d7b3dSmrg 2844428d7b3dSmrg gen4_channel_init_solid(sna, &op->base.src, 2845428d7b3dSmrg sna_rgba_for_color(color, 2846428d7b3dSmrg dst->drawable.depth)); 2847428d7b3dSmrg op->base.mask.bo = NULL; 2848428d7b3dSmrg 2849428d7b3dSmrg op->base.is_affine = true; 2850428d7b3dSmrg op->base.floats_per_vertex = 2; 2851428d7b3dSmrg op->base.floats_per_rect = 6; 2852428d7b3dSmrg op->base.u.gen4.wm_kernel = WM_KERNEL; 2853428d7b3dSmrg op->base.u.gen4.ve_id = 1; 2854428d7b3dSmrg 2855428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 2856428d7b3dSmrg kgem_submit(&sna->kgem); 2857428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 2858428d7b3dSmrg kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2859428d7b3dSmrg return false; 2860428d7b3dSmrg } 2861428d7b3dSmrg } 2862428d7b3dSmrg 2863428d7b3dSmrg gen4_align_vertex(sna, &op->base); 2864428d7b3dSmrg gen4_bind_surfaces(sna, &op->base); 2865428d7b3dSmrg 2866428d7b3dSmrg op->blt = gen4_render_fill_op_blt; 2867428d7b3dSmrg op->box = gen4_render_fill_op_box; 2868428d7b3dSmrg op->boxes = gen4_render_fill_op_boxes; 2869428d7b3dSmrg op->points = NULL; 2870428d7b3dSmrg op->done = gen4_render_fill_op_done; 2871428d7b3dSmrg return true; 2872428d7b3dSmrg} 2873428d7b3dSmrg 2874428d7b3dSmrgstatic bool 2875428d7b3dSmrggen4_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 2876428d7b3dSmrg uint32_t color, 2877428d7b3dSmrg int16_t x1, int16_t y1, int16_t x2, int16_t y2, 2878428d7b3dSmrg uint8_t alu) 2879428d7b3dSmrg{ 2880428d7b3dSmrg BoxRec box; 2881428d7b3dSmrg 2882428d7b3dSmrg box.x1 = x1; 2883428d7b3dSmrg box.y1 = y1; 2884428d7b3dSmrg box.x2 = x2; 2885428d7b3dSmrg box.y2 = y2; 2886428d7b3dSmrg 2887428d7b3dSmrg return sna_blt_fill_boxes(sna, alu, 2888428d7b3dSmrg bo, dst->drawable.bitsPerPixel, 2889428d7b3dSmrg color, &box, 1); 2890428d7b3dSmrg} 2891428d7b3dSmrg 2892428d7b3dSmrgstatic bool 2893428d7b3dSmrggen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 2894428d7b3dSmrg uint32_t color, 2895428d7b3dSmrg int16_t x1, int16_t y1, 2896428d7b3dSmrg int16_t x2, int16_t y2, 2897428d7b3dSmrg uint8_t alu) 2898428d7b3dSmrg{ 2899428d7b3dSmrg struct sna_composite_op tmp; 2900428d7b3dSmrg 2901428d7b3dSmrg DBG(("%s: color=%08x\n", __FUNCTION__, color)); 2902428d7b3dSmrg 2903428d7b3dSmrg if (gen4_render_fill_one_try_blt(sna, dst, bo, color, 2904428d7b3dSmrg x1, y1, x2, y2, alu)) 2905428d7b3dSmrg return true; 2906428d7b3dSmrg 2907428d7b3dSmrg /* Must use the BLT if we can't RENDER... */ 2908428d7b3dSmrg if (!(alu == GXcopy || alu == GXclear) || 2909428d7b3dSmrg too_large(dst->drawable.width, dst->drawable.height)) 2910428d7b3dSmrg return false; 2911428d7b3dSmrg 2912428d7b3dSmrg if (alu == GXclear) 2913428d7b3dSmrg color = 0; 2914428d7b3dSmrg 2915428d7b3dSmrg tmp.op = color == 0 ? PictOpClear : PictOpSrc; 2916428d7b3dSmrg 2917428d7b3dSmrg tmp.dst.pixmap = dst; 2918428d7b3dSmrg tmp.dst.width = dst->drawable.width; 2919428d7b3dSmrg tmp.dst.height = dst->drawable.height; 2920428d7b3dSmrg tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 2921428d7b3dSmrg tmp.dst.bo = bo; 2922428d7b3dSmrg tmp.dst.x = tmp.dst.y = 0; 2923428d7b3dSmrg 2924428d7b3dSmrg gen4_channel_init_solid(sna, &tmp.src, 2925428d7b3dSmrg sna_rgba_for_color(color, 2926428d7b3dSmrg dst->drawable.depth)); 2927428d7b3dSmrg tmp.mask.bo = NULL; 2928428d7b3dSmrg tmp.mask.filter = SAMPLER_FILTER_NEAREST; 2929428d7b3dSmrg tmp.mask.repeat = SAMPLER_EXTEND_NONE; 2930428d7b3dSmrg 2931428d7b3dSmrg tmp.is_affine = true; 2932428d7b3dSmrg tmp.floats_per_vertex = 2; 2933428d7b3dSmrg tmp.floats_per_rect = 6; 2934428d7b3dSmrg tmp.has_component_alpha = false; 2935428d7b3dSmrg tmp.need_magic_ca_pass = false; 2936428d7b3dSmrg 2937428d7b3dSmrg tmp.u.gen4.wm_kernel = WM_KERNEL; 2938428d7b3dSmrg tmp.u.gen4.ve_id = 1; 2939428d7b3dSmrg 2940428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 2941428d7b3dSmrg kgem_submit(&sna->kgem); 2942428d7b3dSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 2943428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2944428d7b3dSmrg return false; 2945428d7b3dSmrg } 2946428d7b3dSmrg } 2947428d7b3dSmrg 2948428d7b3dSmrg gen4_align_vertex(sna, &tmp); 2949428d7b3dSmrg gen4_bind_surfaces(sna, &tmp); 2950428d7b3dSmrg 2951428d7b3dSmrg gen4_render_fill_rectangle(sna, &tmp, x1, y1, x2 - x1, y2 - y1); 2952428d7b3dSmrg 2953428d7b3dSmrg gen4_vertex_flush(sna); 2954428d7b3dSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 2955428d7b3dSmrg 2956428d7b3dSmrg return true; 2957428d7b3dSmrg} 2958428d7b3dSmrg 2959428d7b3dSmrgstatic void gen4_render_reset(struct sna *sna) 2960428d7b3dSmrg{ 2961428d7b3dSmrg sna->render_state.gen4.needs_invariant = true; 2962428d7b3dSmrg sna->render_state.gen4.needs_urb = true; 2963428d7b3dSmrg sna->render_state.gen4.ve_id = -1; 2964428d7b3dSmrg sna->render_state.gen4.last_primitive = -1; 2965428d7b3dSmrg sna->render_state.gen4.last_pipelined_pointers = -1; 2966428d7b3dSmrg 2967428d7b3dSmrg sna->render_state.gen4.drawrect_offset = -1; 2968428d7b3dSmrg sna->render_state.gen4.drawrect_limit = -1; 2969428d7b3dSmrg sna->render_state.gen4.surface_table = 0; 2970428d7b3dSmrg 2971428d7b3dSmrg if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 2972428d7b3dSmrg DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 2973428d7b3dSmrg discard_vbo(sna); 2974428d7b3dSmrg } 2975428d7b3dSmrg 2976428d7b3dSmrg sna->render.vertex_offset = 0; 2977428d7b3dSmrg sna->render.nvertex_reloc = 0; 2978428d7b3dSmrg sna->render.vb_id = 0; 2979428d7b3dSmrg} 2980428d7b3dSmrg 2981428d7b3dSmrgstatic void gen4_render_fini(struct sna *sna) 2982428d7b3dSmrg{ 2983428d7b3dSmrg kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo); 2984428d7b3dSmrg} 2985428d7b3dSmrg 2986428d7b3dSmrgstatic uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream) 2987428d7b3dSmrg{ 2988428d7b3dSmrg struct gen4_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32); 2989428d7b3dSmrg 2990428d7b3dSmrg /* Set up the vertex shader to be disabled (passthrough) */ 2991428d7b3dSmrg vs->thread4.nr_urb_entries = URB_VS_ENTRIES; 2992428d7b3dSmrg vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; 2993428d7b3dSmrg vs->vs6.vs_enable = 0; 2994428d7b3dSmrg vs->vs6.vert_cache_disable = 1; 2995428d7b3dSmrg 2996428d7b3dSmrg return sna_static_stream_offsetof(stream, vs); 2997428d7b3dSmrg} 2998428d7b3dSmrg 2999428d7b3dSmrgstatic uint32_t gen4_create_sf_state(struct sna_static_stream *stream, 3000428d7b3dSmrg uint32_t kernel) 3001428d7b3dSmrg{ 3002428d7b3dSmrg struct gen4_sf_unit_state *sf; 3003428d7b3dSmrg 3004428d7b3dSmrg sf = sna_static_stream_map(stream, sizeof(*sf), 32); 3005428d7b3dSmrg 3006428d7b3dSmrg sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF); 3007428d7b3dSmrg sf->thread0.kernel_start_pointer = kernel >> 6; 3008428d7b3dSmrg sf->thread3.const_urb_entry_read_length = 0; /* no const URBs */ 3009428d7b3dSmrg sf->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ 3010428d7b3dSmrg sf->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ 3011428d7b3dSmrg /* don't smash vertex header, read start from dw8 */ 3012428d7b3dSmrg sf->thread3.urb_entry_read_offset = 1; 3013428d7b3dSmrg sf->thread3.dispatch_grf_start_reg = 3; 3014428d7b3dSmrg sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1; 3015428d7b3dSmrg sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; 3016428d7b3dSmrg sf->thread4.nr_urb_entries = URB_SF_ENTRIES; 3017428d7b3dSmrg sf->sf5.viewport_transform = false; /* skip viewport */ 3018428d7b3dSmrg sf->sf6.cull_mode = GEN4_CULLMODE_NONE; 3019428d7b3dSmrg sf->sf6.scissor = 0; 3020428d7b3dSmrg sf->sf7.trifan_pv = 2; 3021428d7b3dSmrg sf->sf6.dest_org_vbias = 0x8; 3022428d7b3dSmrg sf->sf6.dest_org_hbias = 0x8; 3023428d7b3dSmrg 3024428d7b3dSmrg return sna_static_stream_offsetof(stream, sf); 3025428d7b3dSmrg} 3026428d7b3dSmrg 3027428d7b3dSmrgstatic uint32_t gen4_create_sampler_state(struct sna_static_stream *stream, 3028428d7b3dSmrg sampler_filter_t src_filter, 3029428d7b3dSmrg sampler_extend_t src_extend, 3030428d7b3dSmrg sampler_filter_t mask_filter, 3031428d7b3dSmrg sampler_extend_t mask_extend) 3032428d7b3dSmrg{ 3033428d7b3dSmrg struct gen4_sampler_state *sampler_state; 3034428d7b3dSmrg 3035428d7b3dSmrg sampler_state = sna_static_stream_map(stream, 3036428d7b3dSmrg sizeof(struct gen4_sampler_state) * 2, 3037428d7b3dSmrg 32); 3038428d7b3dSmrg sampler_state_init(&sampler_state[0], src_filter, src_extend); 3039428d7b3dSmrg sampler_state_init(&sampler_state[1], mask_filter, mask_extend); 3040428d7b3dSmrg 3041428d7b3dSmrg return sna_static_stream_offsetof(stream, sampler_state); 3042428d7b3dSmrg} 3043428d7b3dSmrg 3044428d7b3dSmrgstatic void gen4_init_wm_state(struct gen4_wm_unit_state *wm, 3045428d7b3dSmrg int gen, 3046428d7b3dSmrg bool has_mask, 3047428d7b3dSmrg uint32_t kernel, 3048428d7b3dSmrg uint32_t sampler) 3049428d7b3dSmrg{ 3050428d7b3dSmrg assert((kernel & 63) == 0); 3051428d7b3dSmrg wm->thread0.kernel_start_pointer = kernel >> 6; 3052428d7b3dSmrg wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF); 3053428d7b3dSmrg 3054428d7b3dSmrg wm->thread1.single_program_flow = 0; 3055428d7b3dSmrg 3056428d7b3dSmrg wm->thread3.const_urb_entry_read_length = 0; 3057428d7b3dSmrg wm->thread3.const_urb_entry_read_offset = 0; 3058428d7b3dSmrg 3059428d7b3dSmrg wm->thread3.urb_entry_read_offset = 0; 3060428d7b3dSmrg wm->thread3.dispatch_grf_start_reg = 3; 3061428d7b3dSmrg 3062428d7b3dSmrg assert((sampler & 31) == 0); 3063428d7b3dSmrg wm->wm4.sampler_state_pointer = sampler >> 5; 3064428d7b3dSmrg wm->wm4.sampler_count = 1; 3065428d7b3dSmrg 3066428d7b3dSmrg wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1; 3067428d7b3dSmrg wm->wm5.transposed_urb_read = 0; 3068428d7b3dSmrg wm->wm5.thread_dispatch_enable = 1; 3069428d7b3dSmrg /* just use 16-pixel dispatch (4 subspans), don't need to change kernel 3070428d7b3dSmrg * start point 3071428d7b3dSmrg */ 3072428d7b3dSmrg wm->wm5.enable_16_pix = 1; 3073428d7b3dSmrg wm->wm5.enable_8_pix = 0; 3074428d7b3dSmrg wm->wm5.early_depth_test = 1; 3075428d7b3dSmrg 3076428d7b3dSmrg /* Each pair of attributes (src/mask coords) is two URB entries */ 3077428d7b3dSmrg if (has_mask) { 3078428d7b3dSmrg wm->thread1.binding_table_entry_count = 3; 3079428d7b3dSmrg wm->thread3.urb_entry_read_length = 4; 3080428d7b3dSmrg } else { 3081428d7b3dSmrg wm->thread1.binding_table_entry_count = 2; 3082428d7b3dSmrg wm->thread3.urb_entry_read_length = 2; 3083428d7b3dSmrg } 3084428d7b3dSmrg} 3085428d7b3dSmrg 3086428d7b3dSmrgstatic uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream) 3087428d7b3dSmrg{ 3088428d7b3dSmrg uint8_t *ptr, *base; 3089428d7b3dSmrg int i, j; 3090428d7b3dSmrg 3091428d7b3dSmrg base = ptr = 3092428d7b3dSmrg sna_static_stream_map(stream, 3093428d7b3dSmrg GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64, 3094428d7b3dSmrg 64); 3095428d7b3dSmrg 3096428d7b3dSmrg for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) { 3097428d7b3dSmrg for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) { 3098428d7b3dSmrg struct gen4_cc_unit_state *state = 3099428d7b3dSmrg (struct gen4_cc_unit_state *)ptr; 3100428d7b3dSmrg 3101428d7b3dSmrg state->cc3.blend_enable = 3102428d7b3dSmrg !(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE); 3103428d7b3dSmrg 3104428d7b3dSmrg state->cc5.logicop_func = 0xc; /* COPY */ 3105428d7b3dSmrg state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD; 3106428d7b3dSmrg 3107428d7b3dSmrg /* Fill in alpha blend factors same as color, for the future. */ 3108428d7b3dSmrg state->cc5.ia_src_blend_factor = i; 3109428d7b3dSmrg state->cc5.ia_dest_blend_factor = j; 3110428d7b3dSmrg 3111428d7b3dSmrg state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD; 3112428d7b3dSmrg state->cc6.clamp_post_alpha_blend = 1; 3113428d7b3dSmrg state->cc6.clamp_pre_alpha_blend = 1; 3114428d7b3dSmrg state->cc6.src_blend_factor = i; 3115428d7b3dSmrg state->cc6.dest_blend_factor = j; 3116428d7b3dSmrg 3117428d7b3dSmrg ptr += 64; 3118428d7b3dSmrg } 3119428d7b3dSmrg } 3120428d7b3dSmrg 3121428d7b3dSmrg return sna_static_stream_offsetof(stream, base); 3122428d7b3dSmrg} 3123428d7b3dSmrg 3124428d7b3dSmrgstatic bool gen4_render_setup(struct sna *sna) 3125428d7b3dSmrg{ 3126428d7b3dSmrg struct gen4_render_state *state = &sna->render_state.gen4; 3127428d7b3dSmrg struct sna_static_stream general; 3128428d7b3dSmrg struct gen4_wm_unit_state_padded *wm_state; 3129428d7b3dSmrg uint32_t sf, wm[KERNEL_COUNT]; 3130428d7b3dSmrg int i, j, k, l, m; 3131428d7b3dSmrg 3132428d7b3dSmrg sna_static_stream_init(&general); 3133428d7b3dSmrg 3134428d7b3dSmrg /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 3135428d7b3dSmrg * dumps, you know it points to zero. 3136428d7b3dSmrg */ 3137428d7b3dSmrg null_create(&general); 3138428d7b3dSmrg 3139428d7b3dSmrg sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask); 3140428d7b3dSmrg for (m = 0; m < KERNEL_COUNT; m++) { 3141428d7b3dSmrg if (wm_kernels[m].size) { 3142428d7b3dSmrg wm[m] = sna_static_stream_add(&general, 3143428d7b3dSmrg wm_kernels[m].data, 3144428d7b3dSmrg wm_kernels[m].size, 3145428d7b3dSmrg 64); 3146428d7b3dSmrg } else { 3147428d7b3dSmrg wm[m] = sna_static_stream_compile_wm(sna, &general, 3148428d7b3dSmrg wm_kernels[m].data, 3149428d7b3dSmrg 16); 3150428d7b3dSmrg } 3151428d7b3dSmrg } 3152428d7b3dSmrg 3153428d7b3dSmrg state->vs = gen4_create_vs_unit_state(&general); 3154428d7b3dSmrg state->sf = gen4_create_sf_state(&general, sf); 3155428d7b3dSmrg 3156428d7b3dSmrg wm_state = sna_static_stream_map(&general, 3157428d7b3dSmrg sizeof(*wm_state) * KERNEL_COUNT * 3158428d7b3dSmrg FILTER_COUNT * EXTEND_COUNT * 3159428d7b3dSmrg FILTER_COUNT * EXTEND_COUNT, 3160428d7b3dSmrg 64); 3161428d7b3dSmrg state->wm = sna_static_stream_offsetof(&general, wm_state); 3162428d7b3dSmrg for (i = 0; i < FILTER_COUNT; i++) { 3163428d7b3dSmrg for (j = 0; j < EXTEND_COUNT; j++) { 3164428d7b3dSmrg for (k = 0; k < FILTER_COUNT; k++) { 3165428d7b3dSmrg for (l = 0; l < EXTEND_COUNT; l++) { 3166428d7b3dSmrg uint32_t sampler_state; 3167428d7b3dSmrg 3168428d7b3dSmrg sampler_state = 3169428d7b3dSmrg gen4_create_sampler_state(&general, 3170428d7b3dSmrg i, j, 3171428d7b3dSmrg k, l); 3172428d7b3dSmrg 3173428d7b3dSmrg for (m = 0; m < KERNEL_COUNT; m++) { 3174428d7b3dSmrg gen4_init_wm_state(&wm_state->state, 3175428d7b3dSmrg sna->kgem.gen, 3176428d7b3dSmrg wm_kernels[m].has_mask, 3177428d7b3dSmrg wm[m], sampler_state); 3178428d7b3dSmrg wm_state++; 3179428d7b3dSmrg } 3180428d7b3dSmrg } 3181428d7b3dSmrg } 3182428d7b3dSmrg } 3183428d7b3dSmrg } 3184428d7b3dSmrg 3185428d7b3dSmrg state->cc = gen4_create_cc_unit_state(&general); 3186428d7b3dSmrg 3187428d7b3dSmrg state->general_bo = sna_static_stream_fini(sna, &general); 3188428d7b3dSmrg return state->general_bo != NULL; 3189428d7b3dSmrg} 3190428d7b3dSmrg 3191428d7b3dSmrgconst char *gen4_render_init(struct sna *sna, const char *backend) 3192428d7b3dSmrg{ 3193428d7b3dSmrg if (!gen4_render_setup(sna)) 3194428d7b3dSmrg return backend; 3195428d7b3dSmrg 3196428d7b3dSmrg sna->kgem.retire = gen4_render_retire; 3197428d7b3dSmrg sna->kgem.expire = gen4_render_expire; 3198428d7b3dSmrg 3199428d7b3dSmrg#if !NO_COMPOSITE 3200428d7b3dSmrg sna->render.composite = gen4_render_composite; 3201428d7b3dSmrg sna->render.prefer_gpu |= PREFER_GPU_RENDER; 3202428d7b3dSmrg#endif 3203428d7b3dSmrg#if !NO_COMPOSITE_SPANS 3204428d7b3dSmrg sna->render.check_composite_spans = gen4_check_composite_spans; 3205428d7b3dSmrg sna->render.composite_spans = gen4_render_composite_spans; 3206428d7b3dSmrg if (0) 3207428d7b3dSmrg sna->render.prefer_gpu |= PREFER_GPU_SPANS; 3208428d7b3dSmrg#endif 3209428d7b3dSmrg 3210428d7b3dSmrg#if !NO_VIDEO 3211428d7b3dSmrg sna->render.video = gen4_render_video; 3212428d7b3dSmrg#endif 3213428d7b3dSmrg 3214428d7b3dSmrg#if !NO_COPY_BOXES 3215428d7b3dSmrg sna->render.copy_boxes = gen4_render_copy_boxes; 3216428d7b3dSmrg#endif 3217428d7b3dSmrg#if !NO_COPY 3218428d7b3dSmrg sna->render.copy = gen4_render_copy; 3219428d7b3dSmrg#endif 3220428d7b3dSmrg 3221428d7b3dSmrg#if !NO_FILL_BOXES 3222428d7b3dSmrg sna->render.fill_boxes = gen4_render_fill_boxes; 3223428d7b3dSmrg#endif 3224428d7b3dSmrg#if !NO_FILL 3225428d7b3dSmrg sna->render.fill = gen4_render_fill; 3226428d7b3dSmrg#endif 3227428d7b3dSmrg#if !NO_FILL_ONE 3228428d7b3dSmrg sna->render.fill_one = gen4_render_fill_one; 3229428d7b3dSmrg#endif 3230428d7b3dSmrg 3231428d7b3dSmrg sna->render.flush = gen4_render_flush; 3232428d7b3dSmrg sna->render.reset = gen4_render_reset; 3233428d7b3dSmrg sna->render.fini = gen4_render_fini; 3234428d7b3dSmrg 3235428d7b3dSmrg sna->render.max_3d_size = GEN4_MAX_3D_SIZE; 3236428d7b3dSmrg sna->render.max_3d_pitch = 1 << 18; 3237428d7b3dSmrg return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)"; 3238428d7b3dSmrg} 3239