1fe8aea9eSmrg/* 2fe8aea9eSmrg * Copyright © 2012,2013 Intel Corporation 3fe8aea9eSmrg * 4fe8aea9eSmrg * Permission is hereby granted, free of charge, to any person obtaining a 5fe8aea9eSmrg * copy of this software and associated documentation files (the "Software"), 6fe8aea9eSmrg * to deal in the Software without restriction, including without limitation 7fe8aea9eSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8fe8aea9eSmrg * and/or sell copies of the Software, and to permit persons to whom the 9fe8aea9eSmrg * Software is furnished to do so, subject to the following conditions: 10fe8aea9eSmrg * 11fe8aea9eSmrg * The above copyright notice and this permission notice (including the next 12fe8aea9eSmrg * paragraph) shall be included in all copies or substantial portions of the 13fe8aea9eSmrg * Software. 14fe8aea9eSmrg * 15fe8aea9eSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16fe8aea9eSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17fe8aea9eSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18fe8aea9eSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19fe8aea9eSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20fe8aea9eSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21fe8aea9eSmrg * SOFTWARE. 22fe8aea9eSmrg * 23fe8aea9eSmrg * Authors: 24fe8aea9eSmrg * Chris Wilson <chris@chris-wilson.co.uk> 25fe8aea9eSmrg * 26fe8aea9eSmrg */ 27fe8aea9eSmrg 28fe8aea9eSmrg#ifdef HAVE_CONFIG_H 29fe8aea9eSmrg#include "config.h" 30fe8aea9eSmrg#endif 31fe8aea9eSmrg 32fe8aea9eSmrg#include "sna.h" 33fe8aea9eSmrg#include "sna_reg.h" 34fe8aea9eSmrg#include "sna_render.h" 35fe8aea9eSmrg#include "sna_render_inline.h" 36fe8aea9eSmrg#include "sna_video.h" 37fe8aea9eSmrg 38fe8aea9eSmrg#include "gen9_render.h" 39fe8aea9eSmrg#include "gen8_eu.h" 40fe8aea9eSmrg#include "gen4_common.h" 41fe8aea9eSmrg#include "gen4_source.h" 42fe8aea9eSmrg#include "gen4_vertex.h" 43fe8aea9eSmrg#include "gen6_common.h" 44fe8aea9eSmrg#include "gen8_vertex.h" 45fe8aea9eSmrg 46fe8aea9eSmrg#define SIM 1 47fe8aea9eSmrg 48fe8aea9eSmrg#define ALWAYS_INVALIDATE 0 49fe8aea9eSmrg#define ALWAYS_FLUSH 0 50fe8aea9eSmrg#define ALWAYS_STALL 0 51fe8aea9eSmrg 52fe8aea9eSmrg#define NO_COMPOSITE 0 53fe8aea9eSmrg#define NO_COMPOSITE_SPANS 0 54fe8aea9eSmrg#define NO_COPY 0 55fe8aea9eSmrg#define NO_COPY_BOXES 0 56fe8aea9eSmrg#define NO_FILL 0 57fe8aea9eSmrg#define NO_FILL_BOXES 0 58fe8aea9eSmrg#define NO_FILL_ONE 0 59fe8aea9eSmrg#define NO_FILL_CLEAR 0 60fe8aea9eSmrg#define NO_VIDEO 0 61fe8aea9eSmrg 62fe8aea9eSmrg#define USE_8_PIXEL_DISPATCH 1 63fe8aea9eSmrg#define USE_16_PIXEL_DISPATCH 1 64fe8aea9eSmrg#define USE_32_PIXEL_DISPATCH 0 65fe8aea9eSmrg 66fe8aea9eSmrg#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH 67fe8aea9eSmrg#error "Must select at least 8, 16 or 32 pixel dispatch" 68fe8aea9eSmrg#endif 69fe8aea9eSmrg 70fe8aea9eSmrg#define GEN9_MAX_SIZE 16384 71fe8aea9eSmrg#define GEN9_GT_BIAS 1 /* Each GT is bigger than previous gen */ 72fe8aea9eSmrg 73fe8aea9eSmrg/* XXX Todo 74fe8aea9eSmrg * 75fe8aea9eSmrg * STR (software tiled rendering) mode. No, really. 76fe8aea9eSmrg * 64x32 pixel blocks align with the rendering cache. Worth considering. 77fe8aea9eSmrg */ 78fe8aea9eSmrg 79fe8aea9eSmrg#define is_aligned(x, y) (((x) & ((y) - 1)) == 0) 80fe8aea9eSmrg 81fe8aea9eSmrg/* Pipeline stages: 82fe8aea9eSmrg * 1. Command Streamer (CS) 83fe8aea9eSmrg * 2. Vertex Fetch (VF) 84fe8aea9eSmrg * 3. Vertex Shader (VS) 85fe8aea9eSmrg * 4. Hull Shader (HS) 86fe8aea9eSmrg * 5. Tesselation Engine (TE) 87fe8aea9eSmrg * 6. Domain Shader (DS) 88fe8aea9eSmrg * 7. Geometry Shader (GS) 89fe8aea9eSmrg * 8. Stream Output Logic (SOL) 90fe8aea9eSmrg * 9. Clipper (CLIP) 91fe8aea9eSmrg * 10. Strip/Fan (SF) 92fe8aea9eSmrg * 11. Windower/Masker (WM) 93fe8aea9eSmrg * 12. Color Calculator (CC) 94fe8aea9eSmrg */ 95fe8aea9eSmrg 96fe8aea9eSmrg#if !NO_VIDEO 97fe8aea9eSmrgstatic const uint32_t ps_kernel_packed_bt601[][4] = { 98fe8aea9eSmrg#include "exa_wm_src_affine.g8b" 99fe8aea9eSmrg#include "exa_wm_src_sample_argb.g8b" 100fe8aea9eSmrg#include "exa_wm_yuv_rgb_bt601.g8b" 101fe8aea9eSmrg#include "exa_wm_write.g8b" 102fe8aea9eSmrg}; 103fe8aea9eSmrg 104fe8aea9eSmrgstatic const uint32_t ps_kernel_planar_bt601[][4] = { 105fe8aea9eSmrg#include "exa_wm_src_affine.g8b" 106fe8aea9eSmrg#include "exa_wm_src_sample_planar.g8b" 107fe8aea9eSmrg#include "exa_wm_yuv_rgb_bt601.g8b" 108fe8aea9eSmrg#include "exa_wm_write.g8b" 109fe8aea9eSmrg}; 110fe8aea9eSmrg 111fe8aea9eSmrgstatic const uint32_t ps_kernel_nv12_bt601[][4] = { 112fe8aea9eSmrg#include "exa_wm_src_affine.g8b" 113fe8aea9eSmrg#include "exa_wm_src_sample_nv12.g8b" 114fe8aea9eSmrg#include "exa_wm_yuv_rgb_bt601.g8b" 115fe8aea9eSmrg#include "exa_wm_write.g8b" 116fe8aea9eSmrg}; 117fe8aea9eSmrg 118fe8aea9eSmrgstatic const uint32_t ps_kernel_packed_bt709[][4] = { 119fe8aea9eSmrg#include "exa_wm_src_affine.g8b" 120fe8aea9eSmrg#include "exa_wm_src_sample_argb.g8b" 121fe8aea9eSmrg#include "exa_wm_yuv_rgb_bt709.g8b" 122fe8aea9eSmrg#include "exa_wm_write.g8b" 123fe8aea9eSmrg}; 124fe8aea9eSmrg 125fe8aea9eSmrgstatic const uint32_t ps_kernel_planar_bt709[][4] = { 126fe8aea9eSmrg#include "exa_wm_src_affine.g8b" 127fe8aea9eSmrg#include "exa_wm_src_sample_planar.g8b" 128fe8aea9eSmrg#include "exa_wm_yuv_rgb_bt709.g8b" 129fe8aea9eSmrg#include "exa_wm_write.g8b" 130fe8aea9eSmrg}; 131fe8aea9eSmrg 132fe8aea9eSmrgstatic const uint32_t ps_kernel_ayuv_bt601[][4] = { 133fe8aea9eSmrg#include "exa_wm_src_affine.g8b" 134fe8aea9eSmrg#include "exa_wm_src_sample_argb_ayuv.g8b" 135fe8aea9eSmrg#include "exa_wm_yuv_rgb_bt601.g8b" 136fe8aea9eSmrg#include "exa_wm_write.g8b" 137fe8aea9eSmrg}; 138fe8aea9eSmrg 139fe8aea9eSmrgstatic const uint32_t ps_kernel_ayuv_bt709[][4] = { 140fe8aea9eSmrg#include "exa_wm_src_affine.g8b" 141fe8aea9eSmrg#include "exa_wm_src_sample_argb_ayuv.g8b" 142fe8aea9eSmrg#include "exa_wm_yuv_rgb_bt709.g8b" 143fe8aea9eSmrg#include "exa_wm_write.g8b" 144fe8aea9eSmrg}; 145fe8aea9eSmrg 146fe8aea9eSmrgstatic const uint32_t ps_kernel_nv12_bt709[][4] = { 147fe8aea9eSmrg#include "exa_wm_src_affine.g8b" 148fe8aea9eSmrg#include "exa_wm_src_sample_nv12.g8b" 149fe8aea9eSmrg#include "exa_wm_yuv_rgb_bt709.g8b" 150fe8aea9eSmrg#include "exa_wm_write.g8b" 151fe8aea9eSmrg}; 152fe8aea9eSmrg 153fe8aea9eSmrgstatic const uint32_t ps_kernel_rgb[][4] = { 154fe8aea9eSmrg#include "exa_wm_src_affine.g8b" 155fe8aea9eSmrg#include "exa_wm_src_sample_argb.g8b" 156fe8aea9eSmrg#include "exa_wm_write.g8b" 157fe8aea9eSmrg}; 158fe8aea9eSmrg#endif 159fe8aea9eSmrg 160fe8aea9eSmrg#define SURFACE_DW (64 / sizeof(uint32_t)); 161fe8aea9eSmrg 162fe8aea9eSmrg#define KERNEL(kernel_enum, kernel, num_surfaces) \ 163fe8aea9eSmrg [GEN9_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} 164fe8aea9eSmrg#define NOKERNEL(kernel_enum, func, num_surfaces) \ 165fe8aea9eSmrg [GEN9_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces} 166fe8aea9eSmrgstatic const struct wm_kernel_info { 167fe8aea9eSmrg const char *name; 168fe8aea9eSmrg const void *data; 169fe8aea9eSmrg unsigned int size; 170fe8aea9eSmrg int num_surfaces; 171fe8aea9eSmrg} wm_kernels[] = { 172fe8aea9eSmrg NOKERNEL(NOMASK, gen8_wm_kernel__affine, 2), 173fe8aea9eSmrg NOKERNEL(NOMASK_P, gen8_wm_kernel__projective, 2), 174fe8aea9eSmrg 175fe8aea9eSmrg NOKERNEL(MASK, gen8_wm_kernel__affine_mask, 3), 176fe8aea9eSmrg NOKERNEL(MASK_P, gen8_wm_kernel__projective_mask, 3), 177fe8aea9eSmrg 178fe8aea9eSmrg NOKERNEL(MASKCA, gen8_wm_kernel__affine_mask_ca, 3), 179fe8aea9eSmrg NOKERNEL(MASKCA_P, gen8_wm_kernel__projective_mask_ca, 3), 180fe8aea9eSmrg 181fe8aea9eSmrg NOKERNEL(MASKSA, gen8_wm_kernel__affine_mask_sa, 3), 182fe8aea9eSmrg NOKERNEL(MASKSA_P, gen8_wm_kernel__projective_mask_sa, 3), 183fe8aea9eSmrg 184fe8aea9eSmrg NOKERNEL(OPACITY, gen8_wm_kernel__affine_opacity, 2), 185fe8aea9eSmrg NOKERNEL(OPACITY_P, gen8_wm_kernel__projective_opacity, 2), 186fe8aea9eSmrg 187fe8aea9eSmrg#if !NO_VIDEO 188fe8aea9eSmrg KERNEL(VIDEO_PLANAR_BT601, ps_kernel_planar_bt601, 7), 189fe8aea9eSmrg KERNEL(VIDEO_NV12_BT601, ps_kernel_nv12_bt601, 7), 190fe8aea9eSmrg KERNEL(VIDEO_PACKED_BT601, ps_kernel_packed_bt601, 2), 191fe8aea9eSmrg KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7), 192fe8aea9eSmrg KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7), 193fe8aea9eSmrg KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2), 194fe8aea9eSmrg KERNEL(VIDEO_AYUV_BT601, ps_kernel_ayuv_bt601, 2), 195fe8aea9eSmrg KERNEL(VIDEO_AYUV_BT709, ps_kernel_ayuv_bt709, 2), 196fe8aea9eSmrg KERNEL(VIDEO_RGB, ps_kernel_rgb, 2), 197fe8aea9eSmrg#endif 198fe8aea9eSmrg}; 199fe8aea9eSmrg#undef KERNEL 200fe8aea9eSmrg 201fe8aea9eSmrgstatic const struct blendinfo { 202fe8aea9eSmrg uint8_t src_alpha; 203fe8aea9eSmrg uint8_t src_blend; 204fe8aea9eSmrg uint8_t dst_blend; 205fe8aea9eSmrg} gen9_blend_op[] = { 206fe8aea9eSmrg /* Clear */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO}, 207fe8aea9eSmrg /* Src */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO}, 208fe8aea9eSmrg /* Dst */ {0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE}, 209fe8aea9eSmrg /* Over */ {1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA}, 210fe8aea9eSmrg /* OverReverse */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE}, 211fe8aea9eSmrg /* In */ {0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO}, 212fe8aea9eSmrg /* InReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA}, 213fe8aea9eSmrg /* Out */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO}, 214fe8aea9eSmrg /* OutReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA}, 215fe8aea9eSmrg /* Atop */ {1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 216fe8aea9eSmrg /* AtopReverse */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA}, 217fe8aea9eSmrg /* Xor */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, 218fe8aea9eSmrg /* Add */ {0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, 219fe8aea9eSmrg}; 220fe8aea9eSmrg 221fe8aea9eSmrg/** 222fe8aea9eSmrg * Highest-valued BLENDFACTOR used in gen9_blend_op. 223fe8aea9eSmrg * 224fe8aea9eSmrg * This leaves out GEN9_BLENDFACTOR_INV_DST_COLOR, 225fe8aea9eSmrg * GEN9_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, 226fe8aea9eSmrg * GEN9_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} 227fe8aea9eSmrg */ 228fe8aea9eSmrg#define GEN9_BLENDFACTOR_COUNT (BLENDFACTOR_INV_DST_ALPHA + 1) 229fe8aea9eSmrg 230fe8aea9eSmrg#define GEN9_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen9_blend_state), 64) 231fe8aea9eSmrg 232fe8aea9eSmrg#define BLEND_OFFSET(s, d) \ 233fe8aea9eSmrg ((d != BLENDFACTOR_ZERO) << 15 | ((s) * GEN9_BLENDFACTOR_COUNT + (d)) << 4) 234fe8aea9eSmrg 235fe8aea9eSmrg#define NO_BLEND BLEND_OFFSET(BLENDFACTOR_ONE, BLENDFACTOR_ZERO) 236fe8aea9eSmrg#define CLEAR BLEND_OFFSET(BLENDFACTOR_ZERO, BLENDFACTOR_ZERO) 237fe8aea9eSmrg 238fe8aea9eSmrg#define SAMPLER_OFFSET(sf, se, mf, me) \ 239fe8aea9eSmrg (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) 240fe8aea9eSmrg 241fe8aea9eSmrg#define VERTEX_2s2s 0 242fe8aea9eSmrg 243fe8aea9eSmrg#define COPY_SAMPLER 0 244fe8aea9eSmrg#define COPY_VERTEX VERTEX_2s2s 245fe8aea9eSmrg#define COPY_FLAGS(a) GEN9_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, COPY_VERTEX) 246fe8aea9eSmrg 247fe8aea9eSmrg#define FILL_SAMPLER 1 248fe8aea9eSmrg#define FILL_VERTEX VERTEX_2s2s 249fe8aea9eSmrg#define FILL_FLAGS(op, format) GEN9_SET_FLAGS(FILL_SAMPLER, gen9_get_blend((op), false, (format)), FILL_VERTEX) 250fe8aea9eSmrg#define FILL_FLAGS_NOBLEND GEN9_SET_FLAGS(FILL_SAMPLER, NO_BLEND, FILL_VERTEX) 251fe8aea9eSmrg 252fe8aea9eSmrg#define GEN9_SAMPLER(f) (((f) >> 20) & 0xfff) 253fe8aea9eSmrg#define GEN9_BLEND(f) (((f) >> 4) & 0x7ff) 254fe8aea9eSmrg#define GEN9_READS_DST(f) (((f) >> 15) & 1) 255fe8aea9eSmrg#define GEN9_VERTEX(f) (((f) >> 0) & 0xf) 256fe8aea9eSmrg#define GEN9_SET_FLAGS(S, B, V) ((S) << 20 | (B) | (V)) 257fe8aea9eSmrg 258fe8aea9eSmrg#define OUT_BATCH(v) batch_emit(sna, v) 259fe8aea9eSmrg#define OUT_BATCH64(v) batch_emit64(sna, v) 260fe8aea9eSmrg#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) 261fe8aea9eSmrg#define OUT_VERTEX_F(v) vertex_emit(sna, v) 262fe8aea9eSmrg 263fe8aea9eSmrgstruct gt_info { 264fe8aea9eSmrg const char *name; 265fe8aea9eSmrg struct { 266fe8aea9eSmrg int max_vs_entries; 267fe8aea9eSmrg } urb; 268fe8aea9eSmrg}; 269fe8aea9eSmrg 270fe8aea9eSmrgstatic const struct gt_info min_gt_info = { 271fe8aea9eSmrg .name = "Skylake (gen9)", 272fe8aea9eSmrg .urb = { .max_vs_entries = 240 }, 273fe8aea9eSmrg}; 274fe8aea9eSmrg 275fe8aea9eSmrgstatic const struct gt_info skl_gt_info = { 276fe8aea9eSmrg .name = "Skylake (gen9)", 277fe8aea9eSmrg .urb = { .max_vs_entries = 960 }, 278fe8aea9eSmrg}; 279fe8aea9eSmrg 280fe8aea9eSmrgstatic const struct gt_info bxt_gt_info = { 281fe8aea9eSmrg .name = "Broxton (gen9)", 282fe8aea9eSmrg .urb = { .max_vs_entries = 320 }, 283fe8aea9eSmrg}; 284fe8aea9eSmrg 285fe8aea9eSmrgstatic const struct gt_info kbl_gt_info = { 286fe8aea9eSmrg .name = "Kabylake (gen9)", 287fe8aea9eSmrg .urb = { .max_vs_entries = 960 }, 288fe8aea9eSmrg}; 289fe8aea9eSmrg 290fe8aea9eSmrgstatic const struct gt_info glk_gt_info = { 291fe8aea9eSmrg .name = "Geminilake (gen9)", 292fe8aea9eSmrg .urb = { .max_vs_entries = 320 }, 293fe8aea9eSmrg}; 294fe8aea9eSmrg 295fe8aea9eSmrgstatic const struct gt_info cfl_gt_info = { 296fe8aea9eSmrg .name = "Coffeelake (gen9)", 297fe8aea9eSmrg .urb = { .max_vs_entries = 960 }, 298fe8aea9eSmrg}; 299fe8aea9eSmrg 300fe8aea9eSmrgstatic bool is_skl(struct sna *sna) 301fe8aea9eSmrg{ 302fe8aea9eSmrg return sna->kgem.gen == 0110; 303fe8aea9eSmrg} 304fe8aea9eSmrg 305fe8aea9eSmrgstatic bool is_bxt(struct sna *sna) 306fe8aea9eSmrg{ 307fe8aea9eSmrg return sna->kgem.gen == 0111; 308fe8aea9eSmrg} 309fe8aea9eSmrg 310fe8aea9eSmrgstatic bool is_kbl(struct sna *sna) 311fe8aea9eSmrg{ 312fe8aea9eSmrg return sna->kgem.gen == 0112; 313fe8aea9eSmrg} 314fe8aea9eSmrg 315fe8aea9eSmrgstatic bool is_glk(struct sna *sna) 316fe8aea9eSmrg{ 317fe8aea9eSmrg return sna->kgem.gen == 0113; 318fe8aea9eSmrg} 319fe8aea9eSmrg 320fe8aea9eSmrgstatic bool is_cfl(struct sna *sna) 321fe8aea9eSmrg{ 322fe8aea9eSmrg return sna->kgem.gen == 0114; 323fe8aea9eSmrg} 324fe8aea9eSmrg 325fe8aea9eSmrgstatic inline bool too_large(int width, int height) 326fe8aea9eSmrg{ 327fe8aea9eSmrg return width > GEN9_MAX_SIZE || height > GEN9_MAX_SIZE; 328fe8aea9eSmrg} 329fe8aea9eSmrg 330fe8aea9eSmrgstatic inline bool unaligned(struct kgem_bo *bo, int bpp) 331fe8aea9eSmrg{ 332fe8aea9eSmrg /* XXX What exactly do we need to meet H_ALIGN and V_ALIGN? */ 333fe8aea9eSmrg#if 0 334fe8aea9eSmrg int x, y; 335fe8aea9eSmrg 336fe8aea9eSmrg if (bo->proxy == NULL) 337fe8aea9eSmrg return false; 338fe8aea9eSmrg 339fe8aea9eSmrg /* Assume that all tiled proxies are constructed correctly. */ 340fe8aea9eSmrg if (bo->tiling) 341fe8aea9eSmrg return false; 342fe8aea9eSmrg 343fe8aea9eSmrg DBG(("%s: checking alignment of a linear proxy, offset=%d, pitch=%d, bpp=%d: => (%d, %d)\n", 344fe8aea9eSmrg __FUNCTION__, bo->delta, bo->pitch, bpp, 345fe8aea9eSmrg 8 * (bo->delta % bo->pitch) / bpp, bo->delta / bo->pitch)); 346fe8aea9eSmrg 347fe8aea9eSmrg /* This may be a random userptr map, check that it meets the 348fe8aea9eSmrg * render alignment of SURFACE_VALIGN_4 | SURFACE_HALIGN_4. 349fe8aea9eSmrg */ 350fe8aea9eSmrg y = bo->delta / bo->pitch; 351fe8aea9eSmrg if (y & 3) 352fe8aea9eSmrg return true; 353fe8aea9eSmrg 354fe8aea9eSmrg x = 8 * (bo->delta - y * bo->pitch); 355fe8aea9eSmrg if (x & (4*bpp - 1)) 356fe8aea9eSmrg return true; 357fe8aea9eSmrg 358fe8aea9eSmrg return false; 359fe8aea9eSmrg#else 360fe8aea9eSmrg return false; 361fe8aea9eSmrg#endif 362fe8aea9eSmrg} 363fe8aea9eSmrg 364fe8aea9eSmrgstatic uint32_t gen9_get_blend(int op, 365fe8aea9eSmrg bool has_component_alpha, 366fe8aea9eSmrg uint32_t dst_format) 367fe8aea9eSmrg{ 368fe8aea9eSmrg uint32_t src, dst; 369fe8aea9eSmrg 370fe8aea9eSmrg COMPILE_TIME_ASSERT(BLENDFACTOR_INV_DST_ALPHA*GEN9_BLENDFACTOR_COUNT + BLENDFACTOR_INV_DST_ALPHA <= 0x7ff); 371fe8aea9eSmrg 372fe8aea9eSmrg src = gen9_blend_op[op].src_blend; 373fe8aea9eSmrg dst = gen9_blend_op[op].dst_blend; 374fe8aea9eSmrg 375fe8aea9eSmrg /* If there's no dst alpha channel, adjust the blend op so that 376fe8aea9eSmrg * we'll treat it always as 1. 377fe8aea9eSmrg */ 378fe8aea9eSmrg if (PICT_FORMAT_A(dst_format) == 0) { 379fe8aea9eSmrg if (src == BLENDFACTOR_DST_ALPHA) 380fe8aea9eSmrg src = BLENDFACTOR_ONE; 381fe8aea9eSmrg else if (src == BLENDFACTOR_INV_DST_ALPHA) 382fe8aea9eSmrg src = BLENDFACTOR_ZERO; 383fe8aea9eSmrg } 384fe8aea9eSmrg 385fe8aea9eSmrg /* If the source alpha is being used, then we should only be in a 386fe8aea9eSmrg * case where the source blend factor is 0, and the source blend 387fe8aea9eSmrg * value is the mask channels multiplied by the source picture's alpha. 388fe8aea9eSmrg */ 389fe8aea9eSmrg if (has_component_alpha && gen9_blend_op[op].src_alpha) { 390fe8aea9eSmrg if (dst == BLENDFACTOR_SRC_ALPHA) 391fe8aea9eSmrg dst = BLENDFACTOR_SRC_COLOR; 392fe8aea9eSmrg else if (dst == BLENDFACTOR_INV_SRC_ALPHA) 393fe8aea9eSmrg dst = BLENDFACTOR_INV_SRC_COLOR; 394fe8aea9eSmrg } 395fe8aea9eSmrg 396fe8aea9eSmrg DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", 397fe8aea9eSmrg op, dst_format, PICT_FORMAT_A(dst_format), 398fe8aea9eSmrg src, dst, (int)(BLEND_OFFSET(src, dst)>>4))); 399fe8aea9eSmrg assert(BLEND_OFFSET(src, dst) >> 4 <= 0xfff); 400fe8aea9eSmrg return BLEND_OFFSET(src, dst); 401fe8aea9eSmrg} 402fe8aea9eSmrg 403fe8aea9eSmrgstatic uint32_t gen9_get_card_format(PictFormat format) 404fe8aea9eSmrg{ 405fe8aea9eSmrg switch (format) { 406fe8aea9eSmrg default: 407fe8aea9eSmrg return -1; 408fe8aea9eSmrg case PICT_a8r8g8b8: 409fe8aea9eSmrg return SURFACEFORMAT_B8G8R8A8_UNORM; 410fe8aea9eSmrg case PICT_x8r8g8b8: 411fe8aea9eSmrg return SURFACEFORMAT_B8G8R8X8_UNORM; 412fe8aea9eSmrg case PICT_a8b8g8r8: 413fe8aea9eSmrg return SURFACEFORMAT_R8G8B8A8_UNORM; 414fe8aea9eSmrg case PICT_x8b8g8r8: 415fe8aea9eSmrg return SURFACEFORMAT_R8G8B8X8_UNORM; 416fe8aea9eSmrg#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 417fe8aea9eSmrg case PICT_a2r10g10b10: 418fe8aea9eSmrg return SURFACEFORMAT_B10G10R10A2_UNORM; 419fe8aea9eSmrg case PICT_x2r10g10b10: 420fe8aea9eSmrg return SURFACEFORMAT_B10G10R10X2_UNORM; 421fe8aea9eSmrg#endif 422fe8aea9eSmrg case PICT_r8g8b8: 423fe8aea9eSmrg return SURFACEFORMAT_R8G8B8_UNORM; 424fe8aea9eSmrg case PICT_r5g6b5: 425fe8aea9eSmrg return SURFACEFORMAT_B5G6R5_UNORM; 426fe8aea9eSmrg case PICT_a1r5g5b5: 427fe8aea9eSmrg return SURFACEFORMAT_B5G5R5A1_UNORM; 428fe8aea9eSmrg case PICT_a8: 429fe8aea9eSmrg return SURFACEFORMAT_A8_UNORM; 430fe8aea9eSmrg case PICT_a4r4g4b4: 431fe8aea9eSmrg return SURFACEFORMAT_B4G4R4A4_UNORM; 432fe8aea9eSmrg } 433fe8aea9eSmrg} 434fe8aea9eSmrg 435fe8aea9eSmrgstatic uint32_t gen9_get_dest_format(PictFormat format) 436fe8aea9eSmrg{ 437fe8aea9eSmrg switch (format) { 438fe8aea9eSmrg default: 439fe8aea9eSmrg return -1; 440fe8aea9eSmrg case PICT_a8r8g8b8: 441fe8aea9eSmrg case PICT_x8r8g8b8: 442fe8aea9eSmrg return SURFACEFORMAT_B8G8R8A8_UNORM; 443fe8aea9eSmrg case PICT_a8b8g8r8: 444fe8aea9eSmrg case PICT_x8b8g8r8: 445fe8aea9eSmrg return SURFACEFORMAT_R8G8B8A8_UNORM; 446fe8aea9eSmrg#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0) 447fe8aea9eSmrg case PICT_a2r10g10b10: 448fe8aea9eSmrg case PICT_x2r10g10b10: 449fe8aea9eSmrg return SURFACEFORMAT_B10G10R10A2_UNORM; 450fe8aea9eSmrg#endif 451fe8aea9eSmrg case PICT_r5g6b5: 452fe8aea9eSmrg return SURFACEFORMAT_B5G6R5_UNORM; 453fe8aea9eSmrg case PICT_x1r5g5b5: 454fe8aea9eSmrg case PICT_a1r5g5b5: 455fe8aea9eSmrg return SURFACEFORMAT_B5G5R5A1_UNORM; 456fe8aea9eSmrg case PICT_a8: 457fe8aea9eSmrg return SURFACEFORMAT_A8_UNORM; 458fe8aea9eSmrg case PICT_a4r4g4b4: 459fe8aea9eSmrg case PICT_x4r4g4b4: 460fe8aea9eSmrg return SURFACEFORMAT_B4G4R4A4_UNORM; 461fe8aea9eSmrg } 462fe8aea9eSmrg} 463fe8aea9eSmrg 464fe8aea9eSmrgstatic bool gen9_check_dst_format(PictFormat format) 465fe8aea9eSmrg{ 466fe8aea9eSmrg if (gen9_get_dest_format(format) != -1) 467fe8aea9eSmrg return true; 468fe8aea9eSmrg 469fe8aea9eSmrg DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 470fe8aea9eSmrg return false; 471fe8aea9eSmrg} 472fe8aea9eSmrg 473fe8aea9eSmrgstatic bool gen9_check_format(uint32_t format) 474fe8aea9eSmrg{ 475fe8aea9eSmrg if (gen9_get_card_format(format) != -1) 476fe8aea9eSmrg return true; 477fe8aea9eSmrg 478fe8aea9eSmrg DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); 479fe8aea9eSmrg return false; 480fe8aea9eSmrg} 481fe8aea9eSmrg 482fe8aea9eSmrgstatic uint32_t gen9_filter(uint32_t filter) 483fe8aea9eSmrg{ 484fe8aea9eSmrg switch (filter) { 485fe8aea9eSmrg default: 486fe8aea9eSmrg assert(0); 487fe8aea9eSmrg case PictFilterNearest: 488fe8aea9eSmrg return SAMPLER_FILTER_NEAREST; 489fe8aea9eSmrg case PictFilterBilinear: 490fe8aea9eSmrg return SAMPLER_FILTER_BILINEAR; 491fe8aea9eSmrg } 492fe8aea9eSmrg} 493fe8aea9eSmrg 494fe8aea9eSmrgstatic uint32_t gen9_check_filter(PicturePtr picture) 495fe8aea9eSmrg{ 496fe8aea9eSmrg switch (picture->filter) { 497fe8aea9eSmrg case PictFilterNearest: 498fe8aea9eSmrg case PictFilterBilinear: 499fe8aea9eSmrg return true; 500fe8aea9eSmrg default: 501fe8aea9eSmrg return false; 502fe8aea9eSmrg } 503fe8aea9eSmrg} 504fe8aea9eSmrg 505fe8aea9eSmrgstatic uint32_t gen9_repeat(uint32_t repeat) 506fe8aea9eSmrg{ 507fe8aea9eSmrg switch (repeat) { 508fe8aea9eSmrg default: 509fe8aea9eSmrg assert(0); 510fe8aea9eSmrg case RepeatNone: 511fe8aea9eSmrg return SAMPLER_EXTEND_NONE; 512fe8aea9eSmrg case RepeatNormal: 513fe8aea9eSmrg return SAMPLER_EXTEND_REPEAT; 514fe8aea9eSmrg case RepeatPad: 515fe8aea9eSmrg return SAMPLER_EXTEND_PAD; 516fe8aea9eSmrg case RepeatReflect: 517fe8aea9eSmrg return SAMPLER_EXTEND_REFLECT; 518fe8aea9eSmrg } 519fe8aea9eSmrg} 520fe8aea9eSmrg 521fe8aea9eSmrgstatic bool gen9_check_repeat(PicturePtr picture) 522fe8aea9eSmrg{ 523fe8aea9eSmrg if (!picture->repeat) 524fe8aea9eSmrg return true; 525fe8aea9eSmrg 526fe8aea9eSmrg switch (picture->repeatType) { 527fe8aea9eSmrg case RepeatNone: 528fe8aea9eSmrg case RepeatNormal: 529fe8aea9eSmrg case RepeatPad: 530fe8aea9eSmrg case RepeatReflect: 531fe8aea9eSmrg return true; 532fe8aea9eSmrg default: 533fe8aea9eSmrg return false; 534fe8aea9eSmrg } 535fe8aea9eSmrg} 536fe8aea9eSmrg 537fe8aea9eSmrgstatic int 538fe8aea9eSmrggen9_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) 539fe8aea9eSmrg{ 540fe8aea9eSmrg int base; 541fe8aea9eSmrg 542fe8aea9eSmrg if (has_mask) { 543fe8aea9eSmrg if (is_ca) { 544fe8aea9eSmrg if (gen9_blend_op[op].src_alpha) 545fe8aea9eSmrg base = GEN9_WM_KERNEL_MASKSA; 546fe8aea9eSmrg else 547fe8aea9eSmrg base = GEN9_WM_KERNEL_MASKCA; 548fe8aea9eSmrg } else 549fe8aea9eSmrg base = GEN9_WM_KERNEL_MASK; 550fe8aea9eSmrg } else 551fe8aea9eSmrg base = GEN9_WM_KERNEL_NOMASK; 552fe8aea9eSmrg 553fe8aea9eSmrg return base + !is_affine; 554fe8aea9eSmrg} 555fe8aea9eSmrg 556fe8aea9eSmrgstatic void 557fe8aea9eSmrggen9_emit_push_constants(struct sna *sna) 558fe8aea9eSmrg{ 559fe8aea9eSmrg#if SIM 560fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2)); 561fe8aea9eSmrg OUT_BATCH(0); 562fe8aea9eSmrg 563fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2)); 564fe8aea9eSmrg OUT_BATCH(0); 565fe8aea9eSmrg 566fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2)); 567fe8aea9eSmrg OUT_BATCH(0); 568fe8aea9eSmrg 569fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2)); 570fe8aea9eSmrg OUT_BATCH(0); 571fe8aea9eSmrg 572fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); 573fe8aea9eSmrg OUT_BATCH(0); 574fe8aea9eSmrg#endif 575fe8aea9eSmrg} 576fe8aea9eSmrg 577fe8aea9eSmrgstatic void 578fe8aea9eSmrggen9_emit_urb(struct sna *sna) 579fe8aea9eSmrg{ 580fe8aea9eSmrg /* num of VS entries must be divisible by 8 if size < 9 */ 581fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_URB_VS | (2 - 2)); 582fe8aea9eSmrg OUT_BATCH(sna->render_state.gen9.info->urb.max_vs_entries << URB_ENTRY_NUMBER_SHIFT | 583fe8aea9eSmrg (2 - 1) << URB_ENTRY_SIZE_SHIFT | 584fe8aea9eSmrg 4 << URB_STARTING_ADDRESS_SHIFT); 585fe8aea9eSmrg 586fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_URB_HS | (2 - 2)); 587fe8aea9eSmrg OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 588fe8aea9eSmrg 4 << URB_STARTING_ADDRESS_SHIFT); 589fe8aea9eSmrg 590fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_URB_DS | (2 - 2)); 591fe8aea9eSmrg OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 592fe8aea9eSmrg 4 << URB_STARTING_ADDRESS_SHIFT); 593fe8aea9eSmrg 594fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_URB_GS | (2 - 2)); 595fe8aea9eSmrg OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT | 596fe8aea9eSmrg 4 << URB_STARTING_ADDRESS_SHIFT); 597fe8aea9eSmrg} 598fe8aea9eSmrg 599fe8aea9eSmrgstatic void 600fe8aea9eSmrggen9_emit_state_base_address(struct sna *sna) 601fe8aea9eSmrg{ 602fe8aea9eSmrg uint32_t num_pages; 603fe8aea9eSmrg 604fe8aea9eSmrg assert(sna->kgem.surface - sna->kgem.nbatch <= 16384); 605fe8aea9eSmrg 606fe8aea9eSmrg /* WaBindlessSurfaceStateModifyEnable:skl,bxt */ 607fe8aea9eSmrg OUT_BATCH(GEN9_STATE_BASE_ADDRESS | (19 - 1 - 2)); 608fe8aea9eSmrg OUT_BATCH64(0); /* general */ 609fe8aea9eSmrg OUT_BATCH(0); /* stateless dataport */ 610fe8aea9eSmrg OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* surface */ 611fe8aea9eSmrg sna->kgem.nbatch, 612fe8aea9eSmrg NULL, 613fe8aea9eSmrg I915_GEM_DOMAIN_INSTRUCTION << 16, 614fe8aea9eSmrg BASE_ADDRESS_MODIFY)); 615fe8aea9eSmrg OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* dynamic */ 616fe8aea9eSmrg sna->kgem.nbatch, 617fe8aea9eSmrg sna->render_state.gen9.general_bo, 618fe8aea9eSmrg I915_GEM_DOMAIN_INSTRUCTION << 16, 619fe8aea9eSmrg BASE_ADDRESS_MODIFY)); 620fe8aea9eSmrg OUT_BATCH64(0); /* indirect */ 621fe8aea9eSmrg OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* instruction */ 622fe8aea9eSmrg sna->kgem.nbatch, 623fe8aea9eSmrg sna->render_state.gen9.general_bo, 624fe8aea9eSmrg I915_GEM_DOMAIN_INSTRUCTION << 16, 625fe8aea9eSmrg BASE_ADDRESS_MODIFY)); 626fe8aea9eSmrg /* upper bounds */ 627fe8aea9eSmrg num_pages = sna->render_state.gen9.general_bo->size.pages.count; 628fe8aea9eSmrg OUT_BATCH(0); /* general */ 629fe8aea9eSmrg OUT_BATCH(num_pages << 12 | 1); /* dynamic */ 630fe8aea9eSmrg OUT_BATCH(0); /* indirect */ 631fe8aea9eSmrg OUT_BATCH(num_pages << 12 | 1); /* instruction */ 632fe8aea9eSmrg 633fe8aea9eSmrg /* Bindless */ 634fe8aea9eSmrg OUT_BATCH(0); 635fe8aea9eSmrg OUT_BATCH(0); 636fe8aea9eSmrg OUT_BATCH(0); 637fe8aea9eSmrg} 638fe8aea9eSmrg 639fe8aea9eSmrgstatic void 640fe8aea9eSmrggen9_emit_vs_invariant(struct sna *sna) 641fe8aea9eSmrg{ 642fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_VS | (9 - 2)); 643fe8aea9eSmrg OUT_BATCH64(0); /* no VS kernel */ 644fe8aea9eSmrg OUT_BATCH(0); 645fe8aea9eSmrg OUT_BATCH64(0); /* scratch */ 646fe8aea9eSmrg OUT_BATCH(0); 647fe8aea9eSmrg OUT_BATCH(1 << 1); /* pass-through */ 648fe8aea9eSmrg OUT_BATCH(1 << 16 | 1 << 21); /* urb write to SBE */ 649fe8aea9eSmrg 650fe8aea9eSmrg#if SIM 651fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_CONSTANT_VS | (11 - 2)); 652fe8aea9eSmrg OUT_BATCH(0); 653fe8aea9eSmrg OUT_BATCH(0); 654fe8aea9eSmrg OUT_BATCH64(0); 655fe8aea9eSmrg OUT_BATCH64(0); 656fe8aea9eSmrg OUT_BATCH64(0); 657fe8aea9eSmrg OUT_BATCH64(0); 658fe8aea9eSmrg 659fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2)); 660fe8aea9eSmrg OUT_BATCH(0); 661fe8aea9eSmrg 662fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2)); 663fe8aea9eSmrg OUT_BATCH(0); 664fe8aea9eSmrg#endif 665fe8aea9eSmrg} 666fe8aea9eSmrg 667fe8aea9eSmrgstatic void 668fe8aea9eSmrggen9_emit_hs_invariant(struct sna *sna) 669fe8aea9eSmrg{ 670fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_HS | (9 - 2)); 671fe8aea9eSmrg OUT_BATCH(0); 672fe8aea9eSmrg OUT_BATCH(0); 673fe8aea9eSmrg OUT_BATCH64(0); /* no HS kernel */ 674fe8aea9eSmrg OUT_BATCH64(0); /* scratch */ 675fe8aea9eSmrg OUT_BATCH(0); 676fe8aea9eSmrg OUT_BATCH(0); /* pass-through */ 677fe8aea9eSmrg 678fe8aea9eSmrg#if SIM 679fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_CONSTANT_HS | (11 - 2)); 680fe8aea9eSmrg OUT_BATCH(0); 681fe8aea9eSmrg OUT_BATCH(0); 682fe8aea9eSmrg OUT_BATCH64(0); 683fe8aea9eSmrg OUT_BATCH64(0); 684fe8aea9eSmrg OUT_BATCH64(0); 685fe8aea9eSmrg OUT_BATCH64(0); 686fe8aea9eSmrg 687fe8aea9eSmrg#if 1 688fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); 689fe8aea9eSmrg OUT_BATCH(0); 690fe8aea9eSmrg 691fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2)); 692fe8aea9eSmrg OUT_BATCH(0); 693fe8aea9eSmrg#endif 694fe8aea9eSmrg#endif 695fe8aea9eSmrg} 696fe8aea9eSmrg 697fe8aea9eSmrgstatic void 698fe8aea9eSmrggen9_emit_te_invariant(struct sna *sna) 699fe8aea9eSmrg{ 700fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_TE | (4 - 2)); 701fe8aea9eSmrg OUT_BATCH(0); 702fe8aea9eSmrg OUT_BATCH(0); 703fe8aea9eSmrg OUT_BATCH(0); 704fe8aea9eSmrg} 705fe8aea9eSmrg 706fe8aea9eSmrgstatic void 707fe8aea9eSmrggen9_emit_ds_invariant(struct sna *sna) 708fe8aea9eSmrg{ 709fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_DS | (11 - 2)); 710fe8aea9eSmrg OUT_BATCH64(0); /* no kernel */ 711fe8aea9eSmrg OUT_BATCH(0); 712fe8aea9eSmrg OUT_BATCH64(0); /* scratch */ 713fe8aea9eSmrg OUT_BATCH(0); 714fe8aea9eSmrg OUT_BATCH(0); 715fe8aea9eSmrg OUT_BATCH(0); 716fe8aea9eSmrg OUT_BATCH(0); 717fe8aea9eSmrg OUT_BATCH(0); 718fe8aea9eSmrg 719fe8aea9eSmrg#if SIM 720fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_CONSTANT_DS | (11 - 2)); 721fe8aea9eSmrg OUT_BATCH(0); 722fe8aea9eSmrg OUT_BATCH(0); 723fe8aea9eSmrg OUT_BATCH64(0); 724fe8aea9eSmrg OUT_BATCH64(0); 725fe8aea9eSmrg OUT_BATCH64(0); 726fe8aea9eSmrg OUT_BATCH64(0); 727fe8aea9eSmrg 728fe8aea9eSmrg#if 1 729fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); 730fe8aea9eSmrg OUT_BATCH(0); 731fe8aea9eSmrg 732fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2)); 733fe8aea9eSmrg OUT_BATCH(0); 734fe8aea9eSmrg#endif 735fe8aea9eSmrg#endif 736fe8aea9eSmrg} 737fe8aea9eSmrg 738fe8aea9eSmrgstatic void 739fe8aea9eSmrggen9_emit_gs_invariant(struct sna *sna) 740fe8aea9eSmrg{ 741fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_GS | (10 - 2)); 742fe8aea9eSmrg OUT_BATCH64(0); /* no GS kernel */ 743fe8aea9eSmrg OUT_BATCH(0); 744fe8aea9eSmrg OUT_BATCH64(0); /* scratch */ 745fe8aea9eSmrg OUT_BATCH(0); 746fe8aea9eSmrg OUT_BATCH(0); /* pass-through */ 747fe8aea9eSmrg OUT_BATCH(0); 748fe8aea9eSmrg OUT_BATCH(0); 749fe8aea9eSmrg 750fe8aea9eSmrg#if SIM 751fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_CONSTANT_GS | (11 - 2)); 752fe8aea9eSmrg OUT_BATCH(0); 753fe8aea9eSmrg OUT_BATCH(0); 754fe8aea9eSmrg OUT_BATCH64(0); 755fe8aea9eSmrg OUT_BATCH64(0); 756fe8aea9eSmrg OUT_BATCH64(0); 757fe8aea9eSmrg OUT_BATCH64(0); 758fe8aea9eSmrg 759fe8aea9eSmrg#if 1 760fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); 761fe8aea9eSmrg OUT_BATCH(0); 762fe8aea9eSmrg 763fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2)); 764fe8aea9eSmrg OUT_BATCH(0); 765fe8aea9eSmrg#endif 766fe8aea9eSmrg#endif 767fe8aea9eSmrg} 768fe8aea9eSmrg 769fe8aea9eSmrgstatic void 770fe8aea9eSmrggen9_emit_sol_invariant(struct sna *sna) 771fe8aea9eSmrg{ 772fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_STREAMOUT | (5 - 2)); 773fe8aea9eSmrg OUT_BATCH(0); 774fe8aea9eSmrg OUT_BATCH(0); 775fe8aea9eSmrg OUT_BATCH(0); 776fe8aea9eSmrg OUT_BATCH(0); 777fe8aea9eSmrg} 778fe8aea9eSmrg 779fe8aea9eSmrgstatic void 780fe8aea9eSmrggen9_emit_sf_invariant(struct sna *sna) 781fe8aea9eSmrg{ 782fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_SF | (4 - 2)); 783fe8aea9eSmrg OUT_BATCH(0); 784fe8aea9eSmrg OUT_BATCH(0); 785fe8aea9eSmrg OUT_BATCH(0); 786fe8aea9eSmrg} 787fe8aea9eSmrg 788fe8aea9eSmrgstatic void 789fe8aea9eSmrggen9_emit_clip_invariant(struct sna *sna) 790fe8aea9eSmrg{ 791fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_CLIP | (4 - 2)); 792fe8aea9eSmrg OUT_BATCH(0); 793fe8aea9eSmrg OUT_BATCH(0); /* pass-through */ 794fe8aea9eSmrg OUT_BATCH(0); 795fe8aea9eSmrg 796fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP | (2 - 2)); 797fe8aea9eSmrg OUT_BATCH(0); 798fe8aea9eSmrg 799fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); 800fe8aea9eSmrg OUT_BATCH(0); 801fe8aea9eSmrg} 802fe8aea9eSmrg 803fe8aea9eSmrgstatic void 804fe8aea9eSmrggen9_emit_null_depth_buffer(struct sna *sna) 805fe8aea9eSmrg{ 806fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_DEPTH_BUFFER | (8 - 2)); 807fe8aea9eSmrg#if 1 808fe8aea9eSmrg OUT_BATCH(SURFACE_NULL << DEPTH_BUFFER_TYPE_SHIFT | 809fe8aea9eSmrg DEPTHFORMAT_D32_FLOAT << DEPTH_BUFFER_FORMAT_SHIFT); 810fe8aea9eSmrg#else 811fe8aea9eSmrg OUT_BATCH(SURFACE_2D << DEPTH_BUFFER_TYPE_SHIFT | 812fe8aea9eSmrg DEPTHFORMAT_D16_UNORM << DEPTH_BUFFER_FORMAT_SHIFT); 813fe8aea9eSmrg#endif 814fe8aea9eSmrg OUT_BATCH64(0); 815fe8aea9eSmrg OUT_BATCH(0); 816fe8aea9eSmrg OUT_BATCH(0); 817fe8aea9eSmrg OUT_BATCH(0); 818fe8aea9eSmrg OUT_BATCH(0); 819fe8aea9eSmrg 820fe8aea9eSmrg#if SIM 821fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2)); 822fe8aea9eSmrg OUT_BATCH(0); 823fe8aea9eSmrg OUT_BATCH64(0); 824fe8aea9eSmrg OUT_BATCH(0); 825fe8aea9eSmrg#endif 826fe8aea9eSmrg 827fe8aea9eSmrg#if SIM 828fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_STENCIL_BUFFER | (5 - 2)); 829fe8aea9eSmrg OUT_BATCH(0); 830fe8aea9eSmrg OUT_BATCH64(0); 831fe8aea9eSmrg OUT_BATCH(0); 832fe8aea9eSmrg#endif 833fe8aea9eSmrg 834fe8aea9eSmrg#if SIM 835fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_WM_DEPTH_STENCIL | (4 - 2)); 836fe8aea9eSmrg OUT_BATCH(0); 837fe8aea9eSmrg OUT_BATCH(0); 838fe8aea9eSmrg OUT_BATCH(0); 839fe8aea9eSmrg#endif 840fe8aea9eSmrg 841fe8aea9eSmrg#if SIM 842fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_CLEAR_PARAMS | (3 - 2)); 843fe8aea9eSmrg OUT_BATCH(0); 844fe8aea9eSmrg OUT_BATCH(0); 845fe8aea9eSmrg#endif 846fe8aea9eSmrg} 847fe8aea9eSmrg 848fe8aea9eSmrgstatic void 849fe8aea9eSmrggen9_emit_wm_invariant(struct sna *sna) 850fe8aea9eSmrg{ 851fe8aea9eSmrg gen9_emit_null_depth_buffer(sna); 852fe8aea9eSmrg 853fe8aea9eSmrg#if SIM 854fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_SCISSOR_STATE_POINTERS | (2 - 2)); 855fe8aea9eSmrg OUT_BATCH(0); 856fe8aea9eSmrg#endif 857fe8aea9eSmrg 858fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_WM | (2 - 2)); 859fe8aea9eSmrg //OUT_BATCH(WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC); /* XXX */ 860fe8aea9eSmrg OUT_BATCH(WM_PERSPECTIVE_PIXEL_BARYCENTRIC); 861fe8aea9eSmrg 862fe8aea9eSmrg#if SIM 863fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_WM_CHROMAKEY | (2 - 2)); 864fe8aea9eSmrg OUT_BATCH(0); 865fe8aea9eSmrg#endif 866fe8aea9eSmrg 867fe8aea9eSmrg#if 0 868fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_WM_HZ_OP | (5 - 2)); 869fe8aea9eSmrg OUT_BATCH(0); 870fe8aea9eSmrg OUT_BATCH(0); 871fe8aea9eSmrg OUT_BATCH(0); 872fe8aea9eSmrg OUT_BATCH(0); 873fe8aea9eSmrg#endif 874fe8aea9eSmrg 875fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_PS_EXTRA | (2 - 2)); 876fe8aea9eSmrg OUT_BATCH(PSX_PIXEL_SHADER_VALID | 877fe8aea9eSmrg PSX_ATTRIBUTE_ENABLE); 878fe8aea9eSmrg 879fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_RASTER | (5 - 2)); 880fe8aea9eSmrg OUT_BATCH(RASTER_FRONT_WINDING_CCW | 881fe8aea9eSmrg RASTER_CULL_NONE); 882fe8aea9eSmrg OUT_BATCH(0); 883fe8aea9eSmrg OUT_BATCH(0); 884fe8aea9eSmrg OUT_BATCH(0); 885fe8aea9eSmrg 886fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_SBE_SWIZ | (11 - 2)); 887fe8aea9eSmrg OUT_BATCH(0); 888fe8aea9eSmrg OUT_BATCH(0); 889fe8aea9eSmrg OUT_BATCH(0); 890fe8aea9eSmrg OUT_BATCH(0); 891fe8aea9eSmrg OUT_BATCH(0); 892fe8aea9eSmrg OUT_BATCH(0); 893fe8aea9eSmrg OUT_BATCH(0); 894fe8aea9eSmrg OUT_BATCH(0); 895fe8aea9eSmrg OUT_BATCH(0); 896fe8aea9eSmrg OUT_BATCH(0); 897fe8aea9eSmrg 898fe8aea9eSmrg#if SIM 899fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_CONSTANT_PS | (11 - 2)); 900fe8aea9eSmrg OUT_BATCH(0); 901fe8aea9eSmrg OUT_BATCH(0); 902fe8aea9eSmrg OUT_BATCH64(0); 903fe8aea9eSmrg OUT_BATCH64(0); 904fe8aea9eSmrg OUT_BATCH64(0); 905fe8aea9eSmrg OUT_BATCH64(0); 906fe8aea9eSmrg#endif 907fe8aea9eSmrg} 908fe8aea9eSmrg 909fe8aea9eSmrgstatic void 910fe8aea9eSmrggen9_emit_cc_invariant(struct sna *sna) 911fe8aea9eSmrg{ 912fe8aea9eSmrg} 913fe8aea9eSmrg 914fe8aea9eSmrgstatic void 915fe8aea9eSmrggen9_emit_vf_invariant(struct sna *sna) 916fe8aea9eSmrg{ 917fe8aea9eSmrg int n; 918fe8aea9eSmrg 919fe8aea9eSmrg#if 1 920fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_VF | (2 - 2)); 921fe8aea9eSmrg OUT_BATCH(0); 922fe8aea9eSmrg#endif 923fe8aea9eSmrg 924fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_VF_SGVS | (2 - 2)); 925fe8aea9eSmrg OUT_BATCH(0); 926fe8aea9eSmrg 927fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_VF_TOPOLOGY | (2 - 2)); 928fe8aea9eSmrg OUT_BATCH(RECTLIST); 929fe8aea9eSmrg 930fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_VF_STATISTICS | 0); 931fe8aea9eSmrg 932fe8aea9eSmrg for (n = 1; n <= 3; n++) { 933fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_VF_INSTANCING | (3 - 2)); 934fe8aea9eSmrg OUT_BATCH(n); 935fe8aea9eSmrg OUT_BATCH(0); 936fe8aea9eSmrg } 937fe8aea9eSmrg} 938fe8aea9eSmrg 939fe8aea9eSmrgstatic void 940fe8aea9eSmrggen9_emit_invariant(struct sna *sna) 941fe8aea9eSmrg{ 942fe8aea9eSmrg OUT_BATCH(GEN9_PIPELINE_SELECT | 943fe8aea9eSmrg PIPELINE_SELECTION_MASK | 944fe8aea9eSmrg PIPELINE_SELECT_3D); 945fe8aea9eSmrg 946fe8aea9eSmrg#if SIM 947fe8aea9eSmrg OUT_BATCH(GEN9_STATE_SIP | (3 - 2)); 948fe8aea9eSmrg OUT_BATCH64(0); 949fe8aea9eSmrg#endif 950fe8aea9eSmrg 951fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_MULTISAMPLE | (2 - 2)); 952fe8aea9eSmrg OUT_BATCH(MULTISAMPLE_PIXEL_LOCATION_CENTER | 953fe8aea9eSmrg MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ 954fe8aea9eSmrg 955fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_SAMPLE_MASK | (2 - 2)); 956fe8aea9eSmrg OUT_BATCH(1); 957fe8aea9eSmrg 958fe8aea9eSmrg#if SIM 959fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_SAMPLE_PATTERN | (5 - 2)); 960fe8aea9eSmrg OUT_BATCH(0); 961fe8aea9eSmrg OUT_BATCH(0); 962fe8aea9eSmrg OUT_BATCH(0); 963fe8aea9eSmrg //OUT_BATCH(8<<20 | 8<<16); 964fe8aea9eSmrg OUT_BATCH(0); 965fe8aea9eSmrg#endif 966fe8aea9eSmrg 967fe8aea9eSmrg gen9_emit_push_constants(sna); 968fe8aea9eSmrg gen9_emit_urb(sna); 969fe8aea9eSmrg 970fe8aea9eSmrg gen9_emit_state_base_address(sna); 971fe8aea9eSmrg 972fe8aea9eSmrg gen9_emit_vf_invariant(sna); 973fe8aea9eSmrg gen9_emit_vs_invariant(sna); 974fe8aea9eSmrg gen9_emit_hs_invariant(sna); 975fe8aea9eSmrg gen9_emit_te_invariant(sna); 976fe8aea9eSmrg gen9_emit_ds_invariant(sna); 977fe8aea9eSmrg gen9_emit_gs_invariant(sna); 978fe8aea9eSmrg gen9_emit_sol_invariant(sna); 979fe8aea9eSmrg gen9_emit_clip_invariant(sna); 980fe8aea9eSmrg gen9_emit_sf_invariant(sna); 981fe8aea9eSmrg gen9_emit_wm_invariant(sna); 982fe8aea9eSmrg gen9_emit_cc_invariant(sna); 983fe8aea9eSmrg 984fe8aea9eSmrg sna->render_state.gen9.needs_invariant = false; 985fe8aea9eSmrg} 986fe8aea9eSmrg 987fe8aea9eSmrgstatic void 988fe8aea9eSmrggen9_emit_cc(struct sna *sna, uint32_t blend) 989fe8aea9eSmrg{ 990fe8aea9eSmrg struct gen9_render_state *render = &sna->render_state.gen9; 991fe8aea9eSmrg 992fe8aea9eSmrg if (render->blend == blend) 993fe8aea9eSmrg return; 994fe8aea9eSmrg 995fe8aea9eSmrg DBG(("%s: blend=%x (current=%x), src=%d, dst=%d\n", 996fe8aea9eSmrg __FUNCTION__, blend, render->blend, 997fe8aea9eSmrg blend / GEN9_BLENDFACTOR_COUNT, 998fe8aea9eSmrg blend % GEN9_BLENDFACTOR_COUNT)); 999fe8aea9eSmrg 1000fe8aea9eSmrg assert(blend < GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT); 1001fe8aea9eSmrg assert(blend / GEN9_BLENDFACTOR_COUNT > 0); 1002fe8aea9eSmrg assert(blend % GEN9_BLENDFACTOR_COUNT > 0); 1003fe8aea9eSmrg 1004fe8aea9eSmrg /* XXX can have up to 8 blend states preload, selectable via 1005fe8aea9eSmrg * Render Target Index. What other side-effects of Render Target Index? 1006fe8aea9eSmrg */ 1007fe8aea9eSmrg 1008fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_PS_BLEND | (2 - 2)); 1009fe8aea9eSmrg if (blend != GEN9_BLEND(NO_BLEND)) { 1010fe8aea9eSmrg uint32_t src = blend / GEN9_BLENDFACTOR_COUNT; 1011fe8aea9eSmrg uint32_t dst = blend % GEN9_BLENDFACTOR_COUNT; 1012fe8aea9eSmrg OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT | 1013fe8aea9eSmrg PS_BLEND_COLOR_BLEND_ENABLE | 1014fe8aea9eSmrg src << PS_BLEND_SRC_ALPHA_SHIFT | 1015fe8aea9eSmrg dst << PS_BLEND_DST_ALPHA_SHIFT | 1016fe8aea9eSmrg src << PS_BLEND_SRC_SHIFT | 1017fe8aea9eSmrg dst << PS_BLEND_DST_SHIFT); 1018fe8aea9eSmrg } else 1019fe8aea9eSmrg OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT); 1020fe8aea9eSmrg 1021fe8aea9eSmrg assert(is_aligned(render->cc_blend + blend * GEN9_BLEND_STATE_PADDED_SIZE, 64)); 1022fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); 1023fe8aea9eSmrg OUT_BATCH((render->cc_blend + blend * GEN9_BLEND_STATE_PADDED_SIZE) | 1); 1024fe8aea9eSmrg 1025fe8aea9eSmrg /* Force a CC_STATE pointer change to improve blend performance */ 1026fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_CC_STATE_POINTERS | (2 - 2)); 1027fe8aea9eSmrg OUT_BATCH(0); 1028fe8aea9eSmrg 1029fe8aea9eSmrg render->blend = blend; 1030fe8aea9eSmrg} 1031fe8aea9eSmrg 1032fe8aea9eSmrgstatic void 1033fe8aea9eSmrggen9_emit_sampler(struct sna *sna, uint32_t state) 1034fe8aea9eSmrg{ 1035fe8aea9eSmrg if (sna->render_state.gen9.samplers == state) 1036fe8aea9eSmrg return; 1037fe8aea9eSmrg 1038fe8aea9eSmrg sna->render_state.gen9.samplers = state; 1039fe8aea9eSmrg 1040fe8aea9eSmrg DBG(("%s: sampler = %x\n", __FUNCTION__, state)); 1041fe8aea9eSmrg 1042fe8aea9eSmrg assert(2 * sizeof(struct gen9_sampler_state) == 32); 1043fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); 1044fe8aea9eSmrg OUT_BATCH(sna->render_state.gen9.wm_state + state * 2 * sizeof(struct gen9_sampler_state)); 1045fe8aea9eSmrg} 1046fe8aea9eSmrg 1047fe8aea9eSmrgstatic void 1048fe8aea9eSmrggen9_emit_sf(struct sna *sna, bool has_mask) 1049fe8aea9eSmrg{ 1050fe8aea9eSmrg int num_sf_outputs = has_mask ? 2 : 1; 1051fe8aea9eSmrg 1052fe8aea9eSmrg if (sna->render_state.gen9.num_sf_outputs == num_sf_outputs) 1053fe8aea9eSmrg return; 1054fe8aea9eSmrg 1055fe8aea9eSmrg DBG(("%s: num_sf_outputs=%d\n", __FUNCTION__, num_sf_outputs)); 1056fe8aea9eSmrg 1057fe8aea9eSmrg sna->render_state.gen9.num_sf_outputs = num_sf_outputs; 1058fe8aea9eSmrg 1059fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_SBE | (6 - 2)); 1060fe8aea9eSmrg OUT_BATCH(num_sf_outputs << SBE_NUM_OUTPUTS_SHIFT | 1061fe8aea9eSmrg SBE_FORCE_VERTEX_URB_READ_LENGTH | /* forced is faster */ 1062fe8aea9eSmrg SBE_FORCE_VERTEX_URB_READ_OFFSET | 1063fe8aea9eSmrg 1 << SBE_URB_ENTRY_READ_LENGTH_SHIFT | 1064fe8aea9eSmrg 1 << SBE_URB_ENTRY_READ_OFFSET_SHIFT); 1065fe8aea9eSmrg OUT_BATCH(0); 1066fe8aea9eSmrg OUT_BATCH(0); 1067fe8aea9eSmrg OUT_BATCH(SBE_ACTIVE_COMPONENT_XYZW << 0 | 1068fe8aea9eSmrg SBE_ACTIVE_COMPONENT_XYZW << 1); 1069fe8aea9eSmrg OUT_BATCH(0); 1070fe8aea9eSmrg} 1071fe8aea9eSmrg 1072fe8aea9eSmrgstatic void 1073fe8aea9eSmrggen9_emit_wm(struct sna *sna, int kernel) 1074fe8aea9eSmrg{ 1075fe8aea9eSmrg const uint32_t *kernels; 1076fe8aea9eSmrg 1077fe8aea9eSmrg assert(kernel < ARRAY_SIZE(wm_kernels)); 1078fe8aea9eSmrg if (sna->render_state.gen9.kernel == kernel) 1079fe8aea9eSmrg return; 1080fe8aea9eSmrg 1081fe8aea9eSmrg sna->render_state.gen9.kernel = kernel; 1082fe8aea9eSmrg kernels = sna->render_state.gen9.wm_kernel[kernel]; 1083fe8aea9eSmrg 1084fe8aea9eSmrg DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n", 1085fe8aea9eSmrg __FUNCTION__, 1086fe8aea9eSmrg wm_kernels[kernel].name, 1087fe8aea9eSmrg wm_kernels[kernel].num_surfaces, 1088fe8aea9eSmrg kernels[0], kernels[1], kernels[2])); 1089fe8aea9eSmrg assert(is_aligned(kernels[0], 64)); 1090fe8aea9eSmrg assert(is_aligned(kernels[1], 64)); 1091fe8aea9eSmrg assert(is_aligned(kernels[2], 64)); 1092fe8aea9eSmrg 1093fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_PS | (12 - 2)); 1094fe8aea9eSmrg OUT_BATCH64(kernels[0] ?: kernels[1] ?: kernels[2]); 1095fe8aea9eSmrg OUT_BATCH(1 << PS_SAMPLER_COUNT_SHIFT | 1096fe8aea9eSmrg PS_VECTOR_MASK_ENABLE | 1097fe8aea9eSmrg wm_kernels[kernel].num_surfaces << PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); 1098fe8aea9eSmrg OUT_BATCH64(0); /* scratch address */ 1099fe8aea9eSmrg OUT_BATCH(PS_MAX_THREADS | 1100fe8aea9eSmrg (kernels[0] ? PS_8_DISPATCH_ENABLE : 0) | 1101fe8aea9eSmrg (kernels[1] ? PS_16_DISPATCH_ENABLE : 0) | 1102fe8aea9eSmrg (kernels[2] ? PS_32_DISPATCH_ENABLE : 0)); 1103fe8aea9eSmrg OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << PS_DISPATCH_START_GRF_SHIFT_0 | 1104fe8aea9eSmrg 8 << PS_DISPATCH_START_GRF_SHIFT_1 | 1105fe8aea9eSmrg 6 << PS_DISPATCH_START_GRF_SHIFT_2); 1106fe8aea9eSmrg OUT_BATCH64(kernels[2]); 1107fe8aea9eSmrg OUT_BATCH64(kernels[1]); 1108fe8aea9eSmrg} 1109fe8aea9eSmrg 1110fe8aea9eSmrgstatic bool 1111fe8aea9eSmrggen9_emit_binding_table(struct sna *sna, uint16_t offset) 1112fe8aea9eSmrg{ 1113fe8aea9eSmrg if (sna->render_state.gen9.surface_table == offset) 1114fe8aea9eSmrg return false; 1115fe8aea9eSmrg 1116fe8aea9eSmrg /* Binding table pointers */ 1117fe8aea9eSmrg assert(is_aligned(4*offset, 32)); 1118fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); 1119fe8aea9eSmrg OUT_BATCH(offset*4); 1120fe8aea9eSmrg 1121fe8aea9eSmrg sna->render_state.gen9.surface_table = offset; 1122fe8aea9eSmrg return true; 1123fe8aea9eSmrg} 1124fe8aea9eSmrg 1125fe8aea9eSmrgstatic bool 1126fe8aea9eSmrggen9_emit_drawing_rectangle(struct sna *sna, 1127fe8aea9eSmrg const struct sna_composite_op *op) 1128fe8aea9eSmrg{ 1129fe8aea9eSmrg uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); 1130fe8aea9eSmrg uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; 1131fe8aea9eSmrg 1132fe8aea9eSmrg assert(!too_large(abs(op->dst.x), abs(op->dst.y))); 1133fe8aea9eSmrg assert(!too_large(op->dst.width, op->dst.height)); 1134fe8aea9eSmrg 1135fe8aea9eSmrg if (sna->render_state.gen9.drawrect_limit == limit && 1136fe8aea9eSmrg sna->render_state.gen9.drawrect_offset == offset) 1137fe8aea9eSmrg return true; 1138fe8aea9eSmrg 1139fe8aea9eSmrg sna->render_state.gen9.drawrect_offset = offset; 1140fe8aea9eSmrg sna->render_state.gen9.drawrect_limit = limit; 1141fe8aea9eSmrg 1142fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); 1143fe8aea9eSmrg OUT_BATCH(0); 1144fe8aea9eSmrg OUT_BATCH(limit); 1145fe8aea9eSmrg OUT_BATCH(offset); 1146fe8aea9eSmrg return false; 1147fe8aea9eSmrg} 1148fe8aea9eSmrg 1149fe8aea9eSmrgstatic void 1150fe8aea9eSmrggen9_emit_vertex_elements(struct sna *sna, 1151fe8aea9eSmrg const struct sna_composite_op *op) 1152fe8aea9eSmrg{ 1153fe8aea9eSmrg /* 1154fe8aea9eSmrg * vertex data in vertex buffer 1155fe8aea9eSmrg * position: (x, y) 1156fe8aea9eSmrg * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) 1157fe8aea9eSmrg * texture coordinate 1 if (has_mask is true): same as above 1158fe8aea9eSmrg */ 1159fe8aea9eSmrg struct gen9_render_state *render = &sna->render_state.gen9; 1160fe8aea9eSmrg uint32_t src_format, dw; 1161fe8aea9eSmrg int id = GEN9_VERTEX(op->u.gen9.flags); 1162fe8aea9eSmrg bool has_mask; 1163fe8aea9eSmrg 1164fe8aea9eSmrg DBG(("%s: setup id=%d\n", __FUNCTION__, id)); 1165fe8aea9eSmrg 1166fe8aea9eSmrg if (render->ve_id == id) 1167fe8aea9eSmrg return; 1168fe8aea9eSmrg render->ve_id = id; 1169fe8aea9eSmrg 1170fe8aea9eSmrg if (render->ve_dirty) { 1171fe8aea9eSmrg /* dummy primitive to flush vertex before change? */ 1172fe8aea9eSmrg OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2)); 1173fe8aea9eSmrg OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ 1174fe8aea9eSmrg OUT_BATCH(0); 1175fe8aea9eSmrg OUT_BATCH(0); 1176fe8aea9eSmrg OUT_BATCH(1); /* single instance */ 1177fe8aea9eSmrg OUT_BATCH(0); /* start instance location */ 1178fe8aea9eSmrg OUT_BATCH(0); /* index buffer offset, ignored */ 1179fe8aea9eSmrg } 1180fe8aea9eSmrg 1181fe8aea9eSmrg /* The VUE layout 1182fe8aea9eSmrg * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) 1183fe8aea9eSmrg * dword 4-7: position (x, y, 1.0, 1.0), 1184fe8aea9eSmrg * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) 1185fe8aea9eSmrg * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) 1186fe8aea9eSmrg * 1187fe8aea9eSmrg * dword 4-15 are fetched from vertex buffer 1188fe8aea9eSmrg */ 1189fe8aea9eSmrg has_mask = (id >> 2) != 0; 1190fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_VERTEX_ELEMENTS | 1191fe8aea9eSmrg ((2 * (3 + has_mask)) + 1 - 2)); 1192fe8aea9eSmrg 1193fe8aea9eSmrg OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1194fe8aea9eSmrg SURFACEFORMAT_R32G32B32A32_FLOAT << VE_FORMAT_SHIFT | 1195fe8aea9eSmrg 0 << VE_OFFSET_SHIFT); 1196fe8aea9eSmrg OUT_BATCH(COMPONENT_STORE_0 << VE_COMPONENT_0_SHIFT | 1197fe8aea9eSmrg COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT | 1198fe8aea9eSmrg COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | 1199fe8aea9eSmrg COMPONENT_STORE_0 << VE_COMPONENT_3_SHIFT); 1200fe8aea9eSmrg 1201fe8aea9eSmrg /* x,y */ 1202fe8aea9eSmrg OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1203fe8aea9eSmrg SURFACEFORMAT_R16G16_SSCALED << VE_FORMAT_SHIFT | 1204fe8aea9eSmrg 0 << VE_OFFSET_SHIFT); 1205fe8aea9eSmrg OUT_BATCH(COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT | 1206fe8aea9eSmrg COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT | 1207fe8aea9eSmrg COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT | 1208fe8aea9eSmrg COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT); 1209fe8aea9eSmrg 1210fe8aea9eSmrg /* u0, v0, w0 */ 1211fe8aea9eSmrg DBG(("%s: first channel %d floats, offset=4\n", __FUNCTION__, id & 3)); 1212fe8aea9eSmrg dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; 1213fe8aea9eSmrg switch (id & 3) { 1214fe8aea9eSmrg default: 1215fe8aea9eSmrg assert(0); 1216fe8aea9eSmrg case 0: 1217fe8aea9eSmrg src_format = SURFACEFORMAT_R16G16_SSCALED; 1218fe8aea9eSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1219fe8aea9eSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1220fe8aea9eSmrg dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1221fe8aea9eSmrg break; 1222fe8aea9eSmrg case 1: 1223fe8aea9eSmrg src_format = SURFACEFORMAT_R32_FLOAT; 1224fe8aea9eSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1225fe8aea9eSmrg dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; 1226fe8aea9eSmrg dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1227fe8aea9eSmrg break; 1228fe8aea9eSmrg case 2: 1229fe8aea9eSmrg src_format = SURFACEFORMAT_R32G32_FLOAT; 1230fe8aea9eSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1231fe8aea9eSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1232fe8aea9eSmrg dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1233fe8aea9eSmrg break; 1234fe8aea9eSmrg case 3: 1235fe8aea9eSmrg src_format = SURFACEFORMAT_R32G32B32_FLOAT; 1236fe8aea9eSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1237fe8aea9eSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1238fe8aea9eSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; 1239fe8aea9eSmrg break; 1240fe8aea9eSmrg } 1241fe8aea9eSmrg OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1242fe8aea9eSmrg src_format << VE_FORMAT_SHIFT | 1243fe8aea9eSmrg 4 << VE_OFFSET_SHIFT); 1244fe8aea9eSmrg OUT_BATCH(dw); 1245fe8aea9eSmrg 1246fe8aea9eSmrg /* u1, v1, w1 */ 1247fe8aea9eSmrg if (has_mask) { 1248fe8aea9eSmrg unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); 1249fe8aea9eSmrg DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset)); 1250fe8aea9eSmrg dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT; 1251fe8aea9eSmrg switch (id >> 2) { 1252fe8aea9eSmrg case 1: 1253fe8aea9eSmrg src_format = SURFACEFORMAT_R32_FLOAT; 1254fe8aea9eSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1255fe8aea9eSmrg dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT; 1256fe8aea9eSmrg dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1257fe8aea9eSmrg break; 1258fe8aea9eSmrg default: 1259fe8aea9eSmrg assert(0); 1260fe8aea9eSmrg case 2: 1261fe8aea9eSmrg src_format = SURFACEFORMAT_R32G32_FLOAT; 1262fe8aea9eSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1263fe8aea9eSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1264fe8aea9eSmrg dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT; 1265fe8aea9eSmrg break; 1266fe8aea9eSmrg case 3: 1267fe8aea9eSmrg src_format = SURFACEFORMAT_R32G32B32_FLOAT; 1268fe8aea9eSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT; 1269fe8aea9eSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT; 1270fe8aea9eSmrg dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT; 1271fe8aea9eSmrg break; 1272fe8aea9eSmrg } 1273fe8aea9eSmrg OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID | 1274fe8aea9eSmrg src_format << VE_FORMAT_SHIFT | 1275fe8aea9eSmrg offset << VE_OFFSET_SHIFT); 1276fe8aea9eSmrg OUT_BATCH(dw); 1277fe8aea9eSmrg } 1278fe8aea9eSmrg 1279fe8aea9eSmrg render->ve_dirty = true; 1280fe8aea9eSmrg} 1281fe8aea9eSmrg 1282fe8aea9eSmrginline static void 1283fe8aea9eSmrggen9_emit_pipe_invalidate(struct sna *sna) 1284fe8aea9eSmrg{ 1285fe8aea9eSmrg OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2)); 1286fe8aea9eSmrg OUT_BATCH(PIPE_CONTROL_WC_FLUSH | 1287fe8aea9eSmrg PIPE_CONTROL_TC_FLUSH | 1288fe8aea9eSmrg PIPE_CONTROL_CS_STALL); 1289fe8aea9eSmrg OUT_BATCH64(0); 1290fe8aea9eSmrg OUT_BATCH64(0); 1291fe8aea9eSmrg} 1292fe8aea9eSmrg 1293fe8aea9eSmrginline static void 1294fe8aea9eSmrggen9_emit_pipe_flush(struct sna *sna, bool need_stall) 1295fe8aea9eSmrg{ 1296fe8aea9eSmrg unsigned stall; 1297fe8aea9eSmrg 1298fe8aea9eSmrg stall = 0; 1299fe8aea9eSmrg if (need_stall) 1300fe8aea9eSmrg stall = (PIPE_CONTROL_CS_STALL | 1301fe8aea9eSmrg PIPE_CONTROL_STALL_AT_SCOREBOARD); 1302fe8aea9eSmrg 1303fe8aea9eSmrg OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2)); 1304fe8aea9eSmrg OUT_BATCH(PIPE_CONTROL_WC_FLUSH | stall); 1305fe8aea9eSmrg OUT_BATCH64(0); 1306fe8aea9eSmrg OUT_BATCH64(0); 1307fe8aea9eSmrg} 1308fe8aea9eSmrg 1309fe8aea9eSmrginline static void 1310fe8aea9eSmrggen9_emit_pipe_stall(struct sna *sna) 1311fe8aea9eSmrg{ 1312fe8aea9eSmrg OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2)); 1313fe8aea9eSmrg OUT_BATCH(PIPE_CONTROL_CS_STALL | 1314fe8aea9eSmrg PIPE_CONTROL_FLUSH | 1315fe8aea9eSmrg PIPE_CONTROL_STALL_AT_SCOREBOARD); 1316fe8aea9eSmrg OUT_BATCH64(0); 1317fe8aea9eSmrg OUT_BATCH64(0); 1318fe8aea9eSmrg} 1319fe8aea9eSmrg 1320fe8aea9eSmrgstatic void 1321fe8aea9eSmrggen9_emit_state(struct sna *sna, 1322fe8aea9eSmrg const struct sna_composite_op *op, 1323fe8aea9eSmrg uint16_t wm_binding_table) 1324fe8aea9eSmrg{ 1325fe8aea9eSmrg bool need_invalidate; 1326fe8aea9eSmrg bool need_flush; 1327fe8aea9eSmrg bool need_stall; 1328fe8aea9eSmrg 1329fe8aea9eSmrg assert(op->dst.bo->exec); 1330fe8aea9eSmrg 1331fe8aea9eSmrg need_flush = wm_binding_table & 1 || 1332fe8aea9eSmrg (sna->render_state.gen9.emit_flush && GEN9_READS_DST(op->u.gen9.flags)); 1333fe8aea9eSmrg if (ALWAYS_FLUSH) 1334fe8aea9eSmrg need_flush = true; 1335fe8aea9eSmrg 1336fe8aea9eSmrg wm_binding_table &= ~1; 1337fe8aea9eSmrg 1338fe8aea9eSmrg need_stall = sna->render_state.gen9.surface_table != wm_binding_table; 1339fe8aea9eSmrg 1340fe8aea9eSmrg need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); 1341fe8aea9eSmrg if (ALWAYS_INVALIDATE) 1342fe8aea9eSmrg need_invalidate = true; 1343fe8aea9eSmrg 1344fe8aea9eSmrg need_stall &= gen9_emit_drawing_rectangle(sna, op); 1345fe8aea9eSmrg if (ALWAYS_STALL) 1346fe8aea9eSmrg need_stall = true; 1347fe8aea9eSmrg 1348fe8aea9eSmrg if (need_invalidate) { 1349fe8aea9eSmrg gen9_emit_pipe_invalidate(sna); 1350fe8aea9eSmrg kgem_clear_dirty(&sna->kgem); 1351fe8aea9eSmrg assert(op->dst.bo->exec); 1352fe8aea9eSmrg kgem_bo_mark_dirty(op->dst.bo); 1353fe8aea9eSmrg 1354fe8aea9eSmrg need_flush = false; 1355fe8aea9eSmrg need_stall = false; 1356fe8aea9eSmrg } 1357fe8aea9eSmrg if (need_flush) { 1358fe8aea9eSmrg gen9_emit_pipe_flush(sna, need_stall); 1359fe8aea9eSmrg need_stall = false; 1360fe8aea9eSmrg } 1361fe8aea9eSmrg if (need_stall) 1362fe8aea9eSmrg gen9_emit_pipe_stall(sna); 1363fe8aea9eSmrg 1364fe8aea9eSmrg gen9_emit_cc(sna, GEN9_BLEND(op->u.gen9.flags)); 1365fe8aea9eSmrg gen9_emit_sampler(sna, GEN9_SAMPLER(op->u.gen9.flags)); 1366fe8aea9eSmrg gen9_emit_sf(sna, GEN9_VERTEX(op->u.gen9.flags) >> 2); 1367fe8aea9eSmrg gen9_emit_wm(sna, op->u.gen9.wm_kernel); 1368fe8aea9eSmrg gen9_emit_vertex_elements(sna, op); 1369fe8aea9eSmrg gen9_emit_binding_table(sna, wm_binding_table); 1370fe8aea9eSmrg 1371fe8aea9eSmrg sna->render_state.gen9.emit_flush = GEN9_READS_DST(op->u.gen9.flags); 1372fe8aea9eSmrg} 1373fe8aea9eSmrg 1374fe8aea9eSmrgstatic bool gen9_magic_ca_pass(struct sna *sna, 1375fe8aea9eSmrg const struct sna_composite_op *op) 1376fe8aea9eSmrg{ 1377fe8aea9eSmrg struct gen9_render_state *state = &sna->render_state.gen9; 1378fe8aea9eSmrg 1379fe8aea9eSmrg if (!op->need_magic_ca_pass) 1380fe8aea9eSmrg return false; 1381fe8aea9eSmrg 1382fe8aea9eSmrg DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, 1383fe8aea9eSmrg sna->render.vertex_start, sna->render.vertex_index)); 1384fe8aea9eSmrg 1385fe8aea9eSmrg gen9_emit_pipe_stall(sna); 1386fe8aea9eSmrg 1387fe8aea9eSmrg gen9_emit_cc(sna, 1388fe8aea9eSmrg GEN9_BLEND(gen9_get_blend(PictOpAdd, true, 1389fe8aea9eSmrg op->dst.format))); 1390fe8aea9eSmrg gen9_emit_wm(sna, 1391fe8aea9eSmrg gen9_choose_composite_kernel(PictOpAdd, 1392fe8aea9eSmrg true, true, 1393fe8aea9eSmrg op->is_affine)); 1394fe8aea9eSmrg 1395fe8aea9eSmrg OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2)); 1396fe8aea9eSmrg OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ 1397fe8aea9eSmrg OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); 1398fe8aea9eSmrg OUT_BATCH(sna->render.vertex_start); 1399fe8aea9eSmrg OUT_BATCH(1); /* single instance */ 1400fe8aea9eSmrg OUT_BATCH(0); /* start instance location */ 1401fe8aea9eSmrg OUT_BATCH(0); /* index buffer offset, ignored */ 1402fe8aea9eSmrg 1403fe8aea9eSmrg state->last_primitive = sna->kgem.nbatch; 1404fe8aea9eSmrg state->ve_dirty = false; 1405fe8aea9eSmrg return true; 1406fe8aea9eSmrg} 1407fe8aea9eSmrg 1408fe8aea9eSmrgstatic void null_create(struct sna_static_stream *stream) 1409fe8aea9eSmrg{ 1410fe8aea9eSmrg /* A bunch of zeros useful for legacy border color and depth-stencil */ 1411fe8aea9eSmrg sna_static_stream_map(stream, 64, 64); 1412fe8aea9eSmrg} 1413fe8aea9eSmrg 1414fe8aea9eSmrgstatic void 1415fe8aea9eSmrgsampler_state_init(struct gen9_sampler_state *sampler_state, 1416fe8aea9eSmrg sampler_filter_t filter, 1417fe8aea9eSmrg sampler_extend_t extend) 1418fe8aea9eSmrg{ 1419fe8aea9eSmrg COMPILE_TIME_ASSERT(sizeof(*sampler_state) == 4*sizeof(uint32_t)); 1420fe8aea9eSmrg 1421fe8aea9eSmrg sampler_state->ss0.lod_preclamp = 2; /* GL mode */ 1422fe8aea9eSmrg sampler_state->ss0.default_color_mode = 1; 1423fe8aea9eSmrg 1424fe8aea9eSmrg switch (filter) { 1425fe8aea9eSmrg default: 1426fe8aea9eSmrg case SAMPLER_FILTER_NEAREST: 1427fe8aea9eSmrg sampler_state->ss0.min_filter = MAPFILTER_NEAREST; 1428fe8aea9eSmrg sampler_state->ss0.mag_filter = MAPFILTER_NEAREST; 1429fe8aea9eSmrg break; 1430fe8aea9eSmrg case SAMPLER_FILTER_BILINEAR: 1431fe8aea9eSmrg sampler_state->ss0.min_filter = MAPFILTER_LINEAR; 1432fe8aea9eSmrg sampler_state->ss0.mag_filter = MAPFILTER_LINEAR; 1433fe8aea9eSmrg break; 1434fe8aea9eSmrg } 1435fe8aea9eSmrg 1436fe8aea9eSmrg /* XXX bicubic filter using MAPFILTER_FLEXIBLE */ 1437fe8aea9eSmrg 1438fe8aea9eSmrg switch (extend) { 1439fe8aea9eSmrg default: 1440fe8aea9eSmrg case SAMPLER_EXTEND_NONE: 1441fe8aea9eSmrg sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1442fe8aea9eSmrg sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1443fe8aea9eSmrg sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP_BORDER; 1444fe8aea9eSmrg break; 1445fe8aea9eSmrg case SAMPLER_EXTEND_REPEAT: 1446fe8aea9eSmrg sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_WRAP; 1447fe8aea9eSmrg sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_WRAP; 1448fe8aea9eSmrg sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_WRAP; 1449fe8aea9eSmrg break; 1450fe8aea9eSmrg case SAMPLER_EXTEND_PAD: 1451fe8aea9eSmrg sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP; 1452fe8aea9eSmrg sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP; 1453fe8aea9eSmrg sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP; 1454fe8aea9eSmrg break; 1455fe8aea9eSmrg case SAMPLER_EXTEND_REFLECT: 1456fe8aea9eSmrg sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_MIRROR; 1457fe8aea9eSmrg sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_MIRROR; 1458fe8aea9eSmrg sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_MIRROR; 1459fe8aea9eSmrg break; 1460fe8aea9eSmrg } 1461fe8aea9eSmrg} 1462fe8aea9eSmrg 1463fe8aea9eSmrgstatic void 1464fe8aea9eSmrgsampler_copy_init(struct gen9_sampler_state *ss) 1465fe8aea9eSmrg{ 1466fe8aea9eSmrg sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1467fe8aea9eSmrg ss->ss3.non_normalized_coord = 1; 1468fe8aea9eSmrg 1469fe8aea9eSmrg sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1470fe8aea9eSmrg} 1471fe8aea9eSmrg 1472fe8aea9eSmrgstatic void 1473fe8aea9eSmrgsampler_fill_init(struct gen9_sampler_state *ss) 1474fe8aea9eSmrg{ 1475fe8aea9eSmrg sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); 1476fe8aea9eSmrg ss->ss3.non_normalized_coord = 1; 1477fe8aea9eSmrg 1478fe8aea9eSmrg sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); 1479fe8aea9eSmrg} 1480fe8aea9eSmrg 1481fe8aea9eSmrgstatic uint32_t 1482fe8aea9eSmrggen9_tiling_bits(uint32_t tiling) 1483fe8aea9eSmrg{ 1484fe8aea9eSmrg switch (tiling) { 1485fe8aea9eSmrg default: assert(0); 1486fe8aea9eSmrg case I915_TILING_NONE: return 0; 1487fe8aea9eSmrg case I915_TILING_X: return SURFACE_TILED; 1488fe8aea9eSmrg case I915_TILING_Y: return SURFACE_TILED | SURFACE_TILED_Y; 1489fe8aea9eSmrg } 1490fe8aea9eSmrg} 1491fe8aea9eSmrg 1492fe8aea9eSmrg#define MOCS_PTE (1 << 1) 1493fe8aea9eSmrg#define MOCS_WB (2 << 1) 1494fe8aea9eSmrg 1495fe8aea9eSmrg/** 1496fe8aea9eSmrg * Sets up the common fields for a surface state buffer for the given 1497fe8aea9eSmrg * picture in the given surface state buffer. 1498fe8aea9eSmrg */ 1499fe8aea9eSmrgstatic uint32_t 1500fe8aea9eSmrggen9_bind_bo(struct sna *sna, 1501fe8aea9eSmrg struct kgem_bo *bo, 1502fe8aea9eSmrg uint32_t width, 1503fe8aea9eSmrg uint32_t height, 1504fe8aea9eSmrg uint32_t format, 1505fe8aea9eSmrg bool is_dst) 1506fe8aea9eSmrg{ 1507fe8aea9eSmrg uint32_t *ss; 1508fe8aea9eSmrg uint32_t domains; 1509fe8aea9eSmrg int offset; 1510fe8aea9eSmrg uint32_t is_scanout = is_dst && bo->scanout; 1511fe8aea9eSmrg 1512fe8aea9eSmrg /* After the first bind, we manage the cache domains within the batch */ 1513fe8aea9eSmrg offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); 1514fe8aea9eSmrg if (offset) { 1515fe8aea9eSmrg if (is_dst) 1516fe8aea9eSmrg kgem_bo_mark_dirty(bo); 1517fe8aea9eSmrg assert(offset >= sna->kgem.surface); 1518fe8aea9eSmrg return offset * sizeof(uint32_t); 1519fe8aea9eSmrg } 1520fe8aea9eSmrg 1521fe8aea9eSmrg offset = sna->kgem.surface -= SURFACE_DW; 1522fe8aea9eSmrg ss = sna->kgem.batch + offset; 1523fe8aea9eSmrg ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | 1524fe8aea9eSmrg gen9_tiling_bits(bo->tiling) | 1525fe8aea9eSmrg format << SURFACE_FORMAT_SHIFT | 1526fe8aea9eSmrg SURFACE_VALIGN_4 | SURFACE_HALIGN_4); 1527fe8aea9eSmrg if (is_dst) { 1528fe8aea9eSmrg ss[0] |= SURFACE_RC_READ_WRITE; 1529fe8aea9eSmrg domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; 1530fe8aea9eSmrg } else 1531fe8aea9eSmrg domains = I915_GEM_DOMAIN_SAMPLER << 16; 1532fe8aea9eSmrg ss[1] = (is_scanout || (is_dst && is_uncached(sna, bo))) ? MOCS_PTE << 24 : MOCS_WB << 24; 1533fe8aea9eSmrg ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | 1534fe8aea9eSmrg (height - 1) << SURFACE_HEIGHT_SHIFT); 1535fe8aea9eSmrg ss[3] = (bo->pitch - 1) << SURFACE_PITCH_SHIFT; 1536fe8aea9eSmrg ss[4] = 0; 1537fe8aea9eSmrg ss[5] = 0; 1538fe8aea9eSmrg ss[6] = 0; 1539fe8aea9eSmrg ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 1540fe8aea9eSmrg *(uint64_t *)(ss+8) = kgem_add_reloc64(&sna->kgem, offset + 8, bo, domains, 0); 1541fe8aea9eSmrg ss[10] = 0; 1542fe8aea9eSmrg ss[11] = 0; 1543fe8aea9eSmrg ss[12] = 0; 1544fe8aea9eSmrg ss[13] = 0; 1545fe8aea9eSmrg ss[14] = 0; 1546fe8aea9eSmrg ss[15] = 0; 1547fe8aea9eSmrg 1548fe8aea9eSmrg kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); 1549fe8aea9eSmrg 1550fe8aea9eSmrg DBG(("[%x] bind bo(handle=%d, addr=%lx), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", 1551fe8aea9eSmrg offset, bo->handle, *(uint64_t *)(ss+8), 1552fe8aea9eSmrg format, width, height, bo->pitch, bo->tiling, 1553fe8aea9eSmrg domains & 0xffff ? "render" : "sampler")); 1554fe8aea9eSmrg 1555fe8aea9eSmrg return offset * sizeof(uint32_t); 1556fe8aea9eSmrg} 1557fe8aea9eSmrg 1558fe8aea9eSmrgstatic void gen9_emit_vertex_buffer(struct sna *sna, 1559fe8aea9eSmrg const struct sna_composite_op *op) 1560fe8aea9eSmrg{ 1561fe8aea9eSmrg int id = GEN9_VERTEX(op->u.gen9.flags); 1562fe8aea9eSmrg 1563fe8aea9eSmrg OUT_BATCH(GEN9_3DSTATE_VERTEX_BUFFERS | (5 - 2)); 1564fe8aea9eSmrg OUT_BATCH(id << VB_INDEX_SHIFT | VB_MODIFY_ENABLE | 1565fe8aea9eSmrg 4*op->floats_per_vertex); 1566fe8aea9eSmrg sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; 1567fe8aea9eSmrg OUT_BATCH64(0); 1568fe8aea9eSmrg OUT_BATCH(~0); /* buffer size: disabled */ 1569fe8aea9eSmrg 1570fe8aea9eSmrg sna->render.vb_id |= 1 << id; 1571fe8aea9eSmrg} 1572fe8aea9eSmrg 1573fe8aea9eSmrgstatic void gen9_emit_primitive(struct sna *sna) 1574fe8aea9eSmrg{ 1575fe8aea9eSmrg if (sna->kgem.nbatch == sna->render_state.gen9.last_primitive) { 1576fe8aea9eSmrg sna->render.vertex_offset = sna->kgem.nbatch - 5; 1577fe8aea9eSmrg return; 1578fe8aea9eSmrg } 1579fe8aea9eSmrg 1580fe8aea9eSmrg OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2)); 1581fe8aea9eSmrg OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */ 1582fe8aea9eSmrg sna->render.vertex_offset = sna->kgem.nbatch; 1583fe8aea9eSmrg OUT_BATCH(0); /* vertex count, to be filled in later */ 1584fe8aea9eSmrg OUT_BATCH(sna->render.vertex_index); 1585fe8aea9eSmrg OUT_BATCH(1); /* single instance */ 1586fe8aea9eSmrg OUT_BATCH(0); /* start instance location */ 1587fe8aea9eSmrg OUT_BATCH(0); /* index buffer offset, ignored */ 1588fe8aea9eSmrg sna->render.vertex_start = sna->render.vertex_index; 1589fe8aea9eSmrg 1590fe8aea9eSmrg sna->render_state.gen9.last_primitive = sna->kgem.nbatch; 1591fe8aea9eSmrg sna->render_state.gen9.ve_dirty = false; 1592fe8aea9eSmrg} 1593fe8aea9eSmrg 1594fe8aea9eSmrgstatic bool gen9_rectangle_begin(struct sna *sna, 1595fe8aea9eSmrg const struct sna_composite_op *op) 1596fe8aea9eSmrg{ 1597fe8aea9eSmrg int id = 1 << GEN9_VERTEX(op->u.gen9.flags); 1598fe8aea9eSmrg int ndwords; 1599fe8aea9eSmrg 1600fe8aea9eSmrg if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) 1601fe8aea9eSmrg return true; 1602fe8aea9eSmrg 1603fe8aea9eSmrg ndwords = op->need_magic_ca_pass ? 60 : 6; 1604fe8aea9eSmrg if ((sna->render.vb_id & id) == 0) 1605fe8aea9eSmrg ndwords += 5; 1606fe8aea9eSmrg if (!kgem_check_batch(&sna->kgem, ndwords)) 1607fe8aea9eSmrg return false; 1608fe8aea9eSmrg 1609fe8aea9eSmrg if ((sna->render.vb_id & id) == 0) 1610fe8aea9eSmrg gen9_emit_vertex_buffer(sna, op); 1611fe8aea9eSmrg 1612fe8aea9eSmrg gen9_emit_primitive(sna); 1613fe8aea9eSmrg return true; 1614fe8aea9eSmrg} 1615fe8aea9eSmrg 1616fe8aea9eSmrgstatic int gen9_get_rectangles__flush(struct sna *sna, 1617fe8aea9eSmrg const struct sna_composite_op *op) 1618fe8aea9eSmrg{ 1619fe8aea9eSmrg /* Preventing discarding new vbo after lock contention */ 1620fe8aea9eSmrg if (sna_vertex_wait__locked(&sna->render)) { 1621fe8aea9eSmrg int rem = vertex_space(sna); 1622fe8aea9eSmrg if (rem > op->floats_per_rect) 1623fe8aea9eSmrg return rem; 1624fe8aea9eSmrg } 1625fe8aea9eSmrg 1626fe8aea9eSmrg if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6)) 1627fe8aea9eSmrg return 0; 1628fe8aea9eSmrg if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) 1629fe8aea9eSmrg return 0; 1630fe8aea9eSmrg 1631fe8aea9eSmrg if (sna->render.vertex_offset) { 1632fe8aea9eSmrg gen8_vertex_flush(sna); 1633fe8aea9eSmrg if (gen9_magic_ca_pass(sna, op)) { 1634fe8aea9eSmrg gen9_emit_pipe_invalidate(sna); 1635fe8aea9eSmrg gen9_emit_cc(sna, GEN9_BLEND(op->u.gen9.flags)); 1636fe8aea9eSmrg gen9_emit_wm(sna, op->u.gen9.wm_kernel); 1637fe8aea9eSmrg } 1638fe8aea9eSmrg } 1639fe8aea9eSmrg 1640fe8aea9eSmrg return gen8_vertex_finish(sna); 1641fe8aea9eSmrg} 1642fe8aea9eSmrg 1643fe8aea9eSmrginline static int gen9_get_rectangles(struct sna *sna, 1644fe8aea9eSmrg const struct sna_composite_op *op, 1645fe8aea9eSmrg int want, 1646fe8aea9eSmrg void (*emit_state)(struct sna *sna, const struct sna_composite_op *op)) 1647fe8aea9eSmrg{ 1648fe8aea9eSmrg int rem; 1649fe8aea9eSmrg 1650fe8aea9eSmrg assert(want); 1651fe8aea9eSmrg 1652fe8aea9eSmrgstart: 1653fe8aea9eSmrg rem = vertex_space(sna); 1654fe8aea9eSmrg if (unlikely(rem < op->floats_per_rect)) { 1655fe8aea9eSmrg DBG(("flushing vbo for %s: %d < %d\n", 1656fe8aea9eSmrg __FUNCTION__, rem, op->floats_per_rect)); 1657fe8aea9eSmrg rem = gen9_get_rectangles__flush(sna, op); 1658fe8aea9eSmrg if (unlikely(rem == 0)) 1659fe8aea9eSmrg goto flush; 1660fe8aea9eSmrg } 1661fe8aea9eSmrg 1662fe8aea9eSmrg if (unlikely(sna->render.vertex_offset == 0)) { 1663fe8aea9eSmrg if (!gen9_rectangle_begin(sna, op)) 1664fe8aea9eSmrg goto flush; 1665fe8aea9eSmrg else 1666fe8aea9eSmrg goto start; 1667fe8aea9eSmrg } 1668fe8aea9eSmrg 1669fe8aea9eSmrg assert(rem <= vertex_space(sna)); 1670fe8aea9eSmrg assert(op->floats_per_rect <= rem); 1671fe8aea9eSmrg if (want > 1 && want * op->floats_per_rect > rem) 1672fe8aea9eSmrg want = rem / op->floats_per_rect; 1673fe8aea9eSmrg 1674fe8aea9eSmrg assert(want > 0); 1675fe8aea9eSmrg sna->render.vertex_index += 3*want; 1676fe8aea9eSmrg return want; 1677fe8aea9eSmrg 1678fe8aea9eSmrgflush: 1679fe8aea9eSmrg if (sna->render.vertex_offset) { 1680fe8aea9eSmrg gen8_vertex_flush(sna); 1681fe8aea9eSmrg gen9_magic_ca_pass(sna, op); 1682fe8aea9eSmrg } 1683fe8aea9eSmrg sna_vertex_wait__locked(&sna->render); 1684fe8aea9eSmrg _kgem_submit(&sna->kgem); 1685fe8aea9eSmrg emit_state(sna, op); 1686fe8aea9eSmrg goto start; 1687fe8aea9eSmrg} 1688fe8aea9eSmrg 1689fe8aea9eSmrginline static uint32_t *gen9_composite_get_binding_table(struct sna *sna, 1690fe8aea9eSmrg uint16_t *offset) 1691fe8aea9eSmrg{ 1692fe8aea9eSmrg uint32_t *table; 1693fe8aea9eSmrg 1694fe8aea9eSmrg assert(sna->kgem.surface <= 16384); 1695fe8aea9eSmrg sna->kgem.surface -= SURFACE_DW; 1696fe8aea9eSmrg /* Clear all surplus entries to zero in case of prefetch */ 1697fe8aea9eSmrg table = memset(sna->kgem.batch + sna->kgem.surface, 0, 64); 1698fe8aea9eSmrg 1699fe8aea9eSmrg DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); 1700fe8aea9eSmrg 1701fe8aea9eSmrg *offset = sna->kgem.surface; 1702fe8aea9eSmrg return table; 1703fe8aea9eSmrg} 1704fe8aea9eSmrg 1705fe8aea9eSmrgstatic void 1706fe8aea9eSmrggen9_get_batch(struct sna *sna, const struct sna_composite_op *op) 1707fe8aea9eSmrg{ 1708fe8aea9eSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); 1709fe8aea9eSmrg 1710fe8aea9eSmrg if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 2*(1+3))) { 1711fe8aea9eSmrg DBG(("%s: flushing batch: %d < %d+%d\n", 1712fe8aea9eSmrg __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 1713fe8aea9eSmrg 150, 4*8*2)); 1714fe8aea9eSmrg _kgem_submit(&sna->kgem); 1715fe8aea9eSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 1716fe8aea9eSmrg } 1717fe8aea9eSmrg 1718fe8aea9eSmrg assert(sna->kgem.mode == KGEM_RENDER); 1719fe8aea9eSmrg assert(sna->kgem.ring == KGEM_RENDER); 1720fe8aea9eSmrg 1721fe8aea9eSmrg if (sna->render_state.gen9.needs_invariant) 1722fe8aea9eSmrg gen9_emit_invariant(sna); 1723fe8aea9eSmrg} 1724fe8aea9eSmrg 1725fe8aea9eSmrgstatic void gen9_emit_composite_state(struct sna *sna, 1726fe8aea9eSmrg const struct sna_composite_op *op) 1727fe8aea9eSmrg{ 1728fe8aea9eSmrg uint32_t *binding_table; 1729fe8aea9eSmrg uint16_t offset, dirty; 1730fe8aea9eSmrg 1731fe8aea9eSmrg gen9_get_batch(sna, op); 1732fe8aea9eSmrg 1733fe8aea9eSmrg binding_table = gen9_composite_get_binding_table(sna, &offset); 1734fe8aea9eSmrg 1735fe8aea9eSmrg dirty = kgem_bo_is_dirty(op->dst.bo); 1736fe8aea9eSmrg 1737fe8aea9eSmrg binding_table[0] = 1738fe8aea9eSmrg gen9_bind_bo(sna, 1739fe8aea9eSmrg op->dst.bo, op->dst.width, op->dst.height, 1740fe8aea9eSmrg gen9_get_dest_format(op->dst.format), 1741fe8aea9eSmrg true); 1742fe8aea9eSmrg binding_table[1] = 1743fe8aea9eSmrg gen9_bind_bo(sna, 1744fe8aea9eSmrg op->src.bo, op->src.width, op->src.height, 1745fe8aea9eSmrg op->src.card_format, 1746fe8aea9eSmrg false); 1747fe8aea9eSmrg if (op->mask.bo) { 1748fe8aea9eSmrg binding_table[2] = 1749fe8aea9eSmrg gen9_bind_bo(sna, 1750fe8aea9eSmrg op->mask.bo, 1751fe8aea9eSmrg op->mask.width, 1752fe8aea9eSmrg op->mask.height, 1753fe8aea9eSmrg op->mask.card_format, 1754fe8aea9eSmrg false); 1755fe8aea9eSmrg } 1756fe8aea9eSmrg 1757fe8aea9eSmrg if (sna->kgem.surface == offset && 1758fe8aea9eSmrg *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table && 1759fe8aea9eSmrg (op->mask.bo == NULL || 1760fe8aea9eSmrg sna->kgem.batch[sna->render_state.gen9.surface_table+2] == binding_table[2])) { 1761fe8aea9eSmrg sna->kgem.surface += SURFACE_DW; 1762fe8aea9eSmrg offset = sna->render_state.gen9.surface_table; 1763fe8aea9eSmrg } 1764fe8aea9eSmrg 1765fe8aea9eSmrg if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0]) 1766fe8aea9eSmrg dirty = 0; 1767fe8aea9eSmrg 1768fe8aea9eSmrg gen9_emit_state(sna, op, offset | dirty); 1769fe8aea9eSmrg} 1770fe8aea9eSmrg 1771fe8aea9eSmrgstatic void 1772fe8aea9eSmrggen9_align_vertex(struct sna *sna, const struct sna_composite_op *op) 1773fe8aea9eSmrg{ 1774fe8aea9eSmrg if (op->floats_per_vertex != sna->render_state.gen9.floats_per_vertex) { 1775fe8aea9eSmrg DBG(("aligning vertex: was %d, now %d floats per vertex\n", 1776fe8aea9eSmrg sna->render_state.gen9.floats_per_vertex, op->floats_per_vertex)); 1777fe8aea9eSmrg gen8_vertex_align(sna, op); 1778fe8aea9eSmrg sna->render_state.gen9.floats_per_vertex = op->floats_per_vertex; 1779fe8aea9eSmrg } 1780fe8aea9eSmrg} 1781fe8aea9eSmrg 1782fe8aea9eSmrgfastcall static void 1783fe8aea9eSmrggen9_render_composite_blt(struct sna *sna, 1784fe8aea9eSmrg const struct sna_composite_op *op, 1785fe8aea9eSmrg const struct sna_composite_rectangles *r) 1786fe8aea9eSmrg{ 1787fe8aea9eSmrg gen9_get_rectangles(sna, op, 1, gen9_emit_composite_state); 1788fe8aea9eSmrg op->prim_emit(sna, op, r); 1789fe8aea9eSmrg} 1790fe8aea9eSmrg 1791fe8aea9eSmrgfastcall static void 1792fe8aea9eSmrggen9_render_composite_box(struct sna *sna, 1793fe8aea9eSmrg const struct sna_composite_op *op, 1794fe8aea9eSmrg const BoxRec *box) 1795fe8aea9eSmrg{ 1796fe8aea9eSmrg struct sna_composite_rectangles r; 1797fe8aea9eSmrg 1798fe8aea9eSmrg gen9_get_rectangles(sna, op, 1, gen9_emit_composite_state); 1799fe8aea9eSmrg 1800fe8aea9eSmrg DBG((" %s: (%d, %d), (%d, %d)\n", 1801fe8aea9eSmrg __FUNCTION__, 1802fe8aea9eSmrg box->x1, box->y1, box->x2, box->y2)); 1803fe8aea9eSmrg 1804fe8aea9eSmrg r.dst.x = box->x1; 1805fe8aea9eSmrg r.dst.y = box->y1; 1806fe8aea9eSmrg r.width = box->x2 - box->x1; 1807fe8aea9eSmrg r.height = box->y2 - box->y1; 1808fe8aea9eSmrg r.src = r.mask = r.dst; 1809fe8aea9eSmrg 1810fe8aea9eSmrg op->prim_emit(sna, op, &r); 1811fe8aea9eSmrg} 1812fe8aea9eSmrg 1813fe8aea9eSmrgstatic void 1814fe8aea9eSmrggen9_render_composite_boxes__blt(struct sna *sna, 1815fe8aea9eSmrg const struct sna_composite_op *op, 1816fe8aea9eSmrg const BoxRec *box, int nbox) 1817fe8aea9eSmrg{ 1818fe8aea9eSmrg DBG(("composite_boxes(%d)\n", nbox)); 1819fe8aea9eSmrg 1820fe8aea9eSmrg do { 1821fe8aea9eSmrg int nbox_this_time; 1822fe8aea9eSmrg 1823fe8aea9eSmrg nbox_this_time = gen9_get_rectangles(sna, op, nbox, 1824fe8aea9eSmrg gen9_emit_composite_state); 1825fe8aea9eSmrg nbox -= nbox_this_time; 1826fe8aea9eSmrg 1827fe8aea9eSmrg do { 1828fe8aea9eSmrg struct sna_composite_rectangles r; 1829fe8aea9eSmrg 1830fe8aea9eSmrg DBG((" %s: (%d, %d), (%d, %d)\n", 1831fe8aea9eSmrg __FUNCTION__, 1832fe8aea9eSmrg box->x1, box->y1, box->x2, box->y2)); 1833fe8aea9eSmrg 1834fe8aea9eSmrg r.dst.x = box->x1; 1835fe8aea9eSmrg r.dst.y = box->y1; 1836fe8aea9eSmrg r.width = box->x2 - box->x1; 1837fe8aea9eSmrg r.height = box->y2 - box->y1; 1838fe8aea9eSmrg r.src = r.mask = r.dst; 1839fe8aea9eSmrg 1840fe8aea9eSmrg op->prim_emit(sna, op, &r); 1841fe8aea9eSmrg box++; 1842fe8aea9eSmrg } while (--nbox_this_time); 1843fe8aea9eSmrg } while (nbox); 1844fe8aea9eSmrg} 1845fe8aea9eSmrg 1846fe8aea9eSmrgstatic void 1847fe8aea9eSmrggen9_render_composite_boxes(struct sna *sna, 1848fe8aea9eSmrg const struct sna_composite_op *op, 1849fe8aea9eSmrg const BoxRec *box, int nbox) 1850fe8aea9eSmrg{ 1851fe8aea9eSmrg DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1852fe8aea9eSmrg 1853fe8aea9eSmrg do { 1854fe8aea9eSmrg int nbox_this_time; 1855fe8aea9eSmrg float *v; 1856fe8aea9eSmrg 1857fe8aea9eSmrg nbox_this_time = gen9_get_rectangles(sna, op, nbox, 1858fe8aea9eSmrg gen9_emit_composite_state); 1859fe8aea9eSmrg assert(nbox_this_time); 1860fe8aea9eSmrg nbox -= nbox_this_time; 1861fe8aea9eSmrg 1862fe8aea9eSmrg v = sna->render.vertices + sna->render.vertex_used; 1863fe8aea9eSmrg sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1864fe8aea9eSmrg 1865fe8aea9eSmrg op->emit_boxes(op, box, nbox_this_time, v); 1866fe8aea9eSmrg box += nbox_this_time; 1867fe8aea9eSmrg } while (nbox); 1868fe8aea9eSmrg} 1869fe8aea9eSmrg 1870fe8aea9eSmrgstatic void 1871fe8aea9eSmrggen9_render_composite_boxes__thread(struct sna *sna, 1872fe8aea9eSmrg const struct sna_composite_op *op, 1873fe8aea9eSmrg const BoxRec *box, int nbox) 1874fe8aea9eSmrg{ 1875fe8aea9eSmrg DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); 1876fe8aea9eSmrg 1877fe8aea9eSmrg sna_vertex_lock(&sna->render); 1878fe8aea9eSmrg do { 1879fe8aea9eSmrg int nbox_this_time; 1880fe8aea9eSmrg float *v; 1881fe8aea9eSmrg 1882fe8aea9eSmrg nbox_this_time = gen9_get_rectangles(sna, op, nbox, 1883fe8aea9eSmrg gen9_emit_composite_state); 1884fe8aea9eSmrg assert(nbox_this_time); 1885fe8aea9eSmrg nbox -= nbox_this_time; 1886fe8aea9eSmrg 1887fe8aea9eSmrg v = sna->render.vertices + sna->render.vertex_used; 1888fe8aea9eSmrg sna->render.vertex_used += nbox_this_time * op->floats_per_rect; 1889fe8aea9eSmrg 1890fe8aea9eSmrg sna_vertex_acquire__locked(&sna->render); 1891fe8aea9eSmrg sna_vertex_unlock(&sna->render); 1892fe8aea9eSmrg 1893fe8aea9eSmrg op->emit_boxes(op, box, nbox_this_time, v); 1894fe8aea9eSmrg box += nbox_this_time; 1895fe8aea9eSmrg 1896fe8aea9eSmrg sna_vertex_lock(&sna->render); 1897fe8aea9eSmrg sna_vertex_release__locked(&sna->render); 1898fe8aea9eSmrg } while (nbox); 1899fe8aea9eSmrg sna_vertex_unlock(&sna->render); 1900fe8aea9eSmrg} 1901fe8aea9eSmrg 1902fe8aea9eSmrgstatic uint32_t 1903fe8aea9eSmrggen9_create_blend_state(struct sna_static_stream *stream) 1904fe8aea9eSmrg{ 1905fe8aea9eSmrg char *base, *ptr; 1906fe8aea9eSmrg int src, dst; 1907fe8aea9eSmrg 1908fe8aea9eSmrg COMPILE_TIME_ASSERT(((GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT << 4) & (1 << 15)) == 0); 1909fe8aea9eSmrg 1910fe8aea9eSmrg base = sna_static_stream_map(stream, 1911fe8aea9eSmrg GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT * GEN9_BLEND_STATE_PADDED_SIZE, 1912fe8aea9eSmrg 64); 1913fe8aea9eSmrg 1914fe8aea9eSmrg ptr = base; 1915fe8aea9eSmrg for (src = 0; src < GEN9_BLENDFACTOR_COUNT; src++) { 1916fe8aea9eSmrg for (dst = 0; dst < GEN9_BLENDFACTOR_COUNT; dst++) { 1917fe8aea9eSmrg struct gen9_blend_state *blend = 1918fe8aea9eSmrg (struct gen9_blend_state *)ptr; 1919fe8aea9eSmrg 1920fe8aea9eSmrg assert(((ptr - base) & 63) == 0); 1921fe8aea9eSmrg COMPILE_TIME_ASSERT(sizeof(blend->common) == 4); 1922fe8aea9eSmrg COMPILE_TIME_ASSERT(sizeof(blend->rt) == 8); 1923fe8aea9eSmrg COMPILE_TIME_ASSERT((char *)&blend->rt - (char *)blend == 4); 1924fe8aea9eSmrg 1925fe8aea9eSmrg blend->rt.post_blend_clamp = 1; 1926fe8aea9eSmrg blend->rt.pre_blend_clamp = 1; 1927fe8aea9eSmrg 1928fe8aea9eSmrg blend->rt.color_blend = 1929fe8aea9eSmrg !(dst == BLENDFACTOR_ZERO && src == BLENDFACTOR_ONE); 1930fe8aea9eSmrg blend->rt.dest_blend_factor = dst; 1931fe8aea9eSmrg blend->rt.source_blend_factor = src; 1932fe8aea9eSmrg blend->rt.color_blend_function = BLENDFUNCTION_ADD; 1933fe8aea9eSmrg 1934fe8aea9eSmrg blend->rt.dest_alpha_blend_factor = dst; 1935fe8aea9eSmrg blend->rt.source_alpha_blend_factor = src; 1936fe8aea9eSmrg blend->rt.alpha_blend_function = BLENDFUNCTION_ADD; 1937fe8aea9eSmrg 1938fe8aea9eSmrg ptr += GEN9_BLEND_STATE_PADDED_SIZE; 1939fe8aea9eSmrg } 1940fe8aea9eSmrg } 1941fe8aea9eSmrg 1942fe8aea9eSmrg return sna_static_stream_offsetof(stream, base); 1943fe8aea9eSmrg} 1944fe8aea9eSmrg 1945fe8aea9eSmrgstatic int 1946fe8aea9eSmrggen9_composite_picture(struct sna *sna, 1947fe8aea9eSmrg PicturePtr picture, 1948fe8aea9eSmrg struct sna_composite_channel *channel, 1949fe8aea9eSmrg int x, int y, 1950fe8aea9eSmrg int w, int h, 1951fe8aea9eSmrg int dst_x, int dst_y, 1952fe8aea9eSmrg bool precise) 1953fe8aea9eSmrg{ 1954fe8aea9eSmrg PixmapPtr pixmap; 1955fe8aea9eSmrg uint32_t color; 1956fe8aea9eSmrg int16_t dx, dy; 1957fe8aea9eSmrg 1958fe8aea9eSmrg DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", 1959fe8aea9eSmrg __FUNCTION__, x, y, w, h, dst_x, dst_y)); 1960fe8aea9eSmrg 1961fe8aea9eSmrg channel->is_solid = false; 1962fe8aea9eSmrg channel->card_format = -1; 1963fe8aea9eSmrg 1964fe8aea9eSmrg if (sna_picture_is_solid(picture, &color)) 1965fe8aea9eSmrg return gen4_channel_init_solid(sna, channel, color); 1966fe8aea9eSmrg 1967fe8aea9eSmrg if (picture->pDrawable == NULL) { 1968fe8aea9eSmrg int ret; 1969fe8aea9eSmrg 1970fe8aea9eSmrg if (picture->pSourcePict->type == SourcePictTypeLinear) 1971fe8aea9eSmrg return gen4_channel_init_linear(sna, picture, channel, 1972fe8aea9eSmrg x, y, 1973fe8aea9eSmrg w, h, 1974fe8aea9eSmrg dst_x, dst_y); 1975fe8aea9eSmrg 1976fe8aea9eSmrg DBG(("%s -- fixup, gradient\n", __FUNCTION__)); 1977fe8aea9eSmrg ret = -1; 1978fe8aea9eSmrg if (!precise) 1979fe8aea9eSmrg ret = sna_render_picture_approximate_gradient(sna, picture, channel, 1980fe8aea9eSmrg x, y, w, h, dst_x, dst_y); 1981fe8aea9eSmrg if (ret == -1) 1982fe8aea9eSmrg ret = sna_render_picture_fixup(sna, picture, channel, 1983fe8aea9eSmrg x, y, w, h, dst_x, dst_y); 1984fe8aea9eSmrg return ret; 1985fe8aea9eSmrg } 1986fe8aea9eSmrg 1987fe8aea9eSmrg if (picture->alphaMap) { 1988fe8aea9eSmrg DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); 1989fe8aea9eSmrg return sna_render_picture_fixup(sna, picture, channel, 1990fe8aea9eSmrg x, y, w, h, dst_x, dst_y); 1991fe8aea9eSmrg } 1992fe8aea9eSmrg 1993fe8aea9eSmrg if (!gen9_check_repeat(picture)) 1994fe8aea9eSmrg return sna_render_picture_fixup(sna, picture, channel, 1995fe8aea9eSmrg x, y, w, h, dst_x, dst_y); 1996fe8aea9eSmrg 1997fe8aea9eSmrg if (!gen9_check_filter(picture)) 1998fe8aea9eSmrg return sna_render_picture_fixup(sna, picture, channel, 1999fe8aea9eSmrg x, y, w, h, dst_x, dst_y); 2000fe8aea9eSmrg 2001fe8aea9eSmrg channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; 2002fe8aea9eSmrg channel->filter = picture->filter; 2003fe8aea9eSmrg 2004fe8aea9eSmrg pixmap = get_drawable_pixmap(picture->pDrawable); 2005fe8aea9eSmrg get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); 2006fe8aea9eSmrg 2007fe8aea9eSmrg x += dx + picture->pDrawable->x; 2008fe8aea9eSmrg y += dy + picture->pDrawable->y; 2009fe8aea9eSmrg 2010fe8aea9eSmrg channel->is_affine = sna_transform_is_affine(picture->transform); 2011fe8aea9eSmrg if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) { 2012fe8aea9eSmrg DBG(("%s: integer translation (%d, %d), removing\n", 2013fe8aea9eSmrg __FUNCTION__, dx, dy)); 2014fe8aea9eSmrg x += dx; 2015fe8aea9eSmrg y += dy; 2016fe8aea9eSmrg channel->transform = NULL; 2017fe8aea9eSmrg channel->filter = PictFilterNearest; 2018fe8aea9eSmrg 2019fe8aea9eSmrg if (channel->repeat || 2020fe8aea9eSmrg (x >= 0 && 2021fe8aea9eSmrg y >= 0 && 2022fe8aea9eSmrg x + w <= pixmap->drawable.width && 2023fe8aea9eSmrg y + h <= pixmap->drawable.height)) { 2024fe8aea9eSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 2025fe8aea9eSmrg if (priv && priv->clear) { 2026fe8aea9eSmrg DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color)); 2027fe8aea9eSmrg return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color)); 2028fe8aea9eSmrg } 2029fe8aea9eSmrg } 2030fe8aea9eSmrg } else 2031fe8aea9eSmrg channel->transform = picture->transform; 2032fe8aea9eSmrg 2033fe8aea9eSmrg channel->pict_format = picture->format; 2034fe8aea9eSmrg channel->card_format = gen9_get_card_format(picture->format); 2035fe8aea9eSmrg if (channel->card_format == (unsigned)-1) 2036fe8aea9eSmrg return sna_render_picture_convert(sna, picture, channel, pixmap, 2037fe8aea9eSmrg x, y, w, h, dst_x, dst_y, 2038fe8aea9eSmrg false); 2039fe8aea9eSmrg 2040fe8aea9eSmrg if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { 2041fe8aea9eSmrg DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, 2042fe8aea9eSmrg pixmap->drawable.width, pixmap->drawable.height)); 2043fe8aea9eSmrg return sna_render_picture_extract(sna, picture, channel, 2044fe8aea9eSmrg x, y, w, h, dst_x, dst_y); 2045fe8aea9eSmrg } 2046fe8aea9eSmrg 2047fe8aea9eSmrg return sna_render_pixmap_bo(sna, channel, pixmap, 2048fe8aea9eSmrg x, y, w, h, dst_x, dst_y); 2049fe8aea9eSmrg} 2050fe8aea9eSmrg 2051fe8aea9eSmrginline static bool gen9_composite_channel_convert(struct sna_composite_channel *channel) 2052fe8aea9eSmrg{ 2053fe8aea9eSmrg if (unaligned(channel->bo, PICT_FORMAT_BPP(channel->pict_format))) 2054fe8aea9eSmrg return false; 2055fe8aea9eSmrg 2056fe8aea9eSmrg channel->repeat = gen9_repeat(channel->repeat); 2057fe8aea9eSmrg channel->filter = gen9_filter(channel->filter); 2058fe8aea9eSmrg if (channel->card_format == (unsigned)-1) 2059fe8aea9eSmrg channel->card_format = gen9_get_card_format(channel->pict_format); 2060fe8aea9eSmrg assert(channel->card_format != (unsigned)-1); 2061fe8aea9eSmrg 2062fe8aea9eSmrg return true; 2063fe8aea9eSmrg} 2064fe8aea9eSmrg 2065fe8aea9eSmrgstatic void gen9_render_composite_done(struct sna *sna, 2066fe8aea9eSmrg const struct sna_composite_op *op) 2067fe8aea9eSmrg{ 2068fe8aea9eSmrg if (sna->render.vertex_offset) { 2069fe8aea9eSmrg gen8_vertex_flush(sna); 2070fe8aea9eSmrg gen9_magic_ca_pass(sna, op); 2071fe8aea9eSmrg } 2072fe8aea9eSmrg 2073fe8aea9eSmrg if (op->mask.bo) 2074fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, op->mask.bo); 2075fe8aea9eSmrg if (op->src.bo) 2076fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, op->src.bo); 2077fe8aea9eSmrg 2078fe8aea9eSmrg sna_render_composite_redirect_done(sna, op); 2079fe8aea9eSmrg} 2080fe8aea9eSmrg 2081fe8aea9eSmrginline static bool 2082fe8aea9eSmrggen9_composite_set_target(struct sna *sna, 2083fe8aea9eSmrg struct sna_composite_op *op, 2084fe8aea9eSmrg PicturePtr dst, 2085fe8aea9eSmrg int x, int y, int w, int h, 2086fe8aea9eSmrg bool partial) 2087fe8aea9eSmrg{ 2088fe8aea9eSmrg BoxRec box; 2089fe8aea9eSmrg unsigned int hint; 2090fe8aea9eSmrg 2091fe8aea9eSmrg DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial)); 2092fe8aea9eSmrg 2093fe8aea9eSmrg op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); 2094fe8aea9eSmrg op->dst.format = dst->format; 2095fe8aea9eSmrg op->dst.width = op->dst.pixmap->drawable.width; 2096fe8aea9eSmrg op->dst.height = op->dst.pixmap->drawable.height; 2097fe8aea9eSmrg 2098fe8aea9eSmrg if (w | h) { 2099fe8aea9eSmrg assert(w && h); 2100fe8aea9eSmrg box.x1 = x; 2101fe8aea9eSmrg box.y1 = y; 2102fe8aea9eSmrg box.x2 = x + w; 2103fe8aea9eSmrg box.y2 = y + h; 2104fe8aea9eSmrg } else 2105fe8aea9eSmrg sna_render_picture_extents(dst, &box); 2106fe8aea9eSmrg 2107fe8aea9eSmrg hint = PREFER_GPU | RENDER_GPU; 2108fe8aea9eSmrg if (!need_tiling(sna, op->dst.width, op->dst.height)) 2109fe8aea9eSmrg hint |= FORCE_GPU; 2110fe8aea9eSmrg if (!partial) { 2111fe8aea9eSmrg hint |= IGNORE_DAMAGE; 2112fe8aea9eSmrg if (w == op->dst.width && h == op->dst.height) 2113fe8aea9eSmrg hint |= REPLACES; 2114fe8aea9eSmrg } 2115fe8aea9eSmrg 2116fe8aea9eSmrg op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage); 2117fe8aea9eSmrg if (op->dst.bo == NULL) 2118fe8aea9eSmrg return false; 2119fe8aea9eSmrg 2120fe8aea9eSmrg assert(!op->damage || !DAMAGE_IS_ALL(*op->damage)); 2121fe8aea9eSmrg 2122fe8aea9eSmrg if (unaligned(op->dst.bo, dst->pDrawable->bitsPerPixel)) 2123fe8aea9eSmrg return false; 2124fe8aea9eSmrg 2125fe8aea9eSmrg if (hint & REPLACES) { 2126fe8aea9eSmrg struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap); 2127fe8aea9eSmrg kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo); 2128fe8aea9eSmrg } 2129fe8aea9eSmrg 2130fe8aea9eSmrg get_drawable_deltas(dst->pDrawable, op->dst.pixmap, 2131fe8aea9eSmrg &op->dst.x, &op->dst.y); 2132fe8aea9eSmrg 2133fe8aea9eSmrg DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", 2134fe8aea9eSmrg __FUNCTION__, 2135fe8aea9eSmrg op->dst.pixmap->drawable.serialNumber, (int)op->dst.format, 2136fe8aea9eSmrg op->dst.width, op->dst.height, 2137fe8aea9eSmrg op->dst.bo->pitch, 2138fe8aea9eSmrg op->dst.x, op->dst.y, 2139fe8aea9eSmrg op->damage ? *op->damage : (void *)-1)); 2140fe8aea9eSmrg 2141fe8aea9eSmrg assert(op->dst.bo->proxy == NULL); 2142fe8aea9eSmrg 2143fe8aea9eSmrg if (too_large(op->dst.width, op->dst.height) && 2144fe8aea9eSmrg !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) 2145fe8aea9eSmrg return false; 2146fe8aea9eSmrg 2147fe8aea9eSmrg return true; 2148fe8aea9eSmrg} 2149fe8aea9eSmrg 2150fe8aea9eSmrgstatic bool 2151fe8aea9eSmrgtry_blt(struct sna *sna, 2152fe8aea9eSmrg uint8_t op, 2153fe8aea9eSmrg PicturePtr src, 2154fe8aea9eSmrg PicturePtr mask, 2155fe8aea9eSmrg PicturePtr dst, 2156fe8aea9eSmrg int16_t src_x, int16_t src_y, 2157fe8aea9eSmrg int16_t msk_x, int16_t msk_y, 2158fe8aea9eSmrg int16_t dst_x, int16_t dst_y, 2159fe8aea9eSmrg int16_t width, int16_t height, 2160fe8aea9eSmrg unsigned flags, 2161fe8aea9eSmrg struct sna_composite_op *tmp) 2162fe8aea9eSmrg{ 2163fe8aea9eSmrg struct kgem_bo *bo; 2164fe8aea9eSmrg 2165fe8aea9eSmrg if (sna->kgem.mode == KGEM_BLT) { 2166fe8aea9eSmrg DBG(("%s: already performing BLT\n", __FUNCTION__)); 2167fe8aea9eSmrg goto execute; 2168fe8aea9eSmrg } 2169fe8aea9eSmrg 2170fe8aea9eSmrg if (too_large(width, height)) { 2171fe8aea9eSmrg DBG(("%s: operation too large for 3D pipe (%d, %d)\n", 2172fe8aea9eSmrg __FUNCTION__, width, height)); 2173fe8aea9eSmrg goto execute; 2174fe8aea9eSmrg } 2175fe8aea9eSmrg 2176fe8aea9eSmrg bo = __sna_drawable_peek_bo(dst->pDrawable); 2177fe8aea9eSmrg if (bo == NULL) 2178fe8aea9eSmrg goto execute; 2179fe8aea9eSmrg 2180fe8aea9eSmrg if (untiled_tlb_miss(bo)) 2181fe8aea9eSmrg goto execute; 2182fe8aea9eSmrg 2183fe8aea9eSmrg if (bo->rq) { 2184fe8aea9eSmrg if (RQ_IS_BLT(bo->rq)) 2185fe8aea9eSmrg goto execute; 2186fe8aea9eSmrg 2187fe8aea9eSmrg return false; 2188fe8aea9eSmrg } 2189fe8aea9eSmrg 2190fe8aea9eSmrg if (bo->tiling == I915_TILING_Y) 2191fe8aea9eSmrg goto upload; 2192fe8aea9eSmrg 2193fe8aea9eSmrg if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0)) 2194fe8aea9eSmrg goto execute; 2195fe8aea9eSmrg 2196fe8aea9eSmrg if (src->pDrawable == dst->pDrawable && 2197fe8aea9eSmrg (sna->render_state.gt < 3 || width*height < 1024) && 2198fe8aea9eSmrg can_switch_to_blt(sna, bo, 0)) 2199fe8aea9eSmrg goto execute; 2200fe8aea9eSmrg 2201fe8aea9eSmrg if (src->pDrawable) { 2202fe8aea9eSmrg struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable); 2203fe8aea9eSmrg if (s == NULL) 2204fe8aea9eSmrg goto upload; 2205fe8aea9eSmrg 2206fe8aea9eSmrg if (prefer_blt_bo(sna, s, bo)) 2207fe8aea9eSmrg goto execute; 2208fe8aea9eSmrg } 2209fe8aea9eSmrg 2210fe8aea9eSmrg if (sna->kgem.ring == KGEM_BLT) { 2211fe8aea9eSmrg DBG(("%s: already performing BLT\n", __FUNCTION__)); 2212fe8aea9eSmrg goto execute; 2213fe8aea9eSmrg } 2214fe8aea9eSmrg 2215fe8aea9eSmrgupload: 2216fe8aea9eSmrg flags |= COMPOSITE_UPLOAD; 2217fe8aea9eSmrgexecute: 2218fe8aea9eSmrg return sna_blt_composite(sna, op, 2219fe8aea9eSmrg src, dst, 2220fe8aea9eSmrg src_x, src_y, 2221fe8aea9eSmrg dst_x, dst_y, 2222fe8aea9eSmrg width, height, 2223fe8aea9eSmrg flags, tmp); 2224fe8aea9eSmrg} 2225fe8aea9eSmrg 2226fe8aea9eSmrgstatic bool 2227fe8aea9eSmrgcheck_gradient(PicturePtr picture, bool precise) 2228fe8aea9eSmrg{ 2229fe8aea9eSmrg if (picture->pDrawable) 2230fe8aea9eSmrg return false; 2231fe8aea9eSmrg 2232fe8aea9eSmrg switch (picture->pSourcePict->type) { 2233fe8aea9eSmrg case SourcePictTypeSolidFill: 2234fe8aea9eSmrg case SourcePictTypeLinear: 2235fe8aea9eSmrg return false; 2236fe8aea9eSmrg default: 2237fe8aea9eSmrg return precise; 2238fe8aea9eSmrg } 2239fe8aea9eSmrg} 2240fe8aea9eSmrg 2241fe8aea9eSmrgstatic bool 2242fe8aea9eSmrghas_alphamap(PicturePtr p) 2243fe8aea9eSmrg{ 2244fe8aea9eSmrg return p->alphaMap != NULL; 2245fe8aea9eSmrg} 2246fe8aea9eSmrg 2247fe8aea9eSmrgstatic bool 2248fe8aea9eSmrgneed_upload(PicturePtr p) 2249fe8aea9eSmrg{ 2250fe8aea9eSmrg return p->pDrawable && unattached(p->pDrawable) && untransformed(p); 2251fe8aea9eSmrg} 2252fe8aea9eSmrg 2253fe8aea9eSmrgstatic bool 2254fe8aea9eSmrgsource_is_busy(PixmapPtr pixmap) 2255fe8aea9eSmrg{ 2256fe8aea9eSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 2257fe8aea9eSmrg if (priv == NULL || priv->clear) 2258fe8aea9eSmrg return false; 2259fe8aea9eSmrg 2260fe8aea9eSmrg if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) 2261fe8aea9eSmrg return true; 2262fe8aea9eSmrg 2263fe8aea9eSmrg if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) 2264fe8aea9eSmrg return true; 2265fe8aea9eSmrg 2266fe8aea9eSmrg return priv->gpu_damage && !priv->cpu_damage; 2267fe8aea9eSmrg} 2268fe8aea9eSmrg 2269fe8aea9eSmrgstatic bool 2270fe8aea9eSmrgsource_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) 2271fe8aea9eSmrg{ 2272fe8aea9eSmrg if (sna_picture_is_solid(p, NULL)) 2273fe8aea9eSmrg return false; 2274fe8aea9eSmrg 2275fe8aea9eSmrg if (p->pSourcePict) 2276fe8aea9eSmrg return check_gradient(p, precise); 2277fe8aea9eSmrg 2278fe8aea9eSmrg if (!gen9_check_repeat(p) || !gen9_check_format(p->format)) 2279fe8aea9eSmrg return true; 2280fe8aea9eSmrg 2281fe8aea9eSmrg if (pixmap && source_is_busy(pixmap)) 2282fe8aea9eSmrg return false; 2283fe8aea9eSmrg 2284fe8aea9eSmrg return has_alphamap(p) || !gen9_check_filter(p) || need_upload(p); 2285fe8aea9eSmrg} 2286fe8aea9eSmrg 2287fe8aea9eSmrgstatic bool 2288fe8aea9eSmrggen9_composite_fallback(struct sna *sna, 2289fe8aea9eSmrg PicturePtr src, 2290fe8aea9eSmrg PicturePtr mask, 2291fe8aea9eSmrg PicturePtr dst) 2292fe8aea9eSmrg{ 2293fe8aea9eSmrg PixmapPtr src_pixmap; 2294fe8aea9eSmrg PixmapPtr mask_pixmap; 2295fe8aea9eSmrg PixmapPtr dst_pixmap; 2296fe8aea9eSmrg bool src_fallback, mask_fallback; 2297fe8aea9eSmrg 2298fe8aea9eSmrg if (!gen9_check_dst_format(dst->format)) { 2299fe8aea9eSmrg DBG(("%s: unknown destination format: %d\n", 2300fe8aea9eSmrg __FUNCTION__, dst->format)); 2301fe8aea9eSmrg return true; 2302fe8aea9eSmrg } 2303fe8aea9eSmrg 2304fe8aea9eSmrg dst_pixmap = get_drawable_pixmap(dst->pDrawable); 2305fe8aea9eSmrg 2306fe8aea9eSmrg src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; 2307fe8aea9eSmrg src_fallback = source_fallback(src, src_pixmap, 2308fe8aea9eSmrg dst->polyMode == PolyModePrecise); 2309fe8aea9eSmrg 2310fe8aea9eSmrg if (mask) { 2311fe8aea9eSmrg mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; 2312fe8aea9eSmrg mask_fallback = source_fallback(mask, mask_pixmap, 2313fe8aea9eSmrg dst->polyMode == PolyModePrecise); 2314fe8aea9eSmrg } else { 2315fe8aea9eSmrg mask_pixmap = NULL; 2316fe8aea9eSmrg mask_fallback = false; 2317fe8aea9eSmrg } 2318fe8aea9eSmrg 2319fe8aea9eSmrg /* If we are using the destination as a source and need to 2320fe8aea9eSmrg * readback in order to upload the source, do it all 2321fe8aea9eSmrg * on the cpu. 2322fe8aea9eSmrg */ 2323fe8aea9eSmrg if (src_pixmap == dst_pixmap && src_fallback) { 2324fe8aea9eSmrg DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); 2325fe8aea9eSmrg return true; 2326fe8aea9eSmrg } 2327fe8aea9eSmrg if (mask_pixmap == dst_pixmap && mask_fallback) { 2328fe8aea9eSmrg DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); 2329fe8aea9eSmrg return true; 2330fe8aea9eSmrg } 2331fe8aea9eSmrg 2332fe8aea9eSmrg /* If anything is on the GPU, push everything out to the GPU */ 2333fe8aea9eSmrg if (dst_use_gpu(dst_pixmap)) { 2334fe8aea9eSmrg DBG(("%s: dst is already on the GPU, try to use GPU\n", 2335fe8aea9eSmrg __FUNCTION__)); 2336fe8aea9eSmrg return false; 2337fe8aea9eSmrg } 2338fe8aea9eSmrg 2339fe8aea9eSmrg if (src_pixmap && !src_fallback) { 2340fe8aea9eSmrg DBG(("%s: src is already on the GPU, try to use GPU\n", 2341fe8aea9eSmrg __FUNCTION__)); 2342fe8aea9eSmrg return false; 2343fe8aea9eSmrg } 2344fe8aea9eSmrg if (mask_pixmap && !mask_fallback) { 2345fe8aea9eSmrg DBG(("%s: mask is already on the GPU, try to use GPU\n", 2346fe8aea9eSmrg __FUNCTION__)); 2347fe8aea9eSmrg return false; 2348fe8aea9eSmrg } 2349fe8aea9eSmrg 2350fe8aea9eSmrg /* However if the dst is not on the GPU and we need to 2351fe8aea9eSmrg * render one of the sources using the CPU, we may 2352fe8aea9eSmrg * as well do the entire operation in place onthe CPU. 2353fe8aea9eSmrg */ 2354fe8aea9eSmrg if (src_fallback) { 2355fe8aea9eSmrg DBG(("%s: dst is on the CPU and src will fallback\n", 2356fe8aea9eSmrg __FUNCTION__)); 2357fe8aea9eSmrg return true; 2358fe8aea9eSmrg } 2359fe8aea9eSmrg 2360fe8aea9eSmrg if (mask && mask_fallback) { 2361fe8aea9eSmrg DBG(("%s: dst is on the CPU and mask will fallback\n", 2362fe8aea9eSmrg __FUNCTION__)); 2363fe8aea9eSmrg return true; 2364fe8aea9eSmrg } 2365fe8aea9eSmrg 2366fe8aea9eSmrg if (too_large(dst_pixmap->drawable.width, 2367fe8aea9eSmrg dst_pixmap->drawable.height) && 2368fe8aea9eSmrg dst_is_cpu(dst_pixmap)) { 2369fe8aea9eSmrg DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); 2370fe8aea9eSmrg return true; 2371fe8aea9eSmrg } 2372fe8aea9eSmrg 2373fe8aea9eSmrg DBG(("%s: dst is not on the GPU and the operation should not fallback\n", 2374fe8aea9eSmrg __FUNCTION__)); 2375fe8aea9eSmrg return dst_use_cpu(dst_pixmap); 2376fe8aea9eSmrg} 2377fe8aea9eSmrg 2378fe8aea9eSmrgstatic int 2379fe8aea9eSmrgreuse_source(struct sna *sna, 2380fe8aea9eSmrg PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, 2381fe8aea9eSmrg PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) 2382fe8aea9eSmrg{ 2383fe8aea9eSmrg uint32_t color; 2384fe8aea9eSmrg 2385fe8aea9eSmrg if (src_x != msk_x || src_y != msk_y) 2386fe8aea9eSmrg return false; 2387fe8aea9eSmrg 2388fe8aea9eSmrg if (src == mask) { 2389fe8aea9eSmrg DBG(("%s: mask is source\n", __FUNCTION__)); 2390fe8aea9eSmrg *mc = *sc; 2391fe8aea9eSmrg mc->bo = kgem_bo_reference(mc->bo); 2392fe8aea9eSmrg return true; 2393fe8aea9eSmrg } 2394fe8aea9eSmrg 2395fe8aea9eSmrg if (sna_picture_is_solid(mask, &color)) 2396fe8aea9eSmrg return gen4_channel_init_solid(sna, mc, color); 2397fe8aea9eSmrg 2398fe8aea9eSmrg if (sc->is_solid) 2399fe8aea9eSmrg return false; 2400fe8aea9eSmrg 2401fe8aea9eSmrg if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) 2402fe8aea9eSmrg return false; 2403fe8aea9eSmrg 2404fe8aea9eSmrg DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); 2405fe8aea9eSmrg 2406fe8aea9eSmrg if (!sna_transform_equal(src->transform, mask->transform)) 2407fe8aea9eSmrg return false; 2408fe8aea9eSmrg 2409fe8aea9eSmrg if (!sna_picture_alphamap_equal(src, mask)) 2410fe8aea9eSmrg return false; 2411fe8aea9eSmrg 2412fe8aea9eSmrg if (!gen9_check_repeat(mask)) 2413fe8aea9eSmrg return false; 2414fe8aea9eSmrg 2415fe8aea9eSmrg if (!gen9_check_filter(mask)) 2416fe8aea9eSmrg return false; 2417fe8aea9eSmrg 2418fe8aea9eSmrg if (!gen9_check_format(mask->format)) 2419fe8aea9eSmrg return false; 2420fe8aea9eSmrg 2421fe8aea9eSmrg DBG(("%s: reusing source channel for mask with a twist\n", 2422fe8aea9eSmrg __FUNCTION__)); 2423fe8aea9eSmrg 2424fe8aea9eSmrg *mc = *sc; 2425fe8aea9eSmrg mc->repeat = gen9_repeat(mask->repeat ? mask->repeatType : RepeatNone); 2426fe8aea9eSmrg mc->filter = gen9_filter(mask->filter); 2427fe8aea9eSmrg mc->pict_format = mask->format; 2428fe8aea9eSmrg mc->card_format = gen9_get_card_format(mask->format); 2429fe8aea9eSmrg mc->bo = kgem_bo_reference(mc->bo); 2430fe8aea9eSmrg return true; 2431fe8aea9eSmrg} 2432fe8aea9eSmrg 2433fe8aea9eSmrgstatic bool 2434fe8aea9eSmrggen9_render_composite(struct sna *sna, 2435fe8aea9eSmrg uint8_t op, 2436fe8aea9eSmrg PicturePtr src, 2437fe8aea9eSmrg PicturePtr mask, 2438fe8aea9eSmrg PicturePtr dst, 2439fe8aea9eSmrg int16_t src_x, int16_t src_y, 2440fe8aea9eSmrg int16_t msk_x, int16_t msk_y, 2441fe8aea9eSmrg int16_t dst_x, int16_t dst_y, 2442fe8aea9eSmrg int16_t width, int16_t height, 2443fe8aea9eSmrg unsigned flags, 2444fe8aea9eSmrg struct sna_composite_op *tmp) 2445fe8aea9eSmrg{ 2446fe8aea9eSmrg if (op >= ARRAY_SIZE(gen9_blend_op)) 2447fe8aea9eSmrg return false; 2448fe8aea9eSmrg 2449fe8aea9eSmrg DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__, 2450fe8aea9eSmrg width, height, sna->kgem.mode, sna->kgem.ring)); 2451fe8aea9eSmrg 2452fe8aea9eSmrg if (mask == NULL && 2453fe8aea9eSmrg try_blt(sna, op, 2454fe8aea9eSmrg src, mask, dst, 2455fe8aea9eSmrg src_x, src_y, 2456fe8aea9eSmrg msk_x, msk_y, 2457fe8aea9eSmrg dst_x, dst_y, 2458fe8aea9eSmrg width, height, 2459fe8aea9eSmrg flags, tmp)) 2460fe8aea9eSmrg return true; 2461fe8aea9eSmrg 2462fe8aea9eSmrg if (gen9_composite_fallback(sna, src, mask, dst)) 2463fe8aea9eSmrg goto fallback; 2464fe8aea9eSmrg 2465fe8aea9eSmrg if (need_tiling(sna, width, height)) 2466fe8aea9eSmrg return sna_tiling_composite(op, src, mask, dst, 2467fe8aea9eSmrg src_x, src_y, 2468fe8aea9eSmrg msk_x, msk_y, 2469fe8aea9eSmrg dst_x, dst_y, 2470fe8aea9eSmrg width, height, 2471fe8aea9eSmrg tmp); 2472fe8aea9eSmrg 2473fe8aea9eSmrg if (op == PictOpClear && src == sna->clear) 2474fe8aea9eSmrg op = PictOpSrc; 2475fe8aea9eSmrg tmp->op = op; 2476fe8aea9eSmrg if (!gen9_composite_set_target(sna, tmp, dst, 2477fe8aea9eSmrg dst_x, dst_y, width, height, 2478fe8aea9eSmrg flags & COMPOSITE_PARTIAL || op > PictOpSrc)) 2479fe8aea9eSmrg goto fallback; 2480fe8aea9eSmrg 2481fe8aea9eSmrg switch (gen9_composite_picture(sna, src, &tmp->src, 2482fe8aea9eSmrg src_x, src_y, 2483fe8aea9eSmrg width, height, 2484fe8aea9eSmrg dst_x, dst_y, 2485fe8aea9eSmrg dst->polyMode == PolyModePrecise)) { 2486fe8aea9eSmrg case -1: 2487fe8aea9eSmrg goto cleanup_dst; 2488fe8aea9eSmrg case 0: 2489fe8aea9eSmrg if (!gen4_channel_init_solid(sna, &tmp->src, 0)) 2490fe8aea9eSmrg goto cleanup_dst; 2491fe8aea9eSmrg /* fall through to fixup */ 2492fe8aea9eSmrg case 1: 2493fe8aea9eSmrg /* Did we just switch rings to prepare the source? */ 2494fe8aea9eSmrg if (mask == NULL && 2495fe8aea9eSmrg (prefer_blt_composite(sna, tmp) || 2496fe8aea9eSmrg unaligned(tmp->src.bo, PICT_FORMAT_BPP(tmp->src.pict_format))) && 2497fe8aea9eSmrg sna_blt_composite__convert(sna, 2498fe8aea9eSmrg dst_x, dst_y, width, height, 2499fe8aea9eSmrg tmp)) 2500fe8aea9eSmrg return true; 2501fe8aea9eSmrg 2502fe8aea9eSmrg if (!gen9_composite_channel_convert(&tmp->src)) 2503fe8aea9eSmrg goto cleanup_src; 2504fe8aea9eSmrg 2505fe8aea9eSmrg break; 2506fe8aea9eSmrg } 2507fe8aea9eSmrg 2508fe8aea9eSmrg tmp->is_affine = tmp->src.is_affine; 2509fe8aea9eSmrg tmp->has_component_alpha = false; 2510fe8aea9eSmrg tmp->need_magic_ca_pass = false; 2511fe8aea9eSmrg 2512fe8aea9eSmrg tmp->mask.bo = NULL; 2513fe8aea9eSmrg tmp->mask.filter = SAMPLER_FILTER_NEAREST; 2514fe8aea9eSmrg tmp->mask.repeat = SAMPLER_EXTEND_NONE; 2515fe8aea9eSmrg 2516fe8aea9eSmrg if (mask) { 2517fe8aea9eSmrg if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { 2518fe8aea9eSmrg tmp->has_component_alpha = true; 2519fe8aea9eSmrg 2520fe8aea9eSmrg /* Check if it's component alpha that relies on a source alpha and on 2521fe8aea9eSmrg * the source value. We can only get one of those into the single 2522fe8aea9eSmrg * source value that we get to blend with. 2523fe8aea9eSmrg */ 2524fe8aea9eSmrg if (gen9_blend_op[op].src_alpha && 2525fe8aea9eSmrg (gen9_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { 2526fe8aea9eSmrg if (op != PictOpOver) 2527fe8aea9eSmrg goto cleanup_src; 2528fe8aea9eSmrg 2529fe8aea9eSmrg tmp->need_magic_ca_pass = true; 2530fe8aea9eSmrg tmp->op = PictOpOutReverse; 2531fe8aea9eSmrg } 2532fe8aea9eSmrg } 2533fe8aea9eSmrg 2534fe8aea9eSmrg if (!reuse_source(sna, 2535fe8aea9eSmrg src, &tmp->src, src_x, src_y, 2536fe8aea9eSmrg mask, &tmp->mask, msk_x, msk_y)) { 2537fe8aea9eSmrg switch (gen9_composite_picture(sna, mask, &tmp->mask, 2538fe8aea9eSmrg msk_x, msk_y, 2539fe8aea9eSmrg width, height, 2540fe8aea9eSmrg dst_x, dst_y, 2541fe8aea9eSmrg dst->polyMode == PolyModePrecise)) { 2542fe8aea9eSmrg case -1: 2543fe8aea9eSmrg goto cleanup_src; 2544fe8aea9eSmrg case 0: 2545fe8aea9eSmrg if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) 2546fe8aea9eSmrg goto cleanup_src; 2547fe8aea9eSmrg /* fall through to fixup */ 2548fe8aea9eSmrg case 1: 2549fe8aea9eSmrg if (!gen9_composite_channel_convert(&tmp->mask)) 2550fe8aea9eSmrg goto cleanup_mask; 2551fe8aea9eSmrg break; 2552fe8aea9eSmrg } 2553fe8aea9eSmrg } 2554fe8aea9eSmrg 2555fe8aea9eSmrg tmp->is_affine &= tmp->mask.is_affine; 2556fe8aea9eSmrg } 2557fe8aea9eSmrg 2558fe8aea9eSmrg tmp->u.gen9.flags = 2559fe8aea9eSmrg GEN9_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, 2560fe8aea9eSmrg tmp->src.repeat, 2561fe8aea9eSmrg tmp->mask.filter, 2562fe8aea9eSmrg tmp->mask.repeat), 2563fe8aea9eSmrg gen9_get_blend(tmp->op, 2564fe8aea9eSmrg tmp->has_component_alpha, 2565fe8aea9eSmrg tmp->dst.format), 2566fe8aea9eSmrg gen4_choose_composite_emitter(sna, tmp)); 2567fe8aea9eSmrg tmp->u.gen9.wm_kernel = gen9_choose_composite_kernel(tmp->op, 2568fe8aea9eSmrg tmp->mask.bo != NULL, 2569fe8aea9eSmrg tmp->has_component_alpha, 2570fe8aea9eSmrg tmp->is_affine); 2571fe8aea9eSmrg 2572fe8aea9eSmrg tmp->blt = gen9_render_composite_blt; 2573fe8aea9eSmrg tmp->box = gen9_render_composite_box; 2574fe8aea9eSmrg tmp->boxes = gen9_render_composite_boxes__blt; 2575fe8aea9eSmrg if (tmp->emit_boxes){ 2576fe8aea9eSmrg tmp->boxes = gen9_render_composite_boxes; 2577fe8aea9eSmrg tmp->thread_boxes = gen9_render_composite_boxes__thread; 2578fe8aea9eSmrg } 2579fe8aea9eSmrg tmp->done = gen9_render_composite_done; 2580fe8aea9eSmrg 2581fe8aea9eSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); 2582fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, 2583fe8aea9eSmrg tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2584fe8aea9eSmrg NULL)) { 2585fe8aea9eSmrg kgem_submit(&sna->kgem); 2586fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, 2587fe8aea9eSmrg tmp->dst.bo, tmp->src.bo, tmp->mask.bo, 2588fe8aea9eSmrg NULL)) 2589fe8aea9eSmrg goto cleanup_mask; 2590fe8aea9eSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2591fe8aea9eSmrg } 2592fe8aea9eSmrg 2593fe8aea9eSmrg gen9_align_vertex(sna, tmp); 2594fe8aea9eSmrg gen9_emit_composite_state(sna, tmp); 2595fe8aea9eSmrg return true; 2596fe8aea9eSmrg 2597fe8aea9eSmrgcleanup_mask: 2598fe8aea9eSmrg if (tmp->mask.bo) { 2599fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, tmp->mask.bo); 2600fe8aea9eSmrg tmp->mask.bo = NULL; 2601fe8aea9eSmrg } 2602fe8aea9eSmrgcleanup_src: 2603fe8aea9eSmrg if (tmp->src.bo) { 2604fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, tmp->src.bo); 2605fe8aea9eSmrg tmp->src.bo = NULL; 2606fe8aea9eSmrg } 2607fe8aea9eSmrgcleanup_dst: 2608fe8aea9eSmrg if (tmp->redirect.real_bo) { 2609fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, tmp->dst.bo); 2610fe8aea9eSmrg tmp->redirect.real_bo = NULL; 2611fe8aea9eSmrg } 2612fe8aea9eSmrgfallback: 2613fe8aea9eSmrg return (mask == NULL && 2614fe8aea9eSmrg sna_blt_composite(sna, op, 2615fe8aea9eSmrg src, dst, 2616fe8aea9eSmrg src_x, src_y, 2617fe8aea9eSmrg dst_x, dst_y, 2618fe8aea9eSmrg width, height, 2619fe8aea9eSmrg flags | COMPOSITE_FALLBACK, tmp)); 2620fe8aea9eSmrg} 2621fe8aea9eSmrg 2622fe8aea9eSmrg#if !NO_COMPOSITE_SPANS 2623fe8aea9eSmrgfastcall static void 2624fe8aea9eSmrggen9_render_composite_spans_box(struct sna *sna, 2625fe8aea9eSmrg const struct sna_composite_spans_op *op, 2626fe8aea9eSmrg const BoxRec *box, float opacity) 2627fe8aea9eSmrg{ 2628fe8aea9eSmrg DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", 2629fe8aea9eSmrg __FUNCTION__, 2630fe8aea9eSmrg op->base.src.offset[0], op->base.src.offset[1], 2631fe8aea9eSmrg opacity, 2632fe8aea9eSmrg op->base.dst.x, op->base.dst.y, 2633fe8aea9eSmrg box->x1, box->y1, 2634fe8aea9eSmrg box->x2 - box->x1, 2635fe8aea9eSmrg box->y2 - box->y1)); 2636fe8aea9eSmrg 2637fe8aea9eSmrg gen9_get_rectangles(sna, &op->base, 1, gen9_emit_composite_state); 2638fe8aea9eSmrg op->prim_emit(sna, op, box, opacity); 2639fe8aea9eSmrg} 2640fe8aea9eSmrg 2641fe8aea9eSmrgstatic void 2642fe8aea9eSmrggen9_render_composite_spans_boxes(struct sna *sna, 2643fe8aea9eSmrg const struct sna_composite_spans_op *op, 2644fe8aea9eSmrg const BoxRec *box, int nbox, 2645fe8aea9eSmrg float opacity) 2646fe8aea9eSmrg{ 2647fe8aea9eSmrg DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", 2648fe8aea9eSmrg __FUNCTION__, nbox, 2649fe8aea9eSmrg op->base.src.offset[0], op->base.src.offset[1], 2650fe8aea9eSmrg opacity, 2651fe8aea9eSmrg op->base.dst.x, op->base.dst.y)); 2652fe8aea9eSmrg 2653fe8aea9eSmrg do { 2654fe8aea9eSmrg int nbox_this_time; 2655fe8aea9eSmrg 2656fe8aea9eSmrg nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox, 2657fe8aea9eSmrg gen9_emit_composite_state); 2658fe8aea9eSmrg nbox -= nbox_this_time; 2659fe8aea9eSmrg 2660fe8aea9eSmrg do { 2661fe8aea9eSmrg DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, 2662fe8aea9eSmrg box->x1, box->y1, 2663fe8aea9eSmrg box->x2 - box->x1, 2664fe8aea9eSmrg box->y2 - box->y1)); 2665fe8aea9eSmrg 2666fe8aea9eSmrg op->prim_emit(sna, op, box++, opacity); 2667fe8aea9eSmrg } while (--nbox_this_time); 2668fe8aea9eSmrg } while (nbox); 2669fe8aea9eSmrg} 2670fe8aea9eSmrg 2671fe8aea9eSmrgfastcall static void 2672fe8aea9eSmrggen9_render_composite_spans_boxes__thread(struct sna *sna, 2673fe8aea9eSmrg const struct sna_composite_spans_op *op, 2674fe8aea9eSmrg const struct sna_opacity_box *box, 2675fe8aea9eSmrg int nbox) 2676fe8aea9eSmrg{ 2677fe8aea9eSmrg DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", 2678fe8aea9eSmrg __FUNCTION__, nbox, 2679fe8aea9eSmrg op->base.src.offset[0], op->base.src.offset[1], 2680fe8aea9eSmrg op->base.dst.x, op->base.dst.y)); 2681fe8aea9eSmrg 2682fe8aea9eSmrg sna_vertex_lock(&sna->render); 2683fe8aea9eSmrg do { 2684fe8aea9eSmrg int nbox_this_time; 2685fe8aea9eSmrg float *v; 2686fe8aea9eSmrg 2687fe8aea9eSmrg nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox, 2688fe8aea9eSmrg gen9_emit_composite_state); 2689fe8aea9eSmrg assert(nbox_this_time); 2690fe8aea9eSmrg nbox -= nbox_this_time; 2691fe8aea9eSmrg 2692fe8aea9eSmrg v = sna->render.vertices + sna->render.vertex_used; 2693fe8aea9eSmrg sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; 2694fe8aea9eSmrg 2695fe8aea9eSmrg sna_vertex_acquire__locked(&sna->render); 2696fe8aea9eSmrg sna_vertex_unlock(&sna->render); 2697fe8aea9eSmrg 2698fe8aea9eSmrg op->emit_boxes(op, box, nbox_this_time, v); 2699fe8aea9eSmrg box += nbox_this_time; 2700fe8aea9eSmrg 2701fe8aea9eSmrg sna_vertex_lock(&sna->render); 2702fe8aea9eSmrg sna_vertex_release__locked(&sna->render); 2703fe8aea9eSmrg } while (nbox); 2704fe8aea9eSmrg sna_vertex_unlock(&sna->render); 2705fe8aea9eSmrg} 2706fe8aea9eSmrg 2707fe8aea9eSmrgfastcall static void 2708fe8aea9eSmrggen9_render_composite_spans_done(struct sna *sna, 2709fe8aea9eSmrg const struct sna_composite_spans_op *op) 2710fe8aea9eSmrg{ 2711fe8aea9eSmrg if (sna->render.vertex_offset) 2712fe8aea9eSmrg gen8_vertex_flush(sna); 2713fe8aea9eSmrg 2714fe8aea9eSmrg DBG(("%s()\n", __FUNCTION__)); 2715fe8aea9eSmrg 2716fe8aea9eSmrg if (op->base.src.bo) 2717fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, op->base.src.bo); 2718fe8aea9eSmrg 2719fe8aea9eSmrg sna_render_composite_redirect_done(sna, &op->base); 2720fe8aea9eSmrg} 2721fe8aea9eSmrg 2722fe8aea9eSmrgstatic bool 2723fe8aea9eSmrggen9_check_composite_spans(struct sna *sna, 2724fe8aea9eSmrg uint8_t op, PicturePtr src, PicturePtr dst, 2725fe8aea9eSmrg int16_t width, int16_t height, unsigned flags) 2726fe8aea9eSmrg{ 2727fe8aea9eSmrg if (op >= ARRAY_SIZE(gen9_blend_op)) 2728fe8aea9eSmrg return false; 2729fe8aea9eSmrg 2730fe8aea9eSmrg if (gen9_composite_fallback(sna, src, NULL, dst)) 2731fe8aea9eSmrg return false; 2732fe8aea9eSmrg 2733fe8aea9eSmrg if (need_tiling(sna, width, height) && 2734fe8aea9eSmrg !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { 2735fe8aea9eSmrg DBG(("%s: fallback, tiled operation not on GPU\n", 2736fe8aea9eSmrg __FUNCTION__)); 2737fe8aea9eSmrg return false; 2738fe8aea9eSmrg } 2739fe8aea9eSmrg 2740fe8aea9eSmrg return true; 2741fe8aea9eSmrg} 2742fe8aea9eSmrg 2743fe8aea9eSmrgstatic bool 2744fe8aea9eSmrggen9_render_composite_spans(struct sna *sna, 2745fe8aea9eSmrg uint8_t op, 2746fe8aea9eSmrg PicturePtr src, 2747fe8aea9eSmrg PicturePtr dst, 2748fe8aea9eSmrg int16_t src_x, int16_t src_y, 2749fe8aea9eSmrg int16_t dst_x, int16_t dst_y, 2750fe8aea9eSmrg int16_t width, int16_t height, 2751fe8aea9eSmrg unsigned flags, 2752fe8aea9eSmrg struct sna_composite_spans_op *tmp) 2753fe8aea9eSmrg{ 2754fe8aea9eSmrg DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, 2755fe8aea9eSmrg width, height, flags, sna->kgem.ring)); 2756fe8aea9eSmrg 2757fe8aea9eSmrg assert(gen9_check_composite_spans(sna, op, src, dst, width, height, flags)); 2758fe8aea9eSmrg 2759fe8aea9eSmrg if (need_tiling(sna, width, height)) { 2760fe8aea9eSmrg DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", 2761fe8aea9eSmrg __FUNCTION__, width, height)); 2762fe8aea9eSmrg return sna_tiling_composite_spans(op, src, dst, 2763fe8aea9eSmrg src_x, src_y, dst_x, dst_y, 2764fe8aea9eSmrg width, height, flags, tmp); 2765fe8aea9eSmrg } 2766fe8aea9eSmrg 2767fe8aea9eSmrg tmp->base.op = op; 2768fe8aea9eSmrg if (!gen9_composite_set_target(sna, &tmp->base, dst, 2769fe8aea9eSmrg dst_x, dst_y, width, height, true)) 2770fe8aea9eSmrg return false; 2771fe8aea9eSmrg 2772fe8aea9eSmrg switch (gen9_composite_picture(sna, src, &tmp->base.src, 2773fe8aea9eSmrg src_x, src_y, 2774fe8aea9eSmrg width, height, 2775fe8aea9eSmrg dst_x, dst_y, 2776fe8aea9eSmrg dst->polyMode == PolyModePrecise)) { 2777fe8aea9eSmrg case -1: 2778fe8aea9eSmrg goto cleanup_dst; 2779fe8aea9eSmrg case 0: 2780fe8aea9eSmrg if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) 2781fe8aea9eSmrg goto cleanup_dst; 2782fe8aea9eSmrg /* fall through to fixup */ 2783fe8aea9eSmrg case 1: 2784fe8aea9eSmrg if (!gen9_composite_channel_convert(&tmp->base.src)) 2785fe8aea9eSmrg goto cleanup_src; 2786fe8aea9eSmrg break; 2787fe8aea9eSmrg } 2788fe8aea9eSmrg tmp->base.mask.bo = NULL; 2789fe8aea9eSmrg 2790fe8aea9eSmrg tmp->base.is_affine = tmp->base.src.is_affine; 2791fe8aea9eSmrg tmp->base.need_magic_ca_pass = false; 2792fe8aea9eSmrg 2793fe8aea9eSmrg tmp->base.u.gen9.flags = 2794fe8aea9eSmrg GEN9_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, 2795fe8aea9eSmrg tmp->base.src.repeat, 2796fe8aea9eSmrg SAMPLER_FILTER_NEAREST, 2797fe8aea9eSmrg SAMPLER_EXTEND_PAD), 2798fe8aea9eSmrg gen9_get_blend(tmp->base.op, false, tmp->base.dst.format), 2799fe8aea9eSmrg gen4_choose_spans_emitter(sna, tmp)); 2800fe8aea9eSmrg tmp->base.u.gen9.wm_kernel = 2801fe8aea9eSmrg GEN9_WM_KERNEL_OPACITY | !tmp->base.is_affine; 2802fe8aea9eSmrg 2803fe8aea9eSmrg tmp->box = gen9_render_composite_spans_box; 2804fe8aea9eSmrg tmp->boxes = gen9_render_composite_spans_boxes; 2805fe8aea9eSmrg if (tmp->emit_boxes) 2806fe8aea9eSmrg tmp->thread_boxes = gen9_render_composite_spans_boxes__thread; 2807fe8aea9eSmrg tmp->done = gen9_render_composite_spans_done; 2808fe8aea9eSmrg 2809fe8aea9eSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); 2810fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, 2811fe8aea9eSmrg tmp->base.dst.bo, tmp->base.src.bo, 2812fe8aea9eSmrg NULL)) { 2813fe8aea9eSmrg kgem_submit(&sna->kgem); 2814fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, 2815fe8aea9eSmrg tmp->base.dst.bo, tmp->base.src.bo, 2816fe8aea9eSmrg NULL)) 2817fe8aea9eSmrg goto cleanup_src; 2818fe8aea9eSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 2819fe8aea9eSmrg } 2820fe8aea9eSmrg 2821fe8aea9eSmrg gen9_align_vertex(sna, &tmp->base); 2822fe8aea9eSmrg gen9_emit_composite_state(sna, &tmp->base); 2823fe8aea9eSmrg return true; 2824fe8aea9eSmrg 2825fe8aea9eSmrgcleanup_src: 2826fe8aea9eSmrg if (tmp->base.src.bo) 2827fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); 2828fe8aea9eSmrgcleanup_dst: 2829fe8aea9eSmrg if (tmp->base.redirect.real_bo) 2830fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); 2831fe8aea9eSmrg return false; 2832fe8aea9eSmrg} 2833fe8aea9eSmrg#endif 2834fe8aea9eSmrg 2835fe8aea9eSmrgstatic void 2836fe8aea9eSmrggen9_emit_copy_state(struct sna *sna, 2837fe8aea9eSmrg const struct sna_composite_op *op) 2838fe8aea9eSmrg{ 2839fe8aea9eSmrg uint32_t *binding_table; 2840fe8aea9eSmrg uint16_t offset, dirty; 2841fe8aea9eSmrg 2842fe8aea9eSmrg gen9_get_batch(sna, op); 2843fe8aea9eSmrg 2844fe8aea9eSmrg binding_table = gen9_composite_get_binding_table(sna, &offset); 2845fe8aea9eSmrg 2846fe8aea9eSmrg dirty = kgem_bo_is_dirty(op->dst.bo); 2847fe8aea9eSmrg 2848fe8aea9eSmrg binding_table[0] = 2849fe8aea9eSmrg gen9_bind_bo(sna, 2850fe8aea9eSmrg op->dst.bo, op->dst.width, op->dst.height, 2851fe8aea9eSmrg gen9_get_dest_format(op->dst.format), 2852fe8aea9eSmrg true); 2853fe8aea9eSmrg binding_table[1] = 2854fe8aea9eSmrg gen9_bind_bo(sna, 2855fe8aea9eSmrg op->src.bo, op->src.width, op->src.height, 2856fe8aea9eSmrg op->src.card_format, 2857fe8aea9eSmrg false); 2858fe8aea9eSmrg 2859fe8aea9eSmrg if (sna->kgem.surface == offset && 2860fe8aea9eSmrg *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table) { 2861fe8aea9eSmrg sna->kgem.surface += SURFACE_DW; 2862fe8aea9eSmrg offset = sna->render_state.gen9.surface_table; 2863fe8aea9eSmrg } 2864fe8aea9eSmrg 2865fe8aea9eSmrg if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0]) 2866fe8aea9eSmrg dirty = 0; 2867fe8aea9eSmrg 2868fe8aea9eSmrg assert(!GEN9_READS_DST(op->u.gen9.flags)); 2869fe8aea9eSmrg gen9_emit_state(sna, op, offset | dirty); 2870fe8aea9eSmrg} 2871fe8aea9eSmrg 2872fe8aea9eSmrgstatic inline bool 2873fe8aea9eSmrgprefer_blt_copy(struct sna *sna, 2874fe8aea9eSmrg struct kgem_bo *src_bo, 2875fe8aea9eSmrg struct kgem_bo *dst_bo, 2876fe8aea9eSmrg unsigned flags) 2877fe8aea9eSmrg{ 2878fe8aea9eSmrg if (sna->kgem.mode == KGEM_BLT) 2879fe8aea9eSmrg return true; 2880fe8aea9eSmrg 2881fe8aea9eSmrg assert((flags & COPY_SYNC) == 0); 2882fe8aea9eSmrg 2883fe8aea9eSmrg if (untiled_tlb_miss(src_bo) || 2884fe8aea9eSmrg untiled_tlb_miss(dst_bo)) 2885fe8aea9eSmrg return true; 2886fe8aea9eSmrg 2887fe8aea9eSmrg if (flags & COPY_DRI && !sna->kgem.has_semaphores) 2888fe8aea9eSmrg return false; 2889fe8aea9eSmrg 2890fe8aea9eSmrg if (force_blt_ring(sna, dst_bo)) 2891fe8aea9eSmrg return true; 2892fe8aea9eSmrg 2893fe8aea9eSmrg if ((flags & COPY_SMALL || 2894fe8aea9eSmrg (sna->render_state.gt < 3 && src_bo == dst_bo)) && 2895fe8aea9eSmrg can_switch_to_blt(sna, dst_bo, flags)) 2896fe8aea9eSmrg return true; 2897fe8aea9eSmrg 2898fe8aea9eSmrg if (kgem_bo_is_render(dst_bo) || 2899fe8aea9eSmrg kgem_bo_is_render(src_bo)) 2900fe8aea9eSmrg return false; 2901fe8aea9eSmrg 2902fe8aea9eSmrg if (flags & COPY_LAST && 2903fe8aea9eSmrg sna->render_state.gt < 3 && 2904fe8aea9eSmrg can_switch_to_blt(sna, dst_bo, flags)) 2905fe8aea9eSmrg return true; 2906fe8aea9eSmrg 2907fe8aea9eSmrg if (prefer_render_ring(sna, dst_bo)) 2908fe8aea9eSmrg return false; 2909fe8aea9eSmrg 2910fe8aea9eSmrg if (!prefer_blt_ring(sna, dst_bo, flags)) 2911fe8aea9eSmrg return false; 2912fe8aea9eSmrg 2913fe8aea9eSmrg return prefer_blt_bo(sna, src_bo, dst_bo); 2914fe8aea9eSmrg} 2915fe8aea9eSmrg 2916fe8aea9eSmrgstatic bool 2917fe8aea9eSmrggen9_render_copy_boxes(struct sna *sna, uint8_t alu, 2918fe8aea9eSmrg const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, 2919fe8aea9eSmrg const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, 2920fe8aea9eSmrg const BoxRec *box, int n, unsigned flags) 2921fe8aea9eSmrg{ 2922fe8aea9eSmrg struct sna_composite_op tmp; 2923fe8aea9eSmrg BoxRec extents; 2924fe8aea9eSmrg 2925fe8aea9eSmrg DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n", 2926fe8aea9eSmrg __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags, 2927fe8aea9eSmrg src_bo == dst_bo, 2928fe8aea9eSmrg overlaps(sna, 2929fe8aea9eSmrg src_bo, src_dx, src_dy, 2930fe8aea9eSmrg dst_bo, dst_dx, dst_dy, 2931fe8aea9eSmrg box, n, flags, &extents))); 2932fe8aea9eSmrg 2933fe8aea9eSmrg if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && 2934fe8aea9eSmrg sna_blt_compare_depth(src, dst) && 2935fe8aea9eSmrg sna_blt_copy_boxes(sna, alu, 2936fe8aea9eSmrg src_bo, src_dx, src_dy, 2937fe8aea9eSmrg dst_bo, dst_dx, dst_dy, 2938fe8aea9eSmrg dst->bitsPerPixel, 2939fe8aea9eSmrg box, n)) 2940fe8aea9eSmrg return true; 2941fe8aea9eSmrg 2942fe8aea9eSmrg if (!(alu == GXcopy || alu == GXclear) || 2943fe8aea9eSmrg unaligned(src_bo, src->bitsPerPixel) || 2944fe8aea9eSmrg unaligned(dst_bo, dst->bitsPerPixel)) { 2945fe8aea9eSmrgfallback_blt: 2946fe8aea9eSmrg DBG(("%s: fallback blt\n", __FUNCTION__)); 2947fe8aea9eSmrg if (!sna_blt_compare_depth(src, dst)) 2948fe8aea9eSmrg return false; 2949fe8aea9eSmrg 2950fe8aea9eSmrg return sna_blt_copy_boxes_fallback(sna, alu, 2951fe8aea9eSmrg src, src_bo, src_dx, src_dy, 2952fe8aea9eSmrg dst, dst_bo, dst_dx, dst_dy, 2953fe8aea9eSmrg box, n); 2954fe8aea9eSmrg } 2955fe8aea9eSmrg 2956fe8aea9eSmrg if (overlaps(sna, 2957fe8aea9eSmrg src_bo, src_dx, src_dy, 2958fe8aea9eSmrg dst_bo, dst_dx, dst_dy, 2959fe8aea9eSmrg box, n, flags, 2960fe8aea9eSmrg &extents)) { 2961fe8aea9eSmrg bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1); 2962fe8aea9eSmrg 2963fe8aea9eSmrg if ((big || !prefer_render_ring(sna, dst_bo)) && 2964fe8aea9eSmrg sna_blt_copy_boxes(sna, alu, 2965fe8aea9eSmrg src_bo, src_dx, src_dy, 2966fe8aea9eSmrg dst_bo, dst_dx, dst_dy, 2967fe8aea9eSmrg dst->bitsPerPixel, 2968fe8aea9eSmrg box, n)) 2969fe8aea9eSmrg return true; 2970fe8aea9eSmrg 2971fe8aea9eSmrg if (big) 2972fe8aea9eSmrg goto fallback_blt; 2973fe8aea9eSmrg 2974fe8aea9eSmrg assert(src_bo == dst_bo); 2975fe8aea9eSmrg assert(src->depth == dst->depth); 2976fe8aea9eSmrg assert(src->width == dst->width); 2977fe8aea9eSmrg assert(src->height == dst->height); 2978fe8aea9eSmrg return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo, 2979fe8aea9eSmrg src_dx, src_dy, 2980fe8aea9eSmrg dst_dx, dst_dy, 2981fe8aea9eSmrg box, n, &extents); 2982fe8aea9eSmrg } 2983fe8aea9eSmrg 2984fe8aea9eSmrg if (dst->depth == src->depth) { 2985fe8aea9eSmrg tmp.dst.format = sna_render_format_for_depth(dst->depth); 2986fe8aea9eSmrg tmp.src.pict_format = tmp.dst.format; 2987fe8aea9eSmrg } else { 2988fe8aea9eSmrg tmp.dst.format = sna_format_for_depth(dst->depth); 2989fe8aea9eSmrg tmp.src.pict_format = sna_format_for_depth(src->depth); 2990fe8aea9eSmrg } 2991fe8aea9eSmrg if (!gen9_check_format(tmp.src.pict_format)) 2992fe8aea9eSmrg goto fallback_blt; 2993fe8aea9eSmrg 2994fe8aea9eSmrg tmp.dst.pixmap = (PixmapPtr)dst; 2995fe8aea9eSmrg tmp.dst.width = dst->width; 2996fe8aea9eSmrg tmp.dst.height = dst->height; 2997fe8aea9eSmrg tmp.dst.bo = dst_bo; 2998fe8aea9eSmrg tmp.dst.x = tmp.dst.y = 0; 2999fe8aea9eSmrg tmp.damage = NULL; 3000fe8aea9eSmrg 3001fe8aea9eSmrg sna_render_composite_redirect_init(&tmp); 3002fe8aea9eSmrg if (too_large(tmp.dst.width, tmp.dst.height)) { 3003fe8aea9eSmrg int i; 3004fe8aea9eSmrg 3005fe8aea9eSmrg extents = box[0]; 3006fe8aea9eSmrg for (i = 1; i < n; i++) { 3007fe8aea9eSmrg if (box[i].x1 < extents.x1) 3008fe8aea9eSmrg extents.x1 = box[i].x1; 3009fe8aea9eSmrg if (box[i].y1 < extents.y1) 3010fe8aea9eSmrg extents.y1 = box[i].y1; 3011fe8aea9eSmrg 3012fe8aea9eSmrg if (box[i].x2 > extents.x2) 3013fe8aea9eSmrg extents.x2 = box[i].x2; 3014fe8aea9eSmrg if (box[i].y2 > extents.y2) 3015fe8aea9eSmrg extents.y2 = box[i].y2; 3016fe8aea9eSmrg } 3017fe8aea9eSmrg 3018fe8aea9eSmrg if (!sna_render_composite_redirect(sna, &tmp, 3019fe8aea9eSmrg extents.x1 + dst_dx, 3020fe8aea9eSmrg extents.y1 + dst_dy, 3021fe8aea9eSmrg extents.x2 - extents.x1, 3022fe8aea9eSmrg extents.y2 - extents.y1, 3023fe8aea9eSmrg n > 1)) 3024fe8aea9eSmrg goto fallback_tiled; 3025fe8aea9eSmrg } 3026fe8aea9eSmrg 3027fe8aea9eSmrg tmp.src.card_format = gen9_get_card_format(tmp.src.pict_format); 3028fe8aea9eSmrg if (too_large(src->width, src->height)) { 3029fe8aea9eSmrg int i; 3030fe8aea9eSmrg 3031fe8aea9eSmrg extents = box[0]; 3032fe8aea9eSmrg for (i = 1; i < n; i++) { 3033fe8aea9eSmrg if (box[i].x1 < extents.x1) 3034fe8aea9eSmrg extents.x1 = box[i].x1; 3035fe8aea9eSmrg if (box[i].y1 < extents.y1) 3036fe8aea9eSmrg extents.y1 = box[i].y1; 3037fe8aea9eSmrg 3038fe8aea9eSmrg if (box[i].x2 > extents.x2) 3039fe8aea9eSmrg extents.x2 = box[i].x2; 3040fe8aea9eSmrg if (box[i].y2 > extents.y2) 3041fe8aea9eSmrg extents.y2 = box[i].y2; 3042fe8aea9eSmrg } 3043fe8aea9eSmrg 3044fe8aea9eSmrg if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, 3045fe8aea9eSmrg extents.x1 + src_dx, 3046fe8aea9eSmrg extents.y1 + src_dy, 3047fe8aea9eSmrg extents.x2 - extents.x1, 3048fe8aea9eSmrg extents.y2 - extents.y1)) 3049fe8aea9eSmrg goto fallback_tiled_dst; 3050fe8aea9eSmrg } else { 3051fe8aea9eSmrg tmp.src.bo = src_bo; 3052fe8aea9eSmrg tmp.src.width = src->width; 3053fe8aea9eSmrg tmp.src.height = src->height; 3054fe8aea9eSmrg tmp.src.offset[0] = tmp.src.offset[1] = 0; 3055fe8aea9eSmrg } 3056fe8aea9eSmrg 3057fe8aea9eSmrg tmp.mask.bo = NULL; 3058fe8aea9eSmrg 3059fe8aea9eSmrg tmp.floats_per_vertex = 2; 3060fe8aea9eSmrg tmp.floats_per_rect = 6; 3061fe8aea9eSmrg tmp.need_magic_ca_pass = 0; 3062fe8aea9eSmrg 3063fe8aea9eSmrg tmp.u.gen9.flags = COPY_FLAGS(alu); 3064fe8aea9eSmrg tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK; 3065fe8aea9eSmrg 3066fe8aea9eSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 3067fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 3068fe8aea9eSmrg kgem_submit(&sna->kgem); 3069fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { 3070fe8aea9eSmrg if (tmp.src.bo != src_bo) 3071fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3072fe8aea9eSmrg if (tmp.redirect.real_bo) 3073fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3074fe8aea9eSmrg goto fallback_blt; 3075fe8aea9eSmrg } 3076fe8aea9eSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3077fe8aea9eSmrg } 3078fe8aea9eSmrg 3079fe8aea9eSmrg src_dx += tmp.src.offset[0]; 3080fe8aea9eSmrg src_dy += tmp.src.offset[1]; 3081fe8aea9eSmrg 3082fe8aea9eSmrg dst_dx += tmp.dst.x; 3083fe8aea9eSmrg dst_dy += tmp.dst.y; 3084fe8aea9eSmrg 3085fe8aea9eSmrg tmp.dst.x = tmp.dst.y = 0; 3086fe8aea9eSmrg 3087fe8aea9eSmrg gen9_align_vertex(sna, &tmp); 3088fe8aea9eSmrg gen9_emit_copy_state(sna, &tmp); 3089fe8aea9eSmrg 3090fe8aea9eSmrg do { 3091fe8aea9eSmrg int16_t *v; 3092fe8aea9eSmrg int n_this_time; 3093fe8aea9eSmrg 3094fe8aea9eSmrg n_this_time = gen9_get_rectangles(sna, &tmp, n, 3095fe8aea9eSmrg gen9_emit_copy_state); 3096fe8aea9eSmrg n -= n_this_time; 3097fe8aea9eSmrg 3098fe8aea9eSmrg v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3099fe8aea9eSmrg sna->render.vertex_used += 6 * n_this_time; 3100fe8aea9eSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3101fe8aea9eSmrg do { 3102fe8aea9eSmrg 3103fe8aea9eSmrg DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", 3104fe8aea9eSmrg box->x1 + src_dx, box->y1 + src_dy, 3105fe8aea9eSmrg box->x1 + dst_dx, box->y1 + dst_dy, 3106fe8aea9eSmrg box->x2 - box->x1, box->y2 - box->y1)); 3107fe8aea9eSmrg v[0] = box->x2 + dst_dx; 3108fe8aea9eSmrg v[2] = box->x2 + src_dx; 3109fe8aea9eSmrg v[1] = v[5] = box->y2 + dst_dy; 3110fe8aea9eSmrg v[3] = v[7] = box->y2 + src_dy; 3111fe8aea9eSmrg v[8] = v[4] = box->x1 + dst_dx; 3112fe8aea9eSmrg v[10] = v[6] = box->x1 + src_dx; 3113fe8aea9eSmrg v[9] = box->y1 + dst_dy; 3114fe8aea9eSmrg v[11] = box->y1 + src_dy; 3115fe8aea9eSmrg v += 12; box++; 3116fe8aea9eSmrg } while (--n_this_time); 3117fe8aea9eSmrg } while (n); 3118fe8aea9eSmrg 3119fe8aea9eSmrg gen8_vertex_flush(sna); 3120fe8aea9eSmrg sna_render_composite_redirect_done(sna, &tmp); 3121fe8aea9eSmrg if (tmp.src.bo != src_bo) 3122fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3123fe8aea9eSmrg return true; 3124fe8aea9eSmrg 3125fe8aea9eSmrgfallback_tiled_dst: 3126fe8aea9eSmrg if (tmp.redirect.real_bo) 3127fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3128fe8aea9eSmrgfallback_tiled: 3129fe8aea9eSmrg DBG(("%s: fallback tiled\n", __FUNCTION__)); 3130fe8aea9eSmrg if (sna_blt_compare_depth(src, dst) && 3131fe8aea9eSmrg sna_blt_copy_boxes(sna, alu, 3132fe8aea9eSmrg src_bo, src_dx, src_dy, 3133fe8aea9eSmrg dst_bo, dst_dx, dst_dy, 3134fe8aea9eSmrg dst->bitsPerPixel, 3135fe8aea9eSmrg box, n)) 3136fe8aea9eSmrg return true; 3137fe8aea9eSmrg 3138fe8aea9eSmrg return sna_tiling_copy_boxes(sna, alu, 3139fe8aea9eSmrg src, src_bo, src_dx, src_dy, 3140fe8aea9eSmrg dst, dst_bo, dst_dx, dst_dy, 3141fe8aea9eSmrg box, n); 3142fe8aea9eSmrg} 3143fe8aea9eSmrg 3144fe8aea9eSmrgstatic void 3145fe8aea9eSmrggen9_render_copy_blt(struct sna *sna, 3146fe8aea9eSmrg const struct sna_copy_op *op, 3147fe8aea9eSmrg int16_t sx, int16_t sy, 3148fe8aea9eSmrg int16_t w, int16_t h, 3149fe8aea9eSmrg int16_t dx, int16_t dy) 3150fe8aea9eSmrg{ 3151fe8aea9eSmrg int16_t *v; 3152fe8aea9eSmrg 3153fe8aea9eSmrg gen9_get_rectangles(sna, &op->base, 1, gen9_emit_copy_state); 3154fe8aea9eSmrg 3155fe8aea9eSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3156fe8aea9eSmrg sna->render.vertex_used += 6; 3157fe8aea9eSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3158fe8aea9eSmrg 3159fe8aea9eSmrg v[0] = dx+w; v[1] = dy+h; 3160fe8aea9eSmrg v[2] = sx+w; v[3] = sy+h; 3161fe8aea9eSmrg v[4] = dx; v[5] = dy+h; 3162fe8aea9eSmrg v[6] = sx; v[7] = sy+h; 3163fe8aea9eSmrg v[8] = dx; v[9] = dy; 3164fe8aea9eSmrg v[10] = sx; v[11] = sy; 3165fe8aea9eSmrg} 3166fe8aea9eSmrg 3167fe8aea9eSmrgstatic void 3168fe8aea9eSmrggen9_render_copy_done(struct sna *sna, const struct sna_copy_op *op) 3169fe8aea9eSmrg{ 3170fe8aea9eSmrg if (sna->render.vertex_offset) 3171fe8aea9eSmrg gen8_vertex_flush(sna); 3172fe8aea9eSmrg} 3173fe8aea9eSmrg 3174fe8aea9eSmrgstatic bool 3175fe8aea9eSmrggen9_render_copy(struct sna *sna, uint8_t alu, 3176fe8aea9eSmrg PixmapPtr src, struct kgem_bo *src_bo, 3177fe8aea9eSmrg PixmapPtr dst, struct kgem_bo *dst_bo, 3178fe8aea9eSmrg struct sna_copy_op *op) 3179fe8aea9eSmrg{ 3180fe8aea9eSmrg DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", 3181fe8aea9eSmrg __FUNCTION__, alu, 3182fe8aea9eSmrg src->drawable.width, src->drawable.height, 3183fe8aea9eSmrg dst->drawable.width, dst->drawable.height)); 3184fe8aea9eSmrg 3185fe8aea9eSmrg if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && 3186fe8aea9eSmrg sna_blt_compare_depth(&src->drawable, &dst->drawable) && 3187fe8aea9eSmrg sna_blt_copy(sna, alu, 3188fe8aea9eSmrg src_bo, dst_bo, 3189fe8aea9eSmrg dst->drawable.bitsPerPixel, 3190fe8aea9eSmrg op)) 3191fe8aea9eSmrg return true; 3192fe8aea9eSmrg 3193fe8aea9eSmrg if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || 3194fe8aea9eSmrg too_large(src->drawable.width, src->drawable.height) || 3195fe8aea9eSmrg too_large(dst->drawable.width, dst->drawable.height) || 3196fe8aea9eSmrg unaligned(src_bo, src->drawable.bitsPerPixel) || 3197fe8aea9eSmrg unaligned(dst_bo, dst->drawable.bitsPerPixel)) { 3198fe8aea9eSmrgfallback: 3199fe8aea9eSmrg if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) 3200fe8aea9eSmrg return false; 3201fe8aea9eSmrg 3202fe8aea9eSmrg return sna_blt_copy(sna, alu, src_bo, dst_bo, 3203fe8aea9eSmrg dst->drawable.bitsPerPixel, 3204fe8aea9eSmrg op); 3205fe8aea9eSmrg } 3206fe8aea9eSmrg 3207fe8aea9eSmrg if (dst->drawable.depth == src->drawable.depth) { 3208fe8aea9eSmrg op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); 3209fe8aea9eSmrg op->base.src.pict_format = op->base.dst.format; 3210fe8aea9eSmrg } else { 3211fe8aea9eSmrg op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3212fe8aea9eSmrg op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); 3213fe8aea9eSmrg } 3214fe8aea9eSmrg if (!gen9_check_format(op->base.src.pict_format)) 3215fe8aea9eSmrg goto fallback; 3216fe8aea9eSmrg 3217fe8aea9eSmrg op->base.dst.pixmap = dst; 3218fe8aea9eSmrg op->base.dst.width = dst->drawable.width; 3219fe8aea9eSmrg op->base.dst.height = dst->drawable.height; 3220fe8aea9eSmrg op->base.dst.bo = dst_bo; 3221fe8aea9eSmrg 3222fe8aea9eSmrg op->base.src.bo = src_bo; 3223fe8aea9eSmrg op->base.src.card_format = 3224fe8aea9eSmrg gen9_get_card_format(op->base.src.pict_format); 3225fe8aea9eSmrg op->base.src.width = src->drawable.width; 3226fe8aea9eSmrg op->base.src.height = src->drawable.height; 3227fe8aea9eSmrg 3228fe8aea9eSmrg op->base.mask.bo = NULL; 3229fe8aea9eSmrg 3230fe8aea9eSmrg op->base.floats_per_vertex = 2; 3231fe8aea9eSmrg op->base.floats_per_rect = 6; 3232fe8aea9eSmrg 3233fe8aea9eSmrg op->base.u.gen9.flags = COPY_FLAGS(alu); 3234fe8aea9eSmrg op->base.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK; 3235fe8aea9eSmrg 3236fe8aea9eSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3237fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { 3238fe8aea9eSmrg kgem_submit(&sna->kgem); 3239fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) 3240fe8aea9eSmrg goto fallback; 3241fe8aea9eSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3242fe8aea9eSmrg } 3243fe8aea9eSmrg 3244fe8aea9eSmrg gen9_align_vertex(sna, &op->base); 3245fe8aea9eSmrg gen9_emit_copy_state(sna, &op->base); 3246fe8aea9eSmrg 3247fe8aea9eSmrg op->blt = gen9_render_copy_blt; 3248fe8aea9eSmrg op->done = gen9_render_copy_done; 3249fe8aea9eSmrg return true; 3250fe8aea9eSmrg} 3251fe8aea9eSmrg 3252fe8aea9eSmrgstatic void 3253fe8aea9eSmrggen9_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) 3254fe8aea9eSmrg{ 3255fe8aea9eSmrg uint32_t *binding_table; 3256fe8aea9eSmrg uint16_t offset, dirty; 3257fe8aea9eSmrg 3258fe8aea9eSmrg /* XXX Render Target Fast Clear 3259fe8aea9eSmrg * Set RTFC Enable in PS and render a rectangle. 3260fe8aea9eSmrg * Limited to a clearing the full MSC surface only with a 3261fe8aea9eSmrg * specific kernel. 3262fe8aea9eSmrg */ 3263fe8aea9eSmrg 3264fe8aea9eSmrg gen9_get_batch(sna, op); 3265fe8aea9eSmrg 3266fe8aea9eSmrg binding_table = gen9_composite_get_binding_table(sna, &offset); 3267fe8aea9eSmrg 3268fe8aea9eSmrg dirty = kgem_bo_is_dirty(op->dst.bo); 3269fe8aea9eSmrg 3270fe8aea9eSmrg binding_table[0] = 3271fe8aea9eSmrg gen9_bind_bo(sna, 3272fe8aea9eSmrg op->dst.bo, op->dst.width, op->dst.height, 3273fe8aea9eSmrg gen9_get_dest_format(op->dst.format), 3274fe8aea9eSmrg true); 3275fe8aea9eSmrg binding_table[1] = 3276fe8aea9eSmrg gen9_bind_bo(sna, 3277fe8aea9eSmrg op->src.bo, 1, 1, 3278fe8aea9eSmrg SURFACEFORMAT_B8G8R8A8_UNORM, 3279fe8aea9eSmrg false); 3280fe8aea9eSmrg 3281fe8aea9eSmrg if (sna->kgem.surface == offset && 3282fe8aea9eSmrg *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table) { 3283fe8aea9eSmrg sna->kgem.surface += SURFACE_DW; 3284fe8aea9eSmrg offset = sna->render_state.gen9.surface_table; 3285fe8aea9eSmrg } 3286fe8aea9eSmrg 3287fe8aea9eSmrg if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0]) 3288fe8aea9eSmrg dirty = 0; 3289fe8aea9eSmrg 3290fe8aea9eSmrg gen9_emit_state(sna, op, offset | dirty); 3291fe8aea9eSmrg} 3292fe8aea9eSmrg 3293fe8aea9eSmrgstatic bool 3294fe8aea9eSmrggen9_render_fill_boxes(struct sna *sna, 3295fe8aea9eSmrg CARD8 op, 3296fe8aea9eSmrg PictFormat format, 3297fe8aea9eSmrg const xRenderColor *color, 3298fe8aea9eSmrg const DrawableRec *dst, struct kgem_bo *dst_bo, 3299fe8aea9eSmrg const BoxRec *box, int n) 3300fe8aea9eSmrg{ 3301fe8aea9eSmrg struct sna_composite_op tmp; 3302fe8aea9eSmrg uint32_t pixel; 3303fe8aea9eSmrg 3304fe8aea9eSmrg DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", 3305fe8aea9eSmrg __FUNCTION__, op, 3306fe8aea9eSmrg color->red, color->green, color->blue, color->alpha, (int)format)); 3307fe8aea9eSmrg 3308fe8aea9eSmrg if (op >= ARRAY_SIZE(gen9_blend_op)) { 3309fe8aea9eSmrg DBG(("%s: fallback due to unhandled blend op: %d\n", 3310fe8aea9eSmrg __FUNCTION__, op)); 3311fe8aea9eSmrg return false; 3312fe8aea9eSmrg } 3313fe8aea9eSmrg 3314fe8aea9eSmrg if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || 3315fe8aea9eSmrg !gen9_check_dst_format(format) || 3316fe8aea9eSmrg unaligned(dst_bo, PICT_FORMAT_BPP(format))) { 3317fe8aea9eSmrg uint8_t alu = GXinvalid; 3318fe8aea9eSmrg 3319fe8aea9eSmrg if (op <= PictOpSrc) { 3320fe8aea9eSmrg pixel = 0; 3321fe8aea9eSmrg if (op == PictOpClear) 3322fe8aea9eSmrg alu = GXclear; 3323fe8aea9eSmrg else if (sna_get_pixel_from_rgba(&pixel, 3324fe8aea9eSmrg color->red, 3325fe8aea9eSmrg color->green, 3326fe8aea9eSmrg color->blue, 3327fe8aea9eSmrg color->alpha, 3328fe8aea9eSmrg format)) 3329fe8aea9eSmrg alu = GXcopy; 3330fe8aea9eSmrg } 3331fe8aea9eSmrg 3332fe8aea9eSmrg if (alu != GXinvalid && 3333fe8aea9eSmrg sna_blt_fill_boxes(sna, alu, 3334fe8aea9eSmrg dst_bo, dst->bitsPerPixel, 3335fe8aea9eSmrg pixel, box, n)) 3336fe8aea9eSmrg return true; 3337fe8aea9eSmrg 3338fe8aea9eSmrg if (!gen9_check_dst_format(format)) 3339fe8aea9eSmrg return false; 3340fe8aea9eSmrg } 3341fe8aea9eSmrg 3342fe8aea9eSmrg if (op == PictOpClear) { 3343fe8aea9eSmrg pixel = 0; 3344fe8aea9eSmrg op = PictOpSrc; 3345fe8aea9eSmrg } else if (!sna_get_pixel_from_rgba(&pixel, 3346fe8aea9eSmrg color->red, 3347fe8aea9eSmrg color->green, 3348fe8aea9eSmrg color->blue, 3349fe8aea9eSmrg color->alpha, 3350fe8aea9eSmrg PICT_a8r8g8b8)) 3351fe8aea9eSmrg return false; 3352fe8aea9eSmrg 3353fe8aea9eSmrg DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", 3354fe8aea9eSmrg __FUNCTION__, pixel, n, 3355fe8aea9eSmrg box[0].x1, box[0].y1, box[0].x2, box[0].y2)); 3356fe8aea9eSmrg 3357fe8aea9eSmrg tmp.dst.pixmap = (PixmapPtr)dst; 3358fe8aea9eSmrg tmp.dst.width = dst->width; 3359fe8aea9eSmrg tmp.dst.height = dst->height; 3360fe8aea9eSmrg tmp.dst.format = format; 3361fe8aea9eSmrg tmp.dst.bo = dst_bo; 3362fe8aea9eSmrg tmp.dst.x = tmp.dst.y = 0; 3363fe8aea9eSmrg tmp.damage = NULL; 3364fe8aea9eSmrg 3365fe8aea9eSmrg sna_render_composite_redirect_init(&tmp); 3366fe8aea9eSmrg if (too_large(dst->width, dst->height)) { 3367fe8aea9eSmrg BoxRec extents; 3368fe8aea9eSmrg 3369fe8aea9eSmrg boxes_extents(box, n, &extents); 3370fe8aea9eSmrg if (!sna_render_composite_redirect(sna, &tmp, 3371fe8aea9eSmrg extents.x1, extents.y1, 3372fe8aea9eSmrg extents.x2 - extents.x1, 3373fe8aea9eSmrg extents.y2 - extents.y1, 3374fe8aea9eSmrg n > 1)) 3375fe8aea9eSmrg return sna_tiling_fill_boxes(sna, op, format, color, 3376fe8aea9eSmrg dst, dst_bo, box, n); 3377fe8aea9eSmrg } 3378fe8aea9eSmrg 3379fe8aea9eSmrg tmp.src.bo = sna_render_get_solid(sna, pixel); 3380fe8aea9eSmrg tmp.mask.bo = NULL; 3381fe8aea9eSmrg 3382fe8aea9eSmrg tmp.floats_per_vertex = 2; 3383fe8aea9eSmrg tmp.floats_per_rect = 6; 3384fe8aea9eSmrg tmp.need_magic_ca_pass = false; 3385fe8aea9eSmrg 3386fe8aea9eSmrg tmp.u.gen9.flags = FILL_FLAGS(op, format); 3387fe8aea9eSmrg tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK; 3388fe8aea9eSmrg 3389fe8aea9eSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3390fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3391fe8aea9eSmrg kgem_submit(&sna->kgem); 3392fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3393fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3394fe8aea9eSmrg tmp.src.bo = NULL; 3395fe8aea9eSmrg 3396fe8aea9eSmrg if (tmp.redirect.real_bo) { 3397fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, tmp.dst.bo); 3398fe8aea9eSmrg tmp.redirect.real_bo = NULL; 3399fe8aea9eSmrg } 3400fe8aea9eSmrg 3401fe8aea9eSmrg return false; 3402fe8aea9eSmrg } 3403fe8aea9eSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3404fe8aea9eSmrg } 3405fe8aea9eSmrg 3406fe8aea9eSmrg gen9_align_vertex(sna, &tmp); 3407fe8aea9eSmrg gen9_emit_fill_state(sna, &tmp); 3408fe8aea9eSmrg 3409fe8aea9eSmrg do { 3410fe8aea9eSmrg int n_this_time; 3411fe8aea9eSmrg int16_t *v; 3412fe8aea9eSmrg 3413fe8aea9eSmrg n_this_time = gen9_get_rectangles(sna, &tmp, n, 3414fe8aea9eSmrg gen9_emit_fill_state); 3415fe8aea9eSmrg n -= n_this_time; 3416fe8aea9eSmrg 3417fe8aea9eSmrg v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); 3418fe8aea9eSmrg sna->render.vertex_used += 6 * n_this_time; 3419fe8aea9eSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3420fe8aea9eSmrg do { 3421fe8aea9eSmrg DBG((" (%d, %d), (%d, %d)\n", 3422fe8aea9eSmrg box->x1, box->y1, box->x2, box->y2)); 3423fe8aea9eSmrg 3424fe8aea9eSmrg v[0] = box->x2; 3425fe8aea9eSmrg v[5] = v[1] = box->y2; 3426fe8aea9eSmrg v[8] = v[4] = box->x1; 3427fe8aea9eSmrg v[9] = box->y1; 3428fe8aea9eSmrg v[2] = v[3] = v[7] = 1; 3429fe8aea9eSmrg v[6] = v[10] = v[11] = 0; 3430fe8aea9eSmrg v += 12; box++; 3431fe8aea9eSmrg } while (--n_this_time); 3432fe8aea9eSmrg } while (n); 3433fe8aea9eSmrg 3434fe8aea9eSmrg gen8_vertex_flush(sna); 3435fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3436fe8aea9eSmrg sna_render_composite_redirect_done(sna, &tmp); 3437fe8aea9eSmrg return true; 3438fe8aea9eSmrg} 3439fe8aea9eSmrg 3440fe8aea9eSmrgstatic void 3441fe8aea9eSmrggen9_render_fill_op_blt(struct sna *sna, 3442fe8aea9eSmrg const struct sna_fill_op *op, 3443fe8aea9eSmrg int16_t x, int16_t y, int16_t w, int16_t h) 3444fe8aea9eSmrg{ 3445fe8aea9eSmrg int16_t *v; 3446fe8aea9eSmrg 3447fe8aea9eSmrg DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); 3448fe8aea9eSmrg 3449fe8aea9eSmrg gen9_get_rectangles(sna, &op->base, 1, gen9_emit_fill_state); 3450fe8aea9eSmrg 3451fe8aea9eSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3452fe8aea9eSmrg sna->render.vertex_used += 6; 3453fe8aea9eSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3454fe8aea9eSmrg 3455fe8aea9eSmrg v[0] = x+w; 3456fe8aea9eSmrg v[4] = v[8] = x; 3457fe8aea9eSmrg v[1] = v[5] = y+h; 3458fe8aea9eSmrg v[9] = y; 3459fe8aea9eSmrg 3460fe8aea9eSmrg v[2] = v[3] = v[7] = 1; 3461fe8aea9eSmrg v[6] = v[10] = v[11] = 0; 3462fe8aea9eSmrg} 3463fe8aea9eSmrg 3464fe8aea9eSmrgfastcall static void 3465fe8aea9eSmrggen9_render_fill_op_box(struct sna *sna, 3466fe8aea9eSmrg const struct sna_fill_op *op, 3467fe8aea9eSmrg const BoxRec *box) 3468fe8aea9eSmrg{ 3469fe8aea9eSmrg int16_t *v; 3470fe8aea9eSmrg 3471fe8aea9eSmrg DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, 3472fe8aea9eSmrg box->x1, box->y1, box->x2, box->y2)); 3473fe8aea9eSmrg 3474fe8aea9eSmrg gen9_get_rectangles(sna, &op->base, 1, gen9_emit_fill_state); 3475fe8aea9eSmrg 3476fe8aea9eSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3477fe8aea9eSmrg sna->render.vertex_used += 6; 3478fe8aea9eSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3479fe8aea9eSmrg 3480fe8aea9eSmrg v[0] = box->x2; 3481fe8aea9eSmrg v[8] = v[4] = box->x1; 3482fe8aea9eSmrg v[5] = v[1] = box->y2; 3483fe8aea9eSmrg v[9] = box->y1; 3484fe8aea9eSmrg 3485fe8aea9eSmrg v[7] = v[2] = v[3] = 1; 3486fe8aea9eSmrg v[6] = v[10] = v[11] = 0; 3487fe8aea9eSmrg} 3488fe8aea9eSmrg 3489fe8aea9eSmrgfastcall static void 3490fe8aea9eSmrggen9_render_fill_op_boxes(struct sna *sna, 3491fe8aea9eSmrg const struct sna_fill_op *op, 3492fe8aea9eSmrg const BoxRec *box, 3493fe8aea9eSmrg int nbox) 3494fe8aea9eSmrg{ 3495fe8aea9eSmrg DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, 3496fe8aea9eSmrg box->x1, box->y1, box->x2, box->y2, nbox)); 3497fe8aea9eSmrg 3498fe8aea9eSmrg do { 3499fe8aea9eSmrg int nbox_this_time; 3500fe8aea9eSmrg int16_t *v; 3501fe8aea9eSmrg 3502fe8aea9eSmrg nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox, 3503fe8aea9eSmrg gen9_emit_fill_state); 3504fe8aea9eSmrg nbox -= nbox_this_time; 3505fe8aea9eSmrg 3506fe8aea9eSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3507fe8aea9eSmrg sna->render.vertex_used += 6 * nbox_this_time; 3508fe8aea9eSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3509fe8aea9eSmrg 3510fe8aea9eSmrg do { 3511fe8aea9eSmrg v[0] = box->x2; 3512fe8aea9eSmrg v[8] = v[4] = box->x1; 3513fe8aea9eSmrg v[5] = v[1] = box->y2; 3514fe8aea9eSmrg v[9] = box->y1; 3515fe8aea9eSmrg v[7] = v[2] = v[3] = 1; 3516fe8aea9eSmrg v[6] = v[10] = v[11] = 0; 3517fe8aea9eSmrg box++; v += 12; 3518fe8aea9eSmrg } while (--nbox_this_time); 3519fe8aea9eSmrg } while (nbox); 3520fe8aea9eSmrg} 3521fe8aea9eSmrg 3522fe8aea9eSmrgstatic void 3523fe8aea9eSmrggen9_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) 3524fe8aea9eSmrg{ 3525fe8aea9eSmrg if (sna->render.vertex_offset) 3526fe8aea9eSmrg gen8_vertex_flush(sna); 3527fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3528fe8aea9eSmrg} 3529fe8aea9eSmrg 3530fe8aea9eSmrgstatic bool 3531fe8aea9eSmrggen9_render_fill(struct sna *sna, uint8_t alu, 3532fe8aea9eSmrg PixmapPtr dst, struct kgem_bo *dst_bo, 3533fe8aea9eSmrg uint32_t color, unsigned flags, 3534fe8aea9eSmrg struct sna_fill_op *op) 3535fe8aea9eSmrg{ 3536fe8aea9eSmrg DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); 3537fe8aea9eSmrg 3538fe8aea9eSmrg if (prefer_blt_fill(sna, dst_bo, flags) && 3539fe8aea9eSmrg sna_blt_fill(sna, alu, 3540fe8aea9eSmrg dst_bo, dst->drawable.bitsPerPixel, 3541fe8aea9eSmrg color, 3542fe8aea9eSmrg op)) 3543fe8aea9eSmrg return true; 3544fe8aea9eSmrg 3545fe8aea9eSmrg if (!(alu == GXcopy || alu == GXclear) || 3546fe8aea9eSmrg too_large(dst->drawable.width, dst->drawable.height) || 3547fe8aea9eSmrg unaligned(dst_bo, dst->drawable.bitsPerPixel)) 3548fe8aea9eSmrg return sna_blt_fill(sna, alu, 3549fe8aea9eSmrg dst_bo, dst->drawable.bitsPerPixel, 3550fe8aea9eSmrg color, 3551fe8aea9eSmrg op); 3552fe8aea9eSmrg 3553fe8aea9eSmrg if (alu == GXclear) 3554fe8aea9eSmrg color = 0; 3555fe8aea9eSmrg 3556fe8aea9eSmrg op->base.dst.pixmap = dst; 3557fe8aea9eSmrg op->base.dst.width = dst->drawable.width; 3558fe8aea9eSmrg op->base.dst.height = dst->drawable.height; 3559fe8aea9eSmrg op->base.dst.format = sna_format_for_depth(dst->drawable.depth); 3560fe8aea9eSmrg op->base.dst.bo = dst_bo; 3561fe8aea9eSmrg op->base.dst.x = op->base.dst.y = 0; 3562fe8aea9eSmrg 3563fe8aea9eSmrg op->base.src.bo = 3564fe8aea9eSmrg sna_render_get_solid(sna, 3565fe8aea9eSmrg sna_rgba_for_color(color, 3566fe8aea9eSmrg dst->drawable.depth)); 3567fe8aea9eSmrg op->base.mask.bo = NULL; 3568fe8aea9eSmrg 3569fe8aea9eSmrg op->base.need_magic_ca_pass = false; 3570fe8aea9eSmrg op->base.floats_per_vertex = 2; 3571fe8aea9eSmrg op->base.floats_per_rect = 6; 3572fe8aea9eSmrg 3573fe8aea9eSmrg op->base.u.gen9.flags = FILL_FLAGS_NOBLEND; 3574fe8aea9eSmrg op->base.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK; 3575fe8aea9eSmrg 3576fe8aea9eSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); 3577fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3578fe8aea9eSmrg kgem_submit(&sna->kgem); 3579fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { 3580fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, op->base.src.bo); 3581fe8aea9eSmrg return false; 3582fe8aea9eSmrg } 3583fe8aea9eSmrg 3584fe8aea9eSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3585fe8aea9eSmrg } 3586fe8aea9eSmrg 3587fe8aea9eSmrg gen9_align_vertex(sna, &op->base); 3588fe8aea9eSmrg gen9_emit_fill_state(sna, &op->base); 3589fe8aea9eSmrg 3590fe8aea9eSmrg op->blt = gen9_render_fill_op_blt; 3591fe8aea9eSmrg op->box = gen9_render_fill_op_box; 3592fe8aea9eSmrg op->boxes = gen9_render_fill_op_boxes; 3593fe8aea9eSmrg op->points = NULL; 3594fe8aea9eSmrg op->done = gen9_render_fill_op_done; 3595fe8aea9eSmrg return true; 3596fe8aea9eSmrg} 3597fe8aea9eSmrg 3598fe8aea9eSmrgstatic bool 3599fe8aea9eSmrggen9_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3600fe8aea9eSmrg uint32_t color, 3601fe8aea9eSmrg int16_t x1, int16_t y1, int16_t x2, int16_t y2, 3602fe8aea9eSmrg uint8_t alu) 3603fe8aea9eSmrg{ 3604fe8aea9eSmrg BoxRec box; 3605fe8aea9eSmrg 3606fe8aea9eSmrg box.x1 = x1; 3607fe8aea9eSmrg box.y1 = y1; 3608fe8aea9eSmrg box.x2 = x2; 3609fe8aea9eSmrg box.y2 = y2; 3610fe8aea9eSmrg 3611fe8aea9eSmrg return sna_blt_fill_boxes(sna, alu, 3612fe8aea9eSmrg bo, dst->drawable.bitsPerPixel, 3613fe8aea9eSmrg color, &box, 1); 3614fe8aea9eSmrg} 3615fe8aea9eSmrg 3616fe8aea9eSmrgstatic bool 3617fe8aea9eSmrggen9_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, 3618fe8aea9eSmrg uint32_t color, 3619fe8aea9eSmrg int16_t x1, int16_t y1, 3620fe8aea9eSmrg int16_t x2, int16_t y2, 3621fe8aea9eSmrg uint8_t alu) 3622fe8aea9eSmrg{ 3623fe8aea9eSmrg struct sna_composite_op tmp; 3624fe8aea9eSmrg int16_t *v; 3625fe8aea9eSmrg 3626fe8aea9eSmrg /* Prefer to use the BLT if already engaged */ 3627fe8aea9eSmrg if (prefer_blt_fill(sna, bo, FILL_BOXES) && 3628fe8aea9eSmrg gen9_render_fill_one_try_blt(sna, dst, bo, color, 3629fe8aea9eSmrg x1, y1, x2, y2, alu)) 3630fe8aea9eSmrg return true; 3631fe8aea9eSmrg 3632fe8aea9eSmrg /* Must use the BLT if we can't RENDER... */ 3633fe8aea9eSmrg if (!(alu == GXcopy || alu == GXclear) || 3634fe8aea9eSmrg too_large(dst->drawable.width, dst->drawable.height) || 3635fe8aea9eSmrg unaligned(bo, dst->drawable.bitsPerPixel)) 3636fe8aea9eSmrg return gen9_render_fill_one_try_blt(sna, dst, bo, color, 3637fe8aea9eSmrg x1, y1, x2, y2, alu); 3638fe8aea9eSmrg 3639fe8aea9eSmrg if (alu == GXclear) 3640fe8aea9eSmrg color = 0; 3641fe8aea9eSmrg 3642fe8aea9eSmrg tmp.dst.pixmap = dst; 3643fe8aea9eSmrg tmp.dst.width = dst->drawable.width; 3644fe8aea9eSmrg tmp.dst.height = dst->drawable.height; 3645fe8aea9eSmrg tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3646fe8aea9eSmrg tmp.dst.bo = bo; 3647fe8aea9eSmrg tmp.dst.x = tmp.dst.y = 0; 3648fe8aea9eSmrg 3649fe8aea9eSmrg tmp.src.bo = 3650fe8aea9eSmrg sna_render_get_solid(sna, 3651fe8aea9eSmrg sna_rgba_for_color(color, 3652fe8aea9eSmrg dst->drawable.depth)); 3653fe8aea9eSmrg tmp.mask.bo = NULL; 3654fe8aea9eSmrg 3655fe8aea9eSmrg tmp.floats_per_vertex = 2; 3656fe8aea9eSmrg tmp.floats_per_rect = 6; 3657fe8aea9eSmrg tmp.need_magic_ca_pass = false; 3658fe8aea9eSmrg 3659fe8aea9eSmrg tmp.u.gen9.flags = FILL_FLAGS_NOBLEND; 3660fe8aea9eSmrg tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK; 3661fe8aea9eSmrg 3662fe8aea9eSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3663fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3664fe8aea9eSmrg kgem_submit(&sna->kgem); 3665fe8aea9eSmrg if (kgem_check_bo(&sna->kgem, bo, NULL)) { 3666fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3667fe8aea9eSmrg return false; 3668fe8aea9eSmrg } 3669fe8aea9eSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3670fe8aea9eSmrg } 3671fe8aea9eSmrg 3672fe8aea9eSmrg gen9_align_vertex(sna, &tmp); 3673fe8aea9eSmrg gen9_emit_fill_state(sna, &tmp); 3674fe8aea9eSmrg 3675fe8aea9eSmrg gen9_get_rectangles(sna, &tmp, 1, gen9_emit_fill_state); 3676fe8aea9eSmrg 3677fe8aea9eSmrg DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); 3678fe8aea9eSmrg 3679fe8aea9eSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3680fe8aea9eSmrg sna->render.vertex_used += 6; 3681fe8aea9eSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3682fe8aea9eSmrg 3683fe8aea9eSmrg v[0] = x2; 3684fe8aea9eSmrg v[8] = v[4] = x1; 3685fe8aea9eSmrg v[5] = v[1] = y2; 3686fe8aea9eSmrg v[9] = y1; 3687fe8aea9eSmrg v[7] = v[2] = v[3] = 1; 3688fe8aea9eSmrg v[6] = v[10] = v[11] = 0; 3689fe8aea9eSmrg 3690fe8aea9eSmrg gen8_vertex_flush(sna); 3691fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3692fe8aea9eSmrg 3693fe8aea9eSmrg return true; 3694fe8aea9eSmrg} 3695fe8aea9eSmrg 3696fe8aea9eSmrgstatic bool 3697fe8aea9eSmrggen9_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3698fe8aea9eSmrg{ 3699fe8aea9eSmrg BoxRec box; 3700fe8aea9eSmrg 3701fe8aea9eSmrg box.x1 = 0; 3702fe8aea9eSmrg box.y1 = 0; 3703fe8aea9eSmrg box.x2 = dst->drawable.width; 3704fe8aea9eSmrg box.y2 = dst->drawable.height; 3705fe8aea9eSmrg 3706fe8aea9eSmrg return sna_blt_fill_boxes(sna, GXclear, 3707fe8aea9eSmrg bo, dst->drawable.bitsPerPixel, 3708fe8aea9eSmrg 0, &box, 1); 3709fe8aea9eSmrg} 3710fe8aea9eSmrg 3711fe8aea9eSmrgstatic bool 3712fe8aea9eSmrggen9_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) 3713fe8aea9eSmrg{ 3714fe8aea9eSmrg struct sna_composite_op tmp; 3715fe8aea9eSmrg int16_t *v; 3716fe8aea9eSmrg 3717fe8aea9eSmrg DBG(("%s: %dx%d\n", 3718fe8aea9eSmrg __FUNCTION__, 3719fe8aea9eSmrg dst->drawable.width, 3720fe8aea9eSmrg dst->drawable.height)); 3721fe8aea9eSmrg 3722fe8aea9eSmrg /* Prefer to use the BLT if already engaged */ 3723fe8aea9eSmrg if (sna->kgem.mode == KGEM_BLT && 3724fe8aea9eSmrg gen9_render_clear_try_blt(sna, dst, bo)) 3725fe8aea9eSmrg return true; 3726fe8aea9eSmrg 3727fe8aea9eSmrg /* Must use the BLT if we can't RENDER... */ 3728fe8aea9eSmrg if (too_large(dst->drawable.width, dst->drawable.height) || 3729fe8aea9eSmrg unaligned(bo, dst->drawable.bitsPerPixel)) 3730fe8aea9eSmrg return gen9_render_clear_try_blt(sna, dst, bo); 3731fe8aea9eSmrg 3732fe8aea9eSmrg tmp.dst.pixmap = dst; 3733fe8aea9eSmrg tmp.dst.width = dst->drawable.width; 3734fe8aea9eSmrg tmp.dst.height = dst->drawable.height; 3735fe8aea9eSmrg tmp.dst.format = sna_format_for_depth(dst->drawable.depth); 3736fe8aea9eSmrg tmp.dst.bo = bo; 3737fe8aea9eSmrg tmp.dst.x = tmp.dst.y = 0; 3738fe8aea9eSmrg 3739fe8aea9eSmrg tmp.src.bo = sna_render_get_solid(sna, 0); 3740fe8aea9eSmrg tmp.mask.bo = NULL; 3741fe8aea9eSmrg 3742fe8aea9eSmrg tmp.floats_per_vertex = 2; 3743fe8aea9eSmrg tmp.floats_per_rect = 6; 3744fe8aea9eSmrg tmp.need_magic_ca_pass = false; 3745fe8aea9eSmrg 3746fe8aea9eSmrg tmp.u.gen9.flags = FILL_FLAGS_NOBLEND; 3747fe8aea9eSmrg tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK; 3748fe8aea9eSmrg 3749fe8aea9eSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); 3750fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3751fe8aea9eSmrg kgem_submit(&sna->kgem); 3752fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, bo, NULL)) { 3753fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3754fe8aea9eSmrg return false; 3755fe8aea9eSmrg } 3756fe8aea9eSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 3757fe8aea9eSmrg } 3758fe8aea9eSmrg 3759fe8aea9eSmrg gen9_align_vertex(sna, &tmp); 3760fe8aea9eSmrg gen9_emit_fill_state(sna, &tmp); 3761fe8aea9eSmrg 3762fe8aea9eSmrg gen9_get_rectangles(sna, &tmp, 1, gen9_emit_fill_state); 3763fe8aea9eSmrg 3764fe8aea9eSmrg v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; 3765fe8aea9eSmrg sna->render.vertex_used += 6; 3766fe8aea9eSmrg assert(sna->render.vertex_used <= sna->render.vertex_size); 3767fe8aea9eSmrg 3768fe8aea9eSmrg v[0] = dst->drawable.width; 3769fe8aea9eSmrg v[5] = v[1] = dst->drawable.height; 3770fe8aea9eSmrg v[8] = v[4] = 0; 3771fe8aea9eSmrg v[9] = 0; 3772fe8aea9eSmrg 3773fe8aea9eSmrg v[7] = v[2] = v[3] = 1; 3774fe8aea9eSmrg v[6] = v[10] = v[11] = 0; 3775fe8aea9eSmrg 3776fe8aea9eSmrg gen8_vertex_flush(sna); 3777fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, tmp.src.bo); 3778fe8aea9eSmrg 3779fe8aea9eSmrg return true; 3780fe8aea9eSmrg} 3781fe8aea9eSmrg 3782fe8aea9eSmrg#if !NO_VIDEO 3783fe8aea9eSmrgstatic uint32_t gen9_bind_video_source(struct sna *sna, 3784fe8aea9eSmrg struct kgem_bo *bo, 3785fe8aea9eSmrg uint32_t delta, 3786fe8aea9eSmrg int width, 3787fe8aea9eSmrg int height, 3788fe8aea9eSmrg int pitch, 3789fe8aea9eSmrg uint32_t format) 3790fe8aea9eSmrg{ 3791fe8aea9eSmrg uint32_t *ss; 3792fe8aea9eSmrg int offset; 3793fe8aea9eSmrg 3794fe8aea9eSmrg offset = sna->kgem.surface -= SURFACE_DW; 3795fe8aea9eSmrg ss = sna->kgem.batch + offset; 3796fe8aea9eSmrg ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT | 3797fe8aea9eSmrg gen9_tiling_bits(bo->tiling) | 3798fe8aea9eSmrg format << SURFACE_FORMAT_SHIFT | 3799fe8aea9eSmrg SURFACE_VALIGN_4 | SURFACE_HALIGN_4); 3800fe8aea9eSmrg ss[1] = 0; 3801fe8aea9eSmrg ss[2] = ((width - 1) << SURFACE_WIDTH_SHIFT | 3802fe8aea9eSmrg (height - 1) << SURFACE_HEIGHT_SHIFT); 3803fe8aea9eSmrg ss[3] = (pitch - 1) << SURFACE_PITCH_SHIFT; 3804fe8aea9eSmrg ss[4] = 0; 3805fe8aea9eSmrg ss[5] = 0; 3806fe8aea9eSmrg ss[6] = 0; 3807fe8aea9eSmrg ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); 3808fe8aea9eSmrg *(uint64_t *)(ss+8) = 3809fe8aea9eSmrg kgem_add_reloc64(&sna->kgem, offset + 8, bo, 3810fe8aea9eSmrg I915_GEM_DOMAIN_SAMPLER << 16, 3811fe8aea9eSmrg delta); 3812fe8aea9eSmrg ss[10] = 0; 3813fe8aea9eSmrg ss[11] = 0; 3814fe8aea9eSmrg ss[12] = 0; 3815fe8aea9eSmrg ss[13] = 0; 3816fe8aea9eSmrg ss[14] = 0; 3817fe8aea9eSmrg ss[15] = 0; 3818fe8aea9eSmrg 3819fe8aea9eSmrg DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> sampler\n", 3820fe8aea9eSmrg offset, bo->handle, ss[1], 3821fe8aea9eSmrg format, width, height, bo->pitch, bo->tiling)); 3822fe8aea9eSmrg 3823fe8aea9eSmrg return offset * sizeof(uint32_t); 3824fe8aea9eSmrg} 3825fe8aea9eSmrg 3826fe8aea9eSmrgstatic void gen9_emit_video_state(struct sna *sna, 3827fe8aea9eSmrg const struct sna_composite_op *op) 3828fe8aea9eSmrg{ 3829fe8aea9eSmrg struct sna_video_frame *frame = op->priv; 3830fe8aea9eSmrg uint32_t src_surf_format[6]; 3831fe8aea9eSmrg uint32_t src_surf_base[6]; 3832fe8aea9eSmrg int src_width[6]; 3833fe8aea9eSmrg int src_height[6]; 3834fe8aea9eSmrg int src_pitch[6]; 3835fe8aea9eSmrg uint32_t *binding_table; 3836fe8aea9eSmrg uint16_t offset; 3837fe8aea9eSmrg int n_src, n; 3838fe8aea9eSmrg 3839fe8aea9eSmrg /* XXX VeBox, bicubic */ 3840fe8aea9eSmrg 3841fe8aea9eSmrg gen9_get_batch(sna, op); 3842fe8aea9eSmrg 3843fe8aea9eSmrg src_surf_base[0] = 0; 3844fe8aea9eSmrg src_surf_base[1] = 0; 3845fe8aea9eSmrg src_surf_base[2] = frame->VBufOffset; 3846fe8aea9eSmrg src_surf_base[3] = frame->VBufOffset; 3847fe8aea9eSmrg src_surf_base[4] = frame->UBufOffset; 3848fe8aea9eSmrg src_surf_base[5] = frame->UBufOffset; 3849fe8aea9eSmrg 3850fe8aea9eSmrg if (is_planar_fourcc(frame->id)) { 3851fe8aea9eSmrg for (n = 0; n < 2; n++) { 3852fe8aea9eSmrg src_surf_format[n] = SURFACEFORMAT_R8_UNORM; 3853fe8aea9eSmrg src_width[n] = frame->width; 3854fe8aea9eSmrg src_height[n] = frame->height; 3855fe8aea9eSmrg src_pitch[n] = frame->pitch[1]; 3856fe8aea9eSmrg } 3857fe8aea9eSmrg for (; n < 6; n++) { 3858fe8aea9eSmrg if (is_nv12_fourcc(frame->id)) 3859fe8aea9eSmrg src_surf_format[n] = SURFACEFORMAT_R8G8_UNORM; 3860fe8aea9eSmrg else 3861fe8aea9eSmrg src_surf_format[n] = SURFACEFORMAT_R8_UNORM; 3862fe8aea9eSmrg src_width[n] = frame->width / 2; 3863fe8aea9eSmrg src_height[n] = frame->height / 2; 3864fe8aea9eSmrg src_pitch[n] = frame->pitch[0]; 3865fe8aea9eSmrg } 3866fe8aea9eSmrg n_src = 6; 3867fe8aea9eSmrg } else { 3868fe8aea9eSmrg if (frame->id == FOURCC_RGB888) 3869fe8aea9eSmrg src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM; 3870fe8aea9eSmrg else if (frame->id == FOURCC_UYVY) 3871fe8aea9eSmrg src_surf_format[0] = SURFACEFORMAT_YCRCB_SWAPY; 3872fe8aea9eSmrg else if (is_ayuv_fourcc(frame->id)) 3873fe8aea9eSmrg src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM; 3874fe8aea9eSmrg else 3875fe8aea9eSmrg src_surf_format[0] = SURFACEFORMAT_YCRCB_NORMAL; 3876fe8aea9eSmrg 3877fe8aea9eSmrg src_width[0] = frame->width; 3878fe8aea9eSmrg src_height[0] = frame->height; 3879fe8aea9eSmrg src_pitch[0] = frame->pitch[0]; 3880fe8aea9eSmrg n_src = 1; 3881fe8aea9eSmrg } 3882fe8aea9eSmrg 3883fe8aea9eSmrg binding_table = gen9_composite_get_binding_table(sna, &offset); 3884fe8aea9eSmrg 3885fe8aea9eSmrg binding_table[0] = 3886fe8aea9eSmrg gen9_bind_bo(sna, 3887fe8aea9eSmrg op->dst.bo, op->dst.width, op->dst.height, 3888fe8aea9eSmrg gen9_get_dest_format(op->dst.format), 3889fe8aea9eSmrg true); 3890fe8aea9eSmrg for (n = 0; n < n_src; n++) { 3891fe8aea9eSmrg binding_table[1+n] = 3892fe8aea9eSmrg gen9_bind_video_source(sna, 3893fe8aea9eSmrg frame->bo, 3894fe8aea9eSmrg src_surf_base[n], 3895fe8aea9eSmrg src_width[n], 3896fe8aea9eSmrg src_height[n], 3897fe8aea9eSmrg src_pitch[n], 3898fe8aea9eSmrg src_surf_format[n]); 3899fe8aea9eSmrg } 3900fe8aea9eSmrg 3901fe8aea9eSmrg gen9_emit_state(sna, op, offset); 3902fe8aea9eSmrg} 3903fe8aea9eSmrg 3904fe8aea9eSmrgstatic unsigned select_video_kernel(const struct sna_video *video, 3905fe8aea9eSmrg const struct sna_video_frame *frame) 3906fe8aea9eSmrg{ 3907fe8aea9eSmrg switch (frame->id) { 3908fe8aea9eSmrg case FOURCC_YV12: 3909fe8aea9eSmrg case FOURCC_I420: 3910fe8aea9eSmrg case FOURCC_XVMC: 3911fe8aea9eSmrg return video->colorspace ? 3912fe8aea9eSmrg GEN9_WM_KERNEL_VIDEO_PLANAR_BT709 : 3913fe8aea9eSmrg GEN9_WM_KERNEL_VIDEO_PLANAR_BT601; 3914fe8aea9eSmrg 3915fe8aea9eSmrg case FOURCC_NV12: 3916fe8aea9eSmrg return video->colorspace ? 3917fe8aea9eSmrg GEN9_WM_KERNEL_VIDEO_NV12_BT709 : 3918fe8aea9eSmrg GEN9_WM_KERNEL_VIDEO_NV12_BT601; 3919fe8aea9eSmrg 3920fe8aea9eSmrg case FOURCC_RGB888: 3921fe8aea9eSmrg case FOURCC_RGB565: 3922fe8aea9eSmrg return GEN9_WM_KERNEL_VIDEO_RGB; 3923fe8aea9eSmrg 3924fe8aea9eSmrg case FOURCC_AYUV: 3925fe8aea9eSmrg return video->colorspace ? 3926fe8aea9eSmrg GEN9_WM_KERNEL_VIDEO_AYUV_BT709 : 3927fe8aea9eSmrg GEN9_WM_KERNEL_VIDEO_AYUV_BT601; 3928fe8aea9eSmrg 3929fe8aea9eSmrg default: 3930fe8aea9eSmrg return video->colorspace ? 3931fe8aea9eSmrg GEN9_WM_KERNEL_VIDEO_PACKED_BT709 : 3932fe8aea9eSmrg GEN9_WM_KERNEL_VIDEO_PACKED_BT601; 3933fe8aea9eSmrg } 3934fe8aea9eSmrg} 3935fe8aea9eSmrg 3936fe8aea9eSmrgstatic bool 3937fe8aea9eSmrggen9_render_video(struct sna *sna, 3938fe8aea9eSmrg struct sna_video *video, 3939fe8aea9eSmrg struct sna_video_frame *frame, 3940fe8aea9eSmrg RegionPtr dstRegion, 3941fe8aea9eSmrg PixmapPtr pixmap) 3942fe8aea9eSmrg{ 3943fe8aea9eSmrg struct sna_composite_op tmp; 3944fe8aea9eSmrg struct sna_pixmap *priv = sna_pixmap(pixmap); 3945fe8aea9eSmrg int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; 3946fe8aea9eSmrg int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; 3947fe8aea9eSmrg int src_width = frame->src.x2 - frame->src.x1; 3948fe8aea9eSmrg int src_height = frame->src.y2 - frame->src.y1; 3949fe8aea9eSmrg float src_offset_x, src_offset_y; 3950fe8aea9eSmrg float src_scale_x, src_scale_y; 3951fe8aea9eSmrg unsigned filter; 3952fe8aea9eSmrg const BoxRec *box; 3953fe8aea9eSmrg int nbox; 3954fe8aea9eSmrg 3955fe8aea9eSmrg DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", 3956fe8aea9eSmrg __FUNCTION__, 3957fe8aea9eSmrg src_width, src_height, dst_width, dst_height, 3958fe8aea9eSmrg region_num_rects(dstRegion), 3959fe8aea9eSmrg REGION_EXTENTS(NULL, dstRegion)->x1, 3960fe8aea9eSmrg REGION_EXTENTS(NULL, dstRegion)->y1, 3961fe8aea9eSmrg REGION_EXTENTS(NULL, dstRegion)->x2, 3962fe8aea9eSmrg REGION_EXTENTS(NULL, dstRegion)->y2)); 3963fe8aea9eSmrg 3964fe8aea9eSmrg assert(priv->gpu_bo); 3965fe8aea9eSmrg assert(!too_large(pixmap->drawable.width, pixmap->drawable.height)); 3966fe8aea9eSmrg assert(!unaligned(priv->gpu_bo, pixmap->drawable.bitsPerPixel)); 3967fe8aea9eSmrg 3968fe8aea9eSmrg memset(&tmp, 0, sizeof(tmp)); 3969fe8aea9eSmrg 3970fe8aea9eSmrg tmp.dst.pixmap = pixmap; 3971fe8aea9eSmrg tmp.dst.width = pixmap->drawable.width; 3972fe8aea9eSmrg tmp.dst.height = pixmap->drawable.height; 3973fe8aea9eSmrg tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); 3974fe8aea9eSmrg tmp.dst.bo = priv->gpu_bo; 3975fe8aea9eSmrg 3976fe8aea9eSmrg tmp.src.bo = frame->bo; 3977fe8aea9eSmrg tmp.mask.bo = NULL; 3978fe8aea9eSmrg 3979fe8aea9eSmrg tmp.floats_per_vertex = 3; 3980fe8aea9eSmrg tmp.floats_per_rect = 9; 3981fe8aea9eSmrg 3982fe8aea9eSmrg DBG(("%s: scaling?=%d, planar?=%d [%x]\n", 3983fe8aea9eSmrg __FUNCTION__, 3984fe8aea9eSmrg src_width != dst_width || src_height != dst_height, 3985fe8aea9eSmrg is_planar_fourcc(frame->id), frame->id)); 3986fe8aea9eSmrg 3987fe8aea9eSmrg if (src_width == dst_width && src_height == dst_height) 3988fe8aea9eSmrg filter = SAMPLER_FILTER_NEAREST; 3989fe8aea9eSmrg else 3990fe8aea9eSmrg filter = SAMPLER_FILTER_BILINEAR; 3991fe8aea9eSmrg 3992fe8aea9eSmrg tmp.u.gen9.flags = 3993fe8aea9eSmrg GEN9_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, 3994fe8aea9eSmrg SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), 3995fe8aea9eSmrg NO_BLEND, 3996fe8aea9eSmrg 2); 3997fe8aea9eSmrg tmp.u.gen9.wm_kernel = select_video_kernel(video, frame); 3998fe8aea9eSmrg tmp.priv = frame; 3999fe8aea9eSmrg 4000fe8aea9eSmrg kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); 4001fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { 4002fe8aea9eSmrg kgem_submit(&sna->kgem); 4003fe8aea9eSmrg if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) 4004fe8aea9eSmrg return false; 4005fe8aea9eSmrg 4006fe8aea9eSmrg _kgem_set_mode(&sna->kgem, KGEM_RENDER); 4007fe8aea9eSmrg } 4008fe8aea9eSmrg 4009fe8aea9eSmrg gen9_align_vertex(sna, &tmp); 4010fe8aea9eSmrg gen9_emit_video_state(sna, &tmp); 4011fe8aea9eSmrg 4012fe8aea9eSmrg DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", 4013fe8aea9eSmrg __FUNCTION__, 4014fe8aea9eSmrg frame->src.x1, frame->src.y1, 4015fe8aea9eSmrg src_width, src_height, 4016fe8aea9eSmrg dst_width, dst_height, 4017fe8aea9eSmrg frame->width, frame->height)); 4018fe8aea9eSmrg 4019fe8aea9eSmrg src_scale_x = (float)src_width / dst_width / frame->width; 4020fe8aea9eSmrg src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; 4021fe8aea9eSmrg 4022fe8aea9eSmrg src_scale_y = (float)src_height / dst_height / frame->height; 4023fe8aea9eSmrg src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; 4024fe8aea9eSmrg 4025fe8aea9eSmrg DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n", 4026fe8aea9eSmrg __FUNCTION__, 4027fe8aea9eSmrg src_scale_x, src_scale_y, 4028fe8aea9eSmrg src_offset_x, src_offset_y)); 4029fe8aea9eSmrg 4030fe8aea9eSmrg box = region_rects(dstRegion); 4031fe8aea9eSmrg nbox = region_num_rects(dstRegion); 4032fe8aea9eSmrg while (nbox--) { 4033fe8aea9eSmrg DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", 4034fe8aea9eSmrg __FUNCTION__, 4035fe8aea9eSmrg box->x1, box->y1, 4036fe8aea9eSmrg box->x2, box->y2, 4037fe8aea9eSmrg box->x1 * src_scale_x + src_offset_x, 4038fe8aea9eSmrg box->y1 * src_scale_y + src_offset_y, 4039fe8aea9eSmrg box->x2 * src_scale_x + src_offset_x, 4040fe8aea9eSmrg box->y2 * src_scale_y + src_offset_y)); 4041fe8aea9eSmrg 4042fe8aea9eSmrg gen9_get_rectangles(sna, &tmp, 1, gen9_emit_video_state); 4043fe8aea9eSmrg 4044fe8aea9eSmrg OUT_VERTEX(box->x2, box->y2); 4045fe8aea9eSmrg OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); 4046fe8aea9eSmrg OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 4047fe8aea9eSmrg 4048fe8aea9eSmrg OUT_VERTEX(box->x1, box->y2); 4049fe8aea9eSmrg OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 4050fe8aea9eSmrg OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); 4051fe8aea9eSmrg 4052fe8aea9eSmrg OUT_VERTEX(box->x1, box->y1); 4053fe8aea9eSmrg OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); 4054fe8aea9eSmrg OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); 4055fe8aea9eSmrg 4056fe8aea9eSmrg box++; 4057fe8aea9eSmrg } 4058fe8aea9eSmrg gen8_vertex_flush(sna); 4059fe8aea9eSmrg 4060fe8aea9eSmrg if (!DAMAGE_IS_ALL(priv->gpu_damage)) 4061fe8aea9eSmrg sna_damage_add(&priv->gpu_damage, dstRegion); 4062fe8aea9eSmrg 4063fe8aea9eSmrg return true; 4064fe8aea9eSmrg} 4065fe8aea9eSmrg#endif 4066fe8aea9eSmrg 4067fe8aea9eSmrgstatic void gen9_render_flush(struct sna *sna) 4068fe8aea9eSmrg{ 4069fe8aea9eSmrg gen8_vertex_close(sna); 4070fe8aea9eSmrg 4071fe8aea9eSmrg assert(sna->render.vb_id == 0); 4072fe8aea9eSmrg assert(sna->render.vertex_offset == 0); 4073fe8aea9eSmrg} 4074fe8aea9eSmrg 4075fe8aea9eSmrgstatic void gen9_render_reset(struct sna *sna) 4076fe8aea9eSmrg{ 4077fe8aea9eSmrg sna->render_state.gen9.emit_flush = false; 4078fe8aea9eSmrg sna->render_state.gen9.needs_invariant = true; 4079fe8aea9eSmrg sna->render_state.gen9.ve_id = 3 << 2; 4080fe8aea9eSmrg sna->render_state.gen9.ve_dirty = false; 4081fe8aea9eSmrg sna->render_state.gen9.last_primitive = -1; 4082fe8aea9eSmrg 4083fe8aea9eSmrg sna->render_state.gen9.num_sf_outputs = 0; 4084fe8aea9eSmrg sna->render_state.gen9.samplers = -1; 4085fe8aea9eSmrg sna->render_state.gen9.blend = -1; 4086fe8aea9eSmrg sna->render_state.gen9.kernel = -1; 4087fe8aea9eSmrg sna->render_state.gen9.drawrect_offset = -1; 4088fe8aea9eSmrg sna->render_state.gen9.drawrect_limit = -1; 4089fe8aea9eSmrg sna->render_state.gen9.surface_table = 0; 4090fe8aea9eSmrg 4091fe8aea9eSmrg if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { 4092fe8aea9eSmrg DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); 4093fe8aea9eSmrg discard_vbo(sna); 4094fe8aea9eSmrg } 4095fe8aea9eSmrg 4096fe8aea9eSmrg sna->render.vertex_offset = 0; 4097fe8aea9eSmrg sna->render.nvertex_reloc = 0; 4098fe8aea9eSmrg sna->render.vb_id = 0; 4099fe8aea9eSmrg} 4100fe8aea9eSmrg 4101fe8aea9eSmrgstatic void gen9_render_fini(struct sna *sna) 4102fe8aea9eSmrg{ 4103fe8aea9eSmrg kgem_bo_destroy(&sna->kgem, sna->render_state.gen9.general_bo); 4104fe8aea9eSmrg} 4105fe8aea9eSmrg 4106fe8aea9eSmrgstatic bool gen9_render_setup(struct sna *sna) 4107fe8aea9eSmrg{ 4108fe8aea9eSmrg struct gen9_render_state *state = &sna->render_state.gen9; 4109fe8aea9eSmrg struct sna_static_stream general; 4110fe8aea9eSmrg struct gen9_sampler_state *ss; 4111fe8aea9eSmrg int i, j, k, l, m; 4112fe8aea9eSmrg uint32_t devid; 4113fe8aea9eSmrg 4114fe8aea9eSmrg devid = intel_get_device_id(sna->dev); 4115fe8aea9eSmrg if (devid & 0xf) 4116fe8aea9eSmrg state->gt = GEN9_GT_BIAS + ((devid >> 4) & 0xf) + 1; 4117fe8aea9eSmrg DBG(("%s: gt=%d\n", __FUNCTION__, state->gt)); 4118fe8aea9eSmrg 4119fe8aea9eSmrg state->info = &min_gt_info; 4120fe8aea9eSmrg if (is_skl(sna)) 4121fe8aea9eSmrg state->info = &skl_gt_info; 4122fe8aea9eSmrg if (is_bxt(sna)) 4123fe8aea9eSmrg state->info = &bxt_gt_info; 4124fe8aea9eSmrg if (is_kbl(sna)) 4125fe8aea9eSmrg state->info = &kbl_gt_info; 4126fe8aea9eSmrg if (is_glk(sna)) 4127fe8aea9eSmrg state->info = &glk_gt_info; 4128fe8aea9eSmrg if (is_cfl(sna)) 4129fe8aea9eSmrg state->info = &cfl_gt_info; 4130fe8aea9eSmrg 4131fe8aea9eSmrg sna_static_stream_init(&general); 4132fe8aea9eSmrg 4133fe8aea9eSmrg /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer 4134fe8aea9eSmrg * dumps, you know it points to zero. 4135fe8aea9eSmrg */ 4136fe8aea9eSmrg null_create(&general); 4137fe8aea9eSmrg 4138fe8aea9eSmrg for (m = 0; m < ARRAY_SIZE(wm_kernels); m++) { 4139fe8aea9eSmrg if (wm_kernels[m].size) { 4140fe8aea9eSmrg state->wm_kernel[m][1] = 4141fe8aea9eSmrg sna_static_stream_add(&general, 4142fe8aea9eSmrg wm_kernels[m].data, 4143fe8aea9eSmrg wm_kernels[m].size, 4144fe8aea9eSmrg 64); 4145fe8aea9eSmrg } else { 4146fe8aea9eSmrg if (USE_8_PIXEL_DISPATCH) { 4147fe8aea9eSmrg state->wm_kernel[m][0] = 4148fe8aea9eSmrg sna_static_stream_compile_wm(sna, &general, 4149fe8aea9eSmrg wm_kernels[m].data, 8); 4150fe8aea9eSmrg } 4151fe8aea9eSmrg 4152fe8aea9eSmrg if (USE_16_PIXEL_DISPATCH) { 4153fe8aea9eSmrg state->wm_kernel[m][1] = 4154fe8aea9eSmrg sna_static_stream_compile_wm(sna, &general, 4155fe8aea9eSmrg wm_kernels[m].data, 16); 4156fe8aea9eSmrg } 4157fe8aea9eSmrg 4158fe8aea9eSmrg if (USE_32_PIXEL_DISPATCH) { 4159fe8aea9eSmrg state->wm_kernel[m][2] = 4160fe8aea9eSmrg sna_static_stream_compile_wm(sna, &general, 4161fe8aea9eSmrg wm_kernels[m].data, 32); 4162fe8aea9eSmrg } 4163fe8aea9eSmrg } 4164fe8aea9eSmrg assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]); 4165fe8aea9eSmrg } 4166fe8aea9eSmrg 4167fe8aea9eSmrg COMPILE_TIME_ASSERT(GEN9_WM_KERNEL_COUNT <= 4168fe8aea9eSmrg 1 << (sizeof(((struct sna_composite_op *)NULL)->u.gen9.wm_kernel) * 8)); 4169fe8aea9eSmrg 4170fe8aea9eSmrg COMPILE_TIME_ASSERT(SAMPLER_OFFSET(FILTER_COUNT, EXTEND_COUNT, FILTER_COUNT, EXTEND_COUNT) <= 0x7ff); 4171fe8aea9eSmrg ss = sna_static_stream_map(&general, 4172fe8aea9eSmrg 2 * sizeof(*ss) * 4173fe8aea9eSmrg (2 + 4174fe8aea9eSmrg FILTER_COUNT * EXTEND_COUNT * 4175fe8aea9eSmrg FILTER_COUNT * EXTEND_COUNT), 4176fe8aea9eSmrg 32); 4177fe8aea9eSmrg state->wm_state = sna_static_stream_offsetof(&general, ss); 4178fe8aea9eSmrg sampler_copy_init(ss); ss += 2; 4179fe8aea9eSmrg sampler_fill_init(ss); ss += 2; 4180fe8aea9eSmrg for (i = 0; i < FILTER_COUNT; i++) { 4181fe8aea9eSmrg for (j = 0; j < EXTEND_COUNT; j++) { 4182fe8aea9eSmrg for (k = 0; k < FILTER_COUNT; k++) { 4183fe8aea9eSmrg for (l = 0; l < EXTEND_COUNT; l++) { 4184fe8aea9eSmrg sampler_state_init(ss++, i, j); 4185fe8aea9eSmrg sampler_state_init(ss++, k, l); 4186fe8aea9eSmrg } 4187fe8aea9eSmrg } 4188fe8aea9eSmrg } 4189fe8aea9eSmrg } 4190fe8aea9eSmrg 4191fe8aea9eSmrg state->cc_blend = gen9_create_blend_state(&general); 4192fe8aea9eSmrg 4193fe8aea9eSmrg state->general_bo = sna_static_stream_fini(sna, &general); 4194fe8aea9eSmrg return state->general_bo != NULL; 4195fe8aea9eSmrg} 4196fe8aea9eSmrg 4197fe8aea9eSmrgconst char *gen9_render_init(struct sna *sna, const char *backend) 4198fe8aea9eSmrg{ 4199fe8aea9eSmrg if (!gen9_render_setup(sna)) 4200fe8aea9eSmrg return backend; 4201fe8aea9eSmrg 4202fe8aea9eSmrg sna->kgem.context_switch = gen6_render_context_switch; 4203fe8aea9eSmrg sna->kgem.retire = gen6_render_retire; 4204fe8aea9eSmrg sna->kgem.expire = gen4_render_expire; 4205fe8aea9eSmrg 4206fe8aea9eSmrg#if !NO_COMPOSITE 4207fe8aea9eSmrg sna->render.composite = gen9_render_composite; 4208fe8aea9eSmrg sna->render.prefer_gpu |= PREFER_GPU_RENDER; 4209fe8aea9eSmrg#endif 4210fe8aea9eSmrg#if !NO_COMPOSITE_SPANS 4211fe8aea9eSmrg sna->render.check_composite_spans = gen9_check_composite_spans; 4212fe8aea9eSmrg sna->render.composite_spans = gen9_render_composite_spans; 4213fe8aea9eSmrg sna->render.prefer_gpu |= PREFER_GPU_SPANS; 4214fe8aea9eSmrg#endif 4215fe8aea9eSmrg#if !NO_VIDEO 4216fe8aea9eSmrg sna->render.video = gen9_render_video; 4217fe8aea9eSmrg#endif 4218fe8aea9eSmrg 4219fe8aea9eSmrg#if !NO_COPY_BOXES 4220fe8aea9eSmrg sna->render.copy_boxes = gen9_render_copy_boxes; 4221fe8aea9eSmrg#endif 4222fe8aea9eSmrg#if !NO_COPY 4223fe8aea9eSmrg sna->render.copy = gen9_render_copy; 4224fe8aea9eSmrg#endif 4225fe8aea9eSmrg 4226fe8aea9eSmrg#if !NO_FILL_BOXES 4227fe8aea9eSmrg sna->render.fill_boxes = gen9_render_fill_boxes; 4228fe8aea9eSmrg#endif 4229fe8aea9eSmrg#if !NO_FILL 4230fe8aea9eSmrg sna->render.fill = gen9_render_fill; 4231fe8aea9eSmrg#endif 4232fe8aea9eSmrg#if !NO_FILL_ONE 4233fe8aea9eSmrg sna->render.fill_one = gen9_render_fill_one; 4234fe8aea9eSmrg#endif 4235fe8aea9eSmrg#if !NO_FILL_CLEAR 4236fe8aea9eSmrg sna->render.clear = gen9_render_clear; 4237fe8aea9eSmrg#endif 4238fe8aea9eSmrg 4239fe8aea9eSmrg sna->render.flush = gen9_render_flush; 4240fe8aea9eSmrg sna->render.reset = gen9_render_reset; 4241fe8aea9eSmrg sna->render.fini = gen9_render_fini; 4242fe8aea9eSmrg 4243fe8aea9eSmrg sna->render.max_3d_size = GEN9_MAX_SIZE; 4244fe8aea9eSmrg sna->render.max_3d_pitch = 1 << 18; 4245fe8aea9eSmrg return sna->render_state.gen9.info->name; 4246fe8aea9eSmrg} 4247