1fe8aea9eSmrg/*
2fe8aea9eSmrg * Copyright © 2012,2013 Intel Corporation
3fe8aea9eSmrg *
4fe8aea9eSmrg * Permission is hereby granted, free of charge, to any person obtaining a
5fe8aea9eSmrg * copy of this software and associated documentation files (the "Software"),
6fe8aea9eSmrg * to deal in the Software without restriction, including without limitation
7fe8aea9eSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8fe8aea9eSmrg * and/or sell copies of the Software, and to permit persons to whom the
9fe8aea9eSmrg * Software is furnished to do so, subject to the following conditions:
10fe8aea9eSmrg *
11fe8aea9eSmrg * The above copyright notice and this permission notice (including the next
12fe8aea9eSmrg * paragraph) shall be included in all copies or substantial portions of the
13fe8aea9eSmrg * Software.
14fe8aea9eSmrg *
15fe8aea9eSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16fe8aea9eSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17fe8aea9eSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18fe8aea9eSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19fe8aea9eSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20fe8aea9eSmrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21fe8aea9eSmrg * SOFTWARE.
22fe8aea9eSmrg *
23fe8aea9eSmrg * Authors:
24fe8aea9eSmrg *    Chris Wilson <chris@chris-wilson.co.uk>
25fe8aea9eSmrg *
26fe8aea9eSmrg */
27fe8aea9eSmrg
28fe8aea9eSmrg#ifdef HAVE_CONFIG_H
29fe8aea9eSmrg#include "config.h"
30fe8aea9eSmrg#endif
31fe8aea9eSmrg
32fe8aea9eSmrg#include "sna.h"
33fe8aea9eSmrg#include "sna_reg.h"
34fe8aea9eSmrg#include "sna_render.h"
35fe8aea9eSmrg#include "sna_render_inline.h"
36fe8aea9eSmrg#include "sna_video.h"
37fe8aea9eSmrg
38fe8aea9eSmrg#include "gen9_render.h"
39fe8aea9eSmrg#include "gen8_eu.h"
40fe8aea9eSmrg#include "gen4_common.h"
41fe8aea9eSmrg#include "gen4_source.h"
42fe8aea9eSmrg#include "gen4_vertex.h"
43fe8aea9eSmrg#include "gen6_common.h"
44fe8aea9eSmrg#include "gen8_vertex.h"
45fe8aea9eSmrg
46fe8aea9eSmrg#define SIM 1
47fe8aea9eSmrg
48fe8aea9eSmrg#define ALWAYS_INVALIDATE 0
49fe8aea9eSmrg#define ALWAYS_FLUSH 0
50fe8aea9eSmrg#define ALWAYS_STALL 0
51fe8aea9eSmrg
52fe8aea9eSmrg#define NO_COMPOSITE 0
53fe8aea9eSmrg#define NO_COMPOSITE_SPANS 0
54fe8aea9eSmrg#define NO_COPY 0
55fe8aea9eSmrg#define NO_COPY_BOXES 0
56fe8aea9eSmrg#define NO_FILL 0
57fe8aea9eSmrg#define NO_FILL_BOXES 0
58fe8aea9eSmrg#define NO_FILL_ONE 0
59fe8aea9eSmrg#define NO_FILL_CLEAR 0
60fe8aea9eSmrg#define NO_VIDEO 0
61fe8aea9eSmrg
62fe8aea9eSmrg#define USE_8_PIXEL_DISPATCH 1
63fe8aea9eSmrg#define USE_16_PIXEL_DISPATCH 1
64fe8aea9eSmrg#define USE_32_PIXEL_DISPATCH 0
65fe8aea9eSmrg
66fe8aea9eSmrg#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
67fe8aea9eSmrg#error "Must select at least 8, 16 or 32 pixel dispatch"
68fe8aea9eSmrg#endif
69fe8aea9eSmrg
70fe8aea9eSmrg#define GEN9_MAX_SIZE 16384
71fe8aea9eSmrg#define GEN9_GT_BIAS 1 /* Each GT is bigger than previous gen */
72fe8aea9eSmrg
73fe8aea9eSmrg/* XXX Todo
74fe8aea9eSmrg *
75fe8aea9eSmrg * STR (software tiled rendering) mode. No, really.
76fe8aea9eSmrg * 64x32 pixel blocks align with the rendering cache. Worth considering.
77fe8aea9eSmrg */
78fe8aea9eSmrg
79fe8aea9eSmrg#define is_aligned(x, y) (((x) & ((y) - 1)) == 0)
80fe8aea9eSmrg
81fe8aea9eSmrg/* Pipeline stages:
82fe8aea9eSmrg *  1. Command Streamer (CS)
83fe8aea9eSmrg *  2. Vertex Fetch (VF)
84fe8aea9eSmrg *  3. Vertex Shader (VS)
85fe8aea9eSmrg *  4. Hull Shader (HS)
86fe8aea9eSmrg *  5. Tesselation Engine (TE)
87fe8aea9eSmrg *  6. Domain Shader (DS)
88fe8aea9eSmrg *  7. Geometry Shader (GS)
89fe8aea9eSmrg *  8. Stream Output Logic (SOL)
90fe8aea9eSmrg *  9. Clipper (CLIP)
91fe8aea9eSmrg * 10. Strip/Fan (SF)
92fe8aea9eSmrg * 11. Windower/Masker (WM)
93fe8aea9eSmrg * 12. Color Calculator (CC)
94fe8aea9eSmrg */
95fe8aea9eSmrg
96fe8aea9eSmrg#if !NO_VIDEO
97fe8aea9eSmrgstatic const uint32_t ps_kernel_packed_bt601[][4] = {
98fe8aea9eSmrg#include "exa_wm_src_affine.g8b"
99fe8aea9eSmrg#include "exa_wm_src_sample_argb.g8b"
100fe8aea9eSmrg#include "exa_wm_yuv_rgb_bt601.g8b"
101fe8aea9eSmrg#include "exa_wm_write.g8b"
102fe8aea9eSmrg};
103fe8aea9eSmrg
104fe8aea9eSmrgstatic const uint32_t ps_kernel_planar_bt601[][4] = {
105fe8aea9eSmrg#include "exa_wm_src_affine.g8b"
106fe8aea9eSmrg#include "exa_wm_src_sample_planar.g8b"
107fe8aea9eSmrg#include "exa_wm_yuv_rgb_bt601.g8b"
108fe8aea9eSmrg#include "exa_wm_write.g8b"
109fe8aea9eSmrg};
110fe8aea9eSmrg
111fe8aea9eSmrgstatic const uint32_t ps_kernel_nv12_bt601[][4] = {
112fe8aea9eSmrg#include "exa_wm_src_affine.g8b"
113fe8aea9eSmrg#include "exa_wm_src_sample_nv12.g8b"
114fe8aea9eSmrg#include "exa_wm_yuv_rgb_bt601.g8b"
115fe8aea9eSmrg#include "exa_wm_write.g8b"
116fe8aea9eSmrg};
117fe8aea9eSmrg
118fe8aea9eSmrgstatic const uint32_t ps_kernel_packed_bt709[][4] = {
119fe8aea9eSmrg#include "exa_wm_src_affine.g8b"
120fe8aea9eSmrg#include "exa_wm_src_sample_argb.g8b"
121fe8aea9eSmrg#include "exa_wm_yuv_rgb_bt709.g8b"
122fe8aea9eSmrg#include "exa_wm_write.g8b"
123fe8aea9eSmrg};
124fe8aea9eSmrg
125fe8aea9eSmrgstatic const uint32_t ps_kernel_planar_bt709[][4] = {
126fe8aea9eSmrg#include "exa_wm_src_affine.g8b"
127fe8aea9eSmrg#include "exa_wm_src_sample_planar.g8b"
128fe8aea9eSmrg#include "exa_wm_yuv_rgb_bt709.g8b"
129fe8aea9eSmrg#include "exa_wm_write.g8b"
130fe8aea9eSmrg};
131fe8aea9eSmrg
132fe8aea9eSmrgstatic const uint32_t ps_kernel_ayuv_bt601[][4] = {
133fe8aea9eSmrg#include "exa_wm_src_affine.g8b"
134fe8aea9eSmrg#include "exa_wm_src_sample_argb_ayuv.g8b"
135fe8aea9eSmrg#include "exa_wm_yuv_rgb_bt601.g8b"
136fe8aea9eSmrg#include "exa_wm_write.g8b"
137fe8aea9eSmrg};
138fe8aea9eSmrg
139fe8aea9eSmrgstatic const uint32_t ps_kernel_ayuv_bt709[][4] = {
140fe8aea9eSmrg#include "exa_wm_src_affine.g8b"
141fe8aea9eSmrg#include "exa_wm_src_sample_argb_ayuv.g8b"
142fe8aea9eSmrg#include "exa_wm_yuv_rgb_bt709.g8b"
143fe8aea9eSmrg#include "exa_wm_write.g8b"
144fe8aea9eSmrg};
145fe8aea9eSmrg
146fe8aea9eSmrgstatic const uint32_t ps_kernel_nv12_bt709[][4] = {
147fe8aea9eSmrg#include "exa_wm_src_affine.g8b"
148fe8aea9eSmrg#include "exa_wm_src_sample_nv12.g8b"
149fe8aea9eSmrg#include "exa_wm_yuv_rgb_bt709.g8b"
150fe8aea9eSmrg#include "exa_wm_write.g8b"
151fe8aea9eSmrg};
152fe8aea9eSmrg
153fe8aea9eSmrgstatic const uint32_t ps_kernel_rgb[][4] = {
154fe8aea9eSmrg#include "exa_wm_src_affine.g8b"
155fe8aea9eSmrg#include "exa_wm_src_sample_argb.g8b"
156fe8aea9eSmrg#include "exa_wm_write.g8b"
157fe8aea9eSmrg};
158fe8aea9eSmrg#endif
159fe8aea9eSmrg
160fe8aea9eSmrg#define SURFACE_DW (64 / sizeof(uint32_t));
161fe8aea9eSmrg
162fe8aea9eSmrg#define KERNEL(kernel_enum, kernel, num_surfaces) \
163fe8aea9eSmrg    [GEN9_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces}
164fe8aea9eSmrg#define NOKERNEL(kernel_enum, func, num_surfaces) \
165fe8aea9eSmrg    [GEN9_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces}
166fe8aea9eSmrgstatic const struct wm_kernel_info {
167fe8aea9eSmrg	const char *name;
168fe8aea9eSmrg	const void *data;
169fe8aea9eSmrg	unsigned int size;
170fe8aea9eSmrg	int num_surfaces;
171fe8aea9eSmrg} wm_kernels[] = {
172fe8aea9eSmrg	NOKERNEL(NOMASK, gen8_wm_kernel__affine, 2),
173fe8aea9eSmrg	NOKERNEL(NOMASK_P, gen8_wm_kernel__projective, 2),
174fe8aea9eSmrg
175fe8aea9eSmrg	NOKERNEL(MASK, gen8_wm_kernel__affine_mask, 3),
176fe8aea9eSmrg	NOKERNEL(MASK_P, gen8_wm_kernel__projective_mask, 3),
177fe8aea9eSmrg
178fe8aea9eSmrg	NOKERNEL(MASKCA, gen8_wm_kernel__affine_mask_ca, 3),
179fe8aea9eSmrg	NOKERNEL(MASKCA_P, gen8_wm_kernel__projective_mask_ca, 3),
180fe8aea9eSmrg
181fe8aea9eSmrg	NOKERNEL(MASKSA, gen8_wm_kernel__affine_mask_sa, 3),
182fe8aea9eSmrg	NOKERNEL(MASKSA_P, gen8_wm_kernel__projective_mask_sa, 3),
183fe8aea9eSmrg
184fe8aea9eSmrg	NOKERNEL(OPACITY, gen8_wm_kernel__affine_opacity, 2),
185fe8aea9eSmrg	NOKERNEL(OPACITY_P, gen8_wm_kernel__projective_opacity, 2),
186fe8aea9eSmrg
187fe8aea9eSmrg#if !NO_VIDEO
188fe8aea9eSmrg	KERNEL(VIDEO_PLANAR_BT601, ps_kernel_planar_bt601, 7),
189fe8aea9eSmrg	KERNEL(VIDEO_NV12_BT601, ps_kernel_nv12_bt601, 7),
190fe8aea9eSmrg	KERNEL(VIDEO_PACKED_BT601, ps_kernel_packed_bt601, 2),
191fe8aea9eSmrg	KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7),
192fe8aea9eSmrg	KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7),
193fe8aea9eSmrg	KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2),
194fe8aea9eSmrg	KERNEL(VIDEO_AYUV_BT601, ps_kernel_ayuv_bt601, 2),
195fe8aea9eSmrg	KERNEL(VIDEO_AYUV_BT709, ps_kernel_ayuv_bt709, 2),
196fe8aea9eSmrg	KERNEL(VIDEO_RGB, ps_kernel_rgb, 2),
197fe8aea9eSmrg#endif
198fe8aea9eSmrg};
199fe8aea9eSmrg#undef KERNEL
200fe8aea9eSmrg
201fe8aea9eSmrgstatic const struct blendinfo {
202fe8aea9eSmrg	uint8_t src_alpha;
203fe8aea9eSmrg	uint8_t src_blend;
204fe8aea9eSmrg	uint8_t dst_blend;
205fe8aea9eSmrg} gen9_blend_op[] = {
206fe8aea9eSmrg	/* Clear */	{0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO},
207fe8aea9eSmrg	/* Src */	{0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO},
208fe8aea9eSmrg	/* Dst */	{0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE},
209fe8aea9eSmrg	/* Over */	{1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA},
210fe8aea9eSmrg	/* OverReverse */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE},
211fe8aea9eSmrg	/* In */	{0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO},
212fe8aea9eSmrg	/* InReverse */	{1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA},
213fe8aea9eSmrg	/* Out */	{0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO},
214fe8aea9eSmrg	/* OutReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA},
215fe8aea9eSmrg	/* Atop */	{1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA},
216fe8aea9eSmrg	/* AtopReverse */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA},
217fe8aea9eSmrg	/* Xor */	{1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA},
218fe8aea9eSmrg	/* Add */	{0, BLENDFACTOR_ONE, BLENDFACTOR_ONE},
219fe8aea9eSmrg};
220fe8aea9eSmrg
221fe8aea9eSmrg/**
222fe8aea9eSmrg * Highest-valued BLENDFACTOR used in gen9_blend_op.
223fe8aea9eSmrg *
224fe8aea9eSmrg * This leaves out GEN9_BLENDFACTOR_INV_DST_COLOR,
225fe8aea9eSmrg * GEN9_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
226fe8aea9eSmrg * GEN9_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
227fe8aea9eSmrg */
228fe8aea9eSmrg#define GEN9_BLENDFACTOR_COUNT (BLENDFACTOR_INV_DST_ALPHA + 1)
229fe8aea9eSmrg
230fe8aea9eSmrg#define GEN9_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen9_blend_state), 64)
231fe8aea9eSmrg
232fe8aea9eSmrg#define BLEND_OFFSET(s, d) \
233fe8aea9eSmrg	((d != BLENDFACTOR_ZERO) << 15 | ((s) * GEN9_BLENDFACTOR_COUNT + (d)) << 4)
234fe8aea9eSmrg
235fe8aea9eSmrg#define NO_BLEND BLEND_OFFSET(BLENDFACTOR_ONE, BLENDFACTOR_ZERO)
236fe8aea9eSmrg#define CLEAR BLEND_OFFSET(BLENDFACTOR_ZERO, BLENDFACTOR_ZERO)
237fe8aea9eSmrg
238fe8aea9eSmrg#define SAMPLER_OFFSET(sf, se, mf, me) \
239fe8aea9eSmrg	(((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2)
240fe8aea9eSmrg
241fe8aea9eSmrg#define VERTEX_2s2s 0
242fe8aea9eSmrg
243fe8aea9eSmrg#define COPY_SAMPLER 0
244fe8aea9eSmrg#define COPY_VERTEX VERTEX_2s2s
245fe8aea9eSmrg#define COPY_FLAGS(a) GEN9_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, COPY_VERTEX)
246fe8aea9eSmrg
247fe8aea9eSmrg#define FILL_SAMPLER 1
248fe8aea9eSmrg#define FILL_VERTEX VERTEX_2s2s
249fe8aea9eSmrg#define FILL_FLAGS(op, format) GEN9_SET_FLAGS(FILL_SAMPLER, gen9_get_blend((op), false, (format)), FILL_VERTEX)
250fe8aea9eSmrg#define FILL_FLAGS_NOBLEND GEN9_SET_FLAGS(FILL_SAMPLER, NO_BLEND, FILL_VERTEX)
251fe8aea9eSmrg
252fe8aea9eSmrg#define GEN9_SAMPLER(f) (((f) >> 20) & 0xfff)
253fe8aea9eSmrg#define GEN9_BLEND(f) (((f) >> 4) & 0x7ff)
254fe8aea9eSmrg#define GEN9_READS_DST(f) (((f) >> 15) & 1)
255fe8aea9eSmrg#define GEN9_VERTEX(f) (((f) >> 0) & 0xf)
256fe8aea9eSmrg#define GEN9_SET_FLAGS(S, B, V)  ((S) << 20 | (B) | (V))
257fe8aea9eSmrg
258fe8aea9eSmrg#define OUT_BATCH(v) batch_emit(sna, v)
259fe8aea9eSmrg#define OUT_BATCH64(v) batch_emit64(sna, v)
260fe8aea9eSmrg#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
261fe8aea9eSmrg#define OUT_VERTEX_F(v) vertex_emit(sna, v)
262fe8aea9eSmrg
263fe8aea9eSmrgstruct gt_info {
264fe8aea9eSmrg	const char *name;
265fe8aea9eSmrg	struct {
266fe8aea9eSmrg		int max_vs_entries;
267fe8aea9eSmrg	} urb;
268fe8aea9eSmrg};
269fe8aea9eSmrg
270fe8aea9eSmrgstatic const struct gt_info min_gt_info = {
271fe8aea9eSmrg	.name = "Skylake (gen9)",
272fe8aea9eSmrg	.urb = { .max_vs_entries = 240 },
273fe8aea9eSmrg};
274fe8aea9eSmrg
275fe8aea9eSmrgstatic const struct gt_info skl_gt_info = {
276fe8aea9eSmrg	.name = "Skylake (gen9)",
277fe8aea9eSmrg	.urb = { .max_vs_entries = 960 },
278fe8aea9eSmrg};
279fe8aea9eSmrg
280fe8aea9eSmrgstatic const struct gt_info bxt_gt_info = {
281fe8aea9eSmrg	.name = "Broxton (gen9)",
282fe8aea9eSmrg	.urb = { .max_vs_entries = 320 },
283fe8aea9eSmrg};
284fe8aea9eSmrg
285fe8aea9eSmrgstatic const struct gt_info kbl_gt_info = {
286fe8aea9eSmrg	.name = "Kabylake (gen9)",
287fe8aea9eSmrg	.urb = { .max_vs_entries = 960 },
288fe8aea9eSmrg};
289fe8aea9eSmrg
290fe8aea9eSmrgstatic const struct gt_info glk_gt_info = {
291fe8aea9eSmrg	.name = "Geminilake (gen9)",
292fe8aea9eSmrg	.urb = { .max_vs_entries = 320 },
293fe8aea9eSmrg};
294fe8aea9eSmrg
295fe8aea9eSmrgstatic const struct gt_info cfl_gt_info = {
296fe8aea9eSmrg	.name = "Coffeelake (gen9)",
297fe8aea9eSmrg	.urb = { .max_vs_entries = 960 },
298fe8aea9eSmrg};
299fe8aea9eSmrg
300fe8aea9eSmrgstatic bool is_skl(struct sna *sna)
301fe8aea9eSmrg{
302fe8aea9eSmrg	return sna->kgem.gen == 0110;
303fe8aea9eSmrg}
304fe8aea9eSmrg
305fe8aea9eSmrgstatic bool is_bxt(struct sna *sna)
306fe8aea9eSmrg{
307fe8aea9eSmrg	return sna->kgem.gen == 0111;
308fe8aea9eSmrg}
309fe8aea9eSmrg
310fe8aea9eSmrgstatic bool is_kbl(struct sna *sna)
311fe8aea9eSmrg{
312fe8aea9eSmrg	return sna->kgem.gen == 0112;
313fe8aea9eSmrg}
314fe8aea9eSmrg
315fe8aea9eSmrgstatic bool is_glk(struct sna *sna)
316fe8aea9eSmrg{
317fe8aea9eSmrg	return sna->kgem.gen == 0113;
318fe8aea9eSmrg}
319fe8aea9eSmrg
320fe8aea9eSmrgstatic bool is_cfl(struct sna *sna)
321fe8aea9eSmrg{
322fe8aea9eSmrg	return sna->kgem.gen == 0114;
323fe8aea9eSmrg}
324fe8aea9eSmrg
325fe8aea9eSmrgstatic inline bool too_large(int width, int height)
326fe8aea9eSmrg{
327fe8aea9eSmrg	return width > GEN9_MAX_SIZE || height > GEN9_MAX_SIZE;
328fe8aea9eSmrg}
329fe8aea9eSmrg
330fe8aea9eSmrgstatic inline bool unaligned(struct kgem_bo *bo, int bpp)
331fe8aea9eSmrg{
332fe8aea9eSmrg	/* XXX What exactly do we need to meet H_ALIGN and V_ALIGN? */
333fe8aea9eSmrg#if 0
334fe8aea9eSmrg	int x, y;
335fe8aea9eSmrg
336fe8aea9eSmrg	if (bo->proxy == NULL)
337fe8aea9eSmrg		return false;
338fe8aea9eSmrg
339fe8aea9eSmrg	/* Assume that all tiled proxies are constructed correctly. */
340fe8aea9eSmrg	if (bo->tiling)
341fe8aea9eSmrg		return false;
342fe8aea9eSmrg
343fe8aea9eSmrg	DBG(("%s: checking alignment of a linear proxy, offset=%d, pitch=%d, bpp=%d: => (%d, %d)\n",
344fe8aea9eSmrg	     __FUNCTION__, bo->delta, bo->pitch, bpp,
345fe8aea9eSmrg	     8 * (bo->delta % bo->pitch) / bpp, bo->delta / bo->pitch));
346fe8aea9eSmrg
347fe8aea9eSmrg	/* This may be a random userptr map, check that it meets the
348fe8aea9eSmrg	 * render alignment of SURFACE_VALIGN_4 | SURFACE_HALIGN_4.
349fe8aea9eSmrg	 */
350fe8aea9eSmrg	y = bo->delta / bo->pitch;
351fe8aea9eSmrg	if (y & 3)
352fe8aea9eSmrg		return true;
353fe8aea9eSmrg
354fe8aea9eSmrg	x = 8 * (bo->delta - y * bo->pitch);
355fe8aea9eSmrg	if (x & (4*bpp - 1))
356fe8aea9eSmrg	    return true;
357fe8aea9eSmrg
358fe8aea9eSmrg	return false;
359fe8aea9eSmrg#else
360fe8aea9eSmrg	return false;
361fe8aea9eSmrg#endif
362fe8aea9eSmrg}
363fe8aea9eSmrg
364fe8aea9eSmrgstatic uint32_t gen9_get_blend(int op,
365fe8aea9eSmrg			       bool has_component_alpha,
366fe8aea9eSmrg			       uint32_t dst_format)
367fe8aea9eSmrg{
368fe8aea9eSmrg	uint32_t src, dst;
369fe8aea9eSmrg
370fe8aea9eSmrg	COMPILE_TIME_ASSERT(BLENDFACTOR_INV_DST_ALPHA*GEN9_BLENDFACTOR_COUNT + BLENDFACTOR_INV_DST_ALPHA <= 0x7ff);
371fe8aea9eSmrg
372fe8aea9eSmrg	src = gen9_blend_op[op].src_blend;
373fe8aea9eSmrg	dst = gen9_blend_op[op].dst_blend;
374fe8aea9eSmrg
375fe8aea9eSmrg	/* If there's no dst alpha channel, adjust the blend op so that
376fe8aea9eSmrg	 * we'll treat it always as 1.
377fe8aea9eSmrg	 */
378fe8aea9eSmrg	if (PICT_FORMAT_A(dst_format) == 0) {
379fe8aea9eSmrg		if (src == BLENDFACTOR_DST_ALPHA)
380fe8aea9eSmrg			src = BLENDFACTOR_ONE;
381fe8aea9eSmrg		else if (src == BLENDFACTOR_INV_DST_ALPHA)
382fe8aea9eSmrg			src = BLENDFACTOR_ZERO;
383fe8aea9eSmrg	}
384fe8aea9eSmrg
385fe8aea9eSmrg	/* If the source alpha is being used, then we should only be in a
386fe8aea9eSmrg	 * case where the source blend factor is 0, and the source blend
387fe8aea9eSmrg	 * value is the mask channels multiplied by the source picture's alpha.
388fe8aea9eSmrg	 */
389fe8aea9eSmrg	if (has_component_alpha && gen9_blend_op[op].src_alpha) {
390fe8aea9eSmrg		if (dst == BLENDFACTOR_SRC_ALPHA)
391fe8aea9eSmrg			dst = BLENDFACTOR_SRC_COLOR;
392fe8aea9eSmrg		else if (dst == BLENDFACTOR_INV_SRC_ALPHA)
393fe8aea9eSmrg			dst = BLENDFACTOR_INV_SRC_COLOR;
394fe8aea9eSmrg	}
395fe8aea9eSmrg
396fe8aea9eSmrg	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
397fe8aea9eSmrg	     op, dst_format, PICT_FORMAT_A(dst_format),
398fe8aea9eSmrg	     src, dst, (int)(BLEND_OFFSET(src, dst)>>4)));
399fe8aea9eSmrg	assert(BLEND_OFFSET(src, dst) >> 4 <= 0xfff);
400fe8aea9eSmrg	return BLEND_OFFSET(src, dst);
401fe8aea9eSmrg}
402fe8aea9eSmrg
403fe8aea9eSmrgstatic uint32_t gen9_get_card_format(PictFormat format)
404fe8aea9eSmrg{
405fe8aea9eSmrg	switch (format) {
406fe8aea9eSmrg	default:
407fe8aea9eSmrg		return -1;
408fe8aea9eSmrg	case PICT_a8r8g8b8:
409fe8aea9eSmrg		return SURFACEFORMAT_B8G8R8A8_UNORM;
410fe8aea9eSmrg	case PICT_x8r8g8b8:
411fe8aea9eSmrg		return SURFACEFORMAT_B8G8R8X8_UNORM;
412fe8aea9eSmrg	case PICT_a8b8g8r8:
413fe8aea9eSmrg		return SURFACEFORMAT_R8G8B8A8_UNORM;
414fe8aea9eSmrg	case PICT_x8b8g8r8:
415fe8aea9eSmrg		return SURFACEFORMAT_R8G8B8X8_UNORM;
416fe8aea9eSmrg#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
417fe8aea9eSmrg	case PICT_a2r10g10b10:
418fe8aea9eSmrg		return SURFACEFORMAT_B10G10R10A2_UNORM;
419fe8aea9eSmrg	case PICT_x2r10g10b10:
420fe8aea9eSmrg		return SURFACEFORMAT_B10G10R10X2_UNORM;
421fe8aea9eSmrg#endif
422fe8aea9eSmrg	case PICT_r8g8b8:
423fe8aea9eSmrg		return SURFACEFORMAT_R8G8B8_UNORM;
424fe8aea9eSmrg	case PICT_r5g6b5:
425fe8aea9eSmrg		return SURFACEFORMAT_B5G6R5_UNORM;
426fe8aea9eSmrg	case PICT_a1r5g5b5:
427fe8aea9eSmrg		return SURFACEFORMAT_B5G5R5A1_UNORM;
428fe8aea9eSmrg	case PICT_a8:
429fe8aea9eSmrg		return SURFACEFORMAT_A8_UNORM;
430fe8aea9eSmrg	case PICT_a4r4g4b4:
431fe8aea9eSmrg		return SURFACEFORMAT_B4G4R4A4_UNORM;
432fe8aea9eSmrg	}
433fe8aea9eSmrg}
434fe8aea9eSmrg
435fe8aea9eSmrgstatic uint32_t gen9_get_dest_format(PictFormat format)
436fe8aea9eSmrg{
437fe8aea9eSmrg	switch (format) {
438fe8aea9eSmrg	default:
439fe8aea9eSmrg		return -1;
440fe8aea9eSmrg	case PICT_a8r8g8b8:
441fe8aea9eSmrg	case PICT_x8r8g8b8:
442fe8aea9eSmrg		return SURFACEFORMAT_B8G8R8A8_UNORM;
443fe8aea9eSmrg	case PICT_a8b8g8r8:
444fe8aea9eSmrg	case PICT_x8b8g8r8:
445fe8aea9eSmrg		return SURFACEFORMAT_R8G8B8A8_UNORM;
446fe8aea9eSmrg#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
447fe8aea9eSmrg	case PICT_a2r10g10b10:
448fe8aea9eSmrg	case PICT_x2r10g10b10:
449fe8aea9eSmrg		return SURFACEFORMAT_B10G10R10A2_UNORM;
450fe8aea9eSmrg#endif
451fe8aea9eSmrg	case PICT_r5g6b5:
452fe8aea9eSmrg		return SURFACEFORMAT_B5G6R5_UNORM;
453fe8aea9eSmrg	case PICT_x1r5g5b5:
454fe8aea9eSmrg	case PICT_a1r5g5b5:
455fe8aea9eSmrg		return SURFACEFORMAT_B5G5R5A1_UNORM;
456fe8aea9eSmrg	case PICT_a8:
457fe8aea9eSmrg		return SURFACEFORMAT_A8_UNORM;
458fe8aea9eSmrg	case PICT_a4r4g4b4:
459fe8aea9eSmrg	case PICT_x4r4g4b4:
460fe8aea9eSmrg		return SURFACEFORMAT_B4G4R4A4_UNORM;
461fe8aea9eSmrg	}
462fe8aea9eSmrg}
463fe8aea9eSmrg
464fe8aea9eSmrgstatic bool gen9_check_dst_format(PictFormat format)
465fe8aea9eSmrg{
466fe8aea9eSmrg	if (gen9_get_dest_format(format) != -1)
467fe8aea9eSmrg		return true;
468fe8aea9eSmrg
469fe8aea9eSmrg	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
470fe8aea9eSmrg	return false;
471fe8aea9eSmrg}
472fe8aea9eSmrg
473fe8aea9eSmrgstatic bool gen9_check_format(uint32_t format)
474fe8aea9eSmrg{
475fe8aea9eSmrg	if (gen9_get_card_format(format) != -1)
476fe8aea9eSmrg		return true;
477fe8aea9eSmrg
478fe8aea9eSmrg	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
479fe8aea9eSmrg	return false;
480fe8aea9eSmrg}
481fe8aea9eSmrg
482fe8aea9eSmrgstatic uint32_t gen9_filter(uint32_t filter)
483fe8aea9eSmrg{
484fe8aea9eSmrg	switch (filter) {
485fe8aea9eSmrg	default:
486fe8aea9eSmrg		assert(0);
487fe8aea9eSmrg	case PictFilterNearest:
488fe8aea9eSmrg		return SAMPLER_FILTER_NEAREST;
489fe8aea9eSmrg	case PictFilterBilinear:
490fe8aea9eSmrg		return SAMPLER_FILTER_BILINEAR;
491fe8aea9eSmrg	}
492fe8aea9eSmrg}
493fe8aea9eSmrg
494fe8aea9eSmrgstatic uint32_t gen9_check_filter(PicturePtr picture)
495fe8aea9eSmrg{
496fe8aea9eSmrg	switch (picture->filter) {
497fe8aea9eSmrg	case PictFilterNearest:
498fe8aea9eSmrg	case PictFilterBilinear:
499fe8aea9eSmrg		return true;
500fe8aea9eSmrg	default:
501fe8aea9eSmrg		return false;
502fe8aea9eSmrg	}
503fe8aea9eSmrg}
504fe8aea9eSmrg
505fe8aea9eSmrgstatic uint32_t gen9_repeat(uint32_t repeat)
506fe8aea9eSmrg{
507fe8aea9eSmrg	switch (repeat) {
508fe8aea9eSmrg	default:
509fe8aea9eSmrg		assert(0);
510fe8aea9eSmrg	case RepeatNone:
511fe8aea9eSmrg		return SAMPLER_EXTEND_NONE;
512fe8aea9eSmrg	case RepeatNormal:
513fe8aea9eSmrg		return SAMPLER_EXTEND_REPEAT;
514fe8aea9eSmrg	case RepeatPad:
515fe8aea9eSmrg		return SAMPLER_EXTEND_PAD;
516fe8aea9eSmrg	case RepeatReflect:
517fe8aea9eSmrg		return SAMPLER_EXTEND_REFLECT;
518fe8aea9eSmrg	}
519fe8aea9eSmrg}
520fe8aea9eSmrg
521fe8aea9eSmrgstatic bool gen9_check_repeat(PicturePtr picture)
522fe8aea9eSmrg{
523fe8aea9eSmrg	if (!picture->repeat)
524fe8aea9eSmrg		return true;
525fe8aea9eSmrg
526fe8aea9eSmrg	switch (picture->repeatType) {
527fe8aea9eSmrg	case RepeatNone:
528fe8aea9eSmrg	case RepeatNormal:
529fe8aea9eSmrg	case RepeatPad:
530fe8aea9eSmrg	case RepeatReflect:
531fe8aea9eSmrg		return true;
532fe8aea9eSmrg	default:
533fe8aea9eSmrg		return false;
534fe8aea9eSmrg	}
535fe8aea9eSmrg}
536fe8aea9eSmrg
537fe8aea9eSmrgstatic int
538fe8aea9eSmrggen9_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
539fe8aea9eSmrg{
540fe8aea9eSmrg	int base;
541fe8aea9eSmrg
542fe8aea9eSmrg	if (has_mask) {
543fe8aea9eSmrg		if (is_ca) {
544fe8aea9eSmrg			if (gen9_blend_op[op].src_alpha)
545fe8aea9eSmrg				base = GEN9_WM_KERNEL_MASKSA;
546fe8aea9eSmrg			else
547fe8aea9eSmrg				base = GEN9_WM_KERNEL_MASKCA;
548fe8aea9eSmrg		} else
549fe8aea9eSmrg			base = GEN9_WM_KERNEL_MASK;
550fe8aea9eSmrg	} else
551fe8aea9eSmrg		base = GEN9_WM_KERNEL_NOMASK;
552fe8aea9eSmrg
553fe8aea9eSmrg	return base + !is_affine;
554fe8aea9eSmrg}
555fe8aea9eSmrg
556fe8aea9eSmrgstatic void
557fe8aea9eSmrggen9_emit_push_constants(struct sna *sna)
558fe8aea9eSmrg{
559fe8aea9eSmrg#if SIM
560fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
561fe8aea9eSmrg	OUT_BATCH(0);
562fe8aea9eSmrg
563fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
564fe8aea9eSmrg	OUT_BATCH(0);
565fe8aea9eSmrg
566fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
567fe8aea9eSmrg	OUT_BATCH(0);
568fe8aea9eSmrg
569fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
570fe8aea9eSmrg	OUT_BATCH(0);
571fe8aea9eSmrg
572fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
573fe8aea9eSmrg	OUT_BATCH(0);
574fe8aea9eSmrg#endif
575fe8aea9eSmrg}
576fe8aea9eSmrg
577fe8aea9eSmrgstatic void
578fe8aea9eSmrggen9_emit_urb(struct sna *sna)
579fe8aea9eSmrg{
580fe8aea9eSmrg	/* num of VS entries must be divisible by 8 if size < 9 */
581fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_URB_VS | (2 - 2));
582fe8aea9eSmrg	OUT_BATCH(sna->render_state.gen9.info->urb.max_vs_entries << URB_ENTRY_NUMBER_SHIFT |
583fe8aea9eSmrg		  (2 - 1) << URB_ENTRY_SIZE_SHIFT |
584fe8aea9eSmrg		  4 << URB_STARTING_ADDRESS_SHIFT);
585fe8aea9eSmrg
586fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_URB_HS | (2 - 2));
587fe8aea9eSmrg	OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT |
588fe8aea9eSmrg		  4 << URB_STARTING_ADDRESS_SHIFT);
589fe8aea9eSmrg
590fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_URB_DS | (2 - 2));
591fe8aea9eSmrg	OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT |
592fe8aea9eSmrg		  4 << URB_STARTING_ADDRESS_SHIFT);
593fe8aea9eSmrg
594fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_URB_GS | (2 - 2));
595fe8aea9eSmrg	OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT |
596fe8aea9eSmrg		  4 << URB_STARTING_ADDRESS_SHIFT);
597fe8aea9eSmrg}
598fe8aea9eSmrg
599fe8aea9eSmrgstatic void
600fe8aea9eSmrggen9_emit_state_base_address(struct sna *sna)
601fe8aea9eSmrg{
602fe8aea9eSmrg	uint32_t num_pages;
603fe8aea9eSmrg
604fe8aea9eSmrg	assert(sna->kgem.surface - sna->kgem.nbatch <= 16384);
605fe8aea9eSmrg
606fe8aea9eSmrg	/* WaBindlessSurfaceStateModifyEnable:skl,bxt */
607fe8aea9eSmrg	OUT_BATCH(GEN9_STATE_BASE_ADDRESS | (19 - 1 - 2));
608fe8aea9eSmrg	OUT_BATCH64(0); /* general */
609fe8aea9eSmrg	OUT_BATCH(0); /* stateless dataport */
610fe8aea9eSmrg	OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* surface */
611fe8aea9eSmrg				     sna->kgem.nbatch,
612fe8aea9eSmrg				     NULL,
613fe8aea9eSmrg				     I915_GEM_DOMAIN_INSTRUCTION << 16,
614fe8aea9eSmrg				     BASE_ADDRESS_MODIFY));
615fe8aea9eSmrg	OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* dynamic */
616fe8aea9eSmrg				     sna->kgem.nbatch,
617fe8aea9eSmrg				     sna->render_state.gen9.general_bo,
618fe8aea9eSmrg				     I915_GEM_DOMAIN_INSTRUCTION << 16,
619fe8aea9eSmrg				     BASE_ADDRESS_MODIFY));
620fe8aea9eSmrg	OUT_BATCH64(0); /* indirect */
621fe8aea9eSmrg	OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* instruction */
622fe8aea9eSmrg				     sna->kgem.nbatch,
623fe8aea9eSmrg				     sna->render_state.gen9.general_bo,
624fe8aea9eSmrg				     I915_GEM_DOMAIN_INSTRUCTION << 16,
625fe8aea9eSmrg				     BASE_ADDRESS_MODIFY));
626fe8aea9eSmrg	/* upper bounds */
627fe8aea9eSmrg	num_pages = sna->render_state.gen9.general_bo->size.pages.count;
628fe8aea9eSmrg	OUT_BATCH(0); /* general */
629fe8aea9eSmrg	OUT_BATCH(num_pages << 12 | 1); /* dynamic */
630fe8aea9eSmrg	OUT_BATCH(0); /* indirect */
631fe8aea9eSmrg	OUT_BATCH(num_pages << 12 | 1); /* instruction */
632fe8aea9eSmrg
633fe8aea9eSmrg	/* Bindless */
634fe8aea9eSmrg	OUT_BATCH(0);
635fe8aea9eSmrg	OUT_BATCH(0);
636fe8aea9eSmrg	OUT_BATCH(0);
637fe8aea9eSmrg}
638fe8aea9eSmrg
639fe8aea9eSmrgstatic void
640fe8aea9eSmrggen9_emit_vs_invariant(struct sna *sna)
641fe8aea9eSmrg{
642fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_VS | (9 - 2));
643fe8aea9eSmrg	OUT_BATCH64(0); /* no VS kernel */
644fe8aea9eSmrg	OUT_BATCH(0);
645fe8aea9eSmrg	OUT_BATCH64(0); /* scratch */
646fe8aea9eSmrg	OUT_BATCH(0);
647fe8aea9eSmrg	OUT_BATCH(1 << 1); /* pass-through */
648fe8aea9eSmrg	OUT_BATCH(1 << 16 | 1 << 21); /* urb write to SBE */
649fe8aea9eSmrg
650fe8aea9eSmrg#if SIM
651fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_CONSTANT_VS | (11 - 2));
652fe8aea9eSmrg	OUT_BATCH(0);
653fe8aea9eSmrg	OUT_BATCH(0);
654fe8aea9eSmrg	OUT_BATCH64(0);
655fe8aea9eSmrg	OUT_BATCH64(0);
656fe8aea9eSmrg	OUT_BATCH64(0);
657fe8aea9eSmrg	OUT_BATCH64(0);
658fe8aea9eSmrg
659fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
660fe8aea9eSmrg	OUT_BATCH(0);
661fe8aea9eSmrg
662fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
663fe8aea9eSmrg	OUT_BATCH(0);
664fe8aea9eSmrg#endif
665fe8aea9eSmrg}
666fe8aea9eSmrg
667fe8aea9eSmrgstatic void
668fe8aea9eSmrggen9_emit_hs_invariant(struct sna *sna)
669fe8aea9eSmrg{
670fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_HS | (9 - 2));
671fe8aea9eSmrg	OUT_BATCH(0);
672fe8aea9eSmrg	OUT_BATCH(0);
673fe8aea9eSmrg	OUT_BATCH64(0); /* no HS kernel */
674fe8aea9eSmrg	OUT_BATCH64(0); /* scratch */
675fe8aea9eSmrg	OUT_BATCH(0);
676fe8aea9eSmrg	OUT_BATCH(0); /* pass-through */
677fe8aea9eSmrg
678fe8aea9eSmrg#if SIM
679fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_CONSTANT_HS | (11 - 2));
680fe8aea9eSmrg	OUT_BATCH(0);
681fe8aea9eSmrg	OUT_BATCH(0);
682fe8aea9eSmrg	OUT_BATCH64(0);
683fe8aea9eSmrg	OUT_BATCH64(0);
684fe8aea9eSmrg	OUT_BATCH64(0);
685fe8aea9eSmrg	OUT_BATCH64(0);
686fe8aea9eSmrg
687fe8aea9eSmrg#if 1
688fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
689fe8aea9eSmrg	OUT_BATCH(0);
690fe8aea9eSmrg
691fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
692fe8aea9eSmrg	OUT_BATCH(0);
693fe8aea9eSmrg#endif
694fe8aea9eSmrg#endif
695fe8aea9eSmrg}
696fe8aea9eSmrg
697fe8aea9eSmrgstatic void
698fe8aea9eSmrggen9_emit_te_invariant(struct sna *sna)
699fe8aea9eSmrg{
700fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_TE | (4 - 2));
701fe8aea9eSmrg	OUT_BATCH(0);
702fe8aea9eSmrg	OUT_BATCH(0);
703fe8aea9eSmrg	OUT_BATCH(0);
704fe8aea9eSmrg}
705fe8aea9eSmrg
706fe8aea9eSmrgstatic void
707fe8aea9eSmrggen9_emit_ds_invariant(struct sna *sna)
708fe8aea9eSmrg{
709fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_DS | (11 - 2));
710fe8aea9eSmrg	OUT_BATCH64(0); /* no kernel */
711fe8aea9eSmrg	OUT_BATCH(0);
712fe8aea9eSmrg	OUT_BATCH64(0); /* scratch */
713fe8aea9eSmrg	OUT_BATCH(0);
714fe8aea9eSmrg	OUT_BATCH(0);
715fe8aea9eSmrg	OUT_BATCH(0);
716fe8aea9eSmrg	OUT_BATCH(0);
717fe8aea9eSmrg	OUT_BATCH(0);
718fe8aea9eSmrg
719fe8aea9eSmrg#if SIM
720fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_CONSTANT_DS | (11 - 2));
721fe8aea9eSmrg	OUT_BATCH(0);
722fe8aea9eSmrg	OUT_BATCH(0);
723fe8aea9eSmrg	OUT_BATCH64(0);
724fe8aea9eSmrg	OUT_BATCH64(0);
725fe8aea9eSmrg	OUT_BATCH64(0);
726fe8aea9eSmrg	OUT_BATCH64(0);
727fe8aea9eSmrg
728fe8aea9eSmrg#if 1
729fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
730fe8aea9eSmrg	OUT_BATCH(0);
731fe8aea9eSmrg
732fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
733fe8aea9eSmrg	OUT_BATCH(0);
734fe8aea9eSmrg#endif
735fe8aea9eSmrg#endif
736fe8aea9eSmrg}
737fe8aea9eSmrg
738fe8aea9eSmrgstatic void
739fe8aea9eSmrggen9_emit_gs_invariant(struct sna *sna)
740fe8aea9eSmrg{
741fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_GS | (10 - 2));
742fe8aea9eSmrg	OUT_BATCH64(0); /* no GS kernel */
743fe8aea9eSmrg	OUT_BATCH(0);
744fe8aea9eSmrg	OUT_BATCH64(0); /* scratch */
745fe8aea9eSmrg	OUT_BATCH(0);
746fe8aea9eSmrg	OUT_BATCH(0); /* pass-through */
747fe8aea9eSmrg	OUT_BATCH(0);
748fe8aea9eSmrg	OUT_BATCH(0);
749fe8aea9eSmrg
750fe8aea9eSmrg#if SIM
751fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_CONSTANT_GS | (11 - 2));
752fe8aea9eSmrg	OUT_BATCH(0);
753fe8aea9eSmrg	OUT_BATCH(0);
754fe8aea9eSmrg	OUT_BATCH64(0);
755fe8aea9eSmrg	OUT_BATCH64(0);
756fe8aea9eSmrg	OUT_BATCH64(0);
757fe8aea9eSmrg	OUT_BATCH64(0);
758fe8aea9eSmrg
759fe8aea9eSmrg#if 1
760fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
761fe8aea9eSmrg	OUT_BATCH(0);
762fe8aea9eSmrg
763fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
764fe8aea9eSmrg	OUT_BATCH(0);
765fe8aea9eSmrg#endif
766fe8aea9eSmrg#endif
767fe8aea9eSmrg}
768fe8aea9eSmrg
769fe8aea9eSmrgstatic void
770fe8aea9eSmrggen9_emit_sol_invariant(struct sna *sna)
771fe8aea9eSmrg{
772fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_STREAMOUT | (5 - 2));
773fe8aea9eSmrg	OUT_BATCH(0);
774fe8aea9eSmrg	OUT_BATCH(0);
775fe8aea9eSmrg	OUT_BATCH(0);
776fe8aea9eSmrg	OUT_BATCH(0);
777fe8aea9eSmrg}
778fe8aea9eSmrg
779fe8aea9eSmrgstatic void
780fe8aea9eSmrggen9_emit_sf_invariant(struct sna *sna)
781fe8aea9eSmrg{
782fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_SF | (4 - 2));
783fe8aea9eSmrg	OUT_BATCH(0);
784fe8aea9eSmrg	OUT_BATCH(0);
785fe8aea9eSmrg	OUT_BATCH(0);
786fe8aea9eSmrg}
787fe8aea9eSmrg
788fe8aea9eSmrgstatic void
789fe8aea9eSmrggen9_emit_clip_invariant(struct sna *sna)
790fe8aea9eSmrg{
791fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_CLIP | (4 - 2));
792fe8aea9eSmrg	OUT_BATCH(0);
793fe8aea9eSmrg	OUT_BATCH(0); /* pass-through */
794fe8aea9eSmrg	OUT_BATCH(0);
795fe8aea9eSmrg
796fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP | (2 - 2));
797fe8aea9eSmrg	OUT_BATCH(0);
798fe8aea9eSmrg
799fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
800fe8aea9eSmrg	OUT_BATCH(0);
801fe8aea9eSmrg}
802fe8aea9eSmrg
803fe8aea9eSmrgstatic void
804fe8aea9eSmrggen9_emit_null_depth_buffer(struct sna *sna)
805fe8aea9eSmrg{
806fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_DEPTH_BUFFER | (8 - 2));
807fe8aea9eSmrg#if 1
808fe8aea9eSmrg	OUT_BATCH(SURFACE_NULL << DEPTH_BUFFER_TYPE_SHIFT |
809fe8aea9eSmrg		  DEPTHFORMAT_D32_FLOAT << DEPTH_BUFFER_FORMAT_SHIFT);
810fe8aea9eSmrg#else
811fe8aea9eSmrg	OUT_BATCH(SURFACE_2D << DEPTH_BUFFER_TYPE_SHIFT |
812fe8aea9eSmrg		  DEPTHFORMAT_D16_UNORM << DEPTH_BUFFER_FORMAT_SHIFT);
813fe8aea9eSmrg#endif
814fe8aea9eSmrg	OUT_BATCH64(0);
815fe8aea9eSmrg	OUT_BATCH(0);
816fe8aea9eSmrg	OUT_BATCH(0);
817fe8aea9eSmrg	OUT_BATCH(0);
818fe8aea9eSmrg	OUT_BATCH(0);
819fe8aea9eSmrg
820fe8aea9eSmrg#if SIM
821fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
822fe8aea9eSmrg	OUT_BATCH(0);
823fe8aea9eSmrg	OUT_BATCH64(0);
824fe8aea9eSmrg	OUT_BATCH(0);
825fe8aea9eSmrg#endif
826fe8aea9eSmrg
827fe8aea9eSmrg#if SIM
828fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_STENCIL_BUFFER | (5 - 2));
829fe8aea9eSmrg	OUT_BATCH(0);
830fe8aea9eSmrg	OUT_BATCH64(0);
831fe8aea9eSmrg	OUT_BATCH(0);
832fe8aea9eSmrg#endif
833fe8aea9eSmrg
834fe8aea9eSmrg#if SIM
835fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_WM_DEPTH_STENCIL | (4 - 2));
836fe8aea9eSmrg	OUT_BATCH(0);
837fe8aea9eSmrg	OUT_BATCH(0);
838fe8aea9eSmrg	OUT_BATCH(0);
839fe8aea9eSmrg#endif
840fe8aea9eSmrg
841fe8aea9eSmrg#if SIM
842fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_CLEAR_PARAMS | (3 - 2));
843fe8aea9eSmrg	OUT_BATCH(0);
844fe8aea9eSmrg	OUT_BATCH(0);
845fe8aea9eSmrg#endif
846fe8aea9eSmrg}
847fe8aea9eSmrg
848fe8aea9eSmrgstatic void
849fe8aea9eSmrggen9_emit_wm_invariant(struct sna *sna)
850fe8aea9eSmrg{
851fe8aea9eSmrg	gen9_emit_null_depth_buffer(sna);
852fe8aea9eSmrg
853fe8aea9eSmrg#if SIM
854fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_SCISSOR_STATE_POINTERS | (2 - 2));
855fe8aea9eSmrg	OUT_BATCH(0);
856fe8aea9eSmrg#endif
857fe8aea9eSmrg
858fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_WM | (2 - 2));
859fe8aea9eSmrg	//OUT_BATCH(WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC); /* XXX */
860fe8aea9eSmrg	OUT_BATCH(WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
861fe8aea9eSmrg
862fe8aea9eSmrg#if SIM
863fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_WM_CHROMAKEY | (2 - 2));
864fe8aea9eSmrg	OUT_BATCH(0);
865fe8aea9eSmrg#endif
866fe8aea9eSmrg
867fe8aea9eSmrg#if 0
868fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_WM_HZ_OP | (5 - 2));
869fe8aea9eSmrg	OUT_BATCH(0);
870fe8aea9eSmrg	OUT_BATCH(0);
871fe8aea9eSmrg	OUT_BATCH(0);
872fe8aea9eSmrg	OUT_BATCH(0);
873fe8aea9eSmrg#endif
874fe8aea9eSmrg
875fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_PS_EXTRA | (2 - 2));
876fe8aea9eSmrg	OUT_BATCH(PSX_PIXEL_SHADER_VALID |
877fe8aea9eSmrg		  PSX_ATTRIBUTE_ENABLE);
878fe8aea9eSmrg
879fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_RASTER | (5 - 2));
880fe8aea9eSmrg	OUT_BATCH(RASTER_FRONT_WINDING_CCW |
881fe8aea9eSmrg		  RASTER_CULL_NONE);
882fe8aea9eSmrg	OUT_BATCH(0);
883fe8aea9eSmrg	OUT_BATCH(0);
884fe8aea9eSmrg	OUT_BATCH(0);
885fe8aea9eSmrg
886fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_SBE_SWIZ | (11 - 2));
887fe8aea9eSmrg	OUT_BATCH(0);
888fe8aea9eSmrg	OUT_BATCH(0);
889fe8aea9eSmrg	OUT_BATCH(0);
890fe8aea9eSmrg	OUT_BATCH(0);
891fe8aea9eSmrg	OUT_BATCH(0);
892fe8aea9eSmrg	OUT_BATCH(0);
893fe8aea9eSmrg	OUT_BATCH(0);
894fe8aea9eSmrg	OUT_BATCH(0);
895fe8aea9eSmrg	OUT_BATCH(0);
896fe8aea9eSmrg	OUT_BATCH(0);
897fe8aea9eSmrg
898fe8aea9eSmrg#if SIM
899fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_CONSTANT_PS | (11 - 2));
900fe8aea9eSmrg	OUT_BATCH(0);
901fe8aea9eSmrg	OUT_BATCH(0);
902fe8aea9eSmrg	OUT_BATCH64(0);
903fe8aea9eSmrg	OUT_BATCH64(0);
904fe8aea9eSmrg	OUT_BATCH64(0);
905fe8aea9eSmrg	OUT_BATCH64(0);
906fe8aea9eSmrg#endif
907fe8aea9eSmrg}
908fe8aea9eSmrg
909fe8aea9eSmrgstatic void
910fe8aea9eSmrggen9_emit_cc_invariant(struct sna *sna)
911fe8aea9eSmrg{
912fe8aea9eSmrg}
913fe8aea9eSmrg
914fe8aea9eSmrgstatic void
915fe8aea9eSmrggen9_emit_vf_invariant(struct sna *sna)
916fe8aea9eSmrg{
917fe8aea9eSmrg	int n;
918fe8aea9eSmrg
919fe8aea9eSmrg#if 1
920fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_VF | (2 - 2));
921fe8aea9eSmrg	OUT_BATCH(0);
922fe8aea9eSmrg#endif
923fe8aea9eSmrg
924fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_VF_SGVS | (2 - 2));
925fe8aea9eSmrg	OUT_BATCH(0);
926fe8aea9eSmrg
927fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_VF_TOPOLOGY | (2 - 2));
928fe8aea9eSmrg	OUT_BATCH(RECTLIST);
929fe8aea9eSmrg
930fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_VF_STATISTICS | 0);
931fe8aea9eSmrg
932fe8aea9eSmrg	for (n = 1; n <= 3; n++) {
933fe8aea9eSmrg		OUT_BATCH(GEN9_3DSTATE_VF_INSTANCING | (3 - 2));
934fe8aea9eSmrg		OUT_BATCH(n);
935fe8aea9eSmrg		OUT_BATCH(0);
936fe8aea9eSmrg	}
937fe8aea9eSmrg}
938fe8aea9eSmrg
939fe8aea9eSmrgstatic void
940fe8aea9eSmrggen9_emit_invariant(struct sna *sna)
941fe8aea9eSmrg{
942fe8aea9eSmrg	OUT_BATCH(GEN9_PIPELINE_SELECT |
943fe8aea9eSmrg		  PIPELINE_SELECTION_MASK |
944fe8aea9eSmrg		  PIPELINE_SELECT_3D);
945fe8aea9eSmrg
946fe8aea9eSmrg#if SIM
947fe8aea9eSmrg	OUT_BATCH(GEN9_STATE_SIP | (3 - 2));
948fe8aea9eSmrg	OUT_BATCH64(0);
949fe8aea9eSmrg#endif
950fe8aea9eSmrg
951fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_MULTISAMPLE | (2 - 2));
952fe8aea9eSmrg	OUT_BATCH(MULTISAMPLE_PIXEL_LOCATION_CENTER |
953fe8aea9eSmrg		  MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
954fe8aea9eSmrg
955fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_SAMPLE_MASK | (2 - 2));
956fe8aea9eSmrg	OUT_BATCH(1);
957fe8aea9eSmrg
958fe8aea9eSmrg#if SIM
959fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_SAMPLE_PATTERN | (5 - 2));
960fe8aea9eSmrg	OUT_BATCH(0);
961fe8aea9eSmrg	OUT_BATCH(0);
962fe8aea9eSmrg	OUT_BATCH(0);
963fe8aea9eSmrg	//OUT_BATCH(8<<20 | 8<<16);
964fe8aea9eSmrg	OUT_BATCH(0);
965fe8aea9eSmrg#endif
966fe8aea9eSmrg
967fe8aea9eSmrg	gen9_emit_push_constants(sna);
968fe8aea9eSmrg	gen9_emit_urb(sna);
969fe8aea9eSmrg
970fe8aea9eSmrg	gen9_emit_state_base_address(sna);
971fe8aea9eSmrg
972fe8aea9eSmrg	gen9_emit_vf_invariant(sna);
973fe8aea9eSmrg	gen9_emit_vs_invariant(sna);
974fe8aea9eSmrg	gen9_emit_hs_invariant(sna);
975fe8aea9eSmrg	gen9_emit_te_invariant(sna);
976fe8aea9eSmrg	gen9_emit_ds_invariant(sna);
977fe8aea9eSmrg	gen9_emit_gs_invariant(sna);
978fe8aea9eSmrg	gen9_emit_sol_invariant(sna);
979fe8aea9eSmrg	gen9_emit_clip_invariant(sna);
980fe8aea9eSmrg	gen9_emit_sf_invariant(sna);
981fe8aea9eSmrg	gen9_emit_wm_invariant(sna);
982fe8aea9eSmrg	gen9_emit_cc_invariant(sna);
983fe8aea9eSmrg
984fe8aea9eSmrg	sna->render_state.gen9.needs_invariant = false;
985fe8aea9eSmrg}
986fe8aea9eSmrg
987fe8aea9eSmrgstatic void
988fe8aea9eSmrggen9_emit_cc(struct sna *sna, uint32_t blend)
989fe8aea9eSmrg{
990fe8aea9eSmrg	struct gen9_render_state *render = &sna->render_state.gen9;
991fe8aea9eSmrg
992fe8aea9eSmrg	if (render->blend == blend)
993fe8aea9eSmrg		return;
994fe8aea9eSmrg
995fe8aea9eSmrg	DBG(("%s: blend=%x (current=%x), src=%d, dst=%d\n",
996fe8aea9eSmrg	     __FUNCTION__, blend, render->blend,
997fe8aea9eSmrg	     blend / GEN9_BLENDFACTOR_COUNT,
998fe8aea9eSmrg	     blend % GEN9_BLENDFACTOR_COUNT));
999fe8aea9eSmrg
1000fe8aea9eSmrg	assert(blend < GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT);
1001fe8aea9eSmrg	assert(blend / GEN9_BLENDFACTOR_COUNT > 0);
1002fe8aea9eSmrg	assert(blend % GEN9_BLENDFACTOR_COUNT > 0);
1003fe8aea9eSmrg
1004fe8aea9eSmrg	/* XXX can have up to 8 blend states preload, selectable via
1005fe8aea9eSmrg	 * Render Target Index. What other side-effects of Render Target Index?
1006fe8aea9eSmrg	 */
1007fe8aea9eSmrg
1008fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_PS_BLEND | (2 - 2));
1009fe8aea9eSmrg	if (blend != GEN9_BLEND(NO_BLEND)) {
1010fe8aea9eSmrg		uint32_t src = blend / GEN9_BLENDFACTOR_COUNT;
1011fe8aea9eSmrg		uint32_t dst = blend % GEN9_BLENDFACTOR_COUNT;
1012fe8aea9eSmrg		OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT |
1013fe8aea9eSmrg			  PS_BLEND_COLOR_BLEND_ENABLE |
1014fe8aea9eSmrg			  src << PS_BLEND_SRC_ALPHA_SHIFT |
1015fe8aea9eSmrg			  dst << PS_BLEND_DST_ALPHA_SHIFT |
1016fe8aea9eSmrg			  src << PS_BLEND_SRC_SHIFT |
1017fe8aea9eSmrg			  dst << PS_BLEND_DST_SHIFT);
1018fe8aea9eSmrg	} else
1019fe8aea9eSmrg		OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT);
1020fe8aea9eSmrg
1021fe8aea9eSmrg	assert(is_aligned(render->cc_blend + blend * GEN9_BLEND_STATE_PADDED_SIZE, 64));
1022fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
1023fe8aea9eSmrg	OUT_BATCH((render->cc_blend + blend * GEN9_BLEND_STATE_PADDED_SIZE) | 1);
1024fe8aea9eSmrg
1025fe8aea9eSmrg	/* Force a CC_STATE pointer change to improve blend performance */
1026fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_CC_STATE_POINTERS | (2 - 2));
1027fe8aea9eSmrg	OUT_BATCH(0);
1028fe8aea9eSmrg
1029fe8aea9eSmrg	render->blend = blend;
1030fe8aea9eSmrg}
1031fe8aea9eSmrg
1032fe8aea9eSmrgstatic void
1033fe8aea9eSmrggen9_emit_sampler(struct sna *sna, uint32_t state)
1034fe8aea9eSmrg{
1035fe8aea9eSmrg	if (sna->render_state.gen9.samplers == state)
1036fe8aea9eSmrg		return;
1037fe8aea9eSmrg
1038fe8aea9eSmrg	sna->render_state.gen9.samplers = state;
1039fe8aea9eSmrg
1040fe8aea9eSmrg	DBG(("%s: sampler = %x\n", __FUNCTION__, state));
1041fe8aea9eSmrg
1042fe8aea9eSmrg	assert(2 * sizeof(struct gen9_sampler_state) == 32);
1043fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
1044fe8aea9eSmrg	OUT_BATCH(sna->render_state.gen9.wm_state + state * 2 * sizeof(struct gen9_sampler_state));
1045fe8aea9eSmrg}
1046fe8aea9eSmrg
1047fe8aea9eSmrgstatic void
1048fe8aea9eSmrggen9_emit_sf(struct sna *sna, bool has_mask)
1049fe8aea9eSmrg{
1050fe8aea9eSmrg	int num_sf_outputs = has_mask ? 2 : 1;
1051fe8aea9eSmrg
1052fe8aea9eSmrg	if (sna->render_state.gen9.num_sf_outputs == num_sf_outputs)
1053fe8aea9eSmrg		return;
1054fe8aea9eSmrg
1055fe8aea9eSmrg	DBG(("%s: num_sf_outputs=%d\n", __FUNCTION__, num_sf_outputs));
1056fe8aea9eSmrg
1057fe8aea9eSmrg	sna->render_state.gen9.num_sf_outputs = num_sf_outputs;
1058fe8aea9eSmrg
1059fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_SBE | (6 - 2));
1060fe8aea9eSmrg	OUT_BATCH(num_sf_outputs << SBE_NUM_OUTPUTS_SHIFT |
1061fe8aea9eSmrg		  SBE_FORCE_VERTEX_URB_READ_LENGTH | /* forced is faster */
1062fe8aea9eSmrg		  SBE_FORCE_VERTEX_URB_READ_OFFSET |
1063fe8aea9eSmrg		  1 << SBE_URB_ENTRY_READ_LENGTH_SHIFT |
1064fe8aea9eSmrg		  1 << SBE_URB_ENTRY_READ_OFFSET_SHIFT);
1065fe8aea9eSmrg	OUT_BATCH(0);
1066fe8aea9eSmrg	OUT_BATCH(0);
1067fe8aea9eSmrg        OUT_BATCH(SBE_ACTIVE_COMPONENT_XYZW << 0 |
1068fe8aea9eSmrg		  SBE_ACTIVE_COMPONENT_XYZW << 1);
1069fe8aea9eSmrg        OUT_BATCH(0);
1070fe8aea9eSmrg}
1071fe8aea9eSmrg
1072fe8aea9eSmrgstatic void
1073fe8aea9eSmrggen9_emit_wm(struct sna *sna, int kernel)
1074fe8aea9eSmrg{
1075fe8aea9eSmrg	const uint32_t *kernels;
1076fe8aea9eSmrg
1077fe8aea9eSmrg	assert(kernel < ARRAY_SIZE(wm_kernels));
1078fe8aea9eSmrg	if (sna->render_state.gen9.kernel == kernel)
1079fe8aea9eSmrg		return;
1080fe8aea9eSmrg
1081fe8aea9eSmrg	sna->render_state.gen9.kernel = kernel;
1082fe8aea9eSmrg	kernels = sna->render_state.gen9.wm_kernel[kernel];
1083fe8aea9eSmrg
1084fe8aea9eSmrg	DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n",
1085fe8aea9eSmrg	     __FUNCTION__,
1086fe8aea9eSmrg	     wm_kernels[kernel].name,
1087fe8aea9eSmrg	     wm_kernels[kernel].num_surfaces,
1088fe8aea9eSmrg	     kernels[0], kernels[1], kernels[2]));
1089fe8aea9eSmrg	assert(is_aligned(kernels[0], 64));
1090fe8aea9eSmrg	assert(is_aligned(kernels[1], 64));
1091fe8aea9eSmrg	assert(is_aligned(kernels[2], 64));
1092fe8aea9eSmrg
1093fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_PS | (12 - 2));
1094fe8aea9eSmrg	OUT_BATCH64(kernels[0] ?: kernels[1] ?: kernels[2]);
1095fe8aea9eSmrg	OUT_BATCH(1 << PS_SAMPLER_COUNT_SHIFT |
1096fe8aea9eSmrg		  PS_VECTOR_MASK_ENABLE |
1097fe8aea9eSmrg		  wm_kernels[kernel].num_surfaces << PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
1098fe8aea9eSmrg	OUT_BATCH64(0); /* scratch address */
1099fe8aea9eSmrg	OUT_BATCH(PS_MAX_THREADS |
1100fe8aea9eSmrg		  (kernels[0] ? PS_8_DISPATCH_ENABLE : 0) |
1101fe8aea9eSmrg		  (kernels[1] ? PS_16_DISPATCH_ENABLE : 0) |
1102fe8aea9eSmrg		  (kernels[2] ? PS_32_DISPATCH_ENABLE : 0));
1103fe8aea9eSmrg	OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << PS_DISPATCH_START_GRF_SHIFT_0 |
1104fe8aea9eSmrg		  8 << PS_DISPATCH_START_GRF_SHIFT_1 |
1105fe8aea9eSmrg		  6 << PS_DISPATCH_START_GRF_SHIFT_2);
1106fe8aea9eSmrg	OUT_BATCH64(kernels[2]);
1107fe8aea9eSmrg	OUT_BATCH64(kernels[1]);
1108fe8aea9eSmrg}
1109fe8aea9eSmrg
1110fe8aea9eSmrgstatic bool
1111fe8aea9eSmrggen9_emit_binding_table(struct sna *sna, uint16_t offset)
1112fe8aea9eSmrg{
1113fe8aea9eSmrg	if (sna->render_state.gen9.surface_table == offset)
1114fe8aea9eSmrg		return false;
1115fe8aea9eSmrg
1116fe8aea9eSmrg	/* Binding table pointers */
1117fe8aea9eSmrg	assert(is_aligned(4*offset, 32));
1118fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
1119fe8aea9eSmrg	OUT_BATCH(offset*4);
1120fe8aea9eSmrg
1121fe8aea9eSmrg	sna->render_state.gen9.surface_table = offset;
1122fe8aea9eSmrg	return true;
1123fe8aea9eSmrg}
1124fe8aea9eSmrg
1125fe8aea9eSmrgstatic bool
1126fe8aea9eSmrggen9_emit_drawing_rectangle(struct sna *sna,
1127fe8aea9eSmrg			    const struct sna_composite_op *op)
1128fe8aea9eSmrg{
1129fe8aea9eSmrg	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
1130fe8aea9eSmrg	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
1131fe8aea9eSmrg
1132fe8aea9eSmrg	assert(!too_large(abs(op->dst.x), abs(op->dst.y)));
1133fe8aea9eSmrg	assert(!too_large(op->dst.width, op->dst.height));
1134fe8aea9eSmrg
1135fe8aea9eSmrg	if (sna->render_state.gen9.drawrect_limit == limit &&
1136fe8aea9eSmrg	    sna->render_state.gen9.drawrect_offset == offset)
1137fe8aea9eSmrg		return true;
1138fe8aea9eSmrg
1139fe8aea9eSmrg	sna->render_state.gen9.drawrect_offset = offset;
1140fe8aea9eSmrg	sna->render_state.gen9.drawrect_limit = limit;
1141fe8aea9eSmrg
1142fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
1143fe8aea9eSmrg	OUT_BATCH(0);
1144fe8aea9eSmrg	OUT_BATCH(limit);
1145fe8aea9eSmrg	OUT_BATCH(offset);
1146fe8aea9eSmrg	return false;
1147fe8aea9eSmrg}
1148fe8aea9eSmrg
1149fe8aea9eSmrgstatic void
1150fe8aea9eSmrggen9_emit_vertex_elements(struct sna *sna,
1151fe8aea9eSmrg			  const struct sna_composite_op *op)
1152fe8aea9eSmrg{
1153fe8aea9eSmrg	/*
1154fe8aea9eSmrg	 * vertex data in vertex buffer
1155fe8aea9eSmrg	 *    position: (x, y)
1156fe8aea9eSmrg	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
1157fe8aea9eSmrg	 *    texture coordinate 1 if (has_mask is true): same as above
1158fe8aea9eSmrg	 */
1159fe8aea9eSmrg	struct gen9_render_state *render = &sna->render_state.gen9;
1160fe8aea9eSmrg	uint32_t src_format, dw;
1161fe8aea9eSmrg	int id = GEN9_VERTEX(op->u.gen9.flags);
1162fe8aea9eSmrg	bool has_mask;
1163fe8aea9eSmrg
1164fe8aea9eSmrg	DBG(("%s: setup id=%d\n", __FUNCTION__, id));
1165fe8aea9eSmrg
1166fe8aea9eSmrg	if (render->ve_id == id)
1167fe8aea9eSmrg		return;
1168fe8aea9eSmrg	render->ve_id = id;
1169fe8aea9eSmrg
1170fe8aea9eSmrg	if (render->ve_dirty) {
1171fe8aea9eSmrg		/* dummy primitive to flush vertex before change? */
1172fe8aea9eSmrg		OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2));
1173fe8aea9eSmrg		OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */
1174fe8aea9eSmrg		OUT_BATCH(0);
1175fe8aea9eSmrg		OUT_BATCH(0);
1176fe8aea9eSmrg		OUT_BATCH(1);	/* single instance */
1177fe8aea9eSmrg		OUT_BATCH(0);	/* start instance location */
1178fe8aea9eSmrg		OUT_BATCH(0);	/* index buffer offset, ignored */
1179fe8aea9eSmrg	}
1180fe8aea9eSmrg
1181fe8aea9eSmrg	/* The VUE layout
1182fe8aea9eSmrg	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
1183fe8aea9eSmrg	 *    dword 4-7: position (x, y, 1.0, 1.0),
1184fe8aea9eSmrg	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
1185fe8aea9eSmrg	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
1186fe8aea9eSmrg	 *
1187fe8aea9eSmrg	 * dword 4-15 are fetched from vertex buffer
1188fe8aea9eSmrg	 */
1189fe8aea9eSmrg	has_mask = (id >> 2) != 0;
1190fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_VERTEX_ELEMENTS |
1191fe8aea9eSmrg		((2 * (3 + has_mask)) + 1 - 2));
1192fe8aea9eSmrg
1193fe8aea9eSmrg	OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID |
1194fe8aea9eSmrg		  SURFACEFORMAT_R32G32B32A32_FLOAT << VE_FORMAT_SHIFT |
1195fe8aea9eSmrg		  0 << VE_OFFSET_SHIFT);
1196fe8aea9eSmrg	OUT_BATCH(COMPONENT_STORE_0 << VE_COMPONENT_0_SHIFT |
1197fe8aea9eSmrg		  COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT |
1198fe8aea9eSmrg		  COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT |
1199fe8aea9eSmrg		  COMPONENT_STORE_0 << VE_COMPONENT_3_SHIFT);
1200fe8aea9eSmrg
1201fe8aea9eSmrg	/* x,y */
1202fe8aea9eSmrg	OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID |
1203fe8aea9eSmrg		  SURFACEFORMAT_R16G16_SSCALED << VE_FORMAT_SHIFT |
1204fe8aea9eSmrg		  0 << VE_OFFSET_SHIFT);
1205fe8aea9eSmrg	OUT_BATCH(COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT |
1206fe8aea9eSmrg		  COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT |
1207fe8aea9eSmrg		  COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT |
1208fe8aea9eSmrg		  COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT);
1209fe8aea9eSmrg
1210fe8aea9eSmrg	/* u0, v0, w0 */
1211fe8aea9eSmrg	DBG(("%s: first channel %d floats, offset=4\n", __FUNCTION__, id & 3));
1212fe8aea9eSmrg	dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT;
1213fe8aea9eSmrg	switch (id & 3) {
1214fe8aea9eSmrg	default:
1215fe8aea9eSmrg		assert(0);
1216fe8aea9eSmrg	case 0:
1217fe8aea9eSmrg		src_format = SURFACEFORMAT_R16G16_SSCALED;
1218fe8aea9eSmrg		dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT;
1219fe8aea9eSmrg		dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT;
1220fe8aea9eSmrg		dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT;
1221fe8aea9eSmrg		break;
1222fe8aea9eSmrg	case 1:
1223fe8aea9eSmrg		src_format = SURFACEFORMAT_R32_FLOAT;
1224fe8aea9eSmrg		dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT;
1225fe8aea9eSmrg		dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT;
1226fe8aea9eSmrg		dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT;
1227fe8aea9eSmrg		break;
1228fe8aea9eSmrg	case 2:
1229fe8aea9eSmrg		src_format = SURFACEFORMAT_R32G32_FLOAT;
1230fe8aea9eSmrg		dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT;
1231fe8aea9eSmrg		dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT;
1232fe8aea9eSmrg		dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT;
1233fe8aea9eSmrg		break;
1234fe8aea9eSmrg	case 3:
1235fe8aea9eSmrg		src_format = SURFACEFORMAT_R32G32B32_FLOAT;
1236fe8aea9eSmrg		dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT;
1237fe8aea9eSmrg		dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT;
1238fe8aea9eSmrg		dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT;
1239fe8aea9eSmrg		break;
1240fe8aea9eSmrg	}
1241fe8aea9eSmrg	OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID |
1242fe8aea9eSmrg		  src_format << VE_FORMAT_SHIFT |
1243fe8aea9eSmrg		  4 << VE_OFFSET_SHIFT);
1244fe8aea9eSmrg	OUT_BATCH(dw);
1245fe8aea9eSmrg
1246fe8aea9eSmrg	/* u1, v1, w1 */
1247fe8aea9eSmrg	if (has_mask) {
1248fe8aea9eSmrg		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
1249fe8aea9eSmrg		DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset));
1250fe8aea9eSmrg		dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT;
1251fe8aea9eSmrg		switch (id >> 2) {
1252fe8aea9eSmrg		case 1:
1253fe8aea9eSmrg			src_format = SURFACEFORMAT_R32_FLOAT;
1254fe8aea9eSmrg			dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT;
1255fe8aea9eSmrg			dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT;
1256fe8aea9eSmrg			dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT;
1257fe8aea9eSmrg			break;
1258fe8aea9eSmrg		default:
1259fe8aea9eSmrg			assert(0);
1260fe8aea9eSmrg		case 2:
1261fe8aea9eSmrg			src_format = SURFACEFORMAT_R32G32_FLOAT;
1262fe8aea9eSmrg			dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT;
1263fe8aea9eSmrg			dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT;
1264fe8aea9eSmrg			dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT;
1265fe8aea9eSmrg			break;
1266fe8aea9eSmrg		case 3:
1267fe8aea9eSmrg			src_format = SURFACEFORMAT_R32G32B32_FLOAT;
1268fe8aea9eSmrg			dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT;
1269fe8aea9eSmrg			dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT;
1270fe8aea9eSmrg			dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT;
1271fe8aea9eSmrg			break;
1272fe8aea9eSmrg		}
1273fe8aea9eSmrg		OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID |
1274fe8aea9eSmrg			  src_format << VE_FORMAT_SHIFT |
1275fe8aea9eSmrg			  offset << VE_OFFSET_SHIFT);
1276fe8aea9eSmrg		OUT_BATCH(dw);
1277fe8aea9eSmrg	}
1278fe8aea9eSmrg
1279fe8aea9eSmrg	render->ve_dirty = true;
1280fe8aea9eSmrg}
1281fe8aea9eSmrg
1282fe8aea9eSmrginline static void
1283fe8aea9eSmrggen9_emit_pipe_invalidate(struct sna *sna)
1284fe8aea9eSmrg{
1285fe8aea9eSmrg	OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2));
1286fe8aea9eSmrg	OUT_BATCH(PIPE_CONTROL_WC_FLUSH |
1287fe8aea9eSmrg		  PIPE_CONTROL_TC_FLUSH |
1288fe8aea9eSmrg		  PIPE_CONTROL_CS_STALL);
1289fe8aea9eSmrg	OUT_BATCH64(0);
1290fe8aea9eSmrg	OUT_BATCH64(0);
1291fe8aea9eSmrg}
1292fe8aea9eSmrg
1293fe8aea9eSmrginline static void
1294fe8aea9eSmrggen9_emit_pipe_flush(struct sna *sna, bool need_stall)
1295fe8aea9eSmrg{
1296fe8aea9eSmrg	unsigned stall;
1297fe8aea9eSmrg
1298fe8aea9eSmrg	stall = 0;
1299fe8aea9eSmrg	if (need_stall)
1300fe8aea9eSmrg		stall = (PIPE_CONTROL_CS_STALL |
1301fe8aea9eSmrg			 PIPE_CONTROL_STALL_AT_SCOREBOARD);
1302fe8aea9eSmrg
1303fe8aea9eSmrg	OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2));
1304fe8aea9eSmrg	OUT_BATCH(PIPE_CONTROL_WC_FLUSH | stall);
1305fe8aea9eSmrg	OUT_BATCH64(0);
1306fe8aea9eSmrg	OUT_BATCH64(0);
1307fe8aea9eSmrg}
1308fe8aea9eSmrg
1309fe8aea9eSmrginline static void
1310fe8aea9eSmrggen9_emit_pipe_stall(struct sna *sna)
1311fe8aea9eSmrg{
1312fe8aea9eSmrg	OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2));
1313fe8aea9eSmrg	OUT_BATCH(PIPE_CONTROL_CS_STALL |
1314fe8aea9eSmrg		  PIPE_CONTROL_FLUSH |
1315fe8aea9eSmrg		  PIPE_CONTROL_STALL_AT_SCOREBOARD);
1316fe8aea9eSmrg	OUT_BATCH64(0);
1317fe8aea9eSmrg	OUT_BATCH64(0);
1318fe8aea9eSmrg}
1319fe8aea9eSmrg
1320fe8aea9eSmrgstatic void
1321fe8aea9eSmrggen9_emit_state(struct sna *sna,
1322fe8aea9eSmrg		const struct sna_composite_op *op,
1323fe8aea9eSmrg		uint16_t wm_binding_table)
1324fe8aea9eSmrg{
1325fe8aea9eSmrg	bool need_invalidate;
1326fe8aea9eSmrg	bool need_flush;
1327fe8aea9eSmrg	bool need_stall;
1328fe8aea9eSmrg
1329fe8aea9eSmrg	assert(op->dst.bo->exec);
1330fe8aea9eSmrg
1331fe8aea9eSmrg	need_flush = wm_binding_table & 1 ||
1332fe8aea9eSmrg		(sna->render_state.gen9.emit_flush && GEN9_READS_DST(op->u.gen9.flags));
1333fe8aea9eSmrg	if (ALWAYS_FLUSH)
1334fe8aea9eSmrg		need_flush = true;
1335fe8aea9eSmrg
1336fe8aea9eSmrg	wm_binding_table &= ~1;
1337fe8aea9eSmrg
1338fe8aea9eSmrg	need_stall = sna->render_state.gen9.surface_table != wm_binding_table;
1339fe8aea9eSmrg
1340fe8aea9eSmrg	need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo);
1341fe8aea9eSmrg	if (ALWAYS_INVALIDATE)
1342fe8aea9eSmrg		need_invalidate = true;
1343fe8aea9eSmrg
1344fe8aea9eSmrg	need_stall &= gen9_emit_drawing_rectangle(sna, op);
1345fe8aea9eSmrg	if (ALWAYS_STALL)
1346fe8aea9eSmrg		need_stall = true;
1347fe8aea9eSmrg
1348fe8aea9eSmrg	if (need_invalidate) {
1349fe8aea9eSmrg		gen9_emit_pipe_invalidate(sna);
1350fe8aea9eSmrg		kgem_clear_dirty(&sna->kgem);
1351fe8aea9eSmrg		assert(op->dst.bo->exec);
1352fe8aea9eSmrg		kgem_bo_mark_dirty(op->dst.bo);
1353fe8aea9eSmrg
1354fe8aea9eSmrg		need_flush = false;
1355fe8aea9eSmrg		need_stall = false;
1356fe8aea9eSmrg	}
1357fe8aea9eSmrg	if (need_flush) {
1358fe8aea9eSmrg		gen9_emit_pipe_flush(sna, need_stall);
1359fe8aea9eSmrg		need_stall = false;
1360fe8aea9eSmrg	}
1361fe8aea9eSmrg	if (need_stall)
1362fe8aea9eSmrg		gen9_emit_pipe_stall(sna);
1363fe8aea9eSmrg
1364fe8aea9eSmrg	gen9_emit_cc(sna, GEN9_BLEND(op->u.gen9.flags));
1365fe8aea9eSmrg	gen9_emit_sampler(sna, GEN9_SAMPLER(op->u.gen9.flags));
1366fe8aea9eSmrg	gen9_emit_sf(sna, GEN9_VERTEX(op->u.gen9.flags) >> 2);
1367fe8aea9eSmrg	gen9_emit_wm(sna, op->u.gen9.wm_kernel);
1368fe8aea9eSmrg	gen9_emit_vertex_elements(sna, op);
1369fe8aea9eSmrg	gen9_emit_binding_table(sna, wm_binding_table);
1370fe8aea9eSmrg
1371fe8aea9eSmrg	sna->render_state.gen9.emit_flush = GEN9_READS_DST(op->u.gen9.flags);
1372fe8aea9eSmrg}
1373fe8aea9eSmrg
1374fe8aea9eSmrgstatic bool gen9_magic_ca_pass(struct sna *sna,
1375fe8aea9eSmrg			       const struct sna_composite_op *op)
1376fe8aea9eSmrg{
1377fe8aea9eSmrg	struct gen9_render_state *state = &sna->render_state.gen9;
1378fe8aea9eSmrg
1379fe8aea9eSmrg	if (!op->need_magic_ca_pass)
1380fe8aea9eSmrg		return false;
1381fe8aea9eSmrg
1382fe8aea9eSmrg	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
1383fe8aea9eSmrg	     sna->render.vertex_start, sna->render.vertex_index));
1384fe8aea9eSmrg
1385fe8aea9eSmrg	gen9_emit_pipe_stall(sna);
1386fe8aea9eSmrg
1387fe8aea9eSmrg	gen9_emit_cc(sna,
1388fe8aea9eSmrg		     GEN9_BLEND(gen9_get_blend(PictOpAdd, true,
1389fe8aea9eSmrg					       op->dst.format)));
1390fe8aea9eSmrg	gen9_emit_wm(sna,
1391fe8aea9eSmrg		     gen9_choose_composite_kernel(PictOpAdd,
1392fe8aea9eSmrg						  true, true,
1393fe8aea9eSmrg						  op->is_affine));
1394fe8aea9eSmrg
1395fe8aea9eSmrg	OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2));
1396fe8aea9eSmrg	OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */
1397fe8aea9eSmrg	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
1398fe8aea9eSmrg	OUT_BATCH(sna->render.vertex_start);
1399fe8aea9eSmrg	OUT_BATCH(1);	/* single instance */
1400fe8aea9eSmrg	OUT_BATCH(0);	/* start instance location */
1401fe8aea9eSmrg	OUT_BATCH(0);	/* index buffer offset, ignored */
1402fe8aea9eSmrg
1403fe8aea9eSmrg	state->last_primitive = sna->kgem.nbatch;
1404fe8aea9eSmrg	state->ve_dirty = false;
1405fe8aea9eSmrg	return true;
1406fe8aea9eSmrg}
1407fe8aea9eSmrg
1408fe8aea9eSmrgstatic void null_create(struct sna_static_stream *stream)
1409fe8aea9eSmrg{
1410fe8aea9eSmrg	/* A bunch of zeros useful for legacy border color and depth-stencil */
1411fe8aea9eSmrg	sna_static_stream_map(stream, 64, 64);
1412fe8aea9eSmrg}
1413fe8aea9eSmrg
1414fe8aea9eSmrgstatic void
1415fe8aea9eSmrgsampler_state_init(struct gen9_sampler_state *sampler_state,
1416fe8aea9eSmrg		   sampler_filter_t filter,
1417fe8aea9eSmrg		   sampler_extend_t extend)
1418fe8aea9eSmrg{
1419fe8aea9eSmrg	COMPILE_TIME_ASSERT(sizeof(*sampler_state) == 4*sizeof(uint32_t));
1420fe8aea9eSmrg
1421fe8aea9eSmrg	sampler_state->ss0.lod_preclamp = 2;	/* GL mode */
1422fe8aea9eSmrg	sampler_state->ss0.default_color_mode = 1;
1423fe8aea9eSmrg
1424fe8aea9eSmrg	switch (filter) {
1425fe8aea9eSmrg	default:
1426fe8aea9eSmrg	case SAMPLER_FILTER_NEAREST:
1427fe8aea9eSmrg		sampler_state->ss0.min_filter = MAPFILTER_NEAREST;
1428fe8aea9eSmrg		sampler_state->ss0.mag_filter = MAPFILTER_NEAREST;
1429fe8aea9eSmrg		break;
1430fe8aea9eSmrg	case SAMPLER_FILTER_BILINEAR:
1431fe8aea9eSmrg		sampler_state->ss0.min_filter = MAPFILTER_LINEAR;
1432fe8aea9eSmrg		sampler_state->ss0.mag_filter = MAPFILTER_LINEAR;
1433fe8aea9eSmrg		break;
1434fe8aea9eSmrg	}
1435fe8aea9eSmrg
1436fe8aea9eSmrg	/* XXX bicubic filter using MAPFILTER_FLEXIBLE */
1437fe8aea9eSmrg
1438fe8aea9eSmrg	switch (extend) {
1439fe8aea9eSmrg	default:
1440fe8aea9eSmrg	case SAMPLER_EXTEND_NONE:
1441fe8aea9eSmrg		sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP_BORDER;
1442fe8aea9eSmrg		sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP_BORDER;
1443fe8aea9eSmrg		sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP_BORDER;
1444fe8aea9eSmrg		break;
1445fe8aea9eSmrg	case SAMPLER_EXTEND_REPEAT:
1446fe8aea9eSmrg		sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_WRAP;
1447fe8aea9eSmrg		sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_WRAP;
1448fe8aea9eSmrg		sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_WRAP;
1449fe8aea9eSmrg		break;
1450fe8aea9eSmrg	case SAMPLER_EXTEND_PAD:
1451fe8aea9eSmrg		sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP;
1452fe8aea9eSmrg		sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP;
1453fe8aea9eSmrg		sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP;
1454fe8aea9eSmrg		break;
1455fe8aea9eSmrg	case SAMPLER_EXTEND_REFLECT:
1456fe8aea9eSmrg		sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_MIRROR;
1457fe8aea9eSmrg		sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_MIRROR;
1458fe8aea9eSmrg		sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_MIRROR;
1459fe8aea9eSmrg		break;
1460fe8aea9eSmrg	}
1461fe8aea9eSmrg}
1462fe8aea9eSmrg
1463fe8aea9eSmrgstatic void
1464fe8aea9eSmrgsampler_copy_init(struct gen9_sampler_state *ss)
1465fe8aea9eSmrg{
1466fe8aea9eSmrg	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1467fe8aea9eSmrg	ss->ss3.non_normalized_coord = 1;
1468fe8aea9eSmrg
1469fe8aea9eSmrg	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1470fe8aea9eSmrg}
1471fe8aea9eSmrg
1472fe8aea9eSmrgstatic void
1473fe8aea9eSmrgsampler_fill_init(struct gen9_sampler_state *ss)
1474fe8aea9eSmrg{
1475fe8aea9eSmrg	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
1476fe8aea9eSmrg	ss->ss3.non_normalized_coord = 1;
1477fe8aea9eSmrg
1478fe8aea9eSmrg	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1479fe8aea9eSmrg}
1480fe8aea9eSmrg
1481fe8aea9eSmrgstatic uint32_t
1482fe8aea9eSmrggen9_tiling_bits(uint32_t tiling)
1483fe8aea9eSmrg{
1484fe8aea9eSmrg	switch (tiling) {
1485fe8aea9eSmrg	default: assert(0);
1486fe8aea9eSmrg	case I915_TILING_NONE: return 0;
1487fe8aea9eSmrg	case I915_TILING_X: return SURFACE_TILED;
1488fe8aea9eSmrg	case I915_TILING_Y: return SURFACE_TILED | SURFACE_TILED_Y;
1489fe8aea9eSmrg	}
1490fe8aea9eSmrg}
1491fe8aea9eSmrg
1492fe8aea9eSmrg#define MOCS_PTE (1 << 1)
1493fe8aea9eSmrg#define MOCS_WB (2 << 1)
1494fe8aea9eSmrg
1495fe8aea9eSmrg/**
1496fe8aea9eSmrg * Sets up the common fields for a surface state buffer for the given
1497fe8aea9eSmrg * picture in the given surface state buffer.
1498fe8aea9eSmrg */
1499fe8aea9eSmrgstatic uint32_t
1500fe8aea9eSmrggen9_bind_bo(struct sna *sna,
1501fe8aea9eSmrg	     struct kgem_bo *bo,
1502fe8aea9eSmrg	     uint32_t width,
1503fe8aea9eSmrg	     uint32_t height,
1504fe8aea9eSmrg	     uint32_t format,
1505fe8aea9eSmrg	     bool is_dst)
1506fe8aea9eSmrg{
1507fe8aea9eSmrg	uint32_t *ss;
1508fe8aea9eSmrg	uint32_t domains;
1509fe8aea9eSmrg	int offset;
1510fe8aea9eSmrg	uint32_t is_scanout = is_dst && bo->scanout;
1511fe8aea9eSmrg
1512fe8aea9eSmrg	/* After the first bind, we manage the cache domains within the batch */
1513fe8aea9eSmrg	offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
1514fe8aea9eSmrg	if (offset) {
1515fe8aea9eSmrg		if (is_dst)
1516fe8aea9eSmrg			kgem_bo_mark_dirty(bo);
1517fe8aea9eSmrg		assert(offset >= sna->kgem.surface);
1518fe8aea9eSmrg		return offset * sizeof(uint32_t);
1519fe8aea9eSmrg	}
1520fe8aea9eSmrg
1521fe8aea9eSmrg	offset = sna->kgem.surface -= SURFACE_DW;
1522fe8aea9eSmrg	ss = sna->kgem.batch + offset;
1523fe8aea9eSmrg	ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT |
1524fe8aea9eSmrg		 gen9_tiling_bits(bo->tiling) |
1525fe8aea9eSmrg		 format << SURFACE_FORMAT_SHIFT |
1526fe8aea9eSmrg		 SURFACE_VALIGN_4 | SURFACE_HALIGN_4);
1527fe8aea9eSmrg	if (is_dst) {
1528fe8aea9eSmrg		ss[0] |= SURFACE_RC_READ_WRITE;
1529fe8aea9eSmrg		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
1530fe8aea9eSmrg	} else
1531fe8aea9eSmrg		domains = I915_GEM_DOMAIN_SAMPLER << 16;
1532fe8aea9eSmrg	ss[1] = (is_scanout || (is_dst && is_uncached(sna, bo))) ? MOCS_PTE << 24 : MOCS_WB << 24;
1533fe8aea9eSmrg	ss[2] = ((width - 1)  << SURFACE_WIDTH_SHIFT |
1534fe8aea9eSmrg		 (height - 1) << SURFACE_HEIGHT_SHIFT);
1535fe8aea9eSmrg	ss[3] = (bo->pitch - 1) << SURFACE_PITCH_SHIFT;
1536fe8aea9eSmrg	ss[4] = 0;
1537fe8aea9eSmrg	ss[5] = 0;
1538fe8aea9eSmrg	ss[6] = 0;
1539fe8aea9eSmrg	ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
1540fe8aea9eSmrg	*(uint64_t *)(ss+8) = kgem_add_reloc64(&sna->kgem, offset + 8, bo, domains, 0);
1541fe8aea9eSmrg	ss[10] = 0;
1542fe8aea9eSmrg	ss[11] = 0;
1543fe8aea9eSmrg	ss[12] = 0;
1544fe8aea9eSmrg	ss[13] = 0;
1545fe8aea9eSmrg	ss[14] = 0;
1546fe8aea9eSmrg	ss[15] = 0;
1547fe8aea9eSmrg
1548fe8aea9eSmrg	kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
1549fe8aea9eSmrg
1550fe8aea9eSmrg	DBG(("[%x] bind bo(handle=%d, addr=%lx), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
1551fe8aea9eSmrg	     offset, bo->handle, *(uint64_t *)(ss+8),
1552fe8aea9eSmrg	     format, width, height, bo->pitch, bo->tiling,
1553fe8aea9eSmrg	     domains & 0xffff ? "render" : "sampler"));
1554fe8aea9eSmrg
1555fe8aea9eSmrg	return offset * sizeof(uint32_t);
1556fe8aea9eSmrg}
1557fe8aea9eSmrg
1558fe8aea9eSmrgstatic void gen9_emit_vertex_buffer(struct sna *sna,
1559fe8aea9eSmrg				    const struct sna_composite_op *op)
1560fe8aea9eSmrg{
1561fe8aea9eSmrg	int id = GEN9_VERTEX(op->u.gen9.flags);
1562fe8aea9eSmrg
1563fe8aea9eSmrg	OUT_BATCH(GEN9_3DSTATE_VERTEX_BUFFERS | (5 - 2));
1564fe8aea9eSmrg	OUT_BATCH(id << VB_INDEX_SHIFT | VB_MODIFY_ENABLE |
1565fe8aea9eSmrg		  4*op->floats_per_vertex);
1566fe8aea9eSmrg	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
1567fe8aea9eSmrg	OUT_BATCH64(0);
1568fe8aea9eSmrg	OUT_BATCH(~0); /* buffer size: disabled */
1569fe8aea9eSmrg
1570fe8aea9eSmrg	sna->render.vb_id |= 1 << id;
1571fe8aea9eSmrg}
1572fe8aea9eSmrg
1573fe8aea9eSmrgstatic void gen9_emit_primitive(struct sna *sna)
1574fe8aea9eSmrg{
1575fe8aea9eSmrg	if (sna->kgem.nbatch == sna->render_state.gen9.last_primitive) {
1576fe8aea9eSmrg		sna->render.vertex_offset = sna->kgem.nbatch - 5;
1577fe8aea9eSmrg		return;
1578fe8aea9eSmrg	}
1579fe8aea9eSmrg
1580fe8aea9eSmrg	OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2));
1581fe8aea9eSmrg	OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */
1582fe8aea9eSmrg	sna->render.vertex_offset = sna->kgem.nbatch;
1583fe8aea9eSmrg	OUT_BATCH(0);	/* vertex count, to be filled in later */
1584fe8aea9eSmrg	OUT_BATCH(sna->render.vertex_index);
1585fe8aea9eSmrg	OUT_BATCH(1);	/* single instance */
1586fe8aea9eSmrg	OUT_BATCH(0);	/* start instance location */
1587fe8aea9eSmrg	OUT_BATCH(0);	/* index buffer offset, ignored */
1588fe8aea9eSmrg	sna->render.vertex_start = sna->render.vertex_index;
1589fe8aea9eSmrg
1590fe8aea9eSmrg	sna->render_state.gen9.last_primitive = sna->kgem.nbatch;
1591fe8aea9eSmrg	sna->render_state.gen9.ve_dirty = false;
1592fe8aea9eSmrg}
1593fe8aea9eSmrg
1594fe8aea9eSmrgstatic bool gen9_rectangle_begin(struct sna *sna,
1595fe8aea9eSmrg				 const struct sna_composite_op *op)
1596fe8aea9eSmrg{
1597fe8aea9eSmrg	int id = 1 << GEN9_VERTEX(op->u.gen9.flags);
1598fe8aea9eSmrg	int ndwords;
1599fe8aea9eSmrg
1600fe8aea9eSmrg	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
1601fe8aea9eSmrg		return true;
1602fe8aea9eSmrg
1603fe8aea9eSmrg	ndwords = op->need_magic_ca_pass ? 60 : 6;
1604fe8aea9eSmrg	if ((sna->render.vb_id & id) == 0)
1605fe8aea9eSmrg		ndwords += 5;
1606fe8aea9eSmrg	if (!kgem_check_batch(&sna->kgem, ndwords))
1607fe8aea9eSmrg		return false;
1608fe8aea9eSmrg
1609fe8aea9eSmrg	if ((sna->render.vb_id & id) == 0)
1610fe8aea9eSmrg		gen9_emit_vertex_buffer(sna, op);
1611fe8aea9eSmrg
1612fe8aea9eSmrg	gen9_emit_primitive(sna);
1613fe8aea9eSmrg	return true;
1614fe8aea9eSmrg}
1615fe8aea9eSmrg
1616fe8aea9eSmrgstatic int gen9_get_rectangles__flush(struct sna *sna,
1617fe8aea9eSmrg				      const struct sna_composite_op *op)
1618fe8aea9eSmrg{
1619fe8aea9eSmrg	/* Preventing discarding new vbo after lock contention */
1620fe8aea9eSmrg	if (sna_vertex_wait__locked(&sna->render)) {
1621fe8aea9eSmrg		int rem = vertex_space(sna);
1622fe8aea9eSmrg		if (rem > op->floats_per_rect)
1623fe8aea9eSmrg			return rem;
1624fe8aea9eSmrg	}
1625fe8aea9eSmrg
1626fe8aea9eSmrg	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6))
1627fe8aea9eSmrg		return 0;
1628fe8aea9eSmrg	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
1629fe8aea9eSmrg		return 0;
1630fe8aea9eSmrg
1631fe8aea9eSmrg	if (sna->render.vertex_offset) {
1632fe8aea9eSmrg		gen8_vertex_flush(sna);
1633fe8aea9eSmrg		if (gen9_magic_ca_pass(sna, op)) {
1634fe8aea9eSmrg			gen9_emit_pipe_invalidate(sna);
1635fe8aea9eSmrg			gen9_emit_cc(sna, GEN9_BLEND(op->u.gen9.flags));
1636fe8aea9eSmrg			gen9_emit_wm(sna, op->u.gen9.wm_kernel);
1637fe8aea9eSmrg		}
1638fe8aea9eSmrg	}
1639fe8aea9eSmrg
1640fe8aea9eSmrg	return gen8_vertex_finish(sna);
1641fe8aea9eSmrg}
1642fe8aea9eSmrg
1643fe8aea9eSmrginline static int gen9_get_rectangles(struct sna *sna,
1644fe8aea9eSmrg				      const struct sna_composite_op *op,
1645fe8aea9eSmrg				      int want,
1646fe8aea9eSmrg				      void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
1647fe8aea9eSmrg{
1648fe8aea9eSmrg	int rem;
1649fe8aea9eSmrg
1650fe8aea9eSmrg	assert(want);
1651fe8aea9eSmrg
1652fe8aea9eSmrgstart:
1653fe8aea9eSmrg	rem = vertex_space(sna);
1654fe8aea9eSmrg	if (unlikely(rem < op->floats_per_rect)) {
1655fe8aea9eSmrg		DBG(("flushing vbo for %s: %d < %d\n",
1656fe8aea9eSmrg		     __FUNCTION__, rem, op->floats_per_rect));
1657fe8aea9eSmrg		rem = gen9_get_rectangles__flush(sna, op);
1658fe8aea9eSmrg		if (unlikely(rem == 0))
1659fe8aea9eSmrg			goto flush;
1660fe8aea9eSmrg	}
1661fe8aea9eSmrg
1662fe8aea9eSmrg	if (unlikely(sna->render.vertex_offset == 0)) {
1663fe8aea9eSmrg		if (!gen9_rectangle_begin(sna, op))
1664fe8aea9eSmrg			goto flush;
1665fe8aea9eSmrg		else
1666fe8aea9eSmrg			goto start;
1667fe8aea9eSmrg	}
1668fe8aea9eSmrg
1669fe8aea9eSmrg	assert(rem <= vertex_space(sna));
1670fe8aea9eSmrg	assert(op->floats_per_rect <= rem);
1671fe8aea9eSmrg	if (want > 1 && want * op->floats_per_rect > rem)
1672fe8aea9eSmrg		want = rem / op->floats_per_rect;
1673fe8aea9eSmrg
1674fe8aea9eSmrg	assert(want > 0);
1675fe8aea9eSmrg	sna->render.vertex_index += 3*want;
1676fe8aea9eSmrg	return want;
1677fe8aea9eSmrg
1678fe8aea9eSmrgflush:
1679fe8aea9eSmrg	if (sna->render.vertex_offset) {
1680fe8aea9eSmrg		gen8_vertex_flush(sna);
1681fe8aea9eSmrg		gen9_magic_ca_pass(sna, op);
1682fe8aea9eSmrg	}
1683fe8aea9eSmrg	sna_vertex_wait__locked(&sna->render);
1684fe8aea9eSmrg	_kgem_submit(&sna->kgem);
1685fe8aea9eSmrg	emit_state(sna, op);
1686fe8aea9eSmrg	goto start;
1687fe8aea9eSmrg}
1688fe8aea9eSmrg
1689fe8aea9eSmrginline static uint32_t *gen9_composite_get_binding_table(struct sna *sna,
1690fe8aea9eSmrg							 uint16_t *offset)
1691fe8aea9eSmrg{
1692fe8aea9eSmrg	uint32_t *table;
1693fe8aea9eSmrg
1694fe8aea9eSmrg	assert(sna->kgem.surface <= 16384);
1695fe8aea9eSmrg	sna->kgem.surface -= SURFACE_DW;
1696fe8aea9eSmrg	/* Clear all surplus entries to zero in case of prefetch */
1697fe8aea9eSmrg	table = memset(sna->kgem.batch + sna->kgem.surface, 0, 64);
1698fe8aea9eSmrg
1699fe8aea9eSmrg	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
1700fe8aea9eSmrg
1701fe8aea9eSmrg	*offset = sna->kgem.surface;
1702fe8aea9eSmrg	return table;
1703fe8aea9eSmrg}
1704fe8aea9eSmrg
1705fe8aea9eSmrgstatic void
1706fe8aea9eSmrggen9_get_batch(struct sna *sna, const struct sna_composite_op *op)
1707fe8aea9eSmrg{
1708fe8aea9eSmrg	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1709fe8aea9eSmrg
1710fe8aea9eSmrg	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 2*(1+3))) {
1711fe8aea9eSmrg		DBG(("%s: flushing batch: %d < %d+%d\n",
1712fe8aea9eSmrg		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
1713fe8aea9eSmrg		     150, 4*8*2));
1714fe8aea9eSmrg		_kgem_submit(&sna->kgem);
1715fe8aea9eSmrg		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1716fe8aea9eSmrg	}
1717fe8aea9eSmrg
1718fe8aea9eSmrg	assert(sna->kgem.mode == KGEM_RENDER);
1719fe8aea9eSmrg	assert(sna->kgem.ring == KGEM_RENDER);
1720fe8aea9eSmrg
1721fe8aea9eSmrg	if (sna->render_state.gen9.needs_invariant)
1722fe8aea9eSmrg		gen9_emit_invariant(sna);
1723fe8aea9eSmrg}
1724fe8aea9eSmrg
1725fe8aea9eSmrgstatic void gen9_emit_composite_state(struct sna *sna,
1726fe8aea9eSmrg				      const struct sna_composite_op *op)
1727fe8aea9eSmrg{
1728fe8aea9eSmrg	uint32_t *binding_table;
1729fe8aea9eSmrg	uint16_t offset, dirty;
1730fe8aea9eSmrg
1731fe8aea9eSmrg	gen9_get_batch(sna, op);
1732fe8aea9eSmrg
1733fe8aea9eSmrg	binding_table = gen9_composite_get_binding_table(sna, &offset);
1734fe8aea9eSmrg
1735fe8aea9eSmrg	dirty = kgem_bo_is_dirty(op->dst.bo);
1736fe8aea9eSmrg
1737fe8aea9eSmrg	binding_table[0] =
1738fe8aea9eSmrg		gen9_bind_bo(sna,
1739fe8aea9eSmrg			    op->dst.bo, op->dst.width, op->dst.height,
1740fe8aea9eSmrg			    gen9_get_dest_format(op->dst.format),
1741fe8aea9eSmrg			    true);
1742fe8aea9eSmrg	binding_table[1] =
1743fe8aea9eSmrg		gen9_bind_bo(sna,
1744fe8aea9eSmrg			     op->src.bo, op->src.width, op->src.height,
1745fe8aea9eSmrg			     op->src.card_format,
1746fe8aea9eSmrg			     false);
1747fe8aea9eSmrg	if (op->mask.bo) {
1748fe8aea9eSmrg		binding_table[2] =
1749fe8aea9eSmrg			gen9_bind_bo(sna,
1750fe8aea9eSmrg				     op->mask.bo,
1751fe8aea9eSmrg				     op->mask.width,
1752fe8aea9eSmrg				     op->mask.height,
1753fe8aea9eSmrg				     op->mask.card_format,
1754fe8aea9eSmrg				     false);
1755fe8aea9eSmrg	}
1756fe8aea9eSmrg
1757fe8aea9eSmrg	if (sna->kgem.surface == offset &&
1758fe8aea9eSmrg	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table &&
1759fe8aea9eSmrg	    (op->mask.bo == NULL ||
1760fe8aea9eSmrg	     sna->kgem.batch[sna->render_state.gen9.surface_table+2] == binding_table[2])) {
1761fe8aea9eSmrg		sna->kgem.surface += SURFACE_DW;
1762fe8aea9eSmrg		offset = sna->render_state.gen9.surface_table;
1763fe8aea9eSmrg	}
1764fe8aea9eSmrg
1765fe8aea9eSmrg	if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0])
1766fe8aea9eSmrg		dirty = 0;
1767fe8aea9eSmrg
1768fe8aea9eSmrg	gen9_emit_state(sna, op, offset | dirty);
1769fe8aea9eSmrg}
1770fe8aea9eSmrg
1771fe8aea9eSmrgstatic void
1772fe8aea9eSmrggen9_align_vertex(struct sna *sna, const struct sna_composite_op *op)
1773fe8aea9eSmrg{
1774fe8aea9eSmrg	if (op->floats_per_vertex != sna->render_state.gen9.floats_per_vertex) {
1775fe8aea9eSmrg		DBG(("aligning vertex: was %d, now %d floats per vertex\n",
1776fe8aea9eSmrg		     sna->render_state.gen9.floats_per_vertex, op->floats_per_vertex));
1777fe8aea9eSmrg		gen8_vertex_align(sna, op);
1778fe8aea9eSmrg		sna->render_state.gen9.floats_per_vertex = op->floats_per_vertex;
1779fe8aea9eSmrg	}
1780fe8aea9eSmrg}
1781fe8aea9eSmrg
1782fe8aea9eSmrgfastcall static void
1783fe8aea9eSmrggen9_render_composite_blt(struct sna *sna,
1784fe8aea9eSmrg			  const struct sna_composite_op *op,
1785fe8aea9eSmrg			  const struct sna_composite_rectangles *r)
1786fe8aea9eSmrg{
1787fe8aea9eSmrg	gen9_get_rectangles(sna, op, 1, gen9_emit_composite_state);
1788fe8aea9eSmrg	op->prim_emit(sna, op, r);
1789fe8aea9eSmrg}
1790fe8aea9eSmrg
1791fe8aea9eSmrgfastcall static void
1792fe8aea9eSmrggen9_render_composite_box(struct sna *sna,
1793fe8aea9eSmrg			  const struct sna_composite_op *op,
1794fe8aea9eSmrg			  const BoxRec *box)
1795fe8aea9eSmrg{
1796fe8aea9eSmrg	struct sna_composite_rectangles r;
1797fe8aea9eSmrg
1798fe8aea9eSmrg	gen9_get_rectangles(sna, op, 1, gen9_emit_composite_state);
1799fe8aea9eSmrg
1800fe8aea9eSmrg	DBG(("  %s: (%d, %d), (%d, %d)\n",
1801fe8aea9eSmrg	     __FUNCTION__,
1802fe8aea9eSmrg	     box->x1, box->y1, box->x2, box->y2));
1803fe8aea9eSmrg
1804fe8aea9eSmrg	r.dst.x = box->x1;
1805fe8aea9eSmrg	r.dst.y = box->y1;
1806fe8aea9eSmrg	r.width  = box->x2 - box->x1;
1807fe8aea9eSmrg	r.height = box->y2 - box->y1;
1808fe8aea9eSmrg	r.src = r.mask = r.dst;
1809fe8aea9eSmrg
1810fe8aea9eSmrg	op->prim_emit(sna, op, &r);
1811fe8aea9eSmrg}
1812fe8aea9eSmrg
1813fe8aea9eSmrgstatic void
1814fe8aea9eSmrggen9_render_composite_boxes__blt(struct sna *sna,
1815fe8aea9eSmrg				 const struct sna_composite_op *op,
1816fe8aea9eSmrg				 const BoxRec *box, int nbox)
1817fe8aea9eSmrg{
1818fe8aea9eSmrg	DBG(("composite_boxes(%d)\n", nbox));
1819fe8aea9eSmrg
1820fe8aea9eSmrg	do {
1821fe8aea9eSmrg		int nbox_this_time;
1822fe8aea9eSmrg
1823fe8aea9eSmrg		nbox_this_time = gen9_get_rectangles(sna, op, nbox,
1824fe8aea9eSmrg						     gen9_emit_composite_state);
1825fe8aea9eSmrg		nbox -= nbox_this_time;
1826fe8aea9eSmrg
1827fe8aea9eSmrg		do {
1828fe8aea9eSmrg			struct sna_composite_rectangles r;
1829fe8aea9eSmrg
1830fe8aea9eSmrg			DBG(("  %s: (%d, %d), (%d, %d)\n",
1831fe8aea9eSmrg			     __FUNCTION__,
1832fe8aea9eSmrg			     box->x1, box->y1, box->x2, box->y2));
1833fe8aea9eSmrg
1834fe8aea9eSmrg			r.dst.x = box->x1;
1835fe8aea9eSmrg			r.dst.y = box->y1;
1836fe8aea9eSmrg			r.width  = box->x2 - box->x1;
1837fe8aea9eSmrg			r.height = box->y2 - box->y1;
1838fe8aea9eSmrg			r.src = r.mask = r.dst;
1839fe8aea9eSmrg
1840fe8aea9eSmrg			op->prim_emit(sna, op, &r);
1841fe8aea9eSmrg			box++;
1842fe8aea9eSmrg		} while (--nbox_this_time);
1843fe8aea9eSmrg	} while (nbox);
1844fe8aea9eSmrg}
1845fe8aea9eSmrg
1846fe8aea9eSmrgstatic void
1847fe8aea9eSmrggen9_render_composite_boxes(struct sna *sna,
1848fe8aea9eSmrg			    const struct sna_composite_op *op,
1849fe8aea9eSmrg			    const BoxRec *box, int nbox)
1850fe8aea9eSmrg{
1851fe8aea9eSmrg	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1852fe8aea9eSmrg
1853fe8aea9eSmrg	do {
1854fe8aea9eSmrg		int nbox_this_time;
1855fe8aea9eSmrg		float *v;
1856fe8aea9eSmrg
1857fe8aea9eSmrg		nbox_this_time = gen9_get_rectangles(sna, op, nbox,
1858fe8aea9eSmrg						     gen9_emit_composite_state);
1859fe8aea9eSmrg		assert(nbox_this_time);
1860fe8aea9eSmrg		nbox -= nbox_this_time;
1861fe8aea9eSmrg
1862fe8aea9eSmrg		v = sna->render.vertices + sna->render.vertex_used;
1863fe8aea9eSmrg		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1864fe8aea9eSmrg
1865fe8aea9eSmrg		op->emit_boxes(op, box, nbox_this_time, v);
1866fe8aea9eSmrg		box += nbox_this_time;
1867fe8aea9eSmrg	} while (nbox);
1868fe8aea9eSmrg}
1869fe8aea9eSmrg
1870fe8aea9eSmrgstatic void
1871fe8aea9eSmrggen9_render_composite_boxes__thread(struct sna *sna,
1872fe8aea9eSmrg				    const struct sna_composite_op *op,
1873fe8aea9eSmrg				    const BoxRec *box, int nbox)
1874fe8aea9eSmrg{
1875fe8aea9eSmrg	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1876fe8aea9eSmrg
1877fe8aea9eSmrg	sna_vertex_lock(&sna->render);
1878fe8aea9eSmrg	do {
1879fe8aea9eSmrg		int nbox_this_time;
1880fe8aea9eSmrg		float *v;
1881fe8aea9eSmrg
1882fe8aea9eSmrg		nbox_this_time = gen9_get_rectangles(sna, op, nbox,
1883fe8aea9eSmrg						     gen9_emit_composite_state);
1884fe8aea9eSmrg		assert(nbox_this_time);
1885fe8aea9eSmrg		nbox -= nbox_this_time;
1886fe8aea9eSmrg
1887fe8aea9eSmrg		v = sna->render.vertices + sna->render.vertex_used;
1888fe8aea9eSmrg		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1889fe8aea9eSmrg
1890fe8aea9eSmrg		sna_vertex_acquire__locked(&sna->render);
1891fe8aea9eSmrg		sna_vertex_unlock(&sna->render);
1892fe8aea9eSmrg
1893fe8aea9eSmrg		op->emit_boxes(op, box, nbox_this_time, v);
1894fe8aea9eSmrg		box += nbox_this_time;
1895fe8aea9eSmrg
1896fe8aea9eSmrg		sna_vertex_lock(&sna->render);
1897fe8aea9eSmrg		sna_vertex_release__locked(&sna->render);
1898fe8aea9eSmrg	} while (nbox);
1899fe8aea9eSmrg	sna_vertex_unlock(&sna->render);
1900fe8aea9eSmrg}
1901fe8aea9eSmrg
1902fe8aea9eSmrgstatic uint32_t
1903fe8aea9eSmrggen9_create_blend_state(struct sna_static_stream *stream)
1904fe8aea9eSmrg{
1905fe8aea9eSmrg	char *base, *ptr;
1906fe8aea9eSmrg	int src, dst;
1907fe8aea9eSmrg
1908fe8aea9eSmrg	COMPILE_TIME_ASSERT(((GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT << 4) & (1 << 15)) == 0);
1909fe8aea9eSmrg
1910fe8aea9eSmrg	base = sna_static_stream_map(stream,
1911fe8aea9eSmrg				     GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT * GEN9_BLEND_STATE_PADDED_SIZE,
1912fe8aea9eSmrg				     64);
1913fe8aea9eSmrg
1914fe8aea9eSmrg	ptr = base;
1915fe8aea9eSmrg	for (src = 0; src < GEN9_BLENDFACTOR_COUNT; src++) {
1916fe8aea9eSmrg		for (dst = 0; dst < GEN9_BLENDFACTOR_COUNT; dst++) {
1917fe8aea9eSmrg			struct gen9_blend_state *blend =
1918fe8aea9eSmrg				(struct gen9_blend_state *)ptr;
1919fe8aea9eSmrg
1920fe8aea9eSmrg			assert(((ptr - base) & 63) == 0);
1921fe8aea9eSmrg			COMPILE_TIME_ASSERT(sizeof(blend->common) == 4);
1922fe8aea9eSmrg			COMPILE_TIME_ASSERT(sizeof(blend->rt) == 8);
1923fe8aea9eSmrg			COMPILE_TIME_ASSERT((char *)&blend->rt - (char *)blend == 4);
1924fe8aea9eSmrg
1925fe8aea9eSmrg			blend->rt.post_blend_clamp = 1;
1926fe8aea9eSmrg			blend->rt.pre_blend_clamp = 1;
1927fe8aea9eSmrg
1928fe8aea9eSmrg			blend->rt.color_blend =
1929fe8aea9eSmrg				!(dst == BLENDFACTOR_ZERO && src == BLENDFACTOR_ONE);
1930fe8aea9eSmrg			blend->rt.dest_blend_factor = dst;
1931fe8aea9eSmrg			blend->rt.source_blend_factor = src;
1932fe8aea9eSmrg			blend->rt.color_blend_function = BLENDFUNCTION_ADD;
1933fe8aea9eSmrg
1934fe8aea9eSmrg			blend->rt.dest_alpha_blend_factor = dst;
1935fe8aea9eSmrg			blend->rt.source_alpha_blend_factor = src;
1936fe8aea9eSmrg			blend->rt.alpha_blend_function = BLENDFUNCTION_ADD;
1937fe8aea9eSmrg
1938fe8aea9eSmrg			ptr += GEN9_BLEND_STATE_PADDED_SIZE;
1939fe8aea9eSmrg		}
1940fe8aea9eSmrg	}
1941fe8aea9eSmrg
1942fe8aea9eSmrg	return sna_static_stream_offsetof(stream, base);
1943fe8aea9eSmrg}
1944fe8aea9eSmrg
1945fe8aea9eSmrgstatic int
1946fe8aea9eSmrggen9_composite_picture(struct sna *sna,
1947fe8aea9eSmrg		       PicturePtr picture,
1948fe8aea9eSmrg		       struct sna_composite_channel *channel,
1949fe8aea9eSmrg		       int x, int y,
1950fe8aea9eSmrg		       int w, int h,
1951fe8aea9eSmrg		       int dst_x, int dst_y,
1952fe8aea9eSmrg		       bool precise)
1953fe8aea9eSmrg{
1954fe8aea9eSmrg	PixmapPtr pixmap;
1955fe8aea9eSmrg	uint32_t color;
1956fe8aea9eSmrg	int16_t dx, dy;
1957fe8aea9eSmrg
1958fe8aea9eSmrg	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1959fe8aea9eSmrg	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
1960fe8aea9eSmrg
1961fe8aea9eSmrg	channel->is_solid = false;
1962fe8aea9eSmrg	channel->card_format = -1;
1963fe8aea9eSmrg
1964fe8aea9eSmrg	if (sna_picture_is_solid(picture, &color))
1965fe8aea9eSmrg		return gen4_channel_init_solid(sna, channel, color);
1966fe8aea9eSmrg
1967fe8aea9eSmrg	if (picture->pDrawable == NULL) {
1968fe8aea9eSmrg		int ret;
1969fe8aea9eSmrg
1970fe8aea9eSmrg		if (picture->pSourcePict->type == SourcePictTypeLinear)
1971fe8aea9eSmrg			return gen4_channel_init_linear(sna, picture, channel,
1972fe8aea9eSmrg							x, y,
1973fe8aea9eSmrg							w, h,
1974fe8aea9eSmrg							dst_x, dst_y);
1975fe8aea9eSmrg
1976fe8aea9eSmrg		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
1977fe8aea9eSmrg		ret = -1;
1978fe8aea9eSmrg		if (!precise)
1979fe8aea9eSmrg			ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1980fe8aea9eSmrg								      x, y, w, h, dst_x, dst_y);
1981fe8aea9eSmrg		if (ret == -1)
1982fe8aea9eSmrg			ret = sna_render_picture_fixup(sna, picture, channel,
1983fe8aea9eSmrg						       x, y, w, h, dst_x, dst_y);
1984fe8aea9eSmrg		return ret;
1985fe8aea9eSmrg	}
1986fe8aea9eSmrg
1987fe8aea9eSmrg	if (picture->alphaMap) {
1988fe8aea9eSmrg		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
1989fe8aea9eSmrg		return sna_render_picture_fixup(sna, picture, channel,
1990fe8aea9eSmrg						x, y, w, h, dst_x, dst_y);
1991fe8aea9eSmrg	}
1992fe8aea9eSmrg
1993fe8aea9eSmrg	if (!gen9_check_repeat(picture))
1994fe8aea9eSmrg		return sna_render_picture_fixup(sna, picture, channel,
1995fe8aea9eSmrg						x, y, w, h, dst_x, dst_y);
1996fe8aea9eSmrg
1997fe8aea9eSmrg	if (!gen9_check_filter(picture))
1998fe8aea9eSmrg		return sna_render_picture_fixup(sna, picture, channel,
1999fe8aea9eSmrg						x, y, w, h, dst_x, dst_y);
2000fe8aea9eSmrg
2001fe8aea9eSmrg	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
2002fe8aea9eSmrg	channel->filter = picture->filter;
2003fe8aea9eSmrg
2004fe8aea9eSmrg	pixmap = get_drawable_pixmap(picture->pDrawable);
2005fe8aea9eSmrg	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
2006fe8aea9eSmrg
2007fe8aea9eSmrg	x += dx + picture->pDrawable->x;
2008fe8aea9eSmrg	y += dy + picture->pDrawable->y;
2009fe8aea9eSmrg
2010fe8aea9eSmrg	channel->is_affine = sna_transform_is_affine(picture->transform);
2011fe8aea9eSmrg	if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
2012fe8aea9eSmrg		DBG(("%s: integer translation (%d, %d), removing\n",
2013fe8aea9eSmrg		     __FUNCTION__, dx, dy));
2014fe8aea9eSmrg		x += dx;
2015fe8aea9eSmrg		y += dy;
2016fe8aea9eSmrg		channel->transform = NULL;
2017fe8aea9eSmrg		channel->filter = PictFilterNearest;
2018fe8aea9eSmrg
2019fe8aea9eSmrg		if (channel->repeat ||
2020fe8aea9eSmrg		    (x >= 0 &&
2021fe8aea9eSmrg		     y >= 0 &&
2022fe8aea9eSmrg		     x + w <= pixmap->drawable.width &&
2023fe8aea9eSmrg		     y + h <= pixmap->drawable.height)) {
2024fe8aea9eSmrg			struct sna_pixmap *priv = sna_pixmap(pixmap);
2025fe8aea9eSmrg			if (priv && priv->clear) {
2026fe8aea9eSmrg				DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
2027fe8aea9eSmrg				return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color));
2028fe8aea9eSmrg			}
2029fe8aea9eSmrg		}
2030fe8aea9eSmrg	} else
2031fe8aea9eSmrg		channel->transform = picture->transform;
2032fe8aea9eSmrg
2033fe8aea9eSmrg	channel->pict_format = picture->format;
2034fe8aea9eSmrg	channel->card_format = gen9_get_card_format(picture->format);
2035fe8aea9eSmrg	if (channel->card_format == (unsigned)-1)
2036fe8aea9eSmrg		return sna_render_picture_convert(sna, picture, channel, pixmap,
2037fe8aea9eSmrg						  x, y, w, h, dst_x, dst_y,
2038fe8aea9eSmrg						  false);
2039fe8aea9eSmrg
2040fe8aea9eSmrg	if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
2041fe8aea9eSmrg		DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__,
2042fe8aea9eSmrg		     pixmap->drawable.width, pixmap->drawable.height));
2043fe8aea9eSmrg		return sna_render_picture_extract(sna, picture, channel,
2044fe8aea9eSmrg						  x, y, w, h, dst_x, dst_y);
2045fe8aea9eSmrg	}
2046fe8aea9eSmrg
2047fe8aea9eSmrg	return sna_render_pixmap_bo(sna, channel, pixmap,
2048fe8aea9eSmrg				    x, y, w, h, dst_x, dst_y);
2049fe8aea9eSmrg}
2050fe8aea9eSmrg
2051fe8aea9eSmrginline static bool gen9_composite_channel_convert(struct sna_composite_channel *channel)
2052fe8aea9eSmrg{
2053fe8aea9eSmrg	if (unaligned(channel->bo, PICT_FORMAT_BPP(channel->pict_format)))
2054fe8aea9eSmrg		return false;
2055fe8aea9eSmrg
2056fe8aea9eSmrg	channel->repeat = gen9_repeat(channel->repeat);
2057fe8aea9eSmrg	channel->filter = gen9_filter(channel->filter);
2058fe8aea9eSmrg	if (channel->card_format == (unsigned)-1)
2059fe8aea9eSmrg		channel->card_format = gen9_get_card_format(channel->pict_format);
2060fe8aea9eSmrg	assert(channel->card_format != (unsigned)-1);
2061fe8aea9eSmrg
2062fe8aea9eSmrg	return true;
2063fe8aea9eSmrg}
2064fe8aea9eSmrg
2065fe8aea9eSmrgstatic void gen9_render_composite_done(struct sna *sna,
2066fe8aea9eSmrg				       const struct sna_composite_op *op)
2067fe8aea9eSmrg{
2068fe8aea9eSmrg	if (sna->render.vertex_offset) {
2069fe8aea9eSmrg		gen8_vertex_flush(sna);
2070fe8aea9eSmrg		gen9_magic_ca_pass(sna, op);
2071fe8aea9eSmrg	}
2072fe8aea9eSmrg
2073fe8aea9eSmrg	if (op->mask.bo)
2074fe8aea9eSmrg		kgem_bo_destroy(&sna->kgem, op->mask.bo);
2075fe8aea9eSmrg	if (op->src.bo)
2076fe8aea9eSmrg		kgem_bo_destroy(&sna->kgem, op->src.bo);
2077fe8aea9eSmrg
2078fe8aea9eSmrg	sna_render_composite_redirect_done(sna, op);
2079fe8aea9eSmrg}
2080fe8aea9eSmrg
2081fe8aea9eSmrginline static bool
2082fe8aea9eSmrggen9_composite_set_target(struct sna *sna,
2083fe8aea9eSmrg			  struct sna_composite_op *op,
2084fe8aea9eSmrg			  PicturePtr dst,
2085fe8aea9eSmrg			  int x, int y, int w, int h,
2086fe8aea9eSmrg			  bool partial)
2087fe8aea9eSmrg{
2088fe8aea9eSmrg	BoxRec box;
2089fe8aea9eSmrg	unsigned int hint;
2090fe8aea9eSmrg
2091fe8aea9eSmrg	DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial));
2092fe8aea9eSmrg
2093fe8aea9eSmrg	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
2094fe8aea9eSmrg	op->dst.format = dst->format;
2095fe8aea9eSmrg	op->dst.width  = op->dst.pixmap->drawable.width;
2096fe8aea9eSmrg	op->dst.height = op->dst.pixmap->drawable.height;
2097fe8aea9eSmrg
2098fe8aea9eSmrg	if (w | h) {
2099fe8aea9eSmrg		assert(w && h);
2100fe8aea9eSmrg		box.x1 = x;
2101fe8aea9eSmrg		box.y1 = y;
2102fe8aea9eSmrg		box.x2 = x + w;
2103fe8aea9eSmrg		box.y2 = y + h;
2104fe8aea9eSmrg	} else
2105fe8aea9eSmrg		sna_render_picture_extents(dst, &box);
2106fe8aea9eSmrg
2107fe8aea9eSmrg	hint = PREFER_GPU | RENDER_GPU;
2108fe8aea9eSmrg	if (!need_tiling(sna, op->dst.width, op->dst.height))
2109fe8aea9eSmrg		hint |= FORCE_GPU;
2110fe8aea9eSmrg	if (!partial) {
2111fe8aea9eSmrg		hint |= IGNORE_DAMAGE;
2112fe8aea9eSmrg		if (w == op->dst.width && h == op->dst.height)
2113fe8aea9eSmrg			hint |= REPLACES;
2114fe8aea9eSmrg	}
2115fe8aea9eSmrg
2116fe8aea9eSmrg	op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
2117fe8aea9eSmrg	if (op->dst.bo == NULL)
2118fe8aea9eSmrg		return false;
2119fe8aea9eSmrg
2120fe8aea9eSmrg	assert(!op->damage || !DAMAGE_IS_ALL(*op->damage));
2121fe8aea9eSmrg
2122fe8aea9eSmrg	if (unaligned(op->dst.bo, dst->pDrawable->bitsPerPixel))
2123fe8aea9eSmrg		return false;
2124fe8aea9eSmrg
2125fe8aea9eSmrg	if (hint & REPLACES) {
2126fe8aea9eSmrg		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
2127fe8aea9eSmrg		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
2128fe8aea9eSmrg	}
2129fe8aea9eSmrg
2130fe8aea9eSmrg	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
2131fe8aea9eSmrg			    &op->dst.x, &op->dst.y);
2132fe8aea9eSmrg
2133fe8aea9eSmrg	DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
2134fe8aea9eSmrg	     __FUNCTION__,
2135fe8aea9eSmrg	     op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
2136fe8aea9eSmrg	     op->dst.width, op->dst.height,
2137fe8aea9eSmrg	     op->dst.bo->pitch,
2138fe8aea9eSmrg	     op->dst.x, op->dst.y,
2139fe8aea9eSmrg	     op->damage ? *op->damage : (void *)-1));
2140fe8aea9eSmrg
2141fe8aea9eSmrg	assert(op->dst.bo->proxy == NULL);
2142fe8aea9eSmrg
2143fe8aea9eSmrg	if (too_large(op->dst.width, op->dst.height) &&
2144fe8aea9eSmrg	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
2145fe8aea9eSmrg		return false;
2146fe8aea9eSmrg
2147fe8aea9eSmrg	return true;
2148fe8aea9eSmrg}
2149fe8aea9eSmrg
2150fe8aea9eSmrgstatic bool
2151fe8aea9eSmrgtry_blt(struct sna *sna,
2152fe8aea9eSmrg	uint8_t op,
2153fe8aea9eSmrg	PicturePtr src,
2154fe8aea9eSmrg	PicturePtr mask,
2155fe8aea9eSmrg	PicturePtr dst,
2156fe8aea9eSmrg	int16_t src_x, int16_t src_y,
2157fe8aea9eSmrg	int16_t msk_x, int16_t msk_y,
2158fe8aea9eSmrg	int16_t dst_x, int16_t dst_y,
2159fe8aea9eSmrg	int16_t width, int16_t height,
2160fe8aea9eSmrg	unsigned flags,
2161fe8aea9eSmrg	struct sna_composite_op *tmp)
2162fe8aea9eSmrg{
2163fe8aea9eSmrg	struct kgem_bo *bo;
2164fe8aea9eSmrg
2165fe8aea9eSmrg	if (sna->kgem.mode == KGEM_BLT) {
2166fe8aea9eSmrg		DBG(("%s: already performing BLT\n", __FUNCTION__));
2167fe8aea9eSmrg		goto execute;
2168fe8aea9eSmrg	}
2169fe8aea9eSmrg
2170fe8aea9eSmrg	if (too_large(width, height)) {
2171fe8aea9eSmrg		DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
2172fe8aea9eSmrg		     __FUNCTION__, width, height));
2173fe8aea9eSmrg		goto execute;
2174fe8aea9eSmrg	}
2175fe8aea9eSmrg
2176fe8aea9eSmrg	bo = __sna_drawable_peek_bo(dst->pDrawable);
2177fe8aea9eSmrg	if (bo == NULL)
2178fe8aea9eSmrg		goto execute;
2179fe8aea9eSmrg
2180fe8aea9eSmrg	if (untiled_tlb_miss(bo))
2181fe8aea9eSmrg		goto execute;
2182fe8aea9eSmrg
2183fe8aea9eSmrg	if (bo->rq) {
2184fe8aea9eSmrg		if (RQ_IS_BLT(bo->rq))
2185fe8aea9eSmrg			goto execute;
2186fe8aea9eSmrg
2187fe8aea9eSmrg		return false;
2188fe8aea9eSmrg	}
2189fe8aea9eSmrg
2190fe8aea9eSmrg	if (bo->tiling == I915_TILING_Y)
2191fe8aea9eSmrg		goto upload;
2192fe8aea9eSmrg
2193fe8aea9eSmrg	if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0))
2194fe8aea9eSmrg		goto execute;
2195fe8aea9eSmrg
2196fe8aea9eSmrg	if (src->pDrawable == dst->pDrawable &&
2197fe8aea9eSmrg	    (sna->render_state.gt < 3 || width*height < 1024) &&
2198fe8aea9eSmrg	    can_switch_to_blt(sna, bo, 0))
2199fe8aea9eSmrg		goto execute;
2200fe8aea9eSmrg
2201fe8aea9eSmrg	if (src->pDrawable) {
2202fe8aea9eSmrg		struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable);
2203fe8aea9eSmrg		if (s == NULL)
2204fe8aea9eSmrg			goto upload;
2205fe8aea9eSmrg
2206fe8aea9eSmrg		if (prefer_blt_bo(sna, s, bo))
2207fe8aea9eSmrg			goto execute;
2208fe8aea9eSmrg	}
2209fe8aea9eSmrg
2210fe8aea9eSmrg	if (sna->kgem.ring == KGEM_BLT) {
2211fe8aea9eSmrg		DBG(("%s: already performing BLT\n", __FUNCTION__));
2212fe8aea9eSmrg		goto execute;
2213fe8aea9eSmrg	}
2214fe8aea9eSmrg
2215fe8aea9eSmrgupload:
2216fe8aea9eSmrg	flags |= COMPOSITE_UPLOAD;
2217fe8aea9eSmrgexecute:
2218fe8aea9eSmrg	return sna_blt_composite(sna, op,
2219fe8aea9eSmrg				 src, dst,
2220fe8aea9eSmrg				 src_x, src_y,
2221fe8aea9eSmrg				 dst_x, dst_y,
2222fe8aea9eSmrg				 width, height,
2223fe8aea9eSmrg				 flags, tmp);
2224fe8aea9eSmrg}
2225fe8aea9eSmrg
2226fe8aea9eSmrgstatic bool
2227fe8aea9eSmrgcheck_gradient(PicturePtr picture, bool precise)
2228fe8aea9eSmrg{
2229fe8aea9eSmrg	if (picture->pDrawable)
2230fe8aea9eSmrg		return false;
2231fe8aea9eSmrg
2232fe8aea9eSmrg	switch (picture->pSourcePict->type) {
2233fe8aea9eSmrg	case SourcePictTypeSolidFill:
2234fe8aea9eSmrg	case SourcePictTypeLinear:
2235fe8aea9eSmrg		return false;
2236fe8aea9eSmrg	default:
2237fe8aea9eSmrg		return precise;
2238fe8aea9eSmrg	}
2239fe8aea9eSmrg}
2240fe8aea9eSmrg
2241fe8aea9eSmrgstatic bool
2242fe8aea9eSmrghas_alphamap(PicturePtr p)
2243fe8aea9eSmrg{
2244fe8aea9eSmrg	return p->alphaMap != NULL;
2245fe8aea9eSmrg}
2246fe8aea9eSmrg
2247fe8aea9eSmrgstatic bool
2248fe8aea9eSmrgneed_upload(PicturePtr p)
2249fe8aea9eSmrg{
2250fe8aea9eSmrg	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
2251fe8aea9eSmrg}
2252fe8aea9eSmrg
2253fe8aea9eSmrgstatic bool
2254fe8aea9eSmrgsource_is_busy(PixmapPtr pixmap)
2255fe8aea9eSmrg{
2256fe8aea9eSmrg	struct sna_pixmap *priv = sna_pixmap(pixmap);
2257fe8aea9eSmrg	if (priv == NULL || priv->clear)
2258fe8aea9eSmrg		return false;
2259fe8aea9eSmrg
2260fe8aea9eSmrg	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
2261fe8aea9eSmrg		return true;
2262fe8aea9eSmrg
2263fe8aea9eSmrg	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
2264fe8aea9eSmrg		return true;
2265fe8aea9eSmrg
2266fe8aea9eSmrg	return priv->gpu_damage && !priv->cpu_damage;
2267fe8aea9eSmrg}
2268fe8aea9eSmrg
2269fe8aea9eSmrgstatic bool
2270fe8aea9eSmrgsource_fallback(PicturePtr p, PixmapPtr pixmap, bool precise)
2271fe8aea9eSmrg{
2272fe8aea9eSmrg	if (sna_picture_is_solid(p, NULL))
2273fe8aea9eSmrg		return false;
2274fe8aea9eSmrg
2275fe8aea9eSmrg	if (p->pSourcePict)
2276fe8aea9eSmrg		return check_gradient(p, precise);
2277fe8aea9eSmrg
2278fe8aea9eSmrg	if (!gen9_check_repeat(p) || !gen9_check_format(p->format))
2279fe8aea9eSmrg		return true;
2280fe8aea9eSmrg
2281fe8aea9eSmrg	if (pixmap && source_is_busy(pixmap))
2282fe8aea9eSmrg		return false;
2283fe8aea9eSmrg
2284fe8aea9eSmrg	return has_alphamap(p) || !gen9_check_filter(p) || need_upload(p);
2285fe8aea9eSmrg}
2286fe8aea9eSmrg
2287fe8aea9eSmrgstatic bool
2288fe8aea9eSmrggen9_composite_fallback(struct sna *sna,
2289fe8aea9eSmrg			PicturePtr src,
2290fe8aea9eSmrg			PicturePtr mask,
2291fe8aea9eSmrg			PicturePtr dst)
2292fe8aea9eSmrg{
2293fe8aea9eSmrg	PixmapPtr src_pixmap;
2294fe8aea9eSmrg	PixmapPtr mask_pixmap;
2295fe8aea9eSmrg	PixmapPtr dst_pixmap;
2296fe8aea9eSmrg	bool src_fallback, mask_fallback;
2297fe8aea9eSmrg
2298fe8aea9eSmrg	if (!gen9_check_dst_format(dst->format)) {
2299fe8aea9eSmrg		DBG(("%s: unknown destination format: %d\n",
2300fe8aea9eSmrg		     __FUNCTION__, dst->format));
2301fe8aea9eSmrg		return true;
2302fe8aea9eSmrg	}
2303fe8aea9eSmrg
2304fe8aea9eSmrg	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
2305fe8aea9eSmrg
2306fe8aea9eSmrg	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
2307fe8aea9eSmrg	src_fallback = source_fallback(src, src_pixmap,
2308fe8aea9eSmrg				       dst->polyMode == PolyModePrecise);
2309fe8aea9eSmrg
2310fe8aea9eSmrg	if (mask) {
2311fe8aea9eSmrg		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
2312fe8aea9eSmrg		mask_fallback = source_fallback(mask, mask_pixmap,
2313fe8aea9eSmrg						dst->polyMode == PolyModePrecise);
2314fe8aea9eSmrg	} else {
2315fe8aea9eSmrg		mask_pixmap = NULL;
2316fe8aea9eSmrg		mask_fallback = false;
2317fe8aea9eSmrg	}
2318fe8aea9eSmrg
2319fe8aea9eSmrg	/* If we are using the destination as a source and need to
2320fe8aea9eSmrg	 * readback in order to upload the source, do it all
2321fe8aea9eSmrg	 * on the cpu.
2322fe8aea9eSmrg	 */
2323fe8aea9eSmrg	if (src_pixmap == dst_pixmap && src_fallback) {
2324fe8aea9eSmrg		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
2325fe8aea9eSmrg		return true;
2326fe8aea9eSmrg	}
2327fe8aea9eSmrg	if (mask_pixmap == dst_pixmap && mask_fallback) {
2328fe8aea9eSmrg		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
2329fe8aea9eSmrg		return true;
2330fe8aea9eSmrg	}
2331fe8aea9eSmrg
2332fe8aea9eSmrg	/* If anything is on the GPU, push everything out to the GPU */
2333fe8aea9eSmrg	if (dst_use_gpu(dst_pixmap)) {
2334fe8aea9eSmrg		DBG(("%s: dst is already on the GPU, try to use GPU\n",
2335fe8aea9eSmrg		     __FUNCTION__));
2336fe8aea9eSmrg		return false;
2337fe8aea9eSmrg	}
2338fe8aea9eSmrg
2339fe8aea9eSmrg	if (src_pixmap && !src_fallback) {
2340fe8aea9eSmrg		DBG(("%s: src is already on the GPU, try to use GPU\n",
2341fe8aea9eSmrg		     __FUNCTION__));
2342fe8aea9eSmrg		return false;
2343fe8aea9eSmrg	}
2344fe8aea9eSmrg	if (mask_pixmap && !mask_fallback) {
2345fe8aea9eSmrg		DBG(("%s: mask is already on the GPU, try to use GPU\n",
2346fe8aea9eSmrg		     __FUNCTION__));
2347fe8aea9eSmrg		return false;
2348fe8aea9eSmrg	}
2349fe8aea9eSmrg
2350fe8aea9eSmrg	/* However if the dst is not on the GPU and we need to
2351fe8aea9eSmrg	 * render one of the sources using the CPU, we may
2352fe8aea9eSmrg	 * as well do the entire operation in place onthe CPU.
2353fe8aea9eSmrg	 */
2354fe8aea9eSmrg	if (src_fallback) {
2355fe8aea9eSmrg		DBG(("%s: dst is on the CPU and src will fallback\n",
2356fe8aea9eSmrg		     __FUNCTION__));
2357fe8aea9eSmrg		return true;
2358fe8aea9eSmrg	}
2359fe8aea9eSmrg
2360fe8aea9eSmrg	if (mask && mask_fallback) {
2361fe8aea9eSmrg		DBG(("%s: dst is on the CPU and mask will fallback\n",
2362fe8aea9eSmrg		     __FUNCTION__));
2363fe8aea9eSmrg		return true;
2364fe8aea9eSmrg	}
2365fe8aea9eSmrg
2366fe8aea9eSmrg	if (too_large(dst_pixmap->drawable.width,
2367fe8aea9eSmrg		      dst_pixmap->drawable.height) &&
2368fe8aea9eSmrg	    dst_is_cpu(dst_pixmap)) {
2369fe8aea9eSmrg		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
2370fe8aea9eSmrg		return true;
2371fe8aea9eSmrg	}
2372fe8aea9eSmrg
2373fe8aea9eSmrg	DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
2374fe8aea9eSmrg	     __FUNCTION__));
2375fe8aea9eSmrg	return dst_use_cpu(dst_pixmap);
2376fe8aea9eSmrg}
2377fe8aea9eSmrg
2378fe8aea9eSmrgstatic int
2379fe8aea9eSmrgreuse_source(struct sna *sna,
2380fe8aea9eSmrg	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
2381fe8aea9eSmrg	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
2382fe8aea9eSmrg{
2383fe8aea9eSmrg	uint32_t color;
2384fe8aea9eSmrg
2385fe8aea9eSmrg	if (src_x != msk_x || src_y != msk_y)
2386fe8aea9eSmrg		return false;
2387fe8aea9eSmrg
2388fe8aea9eSmrg	if (src == mask) {
2389fe8aea9eSmrg		DBG(("%s: mask is source\n", __FUNCTION__));
2390fe8aea9eSmrg		*mc = *sc;
2391fe8aea9eSmrg		mc->bo = kgem_bo_reference(mc->bo);
2392fe8aea9eSmrg		return true;
2393fe8aea9eSmrg	}
2394fe8aea9eSmrg
2395fe8aea9eSmrg	if (sna_picture_is_solid(mask, &color))
2396fe8aea9eSmrg		return gen4_channel_init_solid(sna, mc, color);
2397fe8aea9eSmrg
2398fe8aea9eSmrg	if (sc->is_solid)
2399fe8aea9eSmrg		return false;
2400fe8aea9eSmrg
2401fe8aea9eSmrg	if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
2402fe8aea9eSmrg		return false;
2403fe8aea9eSmrg
2404fe8aea9eSmrg	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
2405fe8aea9eSmrg
2406fe8aea9eSmrg	if (!sna_transform_equal(src->transform, mask->transform))
2407fe8aea9eSmrg		return false;
2408fe8aea9eSmrg
2409fe8aea9eSmrg	if (!sna_picture_alphamap_equal(src, mask))
2410fe8aea9eSmrg		return false;
2411fe8aea9eSmrg
2412fe8aea9eSmrg	if (!gen9_check_repeat(mask))
2413fe8aea9eSmrg		return false;
2414fe8aea9eSmrg
2415fe8aea9eSmrg	if (!gen9_check_filter(mask))
2416fe8aea9eSmrg		return false;
2417fe8aea9eSmrg
2418fe8aea9eSmrg	if (!gen9_check_format(mask->format))
2419fe8aea9eSmrg		return false;
2420fe8aea9eSmrg
2421fe8aea9eSmrg	DBG(("%s: reusing source channel for mask with a twist\n",
2422fe8aea9eSmrg	     __FUNCTION__));
2423fe8aea9eSmrg
2424fe8aea9eSmrg	*mc = *sc;
2425fe8aea9eSmrg	mc->repeat = gen9_repeat(mask->repeat ? mask->repeatType : RepeatNone);
2426fe8aea9eSmrg	mc->filter = gen9_filter(mask->filter);
2427fe8aea9eSmrg	mc->pict_format = mask->format;
2428fe8aea9eSmrg	mc->card_format = gen9_get_card_format(mask->format);
2429fe8aea9eSmrg	mc->bo = kgem_bo_reference(mc->bo);
2430fe8aea9eSmrg	return true;
2431fe8aea9eSmrg}
2432fe8aea9eSmrg
2433fe8aea9eSmrgstatic bool
2434fe8aea9eSmrggen9_render_composite(struct sna *sna,
2435fe8aea9eSmrg		      uint8_t op,
2436fe8aea9eSmrg		      PicturePtr src,
2437fe8aea9eSmrg		      PicturePtr mask,
2438fe8aea9eSmrg		      PicturePtr dst,
2439fe8aea9eSmrg		      int16_t src_x, int16_t src_y,
2440fe8aea9eSmrg		      int16_t msk_x, int16_t msk_y,
2441fe8aea9eSmrg		      int16_t dst_x, int16_t dst_y,
2442fe8aea9eSmrg		      int16_t width, int16_t height,
2443fe8aea9eSmrg		      unsigned flags,
2444fe8aea9eSmrg		      struct sna_composite_op *tmp)
2445fe8aea9eSmrg{
2446fe8aea9eSmrg	if (op >= ARRAY_SIZE(gen9_blend_op))
2447fe8aea9eSmrg		return false;
2448fe8aea9eSmrg
2449fe8aea9eSmrg	DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__,
2450fe8aea9eSmrg	     width, height, sna->kgem.mode, sna->kgem.ring));
2451fe8aea9eSmrg
2452fe8aea9eSmrg	if (mask == NULL &&
2453fe8aea9eSmrg	    try_blt(sna, op,
2454fe8aea9eSmrg		    src, mask, dst,
2455fe8aea9eSmrg		    src_x, src_y,
2456fe8aea9eSmrg		    msk_x, msk_y,
2457fe8aea9eSmrg		    dst_x, dst_y,
2458fe8aea9eSmrg		    width, height,
2459fe8aea9eSmrg		    flags, tmp))
2460fe8aea9eSmrg		return true;
2461fe8aea9eSmrg
2462fe8aea9eSmrg	if (gen9_composite_fallback(sna, src, mask, dst))
2463fe8aea9eSmrg		goto fallback;
2464fe8aea9eSmrg
2465fe8aea9eSmrg	if (need_tiling(sna, width, height))
2466fe8aea9eSmrg		return sna_tiling_composite(op, src, mask, dst,
2467fe8aea9eSmrg					    src_x, src_y,
2468fe8aea9eSmrg					    msk_x, msk_y,
2469fe8aea9eSmrg					    dst_x, dst_y,
2470fe8aea9eSmrg					    width, height,
2471fe8aea9eSmrg					    tmp);
2472fe8aea9eSmrg
2473fe8aea9eSmrg	if (op == PictOpClear && src == sna->clear)
2474fe8aea9eSmrg		op = PictOpSrc;
2475fe8aea9eSmrg	tmp->op = op;
2476fe8aea9eSmrg	if (!gen9_composite_set_target(sna, tmp, dst,
2477fe8aea9eSmrg				       dst_x, dst_y, width, height,
2478fe8aea9eSmrg				       flags & COMPOSITE_PARTIAL || op > PictOpSrc))
2479fe8aea9eSmrg		goto fallback;
2480fe8aea9eSmrg
2481fe8aea9eSmrg	switch (gen9_composite_picture(sna, src, &tmp->src,
2482fe8aea9eSmrg				       src_x, src_y,
2483fe8aea9eSmrg				       width, height,
2484fe8aea9eSmrg				       dst_x, dst_y,
2485fe8aea9eSmrg				       dst->polyMode == PolyModePrecise)) {
2486fe8aea9eSmrg	case -1:
2487fe8aea9eSmrg		goto cleanup_dst;
2488fe8aea9eSmrg	case 0:
2489fe8aea9eSmrg		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
2490fe8aea9eSmrg			goto cleanup_dst;
2491fe8aea9eSmrg		/* fall through to fixup */
2492fe8aea9eSmrg	case 1:
2493fe8aea9eSmrg		/* Did we just switch rings to prepare the source? */
2494fe8aea9eSmrg		if (mask == NULL &&
2495fe8aea9eSmrg		    (prefer_blt_composite(sna, tmp) ||
2496fe8aea9eSmrg		     unaligned(tmp->src.bo, PICT_FORMAT_BPP(tmp->src.pict_format))) &&
2497fe8aea9eSmrg		    sna_blt_composite__convert(sna,
2498fe8aea9eSmrg					       dst_x, dst_y, width, height,
2499fe8aea9eSmrg					       tmp))
2500fe8aea9eSmrg			return true;
2501fe8aea9eSmrg
2502fe8aea9eSmrg		if (!gen9_composite_channel_convert(&tmp->src))
2503fe8aea9eSmrg			goto cleanup_src;
2504fe8aea9eSmrg
2505fe8aea9eSmrg		break;
2506fe8aea9eSmrg	}
2507fe8aea9eSmrg
2508fe8aea9eSmrg	tmp->is_affine = tmp->src.is_affine;
2509fe8aea9eSmrg	tmp->has_component_alpha = false;
2510fe8aea9eSmrg	tmp->need_magic_ca_pass = false;
2511fe8aea9eSmrg
2512fe8aea9eSmrg	tmp->mask.bo = NULL;
2513fe8aea9eSmrg	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
2514fe8aea9eSmrg	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
2515fe8aea9eSmrg
2516fe8aea9eSmrg	if (mask) {
2517fe8aea9eSmrg		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
2518fe8aea9eSmrg			tmp->has_component_alpha = true;
2519fe8aea9eSmrg
2520fe8aea9eSmrg			/* Check if it's component alpha that relies on a source alpha and on
2521fe8aea9eSmrg			 * the source value.  We can only get one of those into the single
2522fe8aea9eSmrg			 * source value that we get to blend with.
2523fe8aea9eSmrg			 */
2524fe8aea9eSmrg			if (gen9_blend_op[op].src_alpha &&
2525fe8aea9eSmrg			    (gen9_blend_op[op].src_blend != BLENDFACTOR_ZERO)) {
2526fe8aea9eSmrg				if (op != PictOpOver)
2527fe8aea9eSmrg					goto cleanup_src;
2528fe8aea9eSmrg
2529fe8aea9eSmrg				tmp->need_magic_ca_pass = true;
2530fe8aea9eSmrg				tmp->op = PictOpOutReverse;
2531fe8aea9eSmrg			}
2532fe8aea9eSmrg		}
2533fe8aea9eSmrg
2534fe8aea9eSmrg		if (!reuse_source(sna,
2535fe8aea9eSmrg				  src, &tmp->src, src_x, src_y,
2536fe8aea9eSmrg				  mask, &tmp->mask, msk_x, msk_y)) {
2537fe8aea9eSmrg			switch (gen9_composite_picture(sna, mask, &tmp->mask,
2538fe8aea9eSmrg						       msk_x, msk_y,
2539fe8aea9eSmrg						       width, height,
2540fe8aea9eSmrg						       dst_x, dst_y,
2541fe8aea9eSmrg						       dst->polyMode == PolyModePrecise)) {
2542fe8aea9eSmrg			case -1:
2543fe8aea9eSmrg				goto cleanup_src;
2544fe8aea9eSmrg			case 0:
2545fe8aea9eSmrg				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
2546fe8aea9eSmrg					goto cleanup_src;
2547fe8aea9eSmrg				/* fall through to fixup */
2548fe8aea9eSmrg			case 1:
2549fe8aea9eSmrg				if (!gen9_composite_channel_convert(&tmp->mask))
2550fe8aea9eSmrg					goto cleanup_mask;
2551fe8aea9eSmrg				break;
2552fe8aea9eSmrg			}
2553fe8aea9eSmrg		}
2554fe8aea9eSmrg
2555fe8aea9eSmrg		tmp->is_affine &= tmp->mask.is_affine;
2556fe8aea9eSmrg	}
2557fe8aea9eSmrg
2558fe8aea9eSmrg	tmp->u.gen9.flags =
2559fe8aea9eSmrg		GEN9_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
2560fe8aea9eSmrg					      tmp->src.repeat,
2561fe8aea9eSmrg					      tmp->mask.filter,
2562fe8aea9eSmrg					      tmp->mask.repeat),
2563fe8aea9eSmrg			       gen9_get_blend(tmp->op,
2564fe8aea9eSmrg					      tmp->has_component_alpha,
2565fe8aea9eSmrg					      tmp->dst.format),
2566fe8aea9eSmrg			       gen4_choose_composite_emitter(sna, tmp));
2567fe8aea9eSmrg	tmp->u.gen9.wm_kernel = gen9_choose_composite_kernel(tmp->op,
2568fe8aea9eSmrg							     tmp->mask.bo != NULL,
2569fe8aea9eSmrg							     tmp->has_component_alpha,
2570fe8aea9eSmrg							     tmp->is_affine);
2571fe8aea9eSmrg
2572fe8aea9eSmrg	tmp->blt   = gen9_render_composite_blt;
2573fe8aea9eSmrg	tmp->box   = gen9_render_composite_box;
2574fe8aea9eSmrg	tmp->boxes = gen9_render_composite_boxes__blt;
2575fe8aea9eSmrg	if (tmp->emit_boxes){
2576fe8aea9eSmrg		tmp->boxes = gen9_render_composite_boxes;
2577fe8aea9eSmrg		tmp->thread_boxes = gen9_render_composite_boxes__thread;
2578fe8aea9eSmrg	}
2579fe8aea9eSmrg	tmp->done  = gen9_render_composite_done;
2580fe8aea9eSmrg
2581fe8aea9eSmrg	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
2582fe8aea9eSmrg	if (!kgem_check_bo(&sna->kgem,
2583fe8aea9eSmrg			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2584fe8aea9eSmrg			   NULL)) {
2585fe8aea9eSmrg		kgem_submit(&sna->kgem);
2586fe8aea9eSmrg		if (!kgem_check_bo(&sna->kgem,
2587fe8aea9eSmrg				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2588fe8aea9eSmrg				   NULL))
2589fe8aea9eSmrg			goto cleanup_mask;
2590fe8aea9eSmrg		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2591fe8aea9eSmrg	}
2592fe8aea9eSmrg
2593fe8aea9eSmrg	gen9_align_vertex(sna, tmp);
2594fe8aea9eSmrg	gen9_emit_composite_state(sna, tmp);
2595fe8aea9eSmrg	return true;
2596fe8aea9eSmrg
2597fe8aea9eSmrgcleanup_mask:
2598fe8aea9eSmrg	if (tmp->mask.bo) {
2599fe8aea9eSmrg		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2600fe8aea9eSmrg		tmp->mask.bo = NULL;
2601fe8aea9eSmrg	}
2602fe8aea9eSmrgcleanup_src:
2603fe8aea9eSmrg	if (tmp->src.bo) {
2604fe8aea9eSmrg		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2605fe8aea9eSmrg		tmp->src.bo = NULL;
2606fe8aea9eSmrg	}
2607fe8aea9eSmrgcleanup_dst:
2608fe8aea9eSmrg	if (tmp->redirect.real_bo) {
2609fe8aea9eSmrg		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2610fe8aea9eSmrg		tmp->redirect.real_bo = NULL;
2611fe8aea9eSmrg	}
2612fe8aea9eSmrgfallback:
2613fe8aea9eSmrg	return (mask == NULL &&
2614fe8aea9eSmrg		sna_blt_composite(sna, op,
2615fe8aea9eSmrg				  src, dst,
2616fe8aea9eSmrg				  src_x, src_y,
2617fe8aea9eSmrg				  dst_x, dst_y,
2618fe8aea9eSmrg				  width, height,
2619fe8aea9eSmrg				  flags | COMPOSITE_FALLBACK, tmp));
2620fe8aea9eSmrg}
2621fe8aea9eSmrg
2622fe8aea9eSmrg#if !NO_COMPOSITE_SPANS
2623fe8aea9eSmrgfastcall static void
2624fe8aea9eSmrggen9_render_composite_spans_box(struct sna *sna,
2625fe8aea9eSmrg				const struct sna_composite_spans_op *op,
2626fe8aea9eSmrg				const BoxRec *box, float opacity)
2627fe8aea9eSmrg{
2628fe8aea9eSmrg	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2629fe8aea9eSmrg	     __FUNCTION__,
2630fe8aea9eSmrg	     op->base.src.offset[0], op->base.src.offset[1],
2631fe8aea9eSmrg	     opacity,
2632fe8aea9eSmrg	     op->base.dst.x, op->base.dst.y,
2633fe8aea9eSmrg	     box->x1, box->y1,
2634fe8aea9eSmrg	     box->x2 - box->x1,
2635fe8aea9eSmrg	     box->y2 - box->y1));
2636fe8aea9eSmrg
2637fe8aea9eSmrg	gen9_get_rectangles(sna, &op->base, 1, gen9_emit_composite_state);
2638fe8aea9eSmrg	op->prim_emit(sna, op, box, opacity);
2639fe8aea9eSmrg}
2640fe8aea9eSmrg
2641fe8aea9eSmrgstatic void
2642fe8aea9eSmrggen9_render_composite_spans_boxes(struct sna *sna,
2643fe8aea9eSmrg				  const struct sna_composite_spans_op *op,
2644fe8aea9eSmrg				  const BoxRec *box, int nbox,
2645fe8aea9eSmrg				  float opacity)
2646fe8aea9eSmrg{
2647fe8aea9eSmrg	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2648fe8aea9eSmrg	     __FUNCTION__, nbox,
2649fe8aea9eSmrg	     op->base.src.offset[0], op->base.src.offset[1],
2650fe8aea9eSmrg	     opacity,
2651fe8aea9eSmrg	     op->base.dst.x, op->base.dst.y));
2652fe8aea9eSmrg
2653fe8aea9eSmrg	do {
2654fe8aea9eSmrg		int nbox_this_time;
2655fe8aea9eSmrg
2656fe8aea9eSmrg		nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox,
2657fe8aea9eSmrg						     gen9_emit_composite_state);
2658fe8aea9eSmrg		nbox -= nbox_this_time;
2659fe8aea9eSmrg
2660fe8aea9eSmrg		do {
2661fe8aea9eSmrg			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2662fe8aea9eSmrg			     box->x1, box->y1,
2663fe8aea9eSmrg			     box->x2 - box->x1,
2664fe8aea9eSmrg			     box->y2 - box->y1));
2665fe8aea9eSmrg
2666fe8aea9eSmrg			op->prim_emit(sna, op, box++, opacity);
2667fe8aea9eSmrg		} while (--nbox_this_time);
2668fe8aea9eSmrg	} while (nbox);
2669fe8aea9eSmrg}
2670fe8aea9eSmrg
2671fe8aea9eSmrgfastcall static void
2672fe8aea9eSmrggen9_render_composite_spans_boxes__thread(struct sna *sna,
2673fe8aea9eSmrg					  const struct sna_composite_spans_op *op,
2674fe8aea9eSmrg					  const struct sna_opacity_box *box,
2675fe8aea9eSmrg					  int nbox)
2676fe8aea9eSmrg{
2677fe8aea9eSmrg	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
2678fe8aea9eSmrg	     __FUNCTION__, nbox,
2679fe8aea9eSmrg	     op->base.src.offset[0], op->base.src.offset[1],
2680fe8aea9eSmrg	     op->base.dst.x, op->base.dst.y));
2681fe8aea9eSmrg
2682fe8aea9eSmrg	sna_vertex_lock(&sna->render);
2683fe8aea9eSmrg	do {
2684fe8aea9eSmrg		int nbox_this_time;
2685fe8aea9eSmrg		float *v;
2686fe8aea9eSmrg
2687fe8aea9eSmrg		nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox,
2688fe8aea9eSmrg						     gen9_emit_composite_state);
2689fe8aea9eSmrg		assert(nbox_this_time);
2690fe8aea9eSmrg		nbox -= nbox_this_time;
2691fe8aea9eSmrg
2692fe8aea9eSmrg		v = sna->render.vertices + sna->render.vertex_used;
2693fe8aea9eSmrg		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
2694fe8aea9eSmrg
2695fe8aea9eSmrg		sna_vertex_acquire__locked(&sna->render);
2696fe8aea9eSmrg		sna_vertex_unlock(&sna->render);
2697fe8aea9eSmrg
2698fe8aea9eSmrg		op->emit_boxes(op, box, nbox_this_time, v);
2699fe8aea9eSmrg		box += nbox_this_time;
2700fe8aea9eSmrg
2701fe8aea9eSmrg		sna_vertex_lock(&sna->render);
2702fe8aea9eSmrg		sna_vertex_release__locked(&sna->render);
2703fe8aea9eSmrg	} while (nbox);
2704fe8aea9eSmrg	sna_vertex_unlock(&sna->render);
2705fe8aea9eSmrg}
2706fe8aea9eSmrg
2707fe8aea9eSmrgfastcall static void
2708fe8aea9eSmrggen9_render_composite_spans_done(struct sna *sna,
2709fe8aea9eSmrg				 const struct sna_composite_spans_op *op)
2710fe8aea9eSmrg{
2711fe8aea9eSmrg	if (sna->render.vertex_offset)
2712fe8aea9eSmrg		gen8_vertex_flush(sna);
2713fe8aea9eSmrg
2714fe8aea9eSmrg	DBG(("%s()\n", __FUNCTION__));
2715fe8aea9eSmrg
2716fe8aea9eSmrg	if (op->base.src.bo)
2717fe8aea9eSmrg		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2718fe8aea9eSmrg
2719fe8aea9eSmrg	sna_render_composite_redirect_done(sna, &op->base);
2720fe8aea9eSmrg}
2721fe8aea9eSmrg
2722fe8aea9eSmrgstatic bool
2723fe8aea9eSmrggen9_check_composite_spans(struct sna *sna,
2724fe8aea9eSmrg			   uint8_t op, PicturePtr src, PicturePtr dst,
2725fe8aea9eSmrg			   int16_t width, int16_t height, unsigned flags)
2726fe8aea9eSmrg{
2727fe8aea9eSmrg	if (op >= ARRAY_SIZE(gen9_blend_op))
2728fe8aea9eSmrg		return false;
2729fe8aea9eSmrg
2730fe8aea9eSmrg	if (gen9_composite_fallback(sna, src, NULL, dst))
2731fe8aea9eSmrg		return false;
2732fe8aea9eSmrg
2733fe8aea9eSmrg	if (need_tiling(sna, width, height) &&
2734fe8aea9eSmrg	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2735fe8aea9eSmrg		DBG(("%s: fallback, tiled operation not on GPU\n",
2736fe8aea9eSmrg		     __FUNCTION__));
2737fe8aea9eSmrg		return false;
2738fe8aea9eSmrg	}
2739fe8aea9eSmrg
2740fe8aea9eSmrg	return true;
2741fe8aea9eSmrg}
2742fe8aea9eSmrg
2743fe8aea9eSmrgstatic bool
2744fe8aea9eSmrggen9_render_composite_spans(struct sna *sna,
2745fe8aea9eSmrg			    uint8_t op,
2746fe8aea9eSmrg			    PicturePtr src,
2747fe8aea9eSmrg			    PicturePtr dst,
2748fe8aea9eSmrg			    int16_t src_x,  int16_t src_y,
2749fe8aea9eSmrg			    int16_t dst_x,  int16_t dst_y,
2750fe8aea9eSmrg			    int16_t width,  int16_t height,
2751fe8aea9eSmrg			    unsigned flags,
2752fe8aea9eSmrg			    struct sna_composite_spans_op *tmp)
2753fe8aea9eSmrg{
2754fe8aea9eSmrg	DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
2755fe8aea9eSmrg	     width, height, flags, sna->kgem.ring));
2756fe8aea9eSmrg
2757fe8aea9eSmrg	assert(gen9_check_composite_spans(sna, op, src, dst, width, height, flags));
2758fe8aea9eSmrg
2759fe8aea9eSmrg	if (need_tiling(sna, width, height)) {
2760fe8aea9eSmrg		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2761fe8aea9eSmrg		     __FUNCTION__, width, height));
2762fe8aea9eSmrg		return sna_tiling_composite_spans(op, src, dst,
2763fe8aea9eSmrg						  src_x, src_y, dst_x, dst_y,
2764fe8aea9eSmrg						  width, height, flags, tmp);
2765fe8aea9eSmrg	}
2766fe8aea9eSmrg
2767fe8aea9eSmrg	tmp->base.op = op;
2768fe8aea9eSmrg	if (!gen9_composite_set_target(sna, &tmp->base, dst,
2769fe8aea9eSmrg				       dst_x, dst_y, width, height, true))
2770fe8aea9eSmrg		return false;
2771fe8aea9eSmrg
2772fe8aea9eSmrg	switch (gen9_composite_picture(sna, src, &tmp->base.src,
2773fe8aea9eSmrg				       src_x, src_y,
2774fe8aea9eSmrg				       width, height,
2775fe8aea9eSmrg				       dst_x, dst_y,
2776fe8aea9eSmrg				       dst->polyMode == PolyModePrecise)) {
2777fe8aea9eSmrg	case -1:
2778fe8aea9eSmrg		goto cleanup_dst;
2779fe8aea9eSmrg	case 0:
2780fe8aea9eSmrg		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
2781fe8aea9eSmrg			goto cleanup_dst;
2782fe8aea9eSmrg		/* fall through to fixup */
2783fe8aea9eSmrg	case 1:
2784fe8aea9eSmrg		if (!gen9_composite_channel_convert(&tmp->base.src))
2785fe8aea9eSmrg			goto cleanup_src;
2786fe8aea9eSmrg		break;
2787fe8aea9eSmrg	}
2788fe8aea9eSmrg	tmp->base.mask.bo = NULL;
2789fe8aea9eSmrg
2790fe8aea9eSmrg	tmp->base.is_affine = tmp->base.src.is_affine;
2791fe8aea9eSmrg	tmp->base.need_magic_ca_pass = false;
2792fe8aea9eSmrg
2793fe8aea9eSmrg	tmp->base.u.gen9.flags =
2794fe8aea9eSmrg		GEN9_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter,
2795fe8aea9eSmrg					      tmp->base.src.repeat,
2796fe8aea9eSmrg					      SAMPLER_FILTER_NEAREST,
2797fe8aea9eSmrg					      SAMPLER_EXTEND_PAD),
2798fe8aea9eSmrg			       gen9_get_blend(tmp->base.op, false, tmp->base.dst.format),
2799fe8aea9eSmrg			       gen4_choose_spans_emitter(sna, tmp));
2800fe8aea9eSmrg	tmp->base.u.gen9.wm_kernel =
2801fe8aea9eSmrg		GEN9_WM_KERNEL_OPACITY | !tmp->base.is_affine;
2802fe8aea9eSmrg
2803fe8aea9eSmrg	tmp->box   = gen9_render_composite_spans_box;
2804fe8aea9eSmrg	tmp->boxes = gen9_render_composite_spans_boxes;
2805fe8aea9eSmrg	if (tmp->emit_boxes)
2806fe8aea9eSmrg		tmp->thread_boxes = gen9_render_composite_spans_boxes__thread;
2807fe8aea9eSmrg	tmp->done  = gen9_render_composite_spans_done;
2808fe8aea9eSmrg
2809fe8aea9eSmrg	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
2810fe8aea9eSmrg	if (!kgem_check_bo(&sna->kgem,
2811fe8aea9eSmrg			   tmp->base.dst.bo, tmp->base.src.bo,
2812fe8aea9eSmrg			   NULL)) {
2813fe8aea9eSmrg		kgem_submit(&sna->kgem);
2814fe8aea9eSmrg		if (!kgem_check_bo(&sna->kgem,
2815fe8aea9eSmrg				   tmp->base.dst.bo, tmp->base.src.bo,
2816fe8aea9eSmrg				   NULL))
2817fe8aea9eSmrg			goto cleanup_src;
2818fe8aea9eSmrg		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2819fe8aea9eSmrg	}
2820fe8aea9eSmrg
2821fe8aea9eSmrg	gen9_align_vertex(sna, &tmp->base);
2822fe8aea9eSmrg	gen9_emit_composite_state(sna, &tmp->base);
2823fe8aea9eSmrg	return true;
2824fe8aea9eSmrg
2825fe8aea9eSmrgcleanup_src:
2826fe8aea9eSmrg	if (tmp->base.src.bo)
2827fe8aea9eSmrg		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2828fe8aea9eSmrgcleanup_dst:
2829fe8aea9eSmrg	if (tmp->base.redirect.real_bo)
2830fe8aea9eSmrg		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2831fe8aea9eSmrg	return false;
2832fe8aea9eSmrg}
2833fe8aea9eSmrg#endif
2834fe8aea9eSmrg
2835fe8aea9eSmrgstatic void
2836fe8aea9eSmrggen9_emit_copy_state(struct sna *sna,
2837fe8aea9eSmrg		     const struct sna_composite_op *op)
2838fe8aea9eSmrg{
2839fe8aea9eSmrg	uint32_t *binding_table;
2840fe8aea9eSmrg	uint16_t offset, dirty;
2841fe8aea9eSmrg
2842fe8aea9eSmrg	gen9_get_batch(sna, op);
2843fe8aea9eSmrg
2844fe8aea9eSmrg	binding_table = gen9_composite_get_binding_table(sna, &offset);
2845fe8aea9eSmrg
2846fe8aea9eSmrg	dirty = kgem_bo_is_dirty(op->dst.bo);
2847fe8aea9eSmrg
2848fe8aea9eSmrg	binding_table[0] =
2849fe8aea9eSmrg		gen9_bind_bo(sna,
2850fe8aea9eSmrg			     op->dst.bo, op->dst.width, op->dst.height,
2851fe8aea9eSmrg			     gen9_get_dest_format(op->dst.format),
2852fe8aea9eSmrg			     true);
2853fe8aea9eSmrg	binding_table[1] =
2854fe8aea9eSmrg		gen9_bind_bo(sna,
2855fe8aea9eSmrg			     op->src.bo, op->src.width, op->src.height,
2856fe8aea9eSmrg			     op->src.card_format,
2857fe8aea9eSmrg			     false);
2858fe8aea9eSmrg
2859fe8aea9eSmrg	if (sna->kgem.surface == offset &&
2860fe8aea9eSmrg	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table) {
2861fe8aea9eSmrg		sna->kgem.surface += SURFACE_DW;
2862fe8aea9eSmrg		offset = sna->render_state.gen9.surface_table;
2863fe8aea9eSmrg	}
2864fe8aea9eSmrg
2865fe8aea9eSmrg	if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0])
2866fe8aea9eSmrg		dirty = 0;
2867fe8aea9eSmrg
2868fe8aea9eSmrg	assert(!GEN9_READS_DST(op->u.gen9.flags));
2869fe8aea9eSmrg	gen9_emit_state(sna, op, offset | dirty);
2870fe8aea9eSmrg}
2871fe8aea9eSmrg
2872fe8aea9eSmrgstatic inline bool
2873fe8aea9eSmrgprefer_blt_copy(struct sna *sna,
2874fe8aea9eSmrg		struct kgem_bo *src_bo,
2875fe8aea9eSmrg		struct kgem_bo *dst_bo,
2876fe8aea9eSmrg		unsigned flags)
2877fe8aea9eSmrg{
2878fe8aea9eSmrg	if (sna->kgem.mode == KGEM_BLT)
2879fe8aea9eSmrg		return true;
2880fe8aea9eSmrg
2881fe8aea9eSmrg	assert((flags & COPY_SYNC) == 0);
2882fe8aea9eSmrg
2883fe8aea9eSmrg	if (untiled_tlb_miss(src_bo) ||
2884fe8aea9eSmrg	    untiled_tlb_miss(dst_bo))
2885fe8aea9eSmrg		return true;
2886fe8aea9eSmrg
2887fe8aea9eSmrg	if (flags & COPY_DRI && !sna->kgem.has_semaphores)
2888fe8aea9eSmrg		return false;
2889fe8aea9eSmrg
2890fe8aea9eSmrg	if (force_blt_ring(sna, dst_bo))
2891fe8aea9eSmrg		return true;
2892fe8aea9eSmrg
2893fe8aea9eSmrg	if ((flags & COPY_SMALL ||
2894fe8aea9eSmrg	     (sna->render_state.gt < 3 && src_bo == dst_bo)) &&
2895fe8aea9eSmrg	    can_switch_to_blt(sna, dst_bo, flags))
2896fe8aea9eSmrg		return true;
2897fe8aea9eSmrg
2898fe8aea9eSmrg	if (kgem_bo_is_render(dst_bo) ||
2899fe8aea9eSmrg	    kgem_bo_is_render(src_bo))
2900fe8aea9eSmrg		return false;
2901fe8aea9eSmrg
2902fe8aea9eSmrg	if (flags & COPY_LAST &&
2903fe8aea9eSmrg	    sna->render_state.gt < 3 &&
2904fe8aea9eSmrg            can_switch_to_blt(sna, dst_bo, flags))
2905fe8aea9eSmrg		return true;
2906fe8aea9eSmrg
2907fe8aea9eSmrg	if (prefer_render_ring(sna, dst_bo))
2908fe8aea9eSmrg		return false;
2909fe8aea9eSmrg
2910fe8aea9eSmrg	if (!prefer_blt_ring(sna, dst_bo, flags))
2911fe8aea9eSmrg		return false;
2912fe8aea9eSmrg
2913fe8aea9eSmrg	return prefer_blt_bo(sna, src_bo, dst_bo);
2914fe8aea9eSmrg}
2915fe8aea9eSmrg
2916fe8aea9eSmrgstatic bool
2917fe8aea9eSmrggen9_render_copy_boxes(struct sna *sna, uint8_t alu,
2918fe8aea9eSmrg		       const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2919fe8aea9eSmrg		       const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2920fe8aea9eSmrg		       const BoxRec *box, int n, unsigned flags)
2921fe8aea9eSmrg{
2922fe8aea9eSmrg	struct sna_composite_op tmp;
2923fe8aea9eSmrg	BoxRec extents;
2924fe8aea9eSmrg
2925fe8aea9eSmrg	DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n",
2926fe8aea9eSmrg	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags,
2927fe8aea9eSmrg	     src_bo == dst_bo,
2928fe8aea9eSmrg	     overlaps(sna,
2929fe8aea9eSmrg		      src_bo, src_dx, src_dy,
2930fe8aea9eSmrg		      dst_bo, dst_dx, dst_dy,
2931fe8aea9eSmrg		      box, n, flags, &extents)));
2932fe8aea9eSmrg
2933fe8aea9eSmrg	if (prefer_blt_copy(sna, src_bo, dst_bo, flags) &&
2934fe8aea9eSmrg	    sna_blt_compare_depth(src, dst) &&
2935fe8aea9eSmrg	    sna_blt_copy_boxes(sna, alu,
2936fe8aea9eSmrg			       src_bo, src_dx, src_dy,
2937fe8aea9eSmrg			       dst_bo, dst_dx, dst_dy,
2938fe8aea9eSmrg			       dst->bitsPerPixel,
2939fe8aea9eSmrg			       box, n))
2940fe8aea9eSmrg		return true;
2941fe8aea9eSmrg
2942fe8aea9eSmrg	if (!(alu == GXcopy || alu == GXclear) ||
2943fe8aea9eSmrg	    unaligned(src_bo, src->bitsPerPixel) ||
2944fe8aea9eSmrg	    unaligned(dst_bo, dst->bitsPerPixel)) {
2945fe8aea9eSmrgfallback_blt:
2946fe8aea9eSmrg		DBG(("%s: fallback blt\n", __FUNCTION__));
2947fe8aea9eSmrg		if (!sna_blt_compare_depth(src, dst))
2948fe8aea9eSmrg			return false;
2949fe8aea9eSmrg
2950fe8aea9eSmrg		return sna_blt_copy_boxes_fallback(sna, alu,
2951fe8aea9eSmrg						   src, src_bo, src_dx, src_dy,
2952fe8aea9eSmrg						   dst, dst_bo, dst_dx, dst_dy,
2953fe8aea9eSmrg						   box, n);
2954fe8aea9eSmrg	}
2955fe8aea9eSmrg
2956fe8aea9eSmrg	if (overlaps(sna,
2957fe8aea9eSmrg		     src_bo, src_dx, src_dy,
2958fe8aea9eSmrg		     dst_bo, dst_dx, dst_dy,
2959fe8aea9eSmrg		     box, n, flags,
2960fe8aea9eSmrg		     &extents)) {
2961fe8aea9eSmrg		bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1);
2962fe8aea9eSmrg
2963fe8aea9eSmrg		if ((big || !prefer_render_ring(sna, dst_bo)) &&
2964fe8aea9eSmrg		    sna_blt_copy_boxes(sna, alu,
2965fe8aea9eSmrg				       src_bo, src_dx, src_dy,
2966fe8aea9eSmrg				       dst_bo, dst_dx, dst_dy,
2967fe8aea9eSmrg				       dst->bitsPerPixel,
2968fe8aea9eSmrg				       box, n))
2969fe8aea9eSmrg			return true;
2970fe8aea9eSmrg
2971fe8aea9eSmrg		if (big)
2972fe8aea9eSmrg			goto fallback_blt;
2973fe8aea9eSmrg
2974fe8aea9eSmrg		assert(src_bo == dst_bo);
2975fe8aea9eSmrg		assert(src->depth == dst->depth);
2976fe8aea9eSmrg		assert(src->width == dst->width);
2977fe8aea9eSmrg		assert(src->height == dst->height);
2978fe8aea9eSmrg		return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo,
2979fe8aea9eSmrg						      src_dx, src_dy,
2980fe8aea9eSmrg						      dst_dx, dst_dy,
2981fe8aea9eSmrg						      box, n, &extents);
2982fe8aea9eSmrg	}
2983fe8aea9eSmrg
2984fe8aea9eSmrg	if (dst->depth == src->depth) {
2985fe8aea9eSmrg		tmp.dst.format = sna_render_format_for_depth(dst->depth);
2986fe8aea9eSmrg		tmp.src.pict_format = tmp.dst.format;
2987fe8aea9eSmrg	} else {
2988fe8aea9eSmrg		tmp.dst.format = sna_format_for_depth(dst->depth);
2989fe8aea9eSmrg		tmp.src.pict_format = sna_format_for_depth(src->depth);
2990fe8aea9eSmrg	}
2991fe8aea9eSmrg	if (!gen9_check_format(tmp.src.pict_format))
2992fe8aea9eSmrg		goto fallback_blt;
2993fe8aea9eSmrg
2994fe8aea9eSmrg	tmp.dst.pixmap = (PixmapPtr)dst;
2995fe8aea9eSmrg	tmp.dst.width  = dst->width;
2996fe8aea9eSmrg	tmp.dst.height = dst->height;
2997fe8aea9eSmrg	tmp.dst.bo = dst_bo;
2998fe8aea9eSmrg	tmp.dst.x = tmp.dst.y = 0;
2999fe8aea9eSmrg	tmp.damage = NULL;
3000fe8aea9eSmrg
3001fe8aea9eSmrg	sna_render_composite_redirect_init(&tmp);
3002fe8aea9eSmrg	if (too_large(tmp.dst.width, tmp.dst.height)) {
3003fe8aea9eSmrg		int i;
3004fe8aea9eSmrg
3005fe8aea9eSmrg		extents = box[0];
3006fe8aea9eSmrg		for (i = 1; i < n; i++) {
3007fe8aea9eSmrg			if (box[i].x1 < extents.x1)
3008fe8aea9eSmrg				extents.x1 = box[i].x1;
3009fe8aea9eSmrg			if (box[i].y1 < extents.y1)
3010fe8aea9eSmrg				extents.y1 = box[i].y1;
3011fe8aea9eSmrg
3012fe8aea9eSmrg			if (box[i].x2 > extents.x2)
3013fe8aea9eSmrg				extents.x2 = box[i].x2;
3014fe8aea9eSmrg			if (box[i].y2 > extents.y2)
3015fe8aea9eSmrg				extents.y2 = box[i].y2;
3016fe8aea9eSmrg		}
3017fe8aea9eSmrg
3018fe8aea9eSmrg		if (!sna_render_composite_redirect(sna, &tmp,
3019fe8aea9eSmrg						   extents.x1 + dst_dx,
3020fe8aea9eSmrg						   extents.y1 + dst_dy,
3021fe8aea9eSmrg						   extents.x2 - extents.x1,
3022fe8aea9eSmrg						   extents.y2 - extents.y1,
3023fe8aea9eSmrg						   n > 1))
3024fe8aea9eSmrg			goto fallback_tiled;
3025fe8aea9eSmrg	}
3026fe8aea9eSmrg
3027fe8aea9eSmrg	tmp.src.card_format = gen9_get_card_format(tmp.src.pict_format);
3028fe8aea9eSmrg	if (too_large(src->width, src->height)) {
3029fe8aea9eSmrg		int i;
3030fe8aea9eSmrg
3031fe8aea9eSmrg		extents = box[0];
3032fe8aea9eSmrg		for (i = 1; i < n; i++) {
3033fe8aea9eSmrg			if (box[i].x1 < extents.x1)
3034fe8aea9eSmrg				extents.x1 = box[i].x1;
3035fe8aea9eSmrg			if (box[i].y1 < extents.y1)
3036fe8aea9eSmrg				extents.y1 = box[i].y1;
3037fe8aea9eSmrg
3038fe8aea9eSmrg			if (box[i].x2 > extents.x2)
3039fe8aea9eSmrg				extents.x2 = box[i].x2;
3040fe8aea9eSmrg			if (box[i].y2 > extents.y2)
3041fe8aea9eSmrg				extents.y2 = box[i].y2;
3042fe8aea9eSmrg		}
3043fe8aea9eSmrg
3044fe8aea9eSmrg		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
3045fe8aea9eSmrg					       extents.x1 + src_dx,
3046fe8aea9eSmrg					       extents.y1 + src_dy,
3047fe8aea9eSmrg					       extents.x2 - extents.x1,
3048fe8aea9eSmrg					       extents.y2 - extents.y1))
3049fe8aea9eSmrg			goto fallback_tiled_dst;
3050fe8aea9eSmrg	} else {
3051fe8aea9eSmrg		tmp.src.bo = src_bo;
3052fe8aea9eSmrg		tmp.src.width  = src->width;
3053fe8aea9eSmrg		tmp.src.height = src->height;
3054fe8aea9eSmrg		tmp.src.offset[0] = tmp.src.offset[1] = 0;
3055fe8aea9eSmrg	}
3056fe8aea9eSmrg
3057fe8aea9eSmrg	tmp.mask.bo = NULL;
3058fe8aea9eSmrg
3059fe8aea9eSmrg	tmp.floats_per_vertex = 2;
3060fe8aea9eSmrg	tmp.floats_per_rect = 6;
3061fe8aea9eSmrg	tmp.need_magic_ca_pass = 0;
3062fe8aea9eSmrg
3063fe8aea9eSmrg	tmp.u.gen9.flags = COPY_FLAGS(alu);
3064fe8aea9eSmrg	tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
3065fe8aea9eSmrg
3066fe8aea9eSmrg	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
3067fe8aea9eSmrg	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
3068fe8aea9eSmrg		kgem_submit(&sna->kgem);
3069fe8aea9eSmrg		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
3070fe8aea9eSmrg			if (tmp.src.bo != src_bo)
3071fe8aea9eSmrg				kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3072fe8aea9eSmrg			if (tmp.redirect.real_bo)
3073fe8aea9eSmrg				kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
3074fe8aea9eSmrg			goto fallback_blt;
3075fe8aea9eSmrg		}
3076fe8aea9eSmrg		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3077fe8aea9eSmrg	}
3078fe8aea9eSmrg
3079fe8aea9eSmrg	src_dx += tmp.src.offset[0];
3080fe8aea9eSmrg	src_dy += tmp.src.offset[1];
3081fe8aea9eSmrg
3082fe8aea9eSmrg	dst_dx += tmp.dst.x;
3083fe8aea9eSmrg	dst_dy += tmp.dst.y;
3084fe8aea9eSmrg
3085fe8aea9eSmrg	tmp.dst.x = tmp.dst.y = 0;
3086fe8aea9eSmrg
3087fe8aea9eSmrg	gen9_align_vertex(sna, &tmp);
3088fe8aea9eSmrg	gen9_emit_copy_state(sna, &tmp);
3089fe8aea9eSmrg
3090fe8aea9eSmrg	do {
3091fe8aea9eSmrg		int16_t *v;
3092fe8aea9eSmrg		int n_this_time;
3093fe8aea9eSmrg
3094fe8aea9eSmrg		n_this_time = gen9_get_rectangles(sna, &tmp, n,
3095fe8aea9eSmrg						  gen9_emit_copy_state);
3096fe8aea9eSmrg		n -= n_this_time;
3097fe8aea9eSmrg
3098fe8aea9eSmrg		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
3099fe8aea9eSmrg		sna->render.vertex_used += 6 * n_this_time;
3100fe8aea9eSmrg		assert(sna->render.vertex_used <= sna->render.vertex_size);
3101fe8aea9eSmrg		do {
3102fe8aea9eSmrg
3103fe8aea9eSmrg			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
3104fe8aea9eSmrg			     box->x1 + src_dx, box->y1 + src_dy,
3105fe8aea9eSmrg			     box->x1 + dst_dx, box->y1 + dst_dy,
3106fe8aea9eSmrg			     box->x2 - box->x1, box->y2 - box->y1));
3107fe8aea9eSmrg			v[0] = box->x2 + dst_dx;
3108fe8aea9eSmrg			v[2] = box->x2 + src_dx;
3109fe8aea9eSmrg			v[1]  = v[5] = box->y2 + dst_dy;
3110fe8aea9eSmrg			v[3]  = v[7] = box->y2 + src_dy;
3111fe8aea9eSmrg			v[8]  = v[4] = box->x1 + dst_dx;
3112fe8aea9eSmrg			v[10] = v[6] = box->x1 + src_dx;
3113fe8aea9eSmrg			v[9]  = box->y1 + dst_dy;
3114fe8aea9eSmrg			v[11] = box->y1 + src_dy;
3115fe8aea9eSmrg			v += 12; box++;
3116fe8aea9eSmrg		} while (--n_this_time);
3117fe8aea9eSmrg	} while (n);
3118fe8aea9eSmrg
3119fe8aea9eSmrg	gen8_vertex_flush(sna);
3120fe8aea9eSmrg	sna_render_composite_redirect_done(sna, &tmp);
3121fe8aea9eSmrg	if (tmp.src.bo != src_bo)
3122fe8aea9eSmrg		kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3123fe8aea9eSmrg	return true;
3124fe8aea9eSmrg
3125fe8aea9eSmrgfallback_tiled_dst:
3126fe8aea9eSmrg	if (tmp.redirect.real_bo)
3127fe8aea9eSmrg		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
3128fe8aea9eSmrgfallback_tiled:
3129fe8aea9eSmrg	DBG(("%s: fallback tiled\n", __FUNCTION__));
3130fe8aea9eSmrg	if (sna_blt_compare_depth(src, dst) &&
3131fe8aea9eSmrg	    sna_blt_copy_boxes(sna, alu,
3132fe8aea9eSmrg			       src_bo, src_dx, src_dy,
3133fe8aea9eSmrg			       dst_bo, dst_dx, dst_dy,
3134fe8aea9eSmrg			       dst->bitsPerPixel,
3135fe8aea9eSmrg			       box, n))
3136fe8aea9eSmrg		return true;
3137fe8aea9eSmrg
3138fe8aea9eSmrg	return sna_tiling_copy_boxes(sna, alu,
3139fe8aea9eSmrg				     src, src_bo, src_dx, src_dy,
3140fe8aea9eSmrg				     dst, dst_bo, dst_dx, dst_dy,
3141fe8aea9eSmrg				     box, n);
3142fe8aea9eSmrg}
3143fe8aea9eSmrg
3144fe8aea9eSmrgstatic void
3145fe8aea9eSmrggen9_render_copy_blt(struct sna *sna,
3146fe8aea9eSmrg		     const struct sna_copy_op *op,
3147fe8aea9eSmrg		     int16_t sx, int16_t sy,
3148fe8aea9eSmrg		     int16_t w,  int16_t h,
3149fe8aea9eSmrg		     int16_t dx, int16_t dy)
3150fe8aea9eSmrg{
3151fe8aea9eSmrg	int16_t *v;
3152fe8aea9eSmrg
3153fe8aea9eSmrg	gen9_get_rectangles(sna, &op->base, 1, gen9_emit_copy_state);
3154fe8aea9eSmrg
3155fe8aea9eSmrg	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3156fe8aea9eSmrg	sna->render.vertex_used += 6;
3157fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
3158fe8aea9eSmrg
3159fe8aea9eSmrg	v[0]  = dx+w; v[1]  = dy+h;
3160fe8aea9eSmrg	v[2]  = sx+w; v[3]  = sy+h;
3161fe8aea9eSmrg	v[4]  = dx;   v[5]  = dy+h;
3162fe8aea9eSmrg	v[6]  = sx;   v[7]  = sy+h;
3163fe8aea9eSmrg	v[8]  = dx;   v[9]  = dy;
3164fe8aea9eSmrg	v[10] = sx;   v[11] = sy;
3165fe8aea9eSmrg}
3166fe8aea9eSmrg
3167fe8aea9eSmrgstatic void
3168fe8aea9eSmrggen9_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
3169fe8aea9eSmrg{
3170fe8aea9eSmrg	if (sna->render.vertex_offset)
3171fe8aea9eSmrg		gen8_vertex_flush(sna);
3172fe8aea9eSmrg}
3173fe8aea9eSmrg
3174fe8aea9eSmrgstatic bool
3175fe8aea9eSmrggen9_render_copy(struct sna *sna, uint8_t alu,
3176fe8aea9eSmrg		 PixmapPtr src, struct kgem_bo *src_bo,
3177fe8aea9eSmrg		 PixmapPtr dst, struct kgem_bo *dst_bo,
3178fe8aea9eSmrg		 struct sna_copy_op *op)
3179fe8aea9eSmrg{
3180fe8aea9eSmrg	DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n",
3181fe8aea9eSmrg	     __FUNCTION__, alu,
3182fe8aea9eSmrg	     src->drawable.width, src->drawable.height,
3183fe8aea9eSmrg	     dst->drawable.width, dst->drawable.height));
3184fe8aea9eSmrg
3185fe8aea9eSmrg	if (prefer_blt_copy(sna, src_bo, dst_bo, 0) &&
3186fe8aea9eSmrg	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
3187fe8aea9eSmrg	    sna_blt_copy(sna, alu,
3188fe8aea9eSmrg			 src_bo, dst_bo,
3189fe8aea9eSmrg			 dst->drawable.bitsPerPixel,
3190fe8aea9eSmrg			 op))
3191fe8aea9eSmrg		return true;
3192fe8aea9eSmrg
3193fe8aea9eSmrg	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
3194fe8aea9eSmrg	    too_large(src->drawable.width, src->drawable.height) ||
3195fe8aea9eSmrg	    too_large(dst->drawable.width, dst->drawable.height) ||
3196fe8aea9eSmrg	    unaligned(src_bo, src->drawable.bitsPerPixel) ||
3197fe8aea9eSmrg	    unaligned(dst_bo, dst->drawable.bitsPerPixel)) {
3198fe8aea9eSmrgfallback:
3199fe8aea9eSmrg		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
3200fe8aea9eSmrg			return false;
3201fe8aea9eSmrg
3202fe8aea9eSmrg		return sna_blt_copy(sna, alu, src_bo, dst_bo,
3203fe8aea9eSmrg				    dst->drawable.bitsPerPixel,
3204fe8aea9eSmrg				    op);
3205fe8aea9eSmrg	}
3206fe8aea9eSmrg
3207fe8aea9eSmrg	if (dst->drawable.depth == src->drawable.depth) {
3208fe8aea9eSmrg		op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
3209fe8aea9eSmrg		op->base.src.pict_format = op->base.dst.format;
3210fe8aea9eSmrg	} else {
3211fe8aea9eSmrg		op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
3212fe8aea9eSmrg		op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
3213fe8aea9eSmrg	}
3214fe8aea9eSmrg	if (!gen9_check_format(op->base.src.pict_format))
3215fe8aea9eSmrg		goto fallback;
3216fe8aea9eSmrg
3217fe8aea9eSmrg	op->base.dst.pixmap = dst;
3218fe8aea9eSmrg	op->base.dst.width  = dst->drawable.width;
3219fe8aea9eSmrg	op->base.dst.height = dst->drawable.height;
3220fe8aea9eSmrg	op->base.dst.bo = dst_bo;
3221fe8aea9eSmrg
3222fe8aea9eSmrg	op->base.src.bo = src_bo;
3223fe8aea9eSmrg	op->base.src.card_format =
3224fe8aea9eSmrg		gen9_get_card_format(op->base.src.pict_format);
3225fe8aea9eSmrg	op->base.src.width  = src->drawable.width;
3226fe8aea9eSmrg	op->base.src.height = src->drawable.height;
3227fe8aea9eSmrg
3228fe8aea9eSmrg	op->base.mask.bo = NULL;
3229fe8aea9eSmrg
3230fe8aea9eSmrg	op->base.floats_per_vertex = 2;
3231fe8aea9eSmrg	op->base.floats_per_rect = 6;
3232fe8aea9eSmrg
3233fe8aea9eSmrg	op->base.u.gen9.flags = COPY_FLAGS(alu);
3234fe8aea9eSmrg	op->base.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
3235fe8aea9eSmrg
3236fe8aea9eSmrg	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3237fe8aea9eSmrg	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
3238fe8aea9eSmrg		kgem_submit(&sna->kgem);
3239fe8aea9eSmrg		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
3240fe8aea9eSmrg			goto fallback;
3241fe8aea9eSmrg		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3242fe8aea9eSmrg	}
3243fe8aea9eSmrg
3244fe8aea9eSmrg	gen9_align_vertex(sna, &op->base);
3245fe8aea9eSmrg	gen9_emit_copy_state(sna, &op->base);
3246fe8aea9eSmrg
3247fe8aea9eSmrg	op->blt  = gen9_render_copy_blt;
3248fe8aea9eSmrg	op->done = gen9_render_copy_done;
3249fe8aea9eSmrg	return true;
3250fe8aea9eSmrg}
3251fe8aea9eSmrg
3252fe8aea9eSmrgstatic void
3253fe8aea9eSmrggen9_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
3254fe8aea9eSmrg{
3255fe8aea9eSmrg	uint32_t *binding_table;
3256fe8aea9eSmrg	uint16_t offset, dirty;
3257fe8aea9eSmrg
3258fe8aea9eSmrg	/* XXX Render Target Fast Clear
3259fe8aea9eSmrg	 * Set RTFC Enable in PS and render a rectangle.
3260fe8aea9eSmrg	 * Limited to a clearing the full MSC surface only with a
3261fe8aea9eSmrg	 * specific kernel.
3262fe8aea9eSmrg	 */
3263fe8aea9eSmrg
3264fe8aea9eSmrg	gen9_get_batch(sna, op);
3265fe8aea9eSmrg
3266fe8aea9eSmrg	binding_table = gen9_composite_get_binding_table(sna, &offset);
3267fe8aea9eSmrg
3268fe8aea9eSmrg	dirty = kgem_bo_is_dirty(op->dst.bo);
3269fe8aea9eSmrg
3270fe8aea9eSmrg	binding_table[0] =
3271fe8aea9eSmrg		gen9_bind_bo(sna,
3272fe8aea9eSmrg			     op->dst.bo, op->dst.width, op->dst.height,
3273fe8aea9eSmrg			     gen9_get_dest_format(op->dst.format),
3274fe8aea9eSmrg			     true);
3275fe8aea9eSmrg	binding_table[1] =
3276fe8aea9eSmrg		gen9_bind_bo(sna,
3277fe8aea9eSmrg			     op->src.bo, 1, 1,
3278fe8aea9eSmrg			     SURFACEFORMAT_B8G8R8A8_UNORM,
3279fe8aea9eSmrg			     false);
3280fe8aea9eSmrg
3281fe8aea9eSmrg	if (sna->kgem.surface == offset &&
3282fe8aea9eSmrg	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table) {
3283fe8aea9eSmrg		sna->kgem.surface += SURFACE_DW;
3284fe8aea9eSmrg		offset = sna->render_state.gen9.surface_table;
3285fe8aea9eSmrg	}
3286fe8aea9eSmrg
3287fe8aea9eSmrg	if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0])
3288fe8aea9eSmrg		dirty = 0;
3289fe8aea9eSmrg
3290fe8aea9eSmrg	gen9_emit_state(sna, op, offset | dirty);
3291fe8aea9eSmrg}
3292fe8aea9eSmrg
3293fe8aea9eSmrgstatic bool
3294fe8aea9eSmrggen9_render_fill_boxes(struct sna *sna,
3295fe8aea9eSmrg		       CARD8 op,
3296fe8aea9eSmrg		       PictFormat format,
3297fe8aea9eSmrg		       const xRenderColor *color,
3298fe8aea9eSmrg		       const DrawableRec *dst, struct kgem_bo *dst_bo,
3299fe8aea9eSmrg		       const BoxRec *box, int n)
3300fe8aea9eSmrg{
3301fe8aea9eSmrg	struct sna_composite_op tmp;
3302fe8aea9eSmrg	uint32_t pixel;
3303fe8aea9eSmrg
3304fe8aea9eSmrg	DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
3305fe8aea9eSmrg	     __FUNCTION__, op,
3306fe8aea9eSmrg	     color->red, color->green, color->blue, color->alpha, (int)format));
3307fe8aea9eSmrg
3308fe8aea9eSmrg	if (op >= ARRAY_SIZE(gen9_blend_op)) {
3309fe8aea9eSmrg		DBG(("%s: fallback due to unhandled blend op: %d\n",
3310fe8aea9eSmrg		     __FUNCTION__, op));
3311fe8aea9eSmrg		return false;
3312fe8aea9eSmrg	}
3313fe8aea9eSmrg
3314fe8aea9eSmrg	if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) ||
3315fe8aea9eSmrg	    !gen9_check_dst_format(format) ||
3316fe8aea9eSmrg	    unaligned(dst_bo, PICT_FORMAT_BPP(format))) {
3317fe8aea9eSmrg		uint8_t alu = GXinvalid;
3318fe8aea9eSmrg
3319fe8aea9eSmrg		if (op <= PictOpSrc) {
3320fe8aea9eSmrg			pixel = 0;
3321fe8aea9eSmrg			if (op == PictOpClear)
3322fe8aea9eSmrg				alu = GXclear;
3323fe8aea9eSmrg			else if (sna_get_pixel_from_rgba(&pixel,
3324fe8aea9eSmrg							 color->red,
3325fe8aea9eSmrg							 color->green,
3326fe8aea9eSmrg							 color->blue,
3327fe8aea9eSmrg							 color->alpha,
3328fe8aea9eSmrg							 format))
3329fe8aea9eSmrg				alu = GXcopy;
3330fe8aea9eSmrg		}
3331fe8aea9eSmrg
3332fe8aea9eSmrg		if (alu != GXinvalid &&
3333fe8aea9eSmrg		    sna_blt_fill_boxes(sna, alu,
3334fe8aea9eSmrg				       dst_bo, dst->bitsPerPixel,
3335fe8aea9eSmrg				       pixel, box, n))
3336fe8aea9eSmrg			return true;
3337fe8aea9eSmrg
3338fe8aea9eSmrg		if (!gen9_check_dst_format(format))
3339fe8aea9eSmrg			return false;
3340fe8aea9eSmrg	}
3341fe8aea9eSmrg
3342fe8aea9eSmrg	if (op == PictOpClear) {
3343fe8aea9eSmrg		pixel = 0;
3344fe8aea9eSmrg		op = PictOpSrc;
3345fe8aea9eSmrg	} else if (!sna_get_pixel_from_rgba(&pixel,
3346fe8aea9eSmrg					    color->red,
3347fe8aea9eSmrg					    color->green,
3348fe8aea9eSmrg					    color->blue,
3349fe8aea9eSmrg					    color->alpha,
3350fe8aea9eSmrg					    PICT_a8r8g8b8))
3351fe8aea9eSmrg		return false;
3352fe8aea9eSmrg
3353fe8aea9eSmrg	DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
3354fe8aea9eSmrg	     __FUNCTION__, pixel, n,
3355fe8aea9eSmrg	     box[0].x1, box[0].y1, box[0].x2, box[0].y2));
3356fe8aea9eSmrg
3357fe8aea9eSmrg	tmp.dst.pixmap = (PixmapPtr)dst;
3358fe8aea9eSmrg	tmp.dst.width  = dst->width;
3359fe8aea9eSmrg	tmp.dst.height = dst->height;
3360fe8aea9eSmrg	tmp.dst.format = format;
3361fe8aea9eSmrg	tmp.dst.bo = dst_bo;
3362fe8aea9eSmrg	tmp.dst.x = tmp.dst.y = 0;
3363fe8aea9eSmrg	tmp.damage = NULL;
3364fe8aea9eSmrg
3365fe8aea9eSmrg	sna_render_composite_redirect_init(&tmp);
3366fe8aea9eSmrg	if (too_large(dst->width, dst->height)) {
3367fe8aea9eSmrg		BoxRec extents;
3368fe8aea9eSmrg
3369fe8aea9eSmrg		boxes_extents(box, n, &extents);
3370fe8aea9eSmrg		if (!sna_render_composite_redirect(sna, &tmp,
3371fe8aea9eSmrg						   extents.x1, extents.y1,
3372fe8aea9eSmrg						   extents.x2 - extents.x1,
3373fe8aea9eSmrg						   extents.y2 - extents.y1,
3374fe8aea9eSmrg						   n > 1))
3375fe8aea9eSmrg			return sna_tiling_fill_boxes(sna, op, format, color,
3376fe8aea9eSmrg						     dst, dst_bo, box, n);
3377fe8aea9eSmrg	}
3378fe8aea9eSmrg
3379fe8aea9eSmrg	tmp.src.bo = sna_render_get_solid(sna, pixel);
3380fe8aea9eSmrg	tmp.mask.bo = NULL;
3381fe8aea9eSmrg
3382fe8aea9eSmrg	tmp.floats_per_vertex = 2;
3383fe8aea9eSmrg	tmp.floats_per_rect = 6;
3384fe8aea9eSmrg	tmp.need_magic_ca_pass = false;
3385fe8aea9eSmrg
3386fe8aea9eSmrg	tmp.u.gen9.flags = FILL_FLAGS(op, format);
3387fe8aea9eSmrg	tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
3388fe8aea9eSmrg
3389fe8aea9eSmrg	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3390fe8aea9eSmrg	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3391fe8aea9eSmrg		kgem_submit(&sna->kgem);
3392fe8aea9eSmrg		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3393fe8aea9eSmrg			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3394fe8aea9eSmrg			tmp.src.bo = NULL;
3395fe8aea9eSmrg
3396fe8aea9eSmrg			if (tmp.redirect.real_bo) {
3397fe8aea9eSmrg				kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
3398fe8aea9eSmrg				tmp.redirect.real_bo = NULL;
3399fe8aea9eSmrg			}
3400fe8aea9eSmrg
3401fe8aea9eSmrg			return false;
3402fe8aea9eSmrg		}
3403fe8aea9eSmrg		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3404fe8aea9eSmrg	}
3405fe8aea9eSmrg
3406fe8aea9eSmrg	gen9_align_vertex(sna, &tmp);
3407fe8aea9eSmrg	gen9_emit_fill_state(sna, &tmp);
3408fe8aea9eSmrg
3409fe8aea9eSmrg	do {
3410fe8aea9eSmrg		int n_this_time;
3411fe8aea9eSmrg		int16_t *v;
3412fe8aea9eSmrg
3413fe8aea9eSmrg		n_this_time = gen9_get_rectangles(sna, &tmp, n,
3414fe8aea9eSmrg						  gen9_emit_fill_state);
3415fe8aea9eSmrg		n -= n_this_time;
3416fe8aea9eSmrg
3417fe8aea9eSmrg		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
3418fe8aea9eSmrg		sna->render.vertex_used += 6 * n_this_time;
3419fe8aea9eSmrg		assert(sna->render.vertex_used <= sna->render.vertex_size);
3420fe8aea9eSmrg		do {
3421fe8aea9eSmrg			DBG(("	(%d, %d), (%d, %d)\n",
3422fe8aea9eSmrg			     box->x1, box->y1, box->x2, box->y2));
3423fe8aea9eSmrg
3424fe8aea9eSmrg			v[0] = box->x2;
3425fe8aea9eSmrg			v[5] = v[1] = box->y2;
3426fe8aea9eSmrg			v[8] = v[4] = box->x1;
3427fe8aea9eSmrg			v[9] = box->y1;
3428fe8aea9eSmrg			v[2] = v[3]  = v[7]  = 1;
3429fe8aea9eSmrg			v[6] = v[10] = v[11] = 0;
3430fe8aea9eSmrg			v += 12; box++;
3431fe8aea9eSmrg		} while (--n_this_time);
3432fe8aea9eSmrg	} while (n);
3433fe8aea9eSmrg
3434fe8aea9eSmrg	gen8_vertex_flush(sna);
3435fe8aea9eSmrg	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3436fe8aea9eSmrg	sna_render_composite_redirect_done(sna, &tmp);
3437fe8aea9eSmrg	return true;
3438fe8aea9eSmrg}
3439fe8aea9eSmrg
3440fe8aea9eSmrgstatic void
3441fe8aea9eSmrggen9_render_fill_op_blt(struct sna *sna,
3442fe8aea9eSmrg			const struct sna_fill_op *op,
3443fe8aea9eSmrg			int16_t x, int16_t y, int16_t w, int16_t h)
3444fe8aea9eSmrg{
3445fe8aea9eSmrg	int16_t *v;
3446fe8aea9eSmrg
3447fe8aea9eSmrg	DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
3448fe8aea9eSmrg
3449fe8aea9eSmrg	gen9_get_rectangles(sna, &op->base, 1, gen9_emit_fill_state);
3450fe8aea9eSmrg
3451fe8aea9eSmrg	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3452fe8aea9eSmrg	sna->render.vertex_used += 6;
3453fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
3454fe8aea9eSmrg
3455fe8aea9eSmrg	v[0] = x+w;
3456fe8aea9eSmrg	v[4] = v[8] = x;
3457fe8aea9eSmrg	v[1] = v[5] = y+h;
3458fe8aea9eSmrg	v[9] = y;
3459fe8aea9eSmrg
3460fe8aea9eSmrg	v[2] = v[3]  = v[7]  = 1;
3461fe8aea9eSmrg	v[6] = v[10] = v[11] = 0;
3462fe8aea9eSmrg}
3463fe8aea9eSmrg
3464fe8aea9eSmrgfastcall static void
3465fe8aea9eSmrggen9_render_fill_op_box(struct sna *sna,
3466fe8aea9eSmrg			const struct sna_fill_op *op,
3467fe8aea9eSmrg			const BoxRec *box)
3468fe8aea9eSmrg{
3469fe8aea9eSmrg	int16_t *v;
3470fe8aea9eSmrg
3471fe8aea9eSmrg	DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
3472fe8aea9eSmrg	     box->x1, box->y1, box->x2, box->y2));
3473fe8aea9eSmrg
3474fe8aea9eSmrg	gen9_get_rectangles(sna, &op->base, 1, gen9_emit_fill_state);
3475fe8aea9eSmrg
3476fe8aea9eSmrg	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3477fe8aea9eSmrg	sna->render.vertex_used += 6;
3478fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
3479fe8aea9eSmrg
3480fe8aea9eSmrg	v[0] = box->x2;
3481fe8aea9eSmrg	v[8] = v[4] = box->x1;
3482fe8aea9eSmrg	v[5] = v[1] = box->y2;
3483fe8aea9eSmrg	v[9] = box->y1;
3484fe8aea9eSmrg
3485fe8aea9eSmrg	v[7] = v[2]  = v[3]  = 1;
3486fe8aea9eSmrg	v[6] = v[10] = v[11] = 0;
3487fe8aea9eSmrg}
3488fe8aea9eSmrg
3489fe8aea9eSmrgfastcall static void
3490fe8aea9eSmrggen9_render_fill_op_boxes(struct sna *sna,
3491fe8aea9eSmrg			  const struct sna_fill_op *op,
3492fe8aea9eSmrg			  const BoxRec *box,
3493fe8aea9eSmrg			  int nbox)
3494fe8aea9eSmrg{
3495fe8aea9eSmrg	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
3496fe8aea9eSmrg	     box->x1, box->y1, box->x2, box->y2, nbox));
3497fe8aea9eSmrg
3498fe8aea9eSmrg	do {
3499fe8aea9eSmrg		int nbox_this_time;
3500fe8aea9eSmrg		int16_t *v;
3501fe8aea9eSmrg
3502fe8aea9eSmrg		nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox,
3503fe8aea9eSmrg						     gen9_emit_fill_state);
3504fe8aea9eSmrg		nbox -= nbox_this_time;
3505fe8aea9eSmrg
3506fe8aea9eSmrg		v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3507fe8aea9eSmrg		sna->render.vertex_used += 6 * nbox_this_time;
3508fe8aea9eSmrg		assert(sna->render.vertex_used <= sna->render.vertex_size);
3509fe8aea9eSmrg
3510fe8aea9eSmrg		do {
3511fe8aea9eSmrg			v[0] = box->x2;
3512fe8aea9eSmrg			v[8] = v[4] = box->x1;
3513fe8aea9eSmrg			v[5] = v[1] = box->y2;
3514fe8aea9eSmrg			v[9] = box->y1;
3515fe8aea9eSmrg			v[7] = v[2]  = v[3]  = 1;
3516fe8aea9eSmrg			v[6] = v[10] = v[11] = 0;
3517fe8aea9eSmrg			box++; v += 12;
3518fe8aea9eSmrg		} while (--nbox_this_time);
3519fe8aea9eSmrg	} while (nbox);
3520fe8aea9eSmrg}
3521fe8aea9eSmrg
3522fe8aea9eSmrgstatic void
3523fe8aea9eSmrggen9_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
3524fe8aea9eSmrg{
3525fe8aea9eSmrg	if (sna->render.vertex_offset)
3526fe8aea9eSmrg		gen8_vertex_flush(sna);
3527fe8aea9eSmrg	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
3528fe8aea9eSmrg}
3529fe8aea9eSmrg
3530fe8aea9eSmrgstatic bool
3531fe8aea9eSmrggen9_render_fill(struct sna *sna, uint8_t alu,
3532fe8aea9eSmrg		 PixmapPtr dst, struct kgem_bo *dst_bo,
3533fe8aea9eSmrg		 uint32_t color, unsigned flags,
3534fe8aea9eSmrg		 struct sna_fill_op *op)
3535fe8aea9eSmrg{
3536fe8aea9eSmrg	DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color));
3537fe8aea9eSmrg
3538fe8aea9eSmrg	if (prefer_blt_fill(sna, dst_bo, flags) &&
3539fe8aea9eSmrg	    sna_blt_fill(sna, alu,
3540fe8aea9eSmrg			 dst_bo, dst->drawable.bitsPerPixel,
3541fe8aea9eSmrg			 color,
3542fe8aea9eSmrg			 op))
3543fe8aea9eSmrg		return true;
3544fe8aea9eSmrg
3545fe8aea9eSmrg	if (!(alu == GXcopy || alu == GXclear) ||
3546fe8aea9eSmrg	    too_large(dst->drawable.width, dst->drawable.height) ||
3547fe8aea9eSmrg	    unaligned(dst_bo, dst->drawable.bitsPerPixel))
3548fe8aea9eSmrg		return sna_blt_fill(sna, alu,
3549fe8aea9eSmrg				    dst_bo, dst->drawable.bitsPerPixel,
3550fe8aea9eSmrg				    color,
3551fe8aea9eSmrg				    op);
3552fe8aea9eSmrg
3553fe8aea9eSmrg	if (alu == GXclear)
3554fe8aea9eSmrg		color = 0;
3555fe8aea9eSmrg
3556fe8aea9eSmrg	op->base.dst.pixmap = dst;
3557fe8aea9eSmrg	op->base.dst.width  = dst->drawable.width;
3558fe8aea9eSmrg	op->base.dst.height = dst->drawable.height;
3559fe8aea9eSmrg	op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
3560fe8aea9eSmrg	op->base.dst.bo = dst_bo;
3561fe8aea9eSmrg	op->base.dst.x = op->base.dst.y = 0;
3562fe8aea9eSmrg
3563fe8aea9eSmrg	op->base.src.bo =
3564fe8aea9eSmrg		sna_render_get_solid(sna,
3565fe8aea9eSmrg				     sna_rgba_for_color(color,
3566fe8aea9eSmrg							dst->drawable.depth));
3567fe8aea9eSmrg	op->base.mask.bo = NULL;
3568fe8aea9eSmrg
3569fe8aea9eSmrg	op->base.need_magic_ca_pass = false;
3570fe8aea9eSmrg	op->base.floats_per_vertex = 2;
3571fe8aea9eSmrg	op->base.floats_per_rect = 6;
3572fe8aea9eSmrg
3573fe8aea9eSmrg	op->base.u.gen9.flags = FILL_FLAGS_NOBLEND;
3574fe8aea9eSmrg	op->base.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
3575fe8aea9eSmrg
3576fe8aea9eSmrg	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3577fe8aea9eSmrg	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3578fe8aea9eSmrg		kgem_submit(&sna->kgem);
3579fe8aea9eSmrg		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3580fe8aea9eSmrg			kgem_bo_destroy(&sna->kgem, op->base.src.bo);
3581fe8aea9eSmrg			return false;
3582fe8aea9eSmrg		}
3583fe8aea9eSmrg
3584fe8aea9eSmrg		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3585fe8aea9eSmrg	}
3586fe8aea9eSmrg
3587fe8aea9eSmrg	gen9_align_vertex(sna, &op->base);
3588fe8aea9eSmrg	gen9_emit_fill_state(sna, &op->base);
3589fe8aea9eSmrg
3590fe8aea9eSmrg	op->blt   = gen9_render_fill_op_blt;
3591fe8aea9eSmrg	op->box   = gen9_render_fill_op_box;
3592fe8aea9eSmrg	op->boxes = gen9_render_fill_op_boxes;
3593fe8aea9eSmrg	op->points = NULL;
3594fe8aea9eSmrg	op->done  = gen9_render_fill_op_done;
3595fe8aea9eSmrg	return true;
3596fe8aea9eSmrg}
3597fe8aea9eSmrg
3598fe8aea9eSmrgstatic bool
3599fe8aea9eSmrggen9_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3600fe8aea9eSmrg			     uint32_t color,
3601fe8aea9eSmrg			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
3602fe8aea9eSmrg			     uint8_t alu)
3603fe8aea9eSmrg{
3604fe8aea9eSmrg	BoxRec box;
3605fe8aea9eSmrg
3606fe8aea9eSmrg	box.x1 = x1;
3607fe8aea9eSmrg	box.y1 = y1;
3608fe8aea9eSmrg	box.x2 = x2;
3609fe8aea9eSmrg	box.y2 = y2;
3610fe8aea9eSmrg
3611fe8aea9eSmrg	return sna_blt_fill_boxes(sna, alu,
3612fe8aea9eSmrg				  bo, dst->drawable.bitsPerPixel,
3613fe8aea9eSmrg				  color, &box, 1);
3614fe8aea9eSmrg}
3615fe8aea9eSmrg
3616fe8aea9eSmrgstatic bool
3617fe8aea9eSmrggen9_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3618fe8aea9eSmrg		     uint32_t color,
3619fe8aea9eSmrg		     int16_t x1, int16_t y1,
3620fe8aea9eSmrg		     int16_t x2, int16_t y2,
3621fe8aea9eSmrg		     uint8_t alu)
3622fe8aea9eSmrg{
3623fe8aea9eSmrg	struct sna_composite_op tmp;
3624fe8aea9eSmrg	int16_t *v;
3625fe8aea9eSmrg
3626fe8aea9eSmrg	/* Prefer to use the BLT if already engaged */
3627fe8aea9eSmrg	if (prefer_blt_fill(sna, bo, FILL_BOXES) &&
3628fe8aea9eSmrg	    gen9_render_fill_one_try_blt(sna, dst, bo, color,
3629fe8aea9eSmrg					 x1, y1, x2, y2, alu))
3630fe8aea9eSmrg		return true;
3631fe8aea9eSmrg
3632fe8aea9eSmrg	/* Must use the BLT if we can't RENDER... */
3633fe8aea9eSmrg	if (!(alu == GXcopy || alu == GXclear) ||
3634fe8aea9eSmrg	    too_large(dst->drawable.width, dst->drawable.height) ||
3635fe8aea9eSmrg	    unaligned(bo, dst->drawable.bitsPerPixel))
3636fe8aea9eSmrg		return gen9_render_fill_one_try_blt(sna, dst, bo, color,
3637fe8aea9eSmrg						    x1, y1, x2, y2, alu);
3638fe8aea9eSmrg
3639fe8aea9eSmrg	if (alu == GXclear)
3640fe8aea9eSmrg		color = 0;
3641fe8aea9eSmrg
3642fe8aea9eSmrg	tmp.dst.pixmap = dst;
3643fe8aea9eSmrg	tmp.dst.width  = dst->drawable.width;
3644fe8aea9eSmrg	tmp.dst.height = dst->drawable.height;
3645fe8aea9eSmrg	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3646fe8aea9eSmrg	tmp.dst.bo = bo;
3647fe8aea9eSmrg	tmp.dst.x = tmp.dst.y = 0;
3648fe8aea9eSmrg
3649fe8aea9eSmrg	tmp.src.bo =
3650fe8aea9eSmrg		sna_render_get_solid(sna,
3651fe8aea9eSmrg				     sna_rgba_for_color(color,
3652fe8aea9eSmrg							dst->drawable.depth));
3653fe8aea9eSmrg	tmp.mask.bo = NULL;
3654fe8aea9eSmrg
3655fe8aea9eSmrg	tmp.floats_per_vertex = 2;
3656fe8aea9eSmrg	tmp.floats_per_rect = 6;
3657fe8aea9eSmrg	tmp.need_magic_ca_pass = false;
3658fe8aea9eSmrg
3659fe8aea9eSmrg	tmp.u.gen9.flags = FILL_FLAGS_NOBLEND;
3660fe8aea9eSmrg	tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
3661fe8aea9eSmrg
3662fe8aea9eSmrg	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
3663fe8aea9eSmrg	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3664fe8aea9eSmrg		kgem_submit(&sna->kgem);
3665fe8aea9eSmrg		if (kgem_check_bo(&sna->kgem, bo, NULL)) {
3666fe8aea9eSmrg			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3667fe8aea9eSmrg			return false;
3668fe8aea9eSmrg		}
3669fe8aea9eSmrg		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3670fe8aea9eSmrg	}
3671fe8aea9eSmrg
3672fe8aea9eSmrg	gen9_align_vertex(sna, &tmp);
3673fe8aea9eSmrg	gen9_emit_fill_state(sna, &tmp);
3674fe8aea9eSmrg
3675fe8aea9eSmrg	gen9_get_rectangles(sna, &tmp, 1, gen9_emit_fill_state);
3676fe8aea9eSmrg
3677fe8aea9eSmrg	DBG(("	(%d, %d), (%d, %d)\n", x1, y1, x2, y2));
3678fe8aea9eSmrg
3679fe8aea9eSmrg	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3680fe8aea9eSmrg	sna->render.vertex_used += 6;
3681fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
3682fe8aea9eSmrg
3683fe8aea9eSmrg	v[0] = x2;
3684fe8aea9eSmrg	v[8] = v[4] = x1;
3685fe8aea9eSmrg	v[5] = v[1] = y2;
3686fe8aea9eSmrg	v[9] = y1;
3687fe8aea9eSmrg	v[7] = v[2]  = v[3]  = 1;
3688fe8aea9eSmrg	v[6] = v[10] = v[11] = 0;
3689fe8aea9eSmrg
3690fe8aea9eSmrg	gen8_vertex_flush(sna);
3691fe8aea9eSmrg	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3692fe8aea9eSmrg
3693fe8aea9eSmrg	return true;
3694fe8aea9eSmrg}
3695fe8aea9eSmrg
3696fe8aea9eSmrgstatic bool
3697fe8aea9eSmrggen9_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3698fe8aea9eSmrg{
3699fe8aea9eSmrg	BoxRec box;
3700fe8aea9eSmrg
3701fe8aea9eSmrg	box.x1 = 0;
3702fe8aea9eSmrg	box.y1 = 0;
3703fe8aea9eSmrg	box.x2 = dst->drawable.width;
3704fe8aea9eSmrg	box.y2 = dst->drawable.height;
3705fe8aea9eSmrg
3706fe8aea9eSmrg	return sna_blt_fill_boxes(sna, GXclear,
3707fe8aea9eSmrg				  bo, dst->drawable.bitsPerPixel,
3708fe8aea9eSmrg				  0, &box, 1);
3709fe8aea9eSmrg}
3710fe8aea9eSmrg
3711fe8aea9eSmrgstatic bool
3712fe8aea9eSmrggen9_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3713fe8aea9eSmrg{
3714fe8aea9eSmrg	struct sna_composite_op tmp;
3715fe8aea9eSmrg	int16_t *v;
3716fe8aea9eSmrg
3717fe8aea9eSmrg	DBG(("%s: %dx%d\n",
3718fe8aea9eSmrg	     __FUNCTION__,
3719fe8aea9eSmrg	     dst->drawable.width,
3720fe8aea9eSmrg	     dst->drawable.height));
3721fe8aea9eSmrg
3722fe8aea9eSmrg	/* Prefer to use the BLT if already engaged */
3723fe8aea9eSmrg	if (sna->kgem.mode == KGEM_BLT &&
3724fe8aea9eSmrg	    gen9_render_clear_try_blt(sna, dst, bo))
3725fe8aea9eSmrg		return true;
3726fe8aea9eSmrg
3727fe8aea9eSmrg	/* Must use the BLT if we can't RENDER... */
3728fe8aea9eSmrg	if (too_large(dst->drawable.width, dst->drawable.height) ||
3729fe8aea9eSmrg	    unaligned(bo, dst->drawable.bitsPerPixel))
3730fe8aea9eSmrg		return gen9_render_clear_try_blt(sna, dst, bo);
3731fe8aea9eSmrg
3732fe8aea9eSmrg	tmp.dst.pixmap = dst;
3733fe8aea9eSmrg	tmp.dst.width  = dst->drawable.width;
3734fe8aea9eSmrg	tmp.dst.height = dst->drawable.height;
3735fe8aea9eSmrg	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3736fe8aea9eSmrg	tmp.dst.bo = bo;
3737fe8aea9eSmrg	tmp.dst.x = tmp.dst.y = 0;
3738fe8aea9eSmrg
3739fe8aea9eSmrg	tmp.src.bo = sna_render_get_solid(sna, 0);
3740fe8aea9eSmrg	tmp.mask.bo = NULL;
3741fe8aea9eSmrg
3742fe8aea9eSmrg	tmp.floats_per_vertex = 2;
3743fe8aea9eSmrg	tmp.floats_per_rect = 6;
3744fe8aea9eSmrg	tmp.need_magic_ca_pass = false;
3745fe8aea9eSmrg
3746fe8aea9eSmrg	tmp.u.gen9.flags = FILL_FLAGS_NOBLEND;
3747fe8aea9eSmrg	tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
3748fe8aea9eSmrg
3749fe8aea9eSmrg	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
3750fe8aea9eSmrg	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3751fe8aea9eSmrg		kgem_submit(&sna->kgem);
3752fe8aea9eSmrg		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3753fe8aea9eSmrg			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3754fe8aea9eSmrg			return false;
3755fe8aea9eSmrg		}
3756fe8aea9eSmrg		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3757fe8aea9eSmrg	}
3758fe8aea9eSmrg
3759fe8aea9eSmrg	gen9_align_vertex(sna, &tmp);
3760fe8aea9eSmrg	gen9_emit_fill_state(sna, &tmp);
3761fe8aea9eSmrg
3762fe8aea9eSmrg	gen9_get_rectangles(sna, &tmp, 1, gen9_emit_fill_state);
3763fe8aea9eSmrg
3764fe8aea9eSmrg	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3765fe8aea9eSmrg	sna->render.vertex_used += 6;
3766fe8aea9eSmrg	assert(sna->render.vertex_used <= sna->render.vertex_size);
3767fe8aea9eSmrg
3768fe8aea9eSmrg	v[0] = dst->drawable.width;
3769fe8aea9eSmrg	v[5] = v[1] = dst->drawable.height;
3770fe8aea9eSmrg	v[8] = v[4] = 0;
3771fe8aea9eSmrg	v[9] = 0;
3772fe8aea9eSmrg
3773fe8aea9eSmrg	v[7] = v[2]  = v[3]  = 1;
3774fe8aea9eSmrg	v[6] = v[10] = v[11] = 0;
3775fe8aea9eSmrg
3776fe8aea9eSmrg	gen8_vertex_flush(sna);
3777fe8aea9eSmrg	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3778fe8aea9eSmrg
3779fe8aea9eSmrg	return true;
3780fe8aea9eSmrg}
3781fe8aea9eSmrg
3782fe8aea9eSmrg#if !NO_VIDEO
3783fe8aea9eSmrgstatic uint32_t gen9_bind_video_source(struct sna *sna,
3784fe8aea9eSmrg				       struct kgem_bo *bo,
3785fe8aea9eSmrg				       uint32_t delta,
3786fe8aea9eSmrg				       int width,
3787fe8aea9eSmrg				       int height,
3788fe8aea9eSmrg				       int pitch,
3789fe8aea9eSmrg				       uint32_t format)
3790fe8aea9eSmrg{
3791fe8aea9eSmrg	uint32_t *ss;
3792fe8aea9eSmrg	int offset;
3793fe8aea9eSmrg
3794fe8aea9eSmrg	offset = sna->kgem.surface -= SURFACE_DW;
3795fe8aea9eSmrg	ss = sna->kgem.batch + offset;
3796fe8aea9eSmrg	ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT |
3797fe8aea9eSmrg		 gen9_tiling_bits(bo->tiling) |
3798fe8aea9eSmrg		 format << SURFACE_FORMAT_SHIFT |
3799fe8aea9eSmrg		 SURFACE_VALIGN_4 | SURFACE_HALIGN_4);
3800fe8aea9eSmrg	ss[1] = 0;
3801fe8aea9eSmrg	ss[2] = ((width - 1)  << SURFACE_WIDTH_SHIFT |
3802fe8aea9eSmrg		 (height - 1) << SURFACE_HEIGHT_SHIFT);
3803fe8aea9eSmrg	ss[3] = (pitch - 1) << SURFACE_PITCH_SHIFT;
3804fe8aea9eSmrg	ss[4] = 0;
3805fe8aea9eSmrg	ss[5] = 0;
3806fe8aea9eSmrg	ss[6] = 0;
3807fe8aea9eSmrg	ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
3808fe8aea9eSmrg	*(uint64_t *)(ss+8) =
3809fe8aea9eSmrg		kgem_add_reloc64(&sna->kgem, offset + 8, bo,
3810fe8aea9eSmrg				 I915_GEM_DOMAIN_SAMPLER << 16,
3811fe8aea9eSmrg				 delta);
3812fe8aea9eSmrg	ss[10] = 0;
3813fe8aea9eSmrg	ss[11] = 0;
3814fe8aea9eSmrg	ss[12] = 0;
3815fe8aea9eSmrg	ss[13] = 0;
3816fe8aea9eSmrg	ss[14] = 0;
3817fe8aea9eSmrg	ss[15] = 0;
3818fe8aea9eSmrg
3819fe8aea9eSmrg	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> sampler\n",
3820fe8aea9eSmrg	     offset, bo->handle, ss[1],
3821fe8aea9eSmrg	     format, width, height, bo->pitch, bo->tiling));
3822fe8aea9eSmrg
3823fe8aea9eSmrg	return offset * sizeof(uint32_t);
3824fe8aea9eSmrg}
3825fe8aea9eSmrg
3826fe8aea9eSmrgstatic void gen9_emit_video_state(struct sna *sna,
3827fe8aea9eSmrg				  const struct sna_composite_op *op)
3828fe8aea9eSmrg{
3829fe8aea9eSmrg	struct sna_video_frame *frame = op->priv;
3830fe8aea9eSmrg	uint32_t src_surf_format[6];
3831fe8aea9eSmrg	uint32_t src_surf_base[6];
3832fe8aea9eSmrg	int src_width[6];
3833fe8aea9eSmrg	int src_height[6];
3834fe8aea9eSmrg	int src_pitch[6];
3835fe8aea9eSmrg	uint32_t *binding_table;
3836fe8aea9eSmrg	uint16_t offset;
3837fe8aea9eSmrg	int n_src, n;
3838fe8aea9eSmrg
3839fe8aea9eSmrg	/* XXX VeBox, bicubic */
3840fe8aea9eSmrg
3841fe8aea9eSmrg	gen9_get_batch(sna, op);
3842fe8aea9eSmrg
3843fe8aea9eSmrg	src_surf_base[0] = 0;
3844fe8aea9eSmrg	src_surf_base[1] = 0;
3845fe8aea9eSmrg	src_surf_base[2] = frame->VBufOffset;
3846fe8aea9eSmrg	src_surf_base[3] = frame->VBufOffset;
3847fe8aea9eSmrg	src_surf_base[4] = frame->UBufOffset;
3848fe8aea9eSmrg	src_surf_base[5] = frame->UBufOffset;
3849fe8aea9eSmrg
3850fe8aea9eSmrg	if (is_planar_fourcc(frame->id)) {
3851fe8aea9eSmrg		for (n = 0; n < 2; n++) {
3852fe8aea9eSmrg			src_surf_format[n] = SURFACEFORMAT_R8_UNORM;
3853fe8aea9eSmrg			src_width[n]  = frame->width;
3854fe8aea9eSmrg			src_height[n] = frame->height;
3855fe8aea9eSmrg			src_pitch[n]  = frame->pitch[1];
3856fe8aea9eSmrg		}
3857fe8aea9eSmrg		for (; n < 6; n++) {
3858fe8aea9eSmrg			if (is_nv12_fourcc(frame->id))
3859fe8aea9eSmrg				src_surf_format[n] = SURFACEFORMAT_R8G8_UNORM;
3860fe8aea9eSmrg			else
3861fe8aea9eSmrg				src_surf_format[n] = SURFACEFORMAT_R8_UNORM;
3862fe8aea9eSmrg			src_width[n]  = frame->width / 2;
3863fe8aea9eSmrg			src_height[n] = frame->height / 2;
3864fe8aea9eSmrg			src_pitch[n]  = frame->pitch[0];
3865fe8aea9eSmrg		}
3866fe8aea9eSmrg		n_src = 6;
3867fe8aea9eSmrg	} else {
3868fe8aea9eSmrg		if (frame->id == FOURCC_RGB888)
3869fe8aea9eSmrg			src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM;
3870fe8aea9eSmrg		else if (frame->id == FOURCC_UYVY)
3871fe8aea9eSmrg			src_surf_format[0] = SURFACEFORMAT_YCRCB_SWAPY;
3872fe8aea9eSmrg		else if (is_ayuv_fourcc(frame->id))
3873fe8aea9eSmrg			src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM;
3874fe8aea9eSmrg		else
3875fe8aea9eSmrg			src_surf_format[0] = SURFACEFORMAT_YCRCB_NORMAL;
3876fe8aea9eSmrg
3877fe8aea9eSmrg		src_width[0]  = frame->width;
3878fe8aea9eSmrg		src_height[0] = frame->height;
3879fe8aea9eSmrg		src_pitch[0]  = frame->pitch[0];
3880fe8aea9eSmrg		n_src = 1;
3881fe8aea9eSmrg	}
3882fe8aea9eSmrg
3883fe8aea9eSmrg	binding_table = gen9_composite_get_binding_table(sna, &offset);
3884fe8aea9eSmrg
3885fe8aea9eSmrg	binding_table[0] =
3886fe8aea9eSmrg		gen9_bind_bo(sna,
3887fe8aea9eSmrg			     op->dst.bo, op->dst.width, op->dst.height,
3888fe8aea9eSmrg			     gen9_get_dest_format(op->dst.format),
3889fe8aea9eSmrg			     true);
3890fe8aea9eSmrg	for (n = 0; n < n_src; n++) {
3891fe8aea9eSmrg		binding_table[1+n] =
3892fe8aea9eSmrg			gen9_bind_video_source(sna,
3893fe8aea9eSmrg					       frame->bo,
3894fe8aea9eSmrg					       src_surf_base[n],
3895fe8aea9eSmrg					       src_width[n],
3896fe8aea9eSmrg					       src_height[n],
3897fe8aea9eSmrg					       src_pitch[n],
3898fe8aea9eSmrg					       src_surf_format[n]);
3899fe8aea9eSmrg	}
3900fe8aea9eSmrg
3901fe8aea9eSmrg	gen9_emit_state(sna, op, offset);
3902fe8aea9eSmrg}
3903fe8aea9eSmrg
3904fe8aea9eSmrgstatic unsigned select_video_kernel(const struct sna_video *video,
3905fe8aea9eSmrg				    const struct sna_video_frame *frame)
3906fe8aea9eSmrg{
3907fe8aea9eSmrg	switch (frame->id) {
3908fe8aea9eSmrg	case FOURCC_YV12:
3909fe8aea9eSmrg	case FOURCC_I420:
3910fe8aea9eSmrg	case FOURCC_XVMC:
3911fe8aea9eSmrg		return video->colorspace ?
3912fe8aea9eSmrg			GEN9_WM_KERNEL_VIDEO_PLANAR_BT709 :
3913fe8aea9eSmrg			GEN9_WM_KERNEL_VIDEO_PLANAR_BT601;
3914fe8aea9eSmrg
3915fe8aea9eSmrg	case FOURCC_NV12:
3916fe8aea9eSmrg		return video->colorspace ?
3917fe8aea9eSmrg			GEN9_WM_KERNEL_VIDEO_NV12_BT709 :
3918fe8aea9eSmrg			GEN9_WM_KERNEL_VIDEO_NV12_BT601;
3919fe8aea9eSmrg
3920fe8aea9eSmrg	case FOURCC_RGB888:
3921fe8aea9eSmrg	case FOURCC_RGB565:
3922fe8aea9eSmrg		return GEN9_WM_KERNEL_VIDEO_RGB;
3923fe8aea9eSmrg
3924fe8aea9eSmrg	case FOURCC_AYUV:
3925fe8aea9eSmrg		return video->colorspace ?
3926fe8aea9eSmrg			GEN9_WM_KERNEL_VIDEO_AYUV_BT709 :
3927fe8aea9eSmrg			GEN9_WM_KERNEL_VIDEO_AYUV_BT601;
3928fe8aea9eSmrg
3929fe8aea9eSmrg	default:
3930fe8aea9eSmrg		return video->colorspace ?
3931fe8aea9eSmrg			GEN9_WM_KERNEL_VIDEO_PACKED_BT709 :
3932fe8aea9eSmrg			GEN9_WM_KERNEL_VIDEO_PACKED_BT601;
3933fe8aea9eSmrg	}
3934fe8aea9eSmrg}
3935fe8aea9eSmrg
3936fe8aea9eSmrgstatic bool
3937fe8aea9eSmrggen9_render_video(struct sna *sna,
3938fe8aea9eSmrg		  struct sna_video *video,
3939fe8aea9eSmrg		  struct sna_video_frame *frame,
3940fe8aea9eSmrg		  RegionPtr dstRegion,
3941fe8aea9eSmrg		  PixmapPtr pixmap)
3942fe8aea9eSmrg{
3943fe8aea9eSmrg	struct sna_composite_op tmp;
3944fe8aea9eSmrg	struct sna_pixmap *priv = sna_pixmap(pixmap);
3945fe8aea9eSmrg	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
3946fe8aea9eSmrg	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
3947fe8aea9eSmrg	int src_width = frame->src.x2 - frame->src.x1;
3948fe8aea9eSmrg	int src_height = frame->src.y2 - frame->src.y1;
3949fe8aea9eSmrg	float src_offset_x, src_offset_y;
3950fe8aea9eSmrg	float src_scale_x, src_scale_y;
3951fe8aea9eSmrg	unsigned filter;
3952fe8aea9eSmrg	const BoxRec *box;
3953fe8aea9eSmrg	int nbox;
3954fe8aea9eSmrg
3955fe8aea9eSmrg	DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
3956fe8aea9eSmrg	     __FUNCTION__,
3957fe8aea9eSmrg	     src_width, src_height, dst_width, dst_height,
3958fe8aea9eSmrg	     region_num_rects(dstRegion),
3959fe8aea9eSmrg	     REGION_EXTENTS(NULL, dstRegion)->x1,
3960fe8aea9eSmrg	     REGION_EXTENTS(NULL, dstRegion)->y1,
3961fe8aea9eSmrg	     REGION_EXTENTS(NULL, dstRegion)->x2,
3962fe8aea9eSmrg	     REGION_EXTENTS(NULL, dstRegion)->y2));
3963fe8aea9eSmrg
3964fe8aea9eSmrg	assert(priv->gpu_bo);
3965fe8aea9eSmrg	assert(!too_large(pixmap->drawable.width, pixmap->drawable.height));
3966fe8aea9eSmrg	assert(!unaligned(priv->gpu_bo, pixmap->drawable.bitsPerPixel));
3967fe8aea9eSmrg
3968fe8aea9eSmrg	memset(&tmp, 0, sizeof(tmp));
3969fe8aea9eSmrg
3970fe8aea9eSmrg	tmp.dst.pixmap = pixmap;
3971fe8aea9eSmrg	tmp.dst.width  = pixmap->drawable.width;
3972fe8aea9eSmrg	tmp.dst.height = pixmap->drawable.height;
3973fe8aea9eSmrg	tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
3974fe8aea9eSmrg	tmp.dst.bo = priv->gpu_bo;
3975fe8aea9eSmrg
3976fe8aea9eSmrg	tmp.src.bo = frame->bo;
3977fe8aea9eSmrg	tmp.mask.bo = NULL;
3978fe8aea9eSmrg
3979fe8aea9eSmrg	tmp.floats_per_vertex = 3;
3980fe8aea9eSmrg	tmp.floats_per_rect = 9;
3981fe8aea9eSmrg
3982fe8aea9eSmrg	DBG(("%s: scaling?=%d, planar?=%d [%x]\n",
3983fe8aea9eSmrg	     __FUNCTION__,
3984fe8aea9eSmrg	     src_width != dst_width || src_height != dst_height,
3985fe8aea9eSmrg	     is_planar_fourcc(frame->id), frame->id));
3986fe8aea9eSmrg
3987fe8aea9eSmrg	if (src_width == dst_width && src_height == dst_height)
3988fe8aea9eSmrg		filter = SAMPLER_FILTER_NEAREST;
3989fe8aea9eSmrg	else
3990fe8aea9eSmrg		filter = SAMPLER_FILTER_BILINEAR;
3991fe8aea9eSmrg
3992fe8aea9eSmrg	tmp.u.gen9.flags =
3993fe8aea9eSmrg		GEN9_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
3994fe8aea9eSmrg					      SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
3995fe8aea9eSmrg			       NO_BLEND,
3996fe8aea9eSmrg			       2);
3997fe8aea9eSmrg	tmp.u.gen9.wm_kernel = select_video_kernel(video, frame);
3998fe8aea9eSmrg	tmp.priv = frame;
3999fe8aea9eSmrg
4000fe8aea9eSmrg	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
4001fe8aea9eSmrg	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
4002fe8aea9eSmrg		kgem_submit(&sna->kgem);
4003fe8aea9eSmrg		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
4004fe8aea9eSmrg			return false;
4005fe8aea9eSmrg
4006fe8aea9eSmrg		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
4007fe8aea9eSmrg	}
4008fe8aea9eSmrg
4009fe8aea9eSmrg	gen9_align_vertex(sna, &tmp);
4010fe8aea9eSmrg	gen9_emit_video_state(sna, &tmp);
4011fe8aea9eSmrg
4012fe8aea9eSmrg	DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n",
4013fe8aea9eSmrg	     __FUNCTION__,
4014fe8aea9eSmrg	     frame->src.x1, frame->src.y1,
4015fe8aea9eSmrg	     src_width, src_height,
4016fe8aea9eSmrg	     dst_width, dst_height,
4017fe8aea9eSmrg	     frame->width, frame->height));
4018fe8aea9eSmrg
4019fe8aea9eSmrg	src_scale_x = (float)src_width / dst_width / frame->width;
4020fe8aea9eSmrg	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
4021fe8aea9eSmrg
4022fe8aea9eSmrg	src_scale_y = (float)src_height / dst_height / frame->height;
4023fe8aea9eSmrg	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
4024fe8aea9eSmrg
4025fe8aea9eSmrg	DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n",
4026fe8aea9eSmrg	     __FUNCTION__,
4027fe8aea9eSmrg	     src_scale_x, src_scale_y,
4028fe8aea9eSmrg	     src_offset_x, src_offset_y));
4029fe8aea9eSmrg
4030fe8aea9eSmrg	box = region_rects(dstRegion);
4031fe8aea9eSmrg	nbox = region_num_rects(dstRegion);
4032fe8aea9eSmrg	while (nbox--) {
4033fe8aea9eSmrg		DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n",
4034fe8aea9eSmrg		     __FUNCTION__,
4035fe8aea9eSmrg		     box->x1, box->y1,
4036fe8aea9eSmrg		     box->x2, box->y2,
4037fe8aea9eSmrg		     box->x1 * src_scale_x + src_offset_x,
4038fe8aea9eSmrg		     box->y1 * src_scale_y + src_offset_y,
4039fe8aea9eSmrg		     box->x2 * src_scale_x + src_offset_x,
4040fe8aea9eSmrg		     box->y2 * src_scale_y + src_offset_y));
4041fe8aea9eSmrg
4042fe8aea9eSmrg		gen9_get_rectangles(sna, &tmp, 1, gen9_emit_video_state);
4043fe8aea9eSmrg
4044fe8aea9eSmrg		OUT_VERTEX(box->x2, box->y2);
4045fe8aea9eSmrg		OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
4046fe8aea9eSmrg		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
4047fe8aea9eSmrg
4048fe8aea9eSmrg		OUT_VERTEX(box->x1, box->y2);
4049fe8aea9eSmrg		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
4050fe8aea9eSmrg		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
4051fe8aea9eSmrg
4052fe8aea9eSmrg		OUT_VERTEX(box->x1, box->y1);
4053fe8aea9eSmrg		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
4054fe8aea9eSmrg		OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
4055fe8aea9eSmrg
4056fe8aea9eSmrg		box++;
4057fe8aea9eSmrg	}
4058fe8aea9eSmrg	gen8_vertex_flush(sna);
4059fe8aea9eSmrg
4060fe8aea9eSmrg	if (!DAMAGE_IS_ALL(priv->gpu_damage))
4061fe8aea9eSmrg		sna_damage_add(&priv->gpu_damage, dstRegion);
4062fe8aea9eSmrg
4063fe8aea9eSmrg	return true;
4064fe8aea9eSmrg}
4065fe8aea9eSmrg#endif
4066fe8aea9eSmrg
4067fe8aea9eSmrgstatic void gen9_render_flush(struct sna *sna)
4068fe8aea9eSmrg{
4069fe8aea9eSmrg	gen8_vertex_close(sna);
4070fe8aea9eSmrg
4071fe8aea9eSmrg	assert(sna->render.vb_id == 0);
4072fe8aea9eSmrg	assert(sna->render.vertex_offset == 0);
4073fe8aea9eSmrg}
4074fe8aea9eSmrg
4075fe8aea9eSmrgstatic void gen9_render_reset(struct sna *sna)
4076fe8aea9eSmrg{
4077fe8aea9eSmrg	sna->render_state.gen9.emit_flush = false;
4078fe8aea9eSmrg	sna->render_state.gen9.needs_invariant = true;
4079fe8aea9eSmrg	sna->render_state.gen9.ve_id = 3 << 2;
4080fe8aea9eSmrg	sna->render_state.gen9.ve_dirty = false;
4081fe8aea9eSmrg	sna->render_state.gen9.last_primitive = -1;
4082fe8aea9eSmrg
4083fe8aea9eSmrg	sna->render_state.gen9.num_sf_outputs = 0;
4084fe8aea9eSmrg	sna->render_state.gen9.samplers = -1;
4085fe8aea9eSmrg	sna->render_state.gen9.blend = -1;
4086fe8aea9eSmrg	sna->render_state.gen9.kernel = -1;
4087fe8aea9eSmrg	sna->render_state.gen9.drawrect_offset = -1;
4088fe8aea9eSmrg	sna->render_state.gen9.drawrect_limit = -1;
4089fe8aea9eSmrg	sna->render_state.gen9.surface_table = 0;
4090fe8aea9eSmrg
4091fe8aea9eSmrg	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
4092fe8aea9eSmrg		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
4093fe8aea9eSmrg		discard_vbo(sna);
4094fe8aea9eSmrg	}
4095fe8aea9eSmrg
4096fe8aea9eSmrg	sna->render.vertex_offset = 0;
4097fe8aea9eSmrg	sna->render.nvertex_reloc = 0;
4098fe8aea9eSmrg	sna->render.vb_id = 0;
4099fe8aea9eSmrg}
4100fe8aea9eSmrg
4101fe8aea9eSmrgstatic void gen9_render_fini(struct sna *sna)
4102fe8aea9eSmrg{
4103fe8aea9eSmrg	kgem_bo_destroy(&sna->kgem, sna->render_state.gen9.general_bo);
4104fe8aea9eSmrg}
4105fe8aea9eSmrg
4106fe8aea9eSmrgstatic bool gen9_render_setup(struct sna *sna)
4107fe8aea9eSmrg{
4108fe8aea9eSmrg	struct gen9_render_state *state = &sna->render_state.gen9;
4109fe8aea9eSmrg	struct sna_static_stream general;
4110fe8aea9eSmrg	struct gen9_sampler_state *ss;
4111fe8aea9eSmrg	int i, j, k, l, m;
4112fe8aea9eSmrg	uint32_t devid;
4113fe8aea9eSmrg
4114fe8aea9eSmrg	devid = intel_get_device_id(sna->dev);
4115fe8aea9eSmrg	if (devid & 0xf)
4116fe8aea9eSmrg		state->gt = GEN9_GT_BIAS + ((devid >> 4) & 0xf) + 1;
4117fe8aea9eSmrg	DBG(("%s: gt=%d\n", __FUNCTION__, state->gt));
4118fe8aea9eSmrg
4119fe8aea9eSmrg	state->info = &min_gt_info;
4120fe8aea9eSmrg	if (is_skl(sna))
4121fe8aea9eSmrg		state->info = &skl_gt_info;
4122fe8aea9eSmrg	if (is_bxt(sna))
4123fe8aea9eSmrg		state->info = &bxt_gt_info;
4124fe8aea9eSmrg	if (is_kbl(sna))
4125fe8aea9eSmrg		state->info = &kbl_gt_info;
4126fe8aea9eSmrg	if (is_glk(sna))
4127fe8aea9eSmrg		state->info = &glk_gt_info;
4128fe8aea9eSmrg	if (is_cfl(sna))
4129fe8aea9eSmrg		state->info = &cfl_gt_info;
4130fe8aea9eSmrg
4131fe8aea9eSmrg	sna_static_stream_init(&general);
4132fe8aea9eSmrg
4133fe8aea9eSmrg	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
4134fe8aea9eSmrg	 * dumps, you know it points to zero.
4135fe8aea9eSmrg	 */
4136fe8aea9eSmrg	null_create(&general);
4137fe8aea9eSmrg
4138fe8aea9eSmrg	for (m = 0; m < ARRAY_SIZE(wm_kernels); m++) {
4139fe8aea9eSmrg		if (wm_kernels[m].size) {
4140fe8aea9eSmrg			state->wm_kernel[m][1] =
4141fe8aea9eSmrg				sna_static_stream_add(&general,
4142fe8aea9eSmrg						      wm_kernels[m].data,
4143fe8aea9eSmrg						      wm_kernels[m].size,
4144fe8aea9eSmrg						      64);
4145fe8aea9eSmrg		} else {
4146fe8aea9eSmrg			if (USE_8_PIXEL_DISPATCH) {
4147fe8aea9eSmrg				state->wm_kernel[m][0] =
4148fe8aea9eSmrg					sna_static_stream_compile_wm(sna, &general,
4149fe8aea9eSmrg								     wm_kernels[m].data, 8);
4150fe8aea9eSmrg			}
4151fe8aea9eSmrg
4152fe8aea9eSmrg			if (USE_16_PIXEL_DISPATCH) {
4153fe8aea9eSmrg				state->wm_kernel[m][1] =
4154fe8aea9eSmrg					sna_static_stream_compile_wm(sna, &general,
4155fe8aea9eSmrg								     wm_kernels[m].data, 16);
4156fe8aea9eSmrg			}
4157fe8aea9eSmrg
4158fe8aea9eSmrg			if (USE_32_PIXEL_DISPATCH) {
4159fe8aea9eSmrg				state->wm_kernel[m][2] =
4160fe8aea9eSmrg					sna_static_stream_compile_wm(sna, &general,
4161fe8aea9eSmrg								     wm_kernels[m].data, 32);
4162fe8aea9eSmrg			}
4163fe8aea9eSmrg		}
4164fe8aea9eSmrg		assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
4165fe8aea9eSmrg	}
4166fe8aea9eSmrg
4167fe8aea9eSmrg	COMPILE_TIME_ASSERT(GEN9_WM_KERNEL_COUNT <=
4168fe8aea9eSmrg			    1 << (sizeof(((struct sna_composite_op *)NULL)->u.gen9.wm_kernel) * 8));
4169fe8aea9eSmrg
4170fe8aea9eSmrg	COMPILE_TIME_ASSERT(SAMPLER_OFFSET(FILTER_COUNT, EXTEND_COUNT, FILTER_COUNT, EXTEND_COUNT) <= 0x7ff);
4171fe8aea9eSmrg	ss = sna_static_stream_map(&general,
4172fe8aea9eSmrg				   2 * sizeof(*ss) *
4173fe8aea9eSmrg				   (2 +
4174fe8aea9eSmrg				    FILTER_COUNT * EXTEND_COUNT *
4175fe8aea9eSmrg				    FILTER_COUNT * EXTEND_COUNT),
4176fe8aea9eSmrg				   32);
4177fe8aea9eSmrg	state->wm_state = sna_static_stream_offsetof(&general, ss);
4178fe8aea9eSmrg	sampler_copy_init(ss); ss += 2;
4179fe8aea9eSmrg	sampler_fill_init(ss); ss += 2;
4180fe8aea9eSmrg	for (i = 0; i < FILTER_COUNT; i++) {
4181fe8aea9eSmrg		for (j = 0; j < EXTEND_COUNT; j++) {
4182fe8aea9eSmrg			for (k = 0; k < FILTER_COUNT; k++) {
4183fe8aea9eSmrg				for (l = 0; l < EXTEND_COUNT; l++) {
4184fe8aea9eSmrg					sampler_state_init(ss++, i, j);
4185fe8aea9eSmrg					sampler_state_init(ss++, k, l);
4186fe8aea9eSmrg				}
4187fe8aea9eSmrg			}
4188fe8aea9eSmrg		}
4189fe8aea9eSmrg	}
4190fe8aea9eSmrg
4191fe8aea9eSmrg	state->cc_blend = gen9_create_blend_state(&general);
4192fe8aea9eSmrg
4193fe8aea9eSmrg	state->general_bo = sna_static_stream_fini(sna, &general);
4194fe8aea9eSmrg	return state->general_bo != NULL;
4195fe8aea9eSmrg}
4196fe8aea9eSmrg
4197fe8aea9eSmrgconst char *gen9_render_init(struct sna *sna, const char *backend)
4198fe8aea9eSmrg{
4199fe8aea9eSmrg	if (!gen9_render_setup(sna))
4200fe8aea9eSmrg		return backend;
4201fe8aea9eSmrg
4202fe8aea9eSmrg	sna->kgem.context_switch = gen6_render_context_switch;
4203fe8aea9eSmrg	sna->kgem.retire = gen6_render_retire;
4204fe8aea9eSmrg	sna->kgem.expire = gen4_render_expire;
4205fe8aea9eSmrg
4206fe8aea9eSmrg#if !NO_COMPOSITE
4207fe8aea9eSmrg	sna->render.composite = gen9_render_composite;
4208fe8aea9eSmrg	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
4209fe8aea9eSmrg#endif
4210fe8aea9eSmrg#if !NO_COMPOSITE_SPANS
4211fe8aea9eSmrg	sna->render.check_composite_spans = gen9_check_composite_spans;
4212fe8aea9eSmrg	sna->render.composite_spans = gen9_render_composite_spans;
4213fe8aea9eSmrg	sna->render.prefer_gpu |= PREFER_GPU_SPANS;
4214fe8aea9eSmrg#endif
4215fe8aea9eSmrg#if !NO_VIDEO
4216fe8aea9eSmrg	sna->render.video = gen9_render_video;
4217fe8aea9eSmrg#endif
4218fe8aea9eSmrg
4219fe8aea9eSmrg#if !NO_COPY_BOXES
4220fe8aea9eSmrg	sna->render.copy_boxes = gen9_render_copy_boxes;
4221fe8aea9eSmrg#endif
4222fe8aea9eSmrg#if !NO_COPY
4223fe8aea9eSmrg	sna->render.copy = gen9_render_copy;
4224fe8aea9eSmrg#endif
4225fe8aea9eSmrg
4226fe8aea9eSmrg#if !NO_FILL_BOXES
4227fe8aea9eSmrg	sna->render.fill_boxes = gen9_render_fill_boxes;
4228fe8aea9eSmrg#endif
4229fe8aea9eSmrg#if !NO_FILL
4230fe8aea9eSmrg	sna->render.fill = gen9_render_fill;
4231fe8aea9eSmrg#endif
4232fe8aea9eSmrg#if !NO_FILL_ONE
4233fe8aea9eSmrg	sna->render.fill_one = gen9_render_fill_one;
4234fe8aea9eSmrg#endif
4235fe8aea9eSmrg#if !NO_FILL_CLEAR
4236fe8aea9eSmrg	sna->render.clear = gen9_render_clear;
4237fe8aea9eSmrg#endif
4238fe8aea9eSmrg
4239fe8aea9eSmrg	sna->render.flush = gen9_render_flush;
4240fe8aea9eSmrg	sna->render.reset = gen9_render_reset;
4241fe8aea9eSmrg	sna->render.fini = gen9_render_fini;
4242fe8aea9eSmrg
4243fe8aea9eSmrg	sna->render.max_3d_size = GEN9_MAX_SIZE;
4244fe8aea9eSmrg	sna->render.max_3d_pitch = 1 << 18;
4245fe8aea9eSmrg	return sna->render_state.gen9.info->name;
4246fe8aea9eSmrg}
4247