gen9_render.c revision fe8aea9e
1/*
2 * Copyright © 2012,2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Chris Wilson <chris@chris-wilson.co.uk>
25 *
26 */
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "sna.h"
33#include "sna_reg.h"
34#include "sna_render.h"
35#include "sna_render_inline.h"
36#include "sna_video.h"
37
38#include "gen9_render.h"
39#include "gen8_eu.h"
40#include "gen4_common.h"
41#include "gen4_source.h"
42#include "gen4_vertex.h"
43#include "gen6_common.h"
44#include "gen8_vertex.h"
45
46#define SIM 1
47
48#define ALWAYS_INVALIDATE 0
49#define ALWAYS_FLUSH 0
50#define ALWAYS_STALL 0
51
52#define NO_COMPOSITE 0
53#define NO_COMPOSITE_SPANS 0
54#define NO_COPY 0
55#define NO_COPY_BOXES 0
56#define NO_FILL 0
57#define NO_FILL_BOXES 0
58#define NO_FILL_ONE 0
59#define NO_FILL_CLEAR 0
60#define NO_VIDEO 0
61
62#define USE_8_PIXEL_DISPATCH 1
63#define USE_16_PIXEL_DISPATCH 1
64#define USE_32_PIXEL_DISPATCH 0
65
66#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
67#error "Must select at least 8, 16 or 32 pixel dispatch"
68#endif
69
70#define GEN9_MAX_SIZE 16384
71#define GEN9_GT_BIAS 1 /* Each GT is bigger than previous gen */
72
73/* XXX Todo
74 *
75 * STR (software tiled rendering) mode. No, really.
76 * 64x32 pixel blocks align with the rendering cache. Worth considering.
77 */
78
79#define is_aligned(x, y) (((x) & ((y) - 1)) == 0)
80
81/* Pipeline stages:
82 *  1. Command Streamer (CS)
83 *  2. Vertex Fetch (VF)
84 *  3. Vertex Shader (VS)
85 *  4. Hull Shader (HS)
86 *  5. Tesselation Engine (TE)
87 *  6. Domain Shader (DS)
88 *  7. Geometry Shader (GS)
89 *  8. Stream Output Logic (SOL)
90 *  9. Clipper (CLIP)
91 * 10. Strip/Fan (SF)
92 * 11. Windower/Masker (WM)
93 * 12. Color Calculator (CC)
94 */
95
96#if !NO_VIDEO
97static const uint32_t ps_kernel_packed_bt601[][4] = {
98#include "exa_wm_src_affine.g8b"
99#include "exa_wm_src_sample_argb.g8b"
100#include "exa_wm_yuv_rgb_bt601.g8b"
101#include "exa_wm_write.g8b"
102};
103
104static const uint32_t ps_kernel_planar_bt601[][4] = {
105#include "exa_wm_src_affine.g8b"
106#include "exa_wm_src_sample_planar.g8b"
107#include "exa_wm_yuv_rgb_bt601.g8b"
108#include "exa_wm_write.g8b"
109};
110
111static const uint32_t ps_kernel_nv12_bt601[][4] = {
112#include "exa_wm_src_affine.g8b"
113#include "exa_wm_src_sample_nv12.g8b"
114#include "exa_wm_yuv_rgb_bt601.g8b"
115#include "exa_wm_write.g8b"
116};
117
118static const uint32_t ps_kernel_packed_bt709[][4] = {
119#include "exa_wm_src_affine.g8b"
120#include "exa_wm_src_sample_argb.g8b"
121#include "exa_wm_yuv_rgb_bt709.g8b"
122#include "exa_wm_write.g8b"
123};
124
125static const uint32_t ps_kernel_planar_bt709[][4] = {
126#include "exa_wm_src_affine.g8b"
127#include "exa_wm_src_sample_planar.g8b"
128#include "exa_wm_yuv_rgb_bt709.g8b"
129#include "exa_wm_write.g8b"
130};
131
132static const uint32_t ps_kernel_ayuv_bt601[][4] = {
133#include "exa_wm_src_affine.g8b"
134#include "exa_wm_src_sample_argb_ayuv.g8b"
135#include "exa_wm_yuv_rgb_bt601.g8b"
136#include "exa_wm_write.g8b"
137};
138
139static const uint32_t ps_kernel_ayuv_bt709[][4] = {
140#include "exa_wm_src_affine.g8b"
141#include "exa_wm_src_sample_argb_ayuv.g8b"
142#include "exa_wm_yuv_rgb_bt709.g8b"
143#include "exa_wm_write.g8b"
144};
145
146static const uint32_t ps_kernel_nv12_bt709[][4] = {
147#include "exa_wm_src_affine.g8b"
148#include "exa_wm_src_sample_nv12.g8b"
149#include "exa_wm_yuv_rgb_bt709.g8b"
150#include "exa_wm_write.g8b"
151};
152
153static const uint32_t ps_kernel_rgb[][4] = {
154#include "exa_wm_src_affine.g8b"
155#include "exa_wm_src_sample_argb.g8b"
156#include "exa_wm_write.g8b"
157};
158#endif
159
160#define SURFACE_DW (64 / sizeof(uint32_t));
161
162#define KERNEL(kernel_enum, kernel, num_surfaces) \
163    [GEN9_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces}
164#define NOKERNEL(kernel_enum, func, num_surfaces) \
165    [GEN9_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces}
166static const struct wm_kernel_info {
167	const char *name;
168	const void *data;
169	unsigned int size;
170	int num_surfaces;
171} wm_kernels[] = {
172	NOKERNEL(NOMASK, gen8_wm_kernel__affine, 2),
173	NOKERNEL(NOMASK_P, gen8_wm_kernel__projective, 2),
174
175	NOKERNEL(MASK, gen8_wm_kernel__affine_mask, 3),
176	NOKERNEL(MASK_P, gen8_wm_kernel__projective_mask, 3),
177
178	NOKERNEL(MASKCA, gen8_wm_kernel__affine_mask_ca, 3),
179	NOKERNEL(MASKCA_P, gen8_wm_kernel__projective_mask_ca, 3),
180
181	NOKERNEL(MASKSA, gen8_wm_kernel__affine_mask_sa, 3),
182	NOKERNEL(MASKSA_P, gen8_wm_kernel__projective_mask_sa, 3),
183
184	NOKERNEL(OPACITY, gen8_wm_kernel__affine_opacity, 2),
185	NOKERNEL(OPACITY_P, gen8_wm_kernel__projective_opacity, 2),
186
187#if !NO_VIDEO
188	KERNEL(VIDEO_PLANAR_BT601, ps_kernel_planar_bt601, 7),
189	KERNEL(VIDEO_NV12_BT601, ps_kernel_nv12_bt601, 7),
190	KERNEL(VIDEO_PACKED_BT601, ps_kernel_packed_bt601, 2),
191	KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7),
192	KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7),
193	KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2),
194	KERNEL(VIDEO_AYUV_BT601, ps_kernel_ayuv_bt601, 2),
195	KERNEL(VIDEO_AYUV_BT709, ps_kernel_ayuv_bt709, 2),
196	KERNEL(VIDEO_RGB, ps_kernel_rgb, 2),
197#endif
198};
199#undef KERNEL
200
201static const struct blendinfo {
202	uint8_t src_alpha;
203	uint8_t src_blend;
204	uint8_t dst_blend;
205} gen9_blend_op[] = {
206	/* Clear */	{0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO},
207	/* Src */	{0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO},
208	/* Dst */	{0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE},
209	/* Over */	{1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA},
210	/* OverReverse */ {0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE},
211	/* In */	{0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO},
212	/* InReverse */	{1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA},
213	/* Out */	{0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO},
214	/* OutReverse */ {1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA},
215	/* Atop */	{1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA},
216	/* AtopReverse */ {1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA},
217	/* Xor */	{1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA},
218	/* Add */	{0, BLENDFACTOR_ONE, BLENDFACTOR_ONE},
219};
220
221/**
222 * Highest-valued BLENDFACTOR used in gen9_blend_op.
223 *
224 * This leaves out GEN9_BLENDFACTOR_INV_DST_COLOR,
225 * GEN9_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
226 * GEN9_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
227 */
228#define GEN9_BLENDFACTOR_COUNT (BLENDFACTOR_INV_DST_ALPHA + 1)
229
230#define GEN9_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen9_blend_state), 64)
231
232#define BLEND_OFFSET(s, d) \
233	((d != BLENDFACTOR_ZERO) << 15 | ((s) * GEN9_BLENDFACTOR_COUNT + (d)) << 4)
234
235#define NO_BLEND BLEND_OFFSET(BLENDFACTOR_ONE, BLENDFACTOR_ZERO)
236#define CLEAR BLEND_OFFSET(BLENDFACTOR_ZERO, BLENDFACTOR_ZERO)
237
238#define SAMPLER_OFFSET(sf, se, mf, me) \
239	(((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2)
240
241#define VERTEX_2s2s 0
242
243#define COPY_SAMPLER 0
244#define COPY_VERTEX VERTEX_2s2s
245#define COPY_FLAGS(a) GEN9_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, COPY_VERTEX)
246
247#define FILL_SAMPLER 1
248#define FILL_VERTEX VERTEX_2s2s
249#define FILL_FLAGS(op, format) GEN9_SET_FLAGS(FILL_SAMPLER, gen9_get_blend((op), false, (format)), FILL_VERTEX)
250#define FILL_FLAGS_NOBLEND GEN9_SET_FLAGS(FILL_SAMPLER, NO_BLEND, FILL_VERTEX)
251
252#define GEN9_SAMPLER(f) (((f) >> 20) & 0xfff)
253#define GEN9_BLEND(f) (((f) >> 4) & 0x7ff)
254#define GEN9_READS_DST(f) (((f) >> 15) & 1)
255#define GEN9_VERTEX(f) (((f) >> 0) & 0xf)
256#define GEN9_SET_FLAGS(S, B, V)  ((S) << 20 | (B) | (V))
257
258#define OUT_BATCH(v) batch_emit(sna, v)
259#define OUT_BATCH64(v) batch_emit64(sna, v)
260#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
261#define OUT_VERTEX_F(v) vertex_emit(sna, v)
262
263struct gt_info {
264	const char *name;
265	struct {
266		int max_vs_entries;
267	} urb;
268};
269
270static const struct gt_info min_gt_info = {
271	.name = "Skylake (gen9)",
272	.urb = { .max_vs_entries = 240 },
273};
274
275static const struct gt_info skl_gt_info = {
276	.name = "Skylake (gen9)",
277	.urb = { .max_vs_entries = 960 },
278};
279
280static const struct gt_info bxt_gt_info = {
281	.name = "Broxton (gen9)",
282	.urb = { .max_vs_entries = 320 },
283};
284
285static const struct gt_info kbl_gt_info = {
286	.name = "Kabylake (gen9)",
287	.urb = { .max_vs_entries = 960 },
288};
289
290static const struct gt_info glk_gt_info = {
291	.name = "Geminilake (gen9)",
292	.urb = { .max_vs_entries = 320 },
293};
294
295static const struct gt_info cfl_gt_info = {
296	.name = "Coffeelake (gen9)",
297	.urb = { .max_vs_entries = 960 },
298};
299
300static bool is_skl(struct sna *sna)
301{
302	return sna->kgem.gen == 0110;
303}
304
305static bool is_bxt(struct sna *sna)
306{
307	return sna->kgem.gen == 0111;
308}
309
310static bool is_kbl(struct sna *sna)
311{
312	return sna->kgem.gen == 0112;
313}
314
315static bool is_glk(struct sna *sna)
316{
317	return sna->kgem.gen == 0113;
318}
319
320static bool is_cfl(struct sna *sna)
321{
322	return sna->kgem.gen == 0114;
323}
324
325static inline bool too_large(int width, int height)
326{
327	return width > GEN9_MAX_SIZE || height > GEN9_MAX_SIZE;
328}
329
330static inline bool unaligned(struct kgem_bo *bo, int bpp)
331{
332	/* XXX What exactly do we need to meet H_ALIGN and V_ALIGN? */
333#if 0
334	int x, y;
335
336	if (bo->proxy == NULL)
337		return false;
338
339	/* Assume that all tiled proxies are constructed correctly. */
340	if (bo->tiling)
341		return false;
342
343	DBG(("%s: checking alignment of a linear proxy, offset=%d, pitch=%d, bpp=%d: => (%d, %d)\n",
344	     __FUNCTION__, bo->delta, bo->pitch, bpp,
345	     8 * (bo->delta % bo->pitch) / bpp, bo->delta / bo->pitch));
346
347	/* This may be a random userptr map, check that it meets the
348	 * render alignment of SURFACE_VALIGN_4 | SURFACE_HALIGN_4.
349	 */
350	y = bo->delta / bo->pitch;
351	if (y & 3)
352		return true;
353
354	x = 8 * (bo->delta - y * bo->pitch);
355	if (x & (4*bpp - 1))
356	    return true;
357
358	return false;
359#else
360	return false;
361#endif
362}
363
364static uint32_t gen9_get_blend(int op,
365			       bool has_component_alpha,
366			       uint32_t dst_format)
367{
368	uint32_t src, dst;
369
370	COMPILE_TIME_ASSERT(BLENDFACTOR_INV_DST_ALPHA*GEN9_BLENDFACTOR_COUNT + BLENDFACTOR_INV_DST_ALPHA <= 0x7ff);
371
372	src = gen9_blend_op[op].src_blend;
373	dst = gen9_blend_op[op].dst_blend;
374
375	/* If there's no dst alpha channel, adjust the blend op so that
376	 * we'll treat it always as 1.
377	 */
378	if (PICT_FORMAT_A(dst_format) == 0) {
379		if (src == BLENDFACTOR_DST_ALPHA)
380			src = BLENDFACTOR_ONE;
381		else if (src == BLENDFACTOR_INV_DST_ALPHA)
382			src = BLENDFACTOR_ZERO;
383	}
384
385	/* If the source alpha is being used, then we should only be in a
386	 * case where the source blend factor is 0, and the source blend
387	 * value is the mask channels multiplied by the source picture's alpha.
388	 */
389	if (has_component_alpha && gen9_blend_op[op].src_alpha) {
390		if (dst == BLENDFACTOR_SRC_ALPHA)
391			dst = BLENDFACTOR_SRC_COLOR;
392		else if (dst == BLENDFACTOR_INV_SRC_ALPHA)
393			dst = BLENDFACTOR_INV_SRC_COLOR;
394	}
395
396	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
397	     op, dst_format, PICT_FORMAT_A(dst_format),
398	     src, dst, (int)(BLEND_OFFSET(src, dst)>>4)));
399	assert(BLEND_OFFSET(src, dst) >> 4 <= 0xfff);
400	return BLEND_OFFSET(src, dst);
401}
402
403static uint32_t gen9_get_card_format(PictFormat format)
404{
405	switch (format) {
406	default:
407		return -1;
408	case PICT_a8r8g8b8:
409		return SURFACEFORMAT_B8G8R8A8_UNORM;
410	case PICT_x8r8g8b8:
411		return SURFACEFORMAT_B8G8R8X8_UNORM;
412	case PICT_a8b8g8r8:
413		return SURFACEFORMAT_R8G8B8A8_UNORM;
414	case PICT_x8b8g8r8:
415		return SURFACEFORMAT_R8G8B8X8_UNORM;
416#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
417	case PICT_a2r10g10b10:
418		return SURFACEFORMAT_B10G10R10A2_UNORM;
419	case PICT_x2r10g10b10:
420		return SURFACEFORMAT_B10G10R10X2_UNORM;
421#endif
422	case PICT_r8g8b8:
423		return SURFACEFORMAT_R8G8B8_UNORM;
424	case PICT_r5g6b5:
425		return SURFACEFORMAT_B5G6R5_UNORM;
426	case PICT_a1r5g5b5:
427		return SURFACEFORMAT_B5G5R5A1_UNORM;
428	case PICT_a8:
429		return SURFACEFORMAT_A8_UNORM;
430	case PICT_a4r4g4b4:
431		return SURFACEFORMAT_B4G4R4A4_UNORM;
432	}
433}
434
435static uint32_t gen9_get_dest_format(PictFormat format)
436{
437	switch (format) {
438	default:
439		return -1;
440	case PICT_a8r8g8b8:
441	case PICT_x8r8g8b8:
442		return SURFACEFORMAT_B8G8R8A8_UNORM;
443	case PICT_a8b8g8r8:
444	case PICT_x8b8g8r8:
445		return SURFACEFORMAT_R8G8B8A8_UNORM;
446#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
447	case PICT_a2r10g10b10:
448	case PICT_x2r10g10b10:
449		return SURFACEFORMAT_B10G10R10A2_UNORM;
450#endif
451	case PICT_r5g6b5:
452		return SURFACEFORMAT_B5G6R5_UNORM;
453	case PICT_x1r5g5b5:
454	case PICT_a1r5g5b5:
455		return SURFACEFORMAT_B5G5R5A1_UNORM;
456	case PICT_a8:
457		return SURFACEFORMAT_A8_UNORM;
458	case PICT_a4r4g4b4:
459	case PICT_x4r4g4b4:
460		return SURFACEFORMAT_B4G4R4A4_UNORM;
461	}
462}
463
464static bool gen9_check_dst_format(PictFormat format)
465{
466	if (gen9_get_dest_format(format) != -1)
467		return true;
468
469	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
470	return false;
471}
472
473static bool gen9_check_format(uint32_t format)
474{
475	if (gen9_get_card_format(format) != -1)
476		return true;
477
478	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
479	return false;
480}
481
482static uint32_t gen9_filter(uint32_t filter)
483{
484	switch (filter) {
485	default:
486		assert(0);
487	case PictFilterNearest:
488		return SAMPLER_FILTER_NEAREST;
489	case PictFilterBilinear:
490		return SAMPLER_FILTER_BILINEAR;
491	}
492}
493
494static uint32_t gen9_check_filter(PicturePtr picture)
495{
496	switch (picture->filter) {
497	case PictFilterNearest:
498	case PictFilterBilinear:
499		return true;
500	default:
501		return false;
502	}
503}
504
505static uint32_t gen9_repeat(uint32_t repeat)
506{
507	switch (repeat) {
508	default:
509		assert(0);
510	case RepeatNone:
511		return SAMPLER_EXTEND_NONE;
512	case RepeatNormal:
513		return SAMPLER_EXTEND_REPEAT;
514	case RepeatPad:
515		return SAMPLER_EXTEND_PAD;
516	case RepeatReflect:
517		return SAMPLER_EXTEND_REFLECT;
518	}
519}
520
521static bool gen9_check_repeat(PicturePtr picture)
522{
523	if (!picture->repeat)
524		return true;
525
526	switch (picture->repeatType) {
527	case RepeatNone:
528	case RepeatNormal:
529	case RepeatPad:
530	case RepeatReflect:
531		return true;
532	default:
533		return false;
534	}
535}
536
537static int
538gen9_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
539{
540	int base;
541
542	if (has_mask) {
543		if (is_ca) {
544			if (gen9_blend_op[op].src_alpha)
545				base = GEN9_WM_KERNEL_MASKSA;
546			else
547				base = GEN9_WM_KERNEL_MASKCA;
548		} else
549			base = GEN9_WM_KERNEL_MASK;
550	} else
551		base = GEN9_WM_KERNEL_NOMASK;
552
553	return base + !is_affine;
554}
555
556static void
557gen9_emit_push_constants(struct sna *sna)
558{
559#if SIM
560	OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
561	OUT_BATCH(0);
562
563	OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
564	OUT_BATCH(0);
565
566	OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
567	OUT_BATCH(0);
568
569	OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
570	OUT_BATCH(0);
571
572	OUT_BATCH(GEN9_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
573	OUT_BATCH(0);
574#endif
575}
576
577static void
578gen9_emit_urb(struct sna *sna)
579{
580	/* num of VS entries must be divisible by 8 if size < 9 */
581	OUT_BATCH(GEN9_3DSTATE_URB_VS | (2 - 2));
582	OUT_BATCH(sna->render_state.gen9.info->urb.max_vs_entries << URB_ENTRY_NUMBER_SHIFT |
583		  (2 - 1) << URB_ENTRY_SIZE_SHIFT |
584		  4 << URB_STARTING_ADDRESS_SHIFT);
585
586	OUT_BATCH(GEN9_3DSTATE_URB_HS | (2 - 2));
587	OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT |
588		  4 << URB_STARTING_ADDRESS_SHIFT);
589
590	OUT_BATCH(GEN9_3DSTATE_URB_DS | (2 - 2));
591	OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT |
592		  4 << URB_STARTING_ADDRESS_SHIFT);
593
594	OUT_BATCH(GEN9_3DSTATE_URB_GS | (2 - 2));
595	OUT_BATCH(0 << URB_ENTRY_SIZE_SHIFT |
596		  4 << URB_STARTING_ADDRESS_SHIFT);
597}
598
599static void
600gen9_emit_state_base_address(struct sna *sna)
601{
602	uint32_t num_pages;
603
604	assert(sna->kgem.surface - sna->kgem.nbatch <= 16384);
605
606	/* WaBindlessSurfaceStateModifyEnable:skl,bxt */
607	OUT_BATCH(GEN9_STATE_BASE_ADDRESS | (19 - 1 - 2));
608	OUT_BATCH64(0); /* general */
609	OUT_BATCH(0); /* stateless dataport */
610	OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* surface */
611				     sna->kgem.nbatch,
612				     NULL,
613				     I915_GEM_DOMAIN_INSTRUCTION << 16,
614				     BASE_ADDRESS_MODIFY));
615	OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* dynamic */
616				     sna->kgem.nbatch,
617				     sna->render_state.gen9.general_bo,
618				     I915_GEM_DOMAIN_INSTRUCTION << 16,
619				     BASE_ADDRESS_MODIFY));
620	OUT_BATCH64(0); /* indirect */
621	OUT_BATCH64(kgem_add_reloc64(&sna->kgem, /* instruction */
622				     sna->kgem.nbatch,
623				     sna->render_state.gen9.general_bo,
624				     I915_GEM_DOMAIN_INSTRUCTION << 16,
625				     BASE_ADDRESS_MODIFY));
626	/* upper bounds */
627	num_pages = sna->render_state.gen9.general_bo->size.pages.count;
628	OUT_BATCH(0); /* general */
629	OUT_BATCH(num_pages << 12 | 1); /* dynamic */
630	OUT_BATCH(0); /* indirect */
631	OUT_BATCH(num_pages << 12 | 1); /* instruction */
632
633	/* Bindless */
634	OUT_BATCH(0);
635	OUT_BATCH(0);
636	OUT_BATCH(0);
637}
638
639static void
640gen9_emit_vs_invariant(struct sna *sna)
641{
642	OUT_BATCH(GEN9_3DSTATE_VS | (9 - 2));
643	OUT_BATCH64(0); /* no VS kernel */
644	OUT_BATCH(0);
645	OUT_BATCH64(0); /* scratch */
646	OUT_BATCH(0);
647	OUT_BATCH(1 << 1); /* pass-through */
648	OUT_BATCH(1 << 16 | 1 << 21); /* urb write to SBE */
649
650#if SIM
651	OUT_BATCH(GEN9_3DSTATE_CONSTANT_VS | (11 - 2));
652	OUT_BATCH(0);
653	OUT_BATCH(0);
654	OUT_BATCH64(0);
655	OUT_BATCH64(0);
656	OUT_BATCH64(0);
657	OUT_BATCH64(0);
658
659	OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
660	OUT_BATCH(0);
661
662	OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
663	OUT_BATCH(0);
664#endif
665}
666
667static void
668gen9_emit_hs_invariant(struct sna *sna)
669{
670	OUT_BATCH(GEN9_3DSTATE_HS | (9 - 2));
671	OUT_BATCH(0);
672	OUT_BATCH(0);
673	OUT_BATCH64(0); /* no HS kernel */
674	OUT_BATCH64(0); /* scratch */
675	OUT_BATCH(0);
676	OUT_BATCH(0); /* pass-through */
677
678#if SIM
679	OUT_BATCH(GEN9_3DSTATE_CONSTANT_HS | (11 - 2));
680	OUT_BATCH(0);
681	OUT_BATCH(0);
682	OUT_BATCH64(0);
683	OUT_BATCH64(0);
684	OUT_BATCH64(0);
685	OUT_BATCH64(0);
686
687#if 1
688	OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
689	OUT_BATCH(0);
690
691	OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
692	OUT_BATCH(0);
693#endif
694#endif
695}
696
697static void
698gen9_emit_te_invariant(struct sna *sna)
699{
700	OUT_BATCH(GEN9_3DSTATE_TE | (4 - 2));
701	OUT_BATCH(0);
702	OUT_BATCH(0);
703	OUT_BATCH(0);
704}
705
706static void
707gen9_emit_ds_invariant(struct sna *sna)
708{
709	OUT_BATCH(GEN9_3DSTATE_DS | (11 - 2));
710	OUT_BATCH64(0); /* no kernel */
711	OUT_BATCH(0);
712	OUT_BATCH64(0); /* scratch */
713	OUT_BATCH(0);
714	OUT_BATCH(0);
715	OUT_BATCH(0);
716	OUT_BATCH(0);
717	OUT_BATCH(0);
718
719#if SIM
720	OUT_BATCH(GEN9_3DSTATE_CONSTANT_DS | (11 - 2));
721	OUT_BATCH(0);
722	OUT_BATCH(0);
723	OUT_BATCH64(0);
724	OUT_BATCH64(0);
725	OUT_BATCH64(0);
726	OUT_BATCH64(0);
727
728#if 1
729	OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
730	OUT_BATCH(0);
731
732	OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
733	OUT_BATCH(0);
734#endif
735#endif
736}
737
738static void
739gen9_emit_gs_invariant(struct sna *sna)
740{
741	OUT_BATCH(GEN9_3DSTATE_GS | (10 - 2));
742	OUT_BATCH64(0); /* no GS kernel */
743	OUT_BATCH(0);
744	OUT_BATCH64(0); /* scratch */
745	OUT_BATCH(0);
746	OUT_BATCH(0); /* pass-through */
747	OUT_BATCH(0);
748	OUT_BATCH(0);
749
750#if SIM
751	OUT_BATCH(GEN9_3DSTATE_CONSTANT_GS | (11 - 2));
752	OUT_BATCH(0);
753	OUT_BATCH(0);
754	OUT_BATCH64(0);
755	OUT_BATCH64(0);
756	OUT_BATCH64(0);
757	OUT_BATCH64(0);
758
759#if 1
760	OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
761	OUT_BATCH(0);
762
763	OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
764	OUT_BATCH(0);
765#endif
766#endif
767}
768
769static void
770gen9_emit_sol_invariant(struct sna *sna)
771{
772	OUT_BATCH(GEN9_3DSTATE_STREAMOUT | (5 - 2));
773	OUT_BATCH(0);
774	OUT_BATCH(0);
775	OUT_BATCH(0);
776	OUT_BATCH(0);
777}
778
779static void
780gen9_emit_sf_invariant(struct sna *sna)
781{
782	OUT_BATCH(GEN9_3DSTATE_SF | (4 - 2));
783	OUT_BATCH(0);
784	OUT_BATCH(0);
785	OUT_BATCH(0);
786}
787
788static void
789gen9_emit_clip_invariant(struct sna *sna)
790{
791	OUT_BATCH(GEN9_3DSTATE_CLIP | (4 - 2));
792	OUT_BATCH(0);
793	OUT_BATCH(0); /* pass-through */
794	OUT_BATCH(0);
795
796	OUT_BATCH(GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP | (2 - 2));
797	OUT_BATCH(0);
798
799	OUT_BATCH(GEN9_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
800	OUT_BATCH(0);
801}
802
803static void
804gen9_emit_null_depth_buffer(struct sna *sna)
805{
806	OUT_BATCH(GEN9_3DSTATE_DEPTH_BUFFER | (8 - 2));
807#if 1
808	OUT_BATCH(SURFACE_NULL << DEPTH_BUFFER_TYPE_SHIFT |
809		  DEPTHFORMAT_D32_FLOAT << DEPTH_BUFFER_FORMAT_SHIFT);
810#else
811	OUT_BATCH(SURFACE_2D << DEPTH_BUFFER_TYPE_SHIFT |
812		  DEPTHFORMAT_D16_UNORM << DEPTH_BUFFER_FORMAT_SHIFT);
813#endif
814	OUT_BATCH64(0);
815	OUT_BATCH(0);
816	OUT_BATCH(0);
817	OUT_BATCH(0);
818	OUT_BATCH(0);
819
820#if SIM
821	OUT_BATCH(GEN9_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
822	OUT_BATCH(0);
823	OUT_BATCH64(0);
824	OUT_BATCH(0);
825#endif
826
827#if SIM
828	OUT_BATCH(GEN9_3DSTATE_STENCIL_BUFFER | (5 - 2));
829	OUT_BATCH(0);
830	OUT_BATCH64(0);
831	OUT_BATCH(0);
832#endif
833
834#if SIM
835	OUT_BATCH(GEN9_3DSTATE_WM_DEPTH_STENCIL | (4 - 2));
836	OUT_BATCH(0);
837	OUT_BATCH(0);
838	OUT_BATCH(0);
839#endif
840
841#if SIM
842	OUT_BATCH(GEN9_3DSTATE_CLEAR_PARAMS | (3 - 2));
843	OUT_BATCH(0);
844	OUT_BATCH(0);
845#endif
846}
847
848static void
849gen9_emit_wm_invariant(struct sna *sna)
850{
851	gen9_emit_null_depth_buffer(sna);
852
853#if SIM
854	OUT_BATCH(GEN9_3DSTATE_SCISSOR_STATE_POINTERS | (2 - 2));
855	OUT_BATCH(0);
856#endif
857
858	OUT_BATCH(GEN9_3DSTATE_WM | (2 - 2));
859	//OUT_BATCH(WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC); /* XXX */
860	OUT_BATCH(WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
861
862#if SIM
863	OUT_BATCH(GEN9_3DSTATE_WM_CHROMAKEY | (2 - 2));
864	OUT_BATCH(0);
865#endif
866
867#if 0
868	OUT_BATCH(GEN9_3DSTATE_WM_HZ_OP | (5 - 2));
869	OUT_BATCH(0);
870	OUT_BATCH(0);
871	OUT_BATCH(0);
872	OUT_BATCH(0);
873#endif
874
875	OUT_BATCH(GEN9_3DSTATE_PS_EXTRA | (2 - 2));
876	OUT_BATCH(PSX_PIXEL_SHADER_VALID |
877		  PSX_ATTRIBUTE_ENABLE);
878
879	OUT_BATCH(GEN9_3DSTATE_RASTER | (5 - 2));
880	OUT_BATCH(RASTER_FRONT_WINDING_CCW |
881		  RASTER_CULL_NONE);
882	OUT_BATCH(0);
883	OUT_BATCH(0);
884	OUT_BATCH(0);
885
886	OUT_BATCH(GEN9_3DSTATE_SBE_SWIZ | (11 - 2));
887	OUT_BATCH(0);
888	OUT_BATCH(0);
889	OUT_BATCH(0);
890	OUT_BATCH(0);
891	OUT_BATCH(0);
892	OUT_BATCH(0);
893	OUT_BATCH(0);
894	OUT_BATCH(0);
895	OUT_BATCH(0);
896	OUT_BATCH(0);
897
898#if SIM
899	OUT_BATCH(GEN9_3DSTATE_CONSTANT_PS | (11 - 2));
900	OUT_BATCH(0);
901	OUT_BATCH(0);
902	OUT_BATCH64(0);
903	OUT_BATCH64(0);
904	OUT_BATCH64(0);
905	OUT_BATCH64(0);
906#endif
907}
908
909static void
910gen9_emit_cc_invariant(struct sna *sna)
911{
912}
913
914static void
915gen9_emit_vf_invariant(struct sna *sna)
916{
917	int n;
918
919#if 1
920	OUT_BATCH(GEN9_3DSTATE_VF | (2 - 2));
921	OUT_BATCH(0);
922#endif
923
924	OUT_BATCH(GEN9_3DSTATE_VF_SGVS | (2 - 2));
925	OUT_BATCH(0);
926
927	OUT_BATCH(GEN9_3DSTATE_VF_TOPOLOGY | (2 - 2));
928	OUT_BATCH(RECTLIST);
929
930	OUT_BATCH(GEN9_3DSTATE_VF_STATISTICS | 0);
931
932	for (n = 1; n <= 3; n++) {
933		OUT_BATCH(GEN9_3DSTATE_VF_INSTANCING | (3 - 2));
934		OUT_BATCH(n);
935		OUT_BATCH(0);
936	}
937}
938
939static void
940gen9_emit_invariant(struct sna *sna)
941{
942	OUT_BATCH(GEN9_PIPELINE_SELECT |
943		  PIPELINE_SELECTION_MASK |
944		  PIPELINE_SELECT_3D);
945
946#if SIM
947	OUT_BATCH(GEN9_STATE_SIP | (3 - 2));
948	OUT_BATCH64(0);
949#endif
950
951	OUT_BATCH(GEN9_3DSTATE_MULTISAMPLE | (2 - 2));
952	OUT_BATCH(MULTISAMPLE_PIXEL_LOCATION_CENTER |
953		  MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
954
955	OUT_BATCH(GEN9_3DSTATE_SAMPLE_MASK | (2 - 2));
956	OUT_BATCH(1);
957
958#if SIM
959	OUT_BATCH(GEN9_3DSTATE_SAMPLE_PATTERN | (5 - 2));
960	OUT_BATCH(0);
961	OUT_BATCH(0);
962	OUT_BATCH(0);
963	//OUT_BATCH(8<<20 | 8<<16);
964	OUT_BATCH(0);
965#endif
966
967	gen9_emit_push_constants(sna);
968	gen9_emit_urb(sna);
969
970	gen9_emit_state_base_address(sna);
971
972	gen9_emit_vf_invariant(sna);
973	gen9_emit_vs_invariant(sna);
974	gen9_emit_hs_invariant(sna);
975	gen9_emit_te_invariant(sna);
976	gen9_emit_ds_invariant(sna);
977	gen9_emit_gs_invariant(sna);
978	gen9_emit_sol_invariant(sna);
979	gen9_emit_clip_invariant(sna);
980	gen9_emit_sf_invariant(sna);
981	gen9_emit_wm_invariant(sna);
982	gen9_emit_cc_invariant(sna);
983
984	sna->render_state.gen9.needs_invariant = false;
985}
986
987static void
988gen9_emit_cc(struct sna *sna, uint32_t blend)
989{
990	struct gen9_render_state *render = &sna->render_state.gen9;
991
992	if (render->blend == blend)
993		return;
994
995	DBG(("%s: blend=%x (current=%x), src=%d, dst=%d\n",
996	     __FUNCTION__, blend, render->blend,
997	     blend / GEN9_BLENDFACTOR_COUNT,
998	     blend % GEN9_BLENDFACTOR_COUNT));
999
1000	assert(blend < GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT);
1001	assert(blend / GEN9_BLENDFACTOR_COUNT > 0);
1002	assert(blend % GEN9_BLENDFACTOR_COUNT > 0);
1003
1004	/* XXX can have up to 8 blend states preload, selectable via
1005	 * Render Target Index. What other side-effects of Render Target Index?
1006	 */
1007
1008	OUT_BATCH(GEN9_3DSTATE_PS_BLEND | (2 - 2));
1009	if (blend != GEN9_BLEND(NO_BLEND)) {
1010		uint32_t src = blend / GEN9_BLENDFACTOR_COUNT;
1011		uint32_t dst = blend % GEN9_BLENDFACTOR_COUNT;
1012		OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT |
1013			  PS_BLEND_COLOR_BLEND_ENABLE |
1014			  src << PS_BLEND_SRC_ALPHA_SHIFT |
1015			  dst << PS_BLEND_DST_ALPHA_SHIFT |
1016			  src << PS_BLEND_SRC_SHIFT |
1017			  dst << PS_BLEND_DST_SHIFT);
1018	} else
1019		OUT_BATCH(PS_BLEND_HAS_WRITEABLE_RT);
1020
1021	assert(is_aligned(render->cc_blend + blend * GEN9_BLEND_STATE_PADDED_SIZE, 64));
1022	OUT_BATCH(GEN9_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
1023	OUT_BATCH((render->cc_blend + blend * GEN9_BLEND_STATE_PADDED_SIZE) | 1);
1024
1025	/* Force a CC_STATE pointer change to improve blend performance */
1026	OUT_BATCH(GEN9_3DSTATE_CC_STATE_POINTERS | (2 - 2));
1027	OUT_BATCH(0);
1028
1029	render->blend = blend;
1030}
1031
1032static void
1033gen9_emit_sampler(struct sna *sna, uint32_t state)
1034{
1035	if (sna->render_state.gen9.samplers == state)
1036		return;
1037
1038	sna->render_state.gen9.samplers = state;
1039
1040	DBG(("%s: sampler = %x\n", __FUNCTION__, state));
1041
1042	assert(2 * sizeof(struct gen9_sampler_state) == 32);
1043	OUT_BATCH(GEN9_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
1044	OUT_BATCH(sna->render_state.gen9.wm_state + state * 2 * sizeof(struct gen9_sampler_state));
1045}
1046
1047static void
1048gen9_emit_sf(struct sna *sna, bool has_mask)
1049{
1050	int num_sf_outputs = has_mask ? 2 : 1;
1051
1052	if (sna->render_state.gen9.num_sf_outputs == num_sf_outputs)
1053		return;
1054
1055	DBG(("%s: num_sf_outputs=%d\n", __FUNCTION__, num_sf_outputs));
1056
1057	sna->render_state.gen9.num_sf_outputs = num_sf_outputs;
1058
1059	OUT_BATCH(GEN9_3DSTATE_SBE | (6 - 2));
1060	OUT_BATCH(num_sf_outputs << SBE_NUM_OUTPUTS_SHIFT |
1061		  SBE_FORCE_VERTEX_URB_READ_LENGTH | /* forced is faster */
1062		  SBE_FORCE_VERTEX_URB_READ_OFFSET |
1063		  1 << SBE_URB_ENTRY_READ_LENGTH_SHIFT |
1064		  1 << SBE_URB_ENTRY_READ_OFFSET_SHIFT);
1065	OUT_BATCH(0);
1066	OUT_BATCH(0);
1067        OUT_BATCH(SBE_ACTIVE_COMPONENT_XYZW << 0 |
1068		  SBE_ACTIVE_COMPONENT_XYZW << 1);
1069        OUT_BATCH(0);
1070}
1071
1072static void
1073gen9_emit_wm(struct sna *sna, int kernel)
1074{
1075	const uint32_t *kernels;
1076
1077	assert(kernel < ARRAY_SIZE(wm_kernels));
1078	if (sna->render_state.gen9.kernel == kernel)
1079		return;
1080
1081	sna->render_state.gen9.kernel = kernel;
1082	kernels = sna->render_state.gen9.wm_kernel[kernel];
1083
1084	DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n",
1085	     __FUNCTION__,
1086	     wm_kernels[kernel].name,
1087	     wm_kernels[kernel].num_surfaces,
1088	     kernels[0], kernels[1], kernels[2]));
1089	assert(is_aligned(kernels[0], 64));
1090	assert(is_aligned(kernels[1], 64));
1091	assert(is_aligned(kernels[2], 64));
1092
1093	OUT_BATCH(GEN9_3DSTATE_PS | (12 - 2));
1094	OUT_BATCH64(kernels[0] ?: kernels[1] ?: kernels[2]);
1095	OUT_BATCH(1 << PS_SAMPLER_COUNT_SHIFT |
1096		  PS_VECTOR_MASK_ENABLE |
1097		  wm_kernels[kernel].num_surfaces << PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
1098	OUT_BATCH64(0); /* scratch address */
1099	OUT_BATCH(PS_MAX_THREADS |
1100		  (kernels[0] ? PS_8_DISPATCH_ENABLE : 0) |
1101		  (kernels[1] ? PS_16_DISPATCH_ENABLE : 0) |
1102		  (kernels[2] ? PS_32_DISPATCH_ENABLE : 0));
1103	OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << PS_DISPATCH_START_GRF_SHIFT_0 |
1104		  8 << PS_DISPATCH_START_GRF_SHIFT_1 |
1105		  6 << PS_DISPATCH_START_GRF_SHIFT_2);
1106	OUT_BATCH64(kernels[2]);
1107	OUT_BATCH64(kernels[1]);
1108}
1109
1110static bool
1111gen9_emit_binding_table(struct sna *sna, uint16_t offset)
1112{
1113	if (sna->render_state.gen9.surface_table == offset)
1114		return false;
1115
1116	/* Binding table pointers */
1117	assert(is_aligned(4*offset, 32));
1118	OUT_BATCH(GEN9_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
1119	OUT_BATCH(offset*4);
1120
1121	sna->render_state.gen9.surface_table = offset;
1122	return true;
1123}
1124
1125static bool
1126gen9_emit_drawing_rectangle(struct sna *sna,
1127			    const struct sna_composite_op *op)
1128{
1129	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
1130	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
1131
1132	assert(!too_large(abs(op->dst.x), abs(op->dst.y)));
1133	assert(!too_large(op->dst.width, op->dst.height));
1134
1135	if (sna->render_state.gen9.drawrect_limit == limit &&
1136	    sna->render_state.gen9.drawrect_offset == offset)
1137		return true;
1138
1139	sna->render_state.gen9.drawrect_offset = offset;
1140	sna->render_state.gen9.drawrect_limit = limit;
1141
1142	OUT_BATCH(GEN9_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
1143	OUT_BATCH(0);
1144	OUT_BATCH(limit);
1145	OUT_BATCH(offset);
1146	return false;
1147}
1148
1149static void
1150gen9_emit_vertex_elements(struct sna *sna,
1151			  const struct sna_composite_op *op)
1152{
1153	/*
1154	 * vertex data in vertex buffer
1155	 *    position: (x, y)
1156	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
1157	 *    texture coordinate 1 if (has_mask is true): same as above
1158	 */
1159	struct gen9_render_state *render = &sna->render_state.gen9;
1160	uint32_t src_format, dw;
1161	int id = GEN9_VERTEX(op->u.gen9.flags);
1162	bool has_mask;
1163
1164	DBG(("%s: setup id=%d\n", __FUNCTION__, id));
1165
1166	if (render->ve_id == id)
1167		return;
1168	render->ve_id = id;
1169
1170	if (render->ve_dirty) {
1171		/* dummy primitive to flush vertex before change? */
1172		OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2));
1173		OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */
1174		OUT_BATCH(0);
1175		OUT_BATCH(0);
1176		OUT_BATCH(1);	/* single instance */
1177		OUT_BATCH(0);	/* start instance location */
1178		OUT_BATCH(0);	/* index buffer offset, ignored */
1179	}
1180
1181	/* The VUE layout
1182	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
1183	 *    dword 4-7: position (x, y, 1.0, 1.0),
1184	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
1185	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
1186	 *
1187	 * dword 4-15 are fetched from vertex buffer
1188	 */
1189	has_mask = (id >> 2) != 0;
1190	OUT_BATCH(GEN9_3DSTATE_VERTEX_ELEMENTS |
1191		((2 * (3 + has_mask)) + 1 - 2));
1192
1193	OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID |
1194		  SURFACEFORMAT_R32G32B32A32_FLOAT << VE_FORMAT_SHIFT |
1195		  0 << VE_OFFSET_SHIFT);
1196	OUT_BATCH(COMPONENT_STORE_0 << VE_COMPONENT_0_SHIFT |
1197		  COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT |
1198		  COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT |
1199		  COMPONENT_STORE_0 << VE_COMPONENT_3_SHIFT);
1200
1201	/* x,y */
1202	OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID |
1203		  SURFACEFORMAT_R16G16_SSCALED << VE_FORMAT_SHIFT |
1204		  0 << VE_OFFSET_SHIFT);
1205	OUT_BATCH(COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT |
1206		  COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT |
1207		  COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT |
1208		  COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT);
1209
1210	/* u0, v0, w0 */
1211	DBG(("%s: first channel %d floats, offset=4\n", __FUNCTION__, id & 3));
1212	dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT;
1213	switch (id & 3) {
1214	default:
1215		assert(0);
1216	case 0:
1217		src_format = SURFACEFORMAT_R16G16_SSCALED;
1218		dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT;
1219		dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT;
1220		dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT;
1221		break;
1222	case 1:
1223		src_format = SURFACEFORMAT_R32_FLOAT;
1224		dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT;
1225		dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT;
1226		dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT;
1227		break;
1228	case 2:
1229		src_format = SURFACEFORMAT_R32G32_FLOAT;
1230		dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT;
1231		dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT;
1232		dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT;
1233		break;
1234	case 3:
1235		src_format = SURFACEFORMAT_R32G32B32_FLOAT;
1236		dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT;
1237		dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT;
1238		dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT;
1239		break;
1240	}
1241	OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID |
1242		  src_format << VE_FORMAT_SHIFT |
1243		  4 << VE_OFFSET_SHIFT);
1244	OUT_BATCH(dw);
1245
1246	/* u1, v1, w1 */
1247	if (has_mask) {
1248		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
1249		DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset));
1250		dw = COMPONENT_STORE_1_FLT << VE_COMPONENT_3_SHIFT;
1251		switch (id >> 2) {
1252		case 1:
1253			src_format = SURFACEFORMAT_R32_FLOAT;
1254			dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT;
1255			dw |= COMPONENT_STORE_0 << VE_COMPONENT_1_SHIFT;
1256			dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT;
1257			break;
1258		default:
1259			assert(0);
1260		case 2:
1261			src_format = SURFACEFORMAT_R32G32_FLOAT;
1262			dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT;
1263			dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT;
1264			dw |= COMPONENT_STORE_0 << VE_COMPONENT_2_SHIFT;
1265			break;
1266		case 3:
1267			src_format = SURFACEFORMAT_R32G32B32_FLOAT;
1268			dw |= COMPONENT_STORE_SRC << VE_COMPONENT_0_SHIFT;
1269			dw |= COMPONENT_STORE_SRC << VE_COMPONENT_1_SHIFT;
1270			dw |= COMPONENT_STORE_SRC << VE_COMPONENT_2_SHIFT;
1271			break;
1272		}
1273		OUT_BATCH(id << VE_INDEX_SHIFT | VE_VALID |
1274			  src_format << VE_FORMAT_SHIFT |
1275			  offset << VE_OFFSET_SHIFT);
1276		OUT_BATCH(dw);
1277	}
1278
1279	render->ve_dirty = true;
1280}
1281
1282inline static void
1283gen9_emit_pipe_invalidate(struct sna *sna)
1284{
1285	OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2));
1286	OUT_BATCH(PIPE_CONTROL_WC_FLUSH |
1287		  PIPE_CONTROL_TC_FLUSH |
1288		  PIPE_CONTROL_CS_STALL);
1289	OUT_BATCH64(0);
1290	OUT_BATCH64(0);
1291}
1292
1293inline static void
1294gen9_emit_pipe_flush(struct sna *sna, bool need_stall)
1295{
1296	unsigned stall;
1297
1298	stall = 0;
1299	if (need_stall)
1300		stall = (PIPE_CONTROL_CS_STALL |
1301			 PIPE_CONTROL_STALL_AT_SCOREBOARD);
1302
1303	OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2));
1304	OUT_BATCH(PIPE_CONTROL_WC_FLUSH | stall);
1305	OUT_BATCH64(0);
1306	OUT_BATCH64(0);
1307}
1308
1309inline static void
1310gen9_emit_pipe_stall(struct sna *sna)
1311{
1312	OUT_BATCH(GEN9_PIPE_CONTROL | (6 - 2));
1313	OUT_BATCH(PIPE_CONTROL_CS_STALL |
1314		  PIPE_CONTROL_FLUSH |
1315		  PIPE_CONTROL_STALL_AT_SCOREBOARD);
1316	OUT_BATCH64(0);
1317	OUT_BATCH64(0);
1318}
1319
1320static void
1321gen9_emit_state(struct sna *sna,
1322		const struct sna_composite_op *op,
1323		uint16_t wm_binding_table)
1324{
1325	bool need_invalidate;
1326	bool need_flush;
1327	bool need_stall;
1328
1329	assert(op->dst.bo->exec);
1330
1331	need_flush = wm_binding_table & 1 ||
1332		(sna->render_state.gen9.emit_flush && GEN9_READS_DST(op->u.gen9.flags));
1333	if (ALWAYS_FLUSH)
1334		need_flush = true;
1335
1336	wm_binding_table &= ~1;
1337
1338	need_stall = sna->render_state.gen9.surface_table != wm_binding_table;
1339
1340	need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo);
1341	if (ALWAYS_INVALIDATE)
1342		need_invalidate = true;
1343
1344	need_stall &= gen9_emit_drawing_rectangle(sna, op);
1345	if (ALWAYS_STALL)
1346		need_stall = true;
1347
1348	if (need_invalidate) {
1349		gen9_emit_pipe_invalidate(sna);
1350		kgem_clear_dirty(&sna->kgem);
1351		assert(op->dst.bo->exec);
1352		kgem_bo_mark_dirty(op->dst.bo);
1353
1354		need_flush = false;
1355		need_stall = false;
1356	}
1357	if (need_flush) {
1358		gen9_emit_pipe_flush(sna, need_stall);
1359		need_stall = false;
1360	}
1361	if (need_stall)
1362		gen9_emit_pipe_stall(sna);
1363
1364	gen9_emit_cc(sna, GEN9_BLEND(op->u.gen9.flags));
1365	gen9_emit_sampler(sna, GEN9_SAMPLER(op->u.gen9.flags));
1366	gen9_emit_sf(sna, GEN9_VERTEX(op->u.gen9.flags) >> 2);
1367	gen9_emit_wm(sna, op->u.gen9.wm_kernel);
1368	gen9_emit_vertex_elements(sna, op);
1369	gen9_emit_binding_table(sna, wm_binding_table);
1370
1371	sna->render_state.gen9.emit_flush = GEN9_READS_DST(op->u.gen9.flags);
1372}
1373
1374static bool gen9_magic_ca_pass(struct sna *sna,
1375			       const struct sna_composite_op *op)
1376{
1377	struct gen9_render_state *state = &sna->render_state.gen9;
1378
1379	if (!op->need_magic_ca_pass)
1380		return false;
1381
1382	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
1383	     sna->render.vertex_start, sna->render.vertex_index));
1384
1385	gen9_emit_pipe_stall(sna);
1386
1387	gen9_emit_cc(sna,
1388		     GEN9_BLEND(gen9_get_blend(PictOpAdd, true,
1389					       op->dst.format)));
1390	gen9_emit_wm(sna,
1391		     gen9_choose_composite_kernel(PictOpAdd,
1392						  true, true,
1393						  op->is_affine));
1394
1395	OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2));
1396	OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */
1397	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
1398	OUT_BATCH(sna->render.vertex_start);
1399	OUT_BATCH(1);	/* single instance */
1400	OUT_BATCH(0);	/* start instance location */
1401	OUT_BATCH(0);	/* index buffer offset, ignored */
1402
1403	state->last_primitive = sna->kgem.nbatch;
1404	state->ve_dirty = false;
1405	return true;
1406}
1407
1408static void null_create(struct sna_static_stream *stream)
1409{
1410	/* A bunch of zeros useful for legacy border color and depth-stencil */
1411	sna_static_stream_map(stream, 64, 64);
1412}
1413
1414static void
1415sampler_state_init(struct gen9_sampler_state *sampler_state,
1416		   sampler_filter_t filter,
1417		   sampler_extend_t extend)
1418{
1419	COMPILE_TIME_ASSERT(sizeof(*sampler_state) == 4*sizeof(uint32_t));
1420
1421	sampler_state->ss0.lod_preclamp = 2;	/* GL mode */
1422	sampler_state->ss0.default_color_mode = 1;
1423
1424	switch (filter) {
1425	default:
1426	case SAMPLER_FILTER_NEAREST:
1427		sampler_state->ss0.min_filter = MAPFILTER_NEAREST;
1428		sampler_state->ss0.mag_filter = MAPFILTER_NEAREST;
1429		break;
1430	case SAMPLER_FILTER_BILINEAR:
1431		sampler_state->ss0.min_filter = MAPFILTER_LINEAR;
1432		sampler_state->ss0.mag_filter = MAPFILTER_LINEAR;
1433		break;
1434	}
1435
1436	/* XXX bicubic filter using MAPFILTER_FLEXIBLE */
1437
1438	switch (extend) {
1439	default:
1440	case SAMPLER_EXTEND_NONE:
1441		sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP_BORDER;
1442		sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP_BORDER;
1443		sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP_BORDER;
1444		break;
1445	case SAMPLER_EXTEND_REPEAT:
1446		sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_WRAP;
1447		sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_WRAP;
1448		sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_WRAP;
1449		break;
1450	case SAMPLER_EXTEND_PAD:
1451		sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_CLAMP;
1452		sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_CLAMP;
1453		sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_CLAMP;
1454		break;
1455	case SAMPLER_EXTEND_REFLECT:
1456		sampler_state->ss3.r_wrap_mode = TEXCOORDMODE_MIRROR;
1457		sampler_state->ss3.s_wrap_mode = TEXCOORDMODE_MIRROR;
1458		sampler_state->ss3.t_wrap_mode = TEXCOORDMODE_MIRROR;
1459		break;
1460	}
1461}
1462
1463static void
1464sampler_copy_init(struct gen9_sampler_state *ss)
1465{
1466	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1467	ss->ss3.non_normalized_coord = 1;
1468
1469	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1470}
1471
1472static void
1473sampler_fill_init(struct gen9_sampler_state *ss)
1474{
1475	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
1476	ss->ss3.non_normalized_coord = 1;
1477
1478	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1479}
1480
1481static uint32_t
1482gen9_tiling_bits(uint32_t tiling)
1483{
1484	switch (tiling) {
1485	default: assert(0);
1486	case I915_TILING_NONE: return 0;
1487	case I915_TILING_X: return SURFACE_TILED;
1488	case I915_TILING_Y: return SURFACE_TILED | SURFACE_TILED_Y;
1489	}
1490}
1491
1492#define MOCS_PTE (1 << 1)
1493#define MOCS_WB (2 << 1)
1494
1495/**
1496 * Sets up the common fields for a surface state buffer for the given
1497 * picture in the given surface state buffer.
1498 */
1499static uint32_t
1500gen9_bind_bo(struct sna *sna,
1501	     struct kgem_bo *bo,
1502	     uint32_t width,
1503	     uint32_t height,
1504	     uint32_t format,
1505	     bool is_dst)
1506{
1507	uint32_t *ss;
1508	uint32_t domains;
1509	int offset;
1510	uint32_t is_scanout = is_dst && bo->scanout;
1511
1512	/* After the first bind, we manage the cache domains within the batch */
1513	offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
1514	if (offset) {
1515		if (is_dst)
1516			kgem_bo_mark_dirty(bo);
1517		assert(offset >= sna->kgem.surface);
1518		return offset * sizeof(uint32_t);
1519	}
1520
1521	offset = sna->kgem.surface -= SURFACE_DW;
1522	ss = sna->kgem.batch + offset;
1523	ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT |
1524		 gen9_tiling_bits(bo->tiling) |
1525		 format << SURFACE_FORMAT_SHIFT |
1526		 SURFACE_VALIGN_4 | SURFACE_HALIGN_4);
1527	if (is_dst) {
1528		ss[0] |= SURFACE_RC_READ_WRITE;
1529		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
1530	} else
1531		domains = I915_GEM_DOMAIN_SAMPLER << 16;
1532	ss[1] = (is_scanout || (is_dst && is_uncached(sna, bo))) ? MOCS_PTE << 24 : MOCS_WB << 24;
1533	ss[2] = ((width - 1)  << SURFACE_WIDTH_SHIFT |
1534		 (height - 1) << SURFACE_HEIGHT_SHIFT);
1535	ss[3] = (bo->pitch - 1) << SURFACE_PITCH_SHIFT;
1536	ss[4] = 0;
1537	ss[5] = 0;
1538	ss[6] = 0;
1539	ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
1540	*(uint64_t *)(ss+8) = kgem_add_reloc64(&sna->kgem, offset + 8, bo, domains, 0);
1541	ss[10] = 0;
1542	ss[11] = 0;
1543	ss[12] = 0;
1544	ss[13] = 0;
1545	ss[14] = 0;
1546	ss[15] = 0;
1547
1548	kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
1549
1550	DBG(("[%x] bind bo(handle=%d, addr=%lx), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
1551	     offset, bo->handle, *(uint64_t *)(ss+8),
1552	     format, width, height, bo->pitch, bo->tiling,
1553	     domains & 0xffff ? "render" : "sampler"));
1554
1555	return offset * sizeof(uint32_t);
1556}
1557
1558static void gen9_emit_vertex_buffer(struct sna *sna,
1559				    const struct sna_composite_op *op)
1560{
1561	int id = GEN9_VERTEX(op->u.gen9.flags);
1562
1563	OUT_BATCH(GEN9_3DSTATE_VERTEX_BUFFERS | (5 - 2));
1564	OUT_BATCH(id << VB_INDEX_SHIFT | VB_MODIFY_ENABLE |
1565		  4*op->floats_per_vertex);
1566	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
1567	OUT_BATCH64(0);
1568	OUT_BATCH(~0); /* buffer size: disabled */
1569
1570	sna->render.vb_id |= 1 << id;
1571}
1572
1573static void gen9_emit_primitive(struct sna *sna)
1574{
1575	if (sna->kgem.nbatch == sna->render_state.gen9.last_primitive) {
1576		sna->render.vertex_offset = sna->kgem.nbatch - 5;
1577		return;
1578	}
1579
1580	OUT_BATCH(GEN9_3DPRIMITIVE | (7 - 2));
1581	OUT_BATCH(0); /* ignored, see VF_TOPOLOGY */
1582	sna->render.vertex_offset = sna->kgem.nbatch;
1583	OUT_BATCH(0);	/* vertex count, to be filled in later */
1584	OUT_BATCH(sna->render.vertex_index);
1585	OUT_BATCH(1);	/* single instance */
1586	OUT_BATCH(0);	/* start instance location */
1587	OUT_BATCH(0);	/* index buffer offset, ignored */
1588	sna->render.vertex_start = sna->render.vertex_index;
1589
1590	sna->render_state.gen9.last_primitive = sna->kgem.nbatch;
1591	sna->render_state.gen9.ve_dirty = false;
1592}
1593
1594static bool gen9_rectangle_begin(struct sna *sna,
1595				 const struct sna_composite_op *op)
1596{
1597	int id = 1 << GEN9_VERTEX(op->u.gen9.flags);
1598	int ndwords;
1599
1600	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
1601		return true;
1602
1603	ndwords = op->need_magic_ca_pass ? 60 : 6;
1604	if ((sna->render.vb_id & id) == 0)
1605		ndwords += 5;
1606	if (!kgem_check_batch(&sna->kgem, ndwords))
1607		return false;
1608
1609	if ((sna->render.vb_id & id) == 0)
1610		gen9_emit_vertex_buffer(sna, op);
1611
1612	gen9_emit_primitive(sna);
1613	return true;
1614}
1615
1616static int gen9_get_rectangles__flush(struct sna *sna,
1617				      const struct sna_composite_op *op)
1618{
1619	/* Preventing discarding new vbo after lock contention */
1620	if (sna_vertex_wait__locked(&sna->render)) {
1621		int rem = vertex_space(sna);
1622		if (rem > op->floats_per_rect)
1623			return rem;
1624	}
1625
1626	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6))
1627		return 0;
1628	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
1629		return 0;
1630
1631	if (sna->render.vertex_offset) {
1632		gen8_vertex_flush(sna);
1633		if (gen9_magic_ca_pass(sna, op)) {
1634			gen9_emit_pipe_invalidate(sna);
1635			gen9_emit_cc(sna, GEN9_BLEND(op->u.gen9.flags));
1636			gen9_emit_wm(sna, op->u.gen9.wm_kernel);
1637		}
1638	}
1639
1640	return gen8_vertex_finish(sna);
1641}
1642
1643inline static int gen9_get_rectangles(struct sna *sna,
1644				      const struct sna_composite_op *op,
1645				      int want,
1646				      void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
1647{
1648	int rem;
1649
1650	assert(want);
1651
1652start:
1653	rem = vertex_space(sna);
1654	if (unlikely(rem < op->floats_per_rect)) {
1655		DBG(("flushing vbo for %s: %d < %d\n",
1656		     __FUNCTION__, rem, op->floats_per_rect));
1657		rem = gen9_get_rectangles__flush(sna, op);
1658		if (unlikely(rem == 0))
1659			goto flush;
1660	}
1661
1662	if (unlikely(sna->render.vertex_offset == 0)) {
1663		if (!gen9_rectangle_begin(sna, op))
1664			goto flush;
1665		else
1666			goto start;
1667	}
1668
1669	assert(rem <= vertex_space(sna));
1670	assert(op->floats_per_rect <= rem);
1671	if (want > 1 && want * op->floats_per_rect > rem)
1672		want = rem / op->floats_per_rect;
1673
1674	assert(want > 0);
1675	sna->render.vertex_index += 3*want;
1676	return want;
1677
1678flush:
1679	if (sna->render.vertex_offset) {
1680		gen8_vertex_flush(sna);
1681		gen9_magic_ca_pass(sna, op);
1682	}
1683	sna_vertex_wait__locked(&sna->render);
1684	_kgem_submit(&sna->kgem);
1685	emit_state(sna, op);
1686	goto start;
1687}
1688
1689inline static uint32_t *gen9_composite_get_binding_table(struct sna *sna,
1690							 uint16_t *offset)
1691{
1692	uint32_t *table;
1693
1694	assert(sna->kgem.surface <= 16384);
1695	sna->kgem.surface -= SURFACE_DW;
1696	/* Clear all surplus entries to zero in case of prefetch */
1697	table = memset(sna->kgem.batch + sna->kgem.surface, 0, 64);
1698
1699	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
1700
1701	*offset = sna->kgem.surface;
1702	return table;
1703}
1704
1705static void
1706gen9_get_batch(struct sna *sna, const struct sna_composite_op *op)
1707{
1708	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1709
1710	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 2*(1+3))) {
1711		DBG(("%s: flushing batch: %d < %d+%d\n",
1712		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
1713		     150, 4*8*2));
1714		_kgem_submit(&sna->kgem);
1715		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1716	}
1717
1718	assert(sna->kgem.mode == KGEM_RENDER);
1719	assert(sna->kgem.ring == KGEM_RENDER);
1720
1721	if (sna->render_state.gen9.needs_invariant)
1722		gen9_emit_invariant(sna);
1723}
1724
1725static void gen9_emit_composite_state(struct sna *sna,
1726				      const struct sna_composite_op *op)
1727{
1728	uint32_t *binding_table;
1729	uint16_t offset, dirty;
1730
1731	gen9_get_batch(sna, op);
1732
1733	binding_table = gen9_composite_get_binding_table(sna, &offset);
1734
1735	dirty = kgem_bo_is_dirty(op->dst.bo);
1736
1737	binding_table[0] =
1738		gen9_bind_bo(sna,
1739			    op->dst.bo, op->dst.width, op->dst.height,
1740			    gen9_get_dest_format(op->dst.format),
1741			    true);
1742	binding_table[1] =
1743		gen9_bind_bo(sna,
1744			     op->src.bo, op->src.width, op->src.height,
1745			     op->src.card_format,
1746			     false);
1747	if (op->mask.bo) {
1748		binding_table[2] =
1749			gen9_bind_bo(sna,
1750				     op->mask.bo,
1751				     op->mask.width,
1752				     op->mask.height,
1753				     op->mask.card_format,
1754				     false);
1755	}
1756
1757	if (sna->kgem.surface == offset &&
1758	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table &&
1759	    (op->mask.bo == NULL ||
1760	     sna->kgem.batch[sna->render_state.gen9.surface_table+2] == binding_table[2])) {
1761		sna->kgem.surface += SURFACE_DW;
1762		offset = sna->render_state.gen9.surface_table;
1763	}
1764
1765	if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0])
1766		dirty = 0;
1767
1768	gen9_emit_state(sna, op, offset | dirty);
1769}
1770
1771static void
1772gen9_align_vertex(struct sna *sna, const struct sna_composite_op *op)
1773{
1774	if (op->floats_per_vertex != sna->render_state.gen9.floats_per_vertex) {
1775		DBG(("aligning vertex: was %d, now %d floats per vertex\n",
1776		     sna->render_state.gen9.floats_per_vertex, op->floats_per_vertex));
1777		gen8_vertex_align(sna, op);
1778		sna->render_state.gen9.floats_per_vertex = op->floats_per_vertex;
1779	}
1780}
1781
1782fastcall static void
1783gen9_render_composite_blt(struct sna *sna,
1784			  const struct sna_composite_op *op,
1785			  const struct sna_composite_rectangles *r)
1786{
1787	gen9_get_rectangles(sna, op, 1, gen9_emit_composite_state);
1788	op->prim_emit(sna, op, r);
1789}
1790
1791fastcall static void
1792gen9_render_composite_box(struct sna *sna,
1793			  const struct sna_composite_op *op,
1794			  const BoxRec *box)
1795{
1796	struct sna_composite_rectangles r;
1797
1798	gen9_get_rectangles(sna, op, 1, gen9_emit_composite_state);
1799
1800	DBG(("  %s: (%d, %d), (%d, %d)\n",
1801	     __FUNCTION__,
1802	     box->x1, box->y1, box->x2, box->y2));
1803
1804	r.dst.x = box->x1;
1805	r.dst.y = box->y1;
1806	r.width  = box->x2 - box->x1;
1807	r.height = box->y2 - box->y1;
1808	r.src = r.mask = r.dst;
1809
1810	op->prim_emit(sna, op, &r);
1811}
1812
1813static void
1814gen9_render_composite_boxes__blt(struct sna *sna,
1815				 const struct sna_composite_op *op,
1816				 const BoxRec *box, int nbox)
1817{
1818	DBG(("composite_boxes(%d)\n", nbox));
1819
1820	do {
1821		int nbox_this_time;
1822
1823		nbox_this_time = gen9_get_rectangles(sna, op, nbox,
1824						     gen9_emit_composite_state);
1825		nbox -= nbox_this_time;
1826
1827		do {
1828			struct sna_composite_rectangles r;
1829
1830			DBG(("  %s: (%d, %d), (%d, %d)\n",
1831			     __FUNCTION__,
1832			     box->x1, box->y1, box->x2, box->y2));
1833
1834			r.dst.x = box->x1;
1835			r.dst.y = box->y1;
1836			r.width  = box->x2 - box->x1;
1837			r.height = box->y2 - box->y1;
1838			r.src = r.mask = r.dst;
1839
1840			op->prim_emit(sna, op, &r);
1841			box++;
1842		} while (--nbox_this_time);
1843	} while (nbox);
1844}
1845
1846static void
1847gen9_render_composite_boxes(struct sna *sna,
1848			    const struct sna_composite_op *op,
1849			    const BoxRec *box, int nbox)
1850{
1851	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1852
1853	do {
1854		int nbox_this_time;
1855		float *v;
1856
1857		nbox_this_time = gen9_get_rectangles(sna, op, nbox,
1858						     gen9_emit_composite_state);
1859		assert(nbox_this_time);
1860		nbox -= nbox_this_time;
1861
1862		v = sna->render.vertices + sna->render.vertex_used;
1863		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1864
1865		op->emit_boxes(op, box, nbox_this_time, v);
1866		box += nbox_this_time;
1867	} while (nbox);
1868}
1869
1870static void
1871gen9_render_composite_boxes__thread(struct sna *sna,
1872				    const struct sna_composite_op *op,
1873				    const BoxRec *box, int nbox)
1874{
1875	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1876
1877	sna_vertex_lock(&sna->render);
1878	do {
1879		int nbox_this_time;
1880		float *v;
1881
1882		nbox_this_time = gen9_get_rectangles(sna, op, nbox,
1883						     gen9_emit_composite_state);
1884		assert(nbox_this_time);
1885		nbox -= nbox_this_time;
1886
1887		v = sna->render.vertices + sna->render.vertex_used;
1888		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1889
1890		sna_vertex_acquire__locked(&sna->render);
1891		sna_vertex_unlock(&sna->render);
1892
1893		op->emit_boxes(op, box, nbox_this_time, v);
1894		box += nbox_this_time;
1895
1896		sna_vertex_lock(&sna->render);
1897		sna_vertex_release__locked(&sna->render);
1898	} while (nbox);
1899	sna_vertex_unlock(&sna->render);
1900}
1901
1902static uint32_t
1903gen9_create_blend_state(struct sna_static_stream *stream)
1904{
1905	char *base, *ptr;
1906	int src, dst;
1907
1908	COMPILE_TIME_ASSERT(((GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT << 4) & (1 << 15)) == 0);
1909
1910	base = sna_static_stream_map(stream,
1911				     GEN9_BLENDFACTOR_COUNT * GEN9_BLENDFACTOR_COUNT * GEN9_BLEND_STATE_PADDED_SIZE,
1912				     64);
1913
1914	ptr = base;
1915	for (src = 0; src < GEN9_BLENDFACTOR_COUNT; src++) {
1916		for (dst = 0; dst < GEN9_BLENDFACTOR_COUNT; dst++) {
1917			struct gen9_blend_state *blend =
1918				(struct gen9_blend_state *)ptr;
1919
1920			assert(((ptr - base) & 63) == 0);
1921			COMPILE_TIME_ASSERT(sizeof(blend->common) == 4);
1922			COMPILE_TIME_ASSERT(sizeof(blend->rt) == 8);
1923			COMPILE_TIME_ASSERT((char *)&blend->rt - (char *)blend == 4);
1924
1925			blend->rt.post_blend_clamp = 1;
1926			blend->rt.pre_blend_clamp = 1;
1927
1928			blend->rt.color_blend =
1929				!(dst == BLENDFACTOR_ZERO && src == BLENDFACTOR_ONE);
1930			blend->rt.dest_blend_factor = dst;
1931			blend->rt.source_blend_factor = src;
1932			blend->rt.color_blend_function = BLENDFUNCTION_ADD;
1933
1934			blend->rt.dest_alpha_blend_factor = dst;
1935			blend->rt.source_alpha_blend_factor = src;
1936			blend->rt.alpha_blend_function = BLENDFUNCTION_ADD;
1937
1938			ptr += GEN9_BLEND_STATE_PADDED_SIZE;
1939		}
1940	}
1941
1942	return sna_static_stream_offsetof(stream, base);
1943}
1944
1945static int
1946gen9_composite_picture(struct sna *sna,
1947		       PicturePtr picture,
1948		       struct sna_composite_channel *channel,
1949		       int x, int y,
1950		       int w, int h,
1951		       int dst_x, int dst_y,
1952		       bool precise)
1953{
1954	PixmapPtr pixmap;
1955	uint32_t color;
1956	int16_t dx, dy;
1957
1958	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1959	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
1960
1961	channel->is_solid = false;
1962	channel->card_format = -1;
1963
1964	if (sna_picture_is_solid(picture, &color))
1965		return gen4_channel_init_solid(sna, channel, color);
1966
1967	if (picture->pDrawable == NULL) {
1968		int ret;
1969
1970		if (picture->pSourcePict->type == SourcePictTypeLinear)
1971			return gen4_channel_init_linear(sna, picture, channel,
1972							x, y,
1973							w, h,
1974							dst_x, dst_y);
1975
1976		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
1977		ret = -1;
1978		if (!precise)
1979			ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1980								      x, y, w, h, dst_x, dst_y);
1981		if (ret == -1)
1982			ret = sna_render_picture_fixup(sna, picture, channel,
1983						       x, y, w, h, dst_x, dst_y);
1984		return ret;
1985	}
1986
1987	if (picture->alphaMap) {
1988		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
1989		return sna_render_picture_fixup(sna, picture, channel,
1990						x, y, w, h, dst_x, dst_y);
1991	}
1992
1993	if (!gen9_check_repeat(picture))
1994		return sna_render_picture_fixup(sna, picture, channel,
1995						x, y, w, h, dst_x, dst_y);
1996
1997	if (!gen9_check_filter(picture))
1998		return sna_render_picture_fixup(sna, picture, channel,
1999						x, y, w, h, dst_x, dst_y);
2000
2001	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
2002	channel->filter = picture->filter;
2003
2004	pixmap = get_drawable_pixmap(picture->pDrawable);
2005	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
2006
2007	x += dx + picture->pDrawable->x;
2008	y += dy + picture->pDrawable->y;
2009
2010	channel->is_affine = sna_transform_is_affine(picture->transform);
2011	if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
2012		DBG(("%s: integer translation (%d, %d), removing\n",
2013		     __FUNCTION__, dx, dy));
2014		x += dx;
2015		y += dy;
2016		channel->transform = NULL;
2017		channel->filter = PictFilterNearest;
2018
2019		if (channel->repeat ||
2020		    (x >= 0 &&
2021		     y >= 0 &&
2022		     x + w <= pixmap->drawable.width &&
2023		     y + h <= pixmap->drawable.height)) {
2024			struct sna_pixmap *priv = sna_pixmap(pixmap);
2025			if (priv && priv->clear) {
2026				DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
2027				return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color));
2028			}
2029		}
2030	} else
2031		channel->transform = picture->transform;
2032
2033	channel->pict_format = picture->format;
2034	channel->card_format = gen9_get_card_format(picture->format);
2035	if (channel->card_format == (unsigned)-1)
2036		return sna_render_picture_convert(sna, picture, channel, pixmap,
2037						  x, y, w, h, dst_x, dst_y,
2038						  false);
2039
2040	if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
2041		DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__,
2042		     pixmap->drawable.width, pixmap->drawable.height));
2043		return sna_render_picture_extract(sna, picture, channel,
2044						  x, y, w, h, dst_x, dst_y);
2045	}
2046
2047	return sna_render_pixmap_bo(sna, channel, pixmap,
2048				    x, y, w, h, dst_x, dst_y);
2049}
2050
2051inline static bool gen9_composite_channel_convert(struct sna_composite_channel *channel)
2052{
2053	if (unaligned(channel->bo, PICT_FORMAT_BPP(channel->pict_format)))
2054		return false;
2055
2056	channel->repeat = gen9_repeat(channel->repeat);
2057	channel->filter = gen9_filter(channel->filter);
2058	if (channel->card_format == (unsigned)-1)
2059		channel->card_format = gen9_get_card_format(channel->pict_format);
2060	assert(channel->card_format != (unsigned)-1);
2061
2062	return true;
2063}
2064
2065static void gen9_render_composite_done(struct sna *sna,
2066				       const struct sna_composite_op *op)
2067{
2068	if (sna->render.vertex_offset) {
2069		gen8_vertex_flush(sna);
2070		gen9_magic_ca_pass(sna, op);
2071	}
2072
2073	if (op->mask.bo)
2074		kgem_bo_destroy(&sna->kgem, op->mask.bo);
2075	if (op->src.bo)
2076		kgem_bo_destroy(&sna->kgem, op->src.bo);
2077
2078	sna_render_composite_redirect_done(sna, op);
2079}
2080
2081inline static bool
2082gen9_composite_set_target(struct sna *sna,
2083			  struct sna_composite_op *op,
2084			  PicturePtr dst,
2085			  int x, int y, int w, int h,
2086			  bool partial)
2087{
2088	BoxRec box;
2089	unsigned int hint;
2090
2091	DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial));
2092
2093	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
2094	op->dst.format = dst->format;
2095	op->dst.width  = op->dst.pixmap->drawable.width;
2096	op->dst.height = op->dst.pixmap->drawable.height;
2097
2098	if (w | h) {
2099		assert(w && h);
2100		box.x1 = x;
2101		box.y1 = y;
2102		box.x2 = x + w;
2103		box.y2 = y + h;
2104	} else
2105		sna_render_picture_extents(dst, &box);
2106
2107	hint = PREFER_GPU | RENDER_GPU;
2108	if (!need_tiling(sna, op->dst.width, op->dst.height))
2109		hint |= FORCE_GPU;
2110	if (!partial) {
2111		hint |= IGNORE_DAMAGE;
2112		if (w == op->dst.width && h == op->dst.height)
2113			hint |= REPLACES;
2114	}
2115
2116	op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
2117	if (op->dst.bo == NULL)
2118		return false;
2119
2120	assert(!op->damage || !DAMAGE_IS_ALL(*op->damage));
2121
2122	if (unaligned(op->dst.bo, dst->pDrawable->bitsPerPixel))
2123		return false;
2124
2125	if (hint & REPLACES) {
2126		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
2127		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
2128	}
2129
2130	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
2131			    &op->dst.x, &op->dst.y);
2132
2133	DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
2134	     __FUNCTION__,
2135	     op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
2136	     op->dst.width, op->dst.height,
2137	     op->dst.bo->pitch,
2138	     op->dst.x, op->dst.y,
2139	     op->damage ? *op->damage : (void *)-1));
2140
2141	assert(op->dst.bo->proxy == NULL);
2142
2143	if (too_large(op->dst.width, op->dst.height) &&
2144	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
2145		return false;
2146
2147	return true;
2148}
2149
2150static bool
2151try_blt(struct sna *sna,
2152	uint8_t op,
2153	PicturePtr src,
2154	PicturePtr mask,
2155	PicturePtr dst,
2156	int16_t src_x, int16_t src_y,
2157	int16_t msk_x, int16_t msk_y,
2158	int16_t dst_x, int16_t dst_y,
2159	int16_t width, int16_t height,
2160	unsigned flags,
2161	struct sna_composite_op *tmp)
2162{
2163	struct kgem_bo *bo;
2164
2165	if (sna->kgem.mode == KGEM_BLT) {
2166		DBG(("%s: already performing BLT\n", __FUNCTION__));
2167		goto execute;
2168	}
2169
2170	if (too_large(width, height)) {
2171		DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
2172		     __FUNCTION__, width, height));
2173		goto execute;
2174	}
2175
2176	bo = __sna_drawable_peek_bo(dst->pDrawable);
2177	if (bo == NULL)
2178		goto execute;
2179
2180	if (untiled_tlb_miss(bo))
2181		goto execute;
2182
2183	if (bo->rq) {
2184		if (RQ_IS_BLT(bo->rq))
2185			goto execute;
2186
2187		return false;
2188	}
2189
2190	if (bo->tiling == I915_TILING_Y)
2191		goto upload;
2192
2193	if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0))
2194		goto execute;
2195
2196	if (src->pDrawable == dst->pDrawable &&
2197	    (sna->render_state.gt < 3 || width*height < 1024) &&
2198	    can_switch_to_blt(sna, bo, 0))
2199		goto execute;
2200
2201	if (src->pDrawable) {
2202		struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable);
2203		if (s == NULL)
2204			goto upload;
2205
2206		if (prefer_blt_bo(sna, s, bo))
2207			goto execute;
2208	}
2209
2210	if (sna->kgem.ring == KGEM_BLT) {
2211		DBG(("%s: already performing BLT\n", __FUNCTION__));
2212		goto execute;
2213	}
2214
2215upload:
2216	flags |= COMPOSITE_UPLOAD;
2217execute:
2218	return sna_blt_composite(sna, op,
2219				 src, dst,
2220				 src_x, src_y,
2221				 dst_x, dst_y,
2222				 width, height,
2223				 flags, tmp);
2224}
2225
2226static bool
2227check_gradient(PicturePtr picture, bool precise)
2228{
2229	if (picture->pDrawable)
2230		return false;
2231
2232	switch (picture->pSourcePict->type) {
2233	case SourcePictTypeSolidFill:
2234	case SourcePictTypeLinear:
2235		return false;
2236	default:
2237		return precise;
2238	}
2239}
2240
2241static bool
2242has_alphamap(PicturePtr p)
2243{
2244	return p->alphaMap != NULL;
2245}
2246
2247static bool
2248need_upload(PicturePtr p)
2249{
2250	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
2251}
2252
2253static bool
2254source_is_busy(PixmapPtr pixmap)
2255{
2256	struct sna_pixmap *priv = sna_pixmap(pixmap);
2257	if (priv == NULL || priv->clear)
2258		return false;
2259
2260	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
2261		return true;
2262
2263	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
2264		return true;
2265
2266	return priv->gpu_damage && !priv->cpu_damage;
2267}
2268
2269static bool
2270source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise)
2271{
2272	if (sna_picture_is_solid(p, NULL))
2273		return false;
2274
2275	if (p->pSourcePict)
2276		return check_gradient(p, precise);
2277
2278	if (!gen9_check_repeat(p) || !gen9_check_format(p->format))
2279		return true;
2280
2281	if (pixmap && source_is_busy(pixmap))
2282		return false;
2283
2284	return has_alphamap(p) || !gen9_check_filter(p) || need_upload(p);
2285}
2286
2287static bool
2288gen9_composite_fallback(struct sna *sna,
2289			PicturePtr src,
2290			PicturePtr mask,
2291			PicturePtr dst)
2292{
2293	PixmapPtr src_pixmap;
2294	PixmapPtr mask_pixmap;
2295	PixmapPtr dst_pixmap;
2296	bool src_fallback, mask_fallback;
2297
2298	if (!gen9_check_dst_format(dst->format)) {
2299		DBG(("%s: unknown destination format: %d\n",
2300		     __FUNCTION__, dst->format));
2301		return true;
2302	}
2303
2304	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
2305
2306	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
2307	src_fallback = source_fallback(src, src_pixmap,
2308				       dst->polyMode == PolyModePrecise);
2309
2310	if (mask) {
2311		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
2312		mask_fallback = source_fallback(mask, mask_pixmap,
2313						dst->polyMode == PolyModePrecise);
2314	} else {
2315		mask_pixmap = NULL;
2316		mask_fallback = false;
2317	}
2318
2319	/* If we are using the destination as a source and need to
2320	 * readback in order to upload the source, do it all
2321	 * on the cpu.
2322	 */
2323	if (src_pixmap == dst_pixmap && src_fallback) {
2324		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
2325		return true;
2326	}
2327	if (mask_pixmap == dst_pixmap && mask_fallback) {
2328		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
2329		return true;
2330	}
2331
2332	/* If anything is on the GPU, push everything out to the GPU */
2333	if (dst_use_gpu(dst_pixmap)) {
2334		DBG(("%s: dst is already on the GPU, try to use GPU\n",
2335		     __FUNCTION__));
2336		return false;
2337	}
2338
2339	if (src_pixmap && !src_fallback) {
2340		DBG(("%s: src is already on the GPU, try to use GPU\n",
2341		     __FUNCTION__));
2342		return false;
2343	}
2344	if (mask_pixmap && !mask_fallback) {
2345		DBG(("%s: mask is already on the GPU, try to use GPU\n",
2346		     __FUNCTION__));
2347		return false;
2348	}
2349
2350	/* However if the dst is not on the GPU and we need to
2351	 * render one of the sources using the CPU, we may
2352	 * as well do the entire operation in place onthe CPU.
2353	 */
2354	if (src_fallback) {
2355		DBG(("%s: dst is on the CPU and src will fallback\n",
2356		     __FUNCTION__));
2357		return true;
2358	}
2359
2360	if (mask && mask_fallback) {
2361		DBG(("%s: dst is on the CPU and mask will fallback\n",
2362		     __FUNCTION__));
2363		return true;
2364	}
2365
2366	if (too_large(dst_pixmap->drawable.width,
2367		      dst_pixmap->drawable.height) &&
2368	    dst_is_cpu(dst_pixmap)) {
2369		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
2370		return true;
2371	}
2372
2373	DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
2374	     __FUNCTION__));
2375	return dst_use_cpu(dst_pixmap);
2376}
2377
2378static int
2379reuse_source(struct sna *sna,
2380	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
2381	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
2382{
2383	uint32_t color;
2384
2385	if (src_x != msk_x || src_y != msk_y)
2386		return false;
2387
2388	if (src == mask) {
2389		DBG(("%s: mask is source\n", __FUNCTION__));
2390		*mc = *sc;
2391		mc->bo = kgem_bo_reference(mc->bo);
2392		return true;
2393	}
2394
2395	if (sna_picture_is_solid(mask, &color))
2396		return gen4_channel_init_solid(sna, mc, color);
2397
2398	if (sc->is_solid)
2399		return false;
2400
2401	if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
2402		return false;
2403
2404	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
2405
2406	if (!sna_transform_equal(src->transform, mask->transform))
2407		return false;
2408
2409	if (!sna_picture_alphamap_equal(src, mask))
2410		return false;
2411
2412	if (!gen9_check_repeat(mask))
2413		return false;
2414
2415	if (!gen9_check_filter(mask))
2416		return false;
2417
2418	if (!gen9_check_format(mask->format))
2419		return false;
2420
2421	DBG(("%s: reusing source channel for mask with a twist\n",
2422	     __FUNCTION__));
2423
2424	*mc = *sc;
2425	mc->repeat = gen9_repeat(mask->repeat ? mask->repeatType : RepeatNone);
2426	mc->filter = gen9_filter(mask->filter);
2427	mc->pict_format = mask->format;
2428	mc->card_format = gen9_get_card_format(mask->format);
2429	mc->bo = kgem_bo_reference(mc->bo);
2430	return true;
2431}
2432
2433static bool
2434gen9_render_composite(struct sna *sna,
2435		      uint8_t op,
2436		      PicturePtr src,
2437		      PicturePtr mask,
2438		      PicturePtr dst,
2439		      int16_t src_x, int16_t src_y,
2440		      int16_t msk_x, int16_t msk_y,
2441		      int16_t dst_x, int16_t dst_y,
2442		      int16_t width, int16_t height,
2443		      unsigned flags,
2444		      struct sna_composite_op *tmp)
2445{
2446	if (op >= ARRAY_SIZE(gen9_blend_op))
2447		return false;
2448
2449	DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__,
2450	     width, height, sna->kgem.mode, sna->kgem.ring));
2451
2452	if (mask == NULL &&
2453	    try_blt(sna, op,
2454		    src, mask, dst,
2455		    src_x, src_y,
2456		    msk_x, msk_y,
2457		    dst_x, dst_y,
2458		    width, height,
2459		    flags, tmp))
2460		return true;
2461
2462	if (gen9_composite_fallback(sna, src, mask, dst))
2463		goto fallback;
2464
2465	if (need_tiling(sna, width, height))
2466		return sna_tiling_composite(op, src, mask, dst,
2467					    src_x, src_y,
2468					    msk_x, msk_y,
2469					    dst_x, dst_y,
2470					    width, height,
2471					    tmp);
2472
2473	if (op == PictOpClear && src == sna->clear)
2474		op = PictOpSrc;
2475	tmp->op = op;
2476	if (!gen9_composite_set_target(sna, tmp, dst,
2477				       dst_x, dst_y, width, height,
2478				       flags & COMPOSITE_PARTIAL || op > PictOpSrc))
2479		goto fallback;
2480
2481	switch (gen9_composite_picture(sna, src, &tmp->src,
2482				       src_x, src_y,
2483				       width, height,
2484				       dst_x, dst_y,
2485				       dst->polyMode == PolyModePrecise)) {
2486	case -1:
2487		goto cleanup_dst;
2488	case 0:
2489		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
2490			goto cleanup_dst;
2491		/* fall through to fixup */
2492	case 1:
2493		/* Did we just switch rings to prepare the source? */
2494		if (mask == NULL &&
2495		    (prefer_blt_composite(sna, tmp) ||
2496		     unaligned(tmp->src.bo, PICT_FORMAT_BPP(tmp->src.pict_format))) &&
2497		    sna_blt_composite__convert(sna,
2498					       dst_x, dst_y, width, height,
2499					       tmp))
2500			return true;
2501
2502		if (!gen9_composite_channel_convert(&tmp->src))
2503			goto cleanup_src;
2504
2505		break;
2506	}
2507
2508	tmp->is_affine = tmp->src.is_affine;
2509	tmp->has_component_alpha = false;
2510	tmp->need_magic_ca_pass = false;
2511
2512	tmp->mask.bo = NULL;
2513	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
2514	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
2515
2516	if (mask) {
2517		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
2518			tmp->has_component_alpha = true;
2519
2520			/* Check if it's component alpha that relies on a source alpha and on
2521			 * the source value.  We can only get one of those into the single
2522			 * source value that we get to blend with.
2523			 */
2524			if (gen9_blend_op[op].src_alpha &&
2525			    (gen9_blend_op[op].src_blend != BLENDFACTOR_ZERO)) {
2526				if (op != PictOpOver)
2527					goto cleanup_src;
2528
2529				tmp->need_magic_ca_pass = true;
2530				tmp->op = PictOpOutReverse;
2531			}
2532		}
2533
2534		if (!reuse_source(sna,
2535				  src, &tmp->src, src_x, src_y,
2536				  mask, &tmp->mask, msk_x, msk_y)) {
2537			switch (gen9_composite_picture(sna, mask, &tmp->mask,
2538						       msk_x, msk_y,
2539						       width, height,
2540						       dst_x, dst_y,
2541						       dst->polyMode == PolyModePrecise)) {
2542			case -1:
2543				goto cleanup_src;
2544			case 0:
2545				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
2546					goto cleanup_src;
2547				/* fall through to fixup */
2548			case 1:
2549				if (!gen9_composite_channel_convert(&tmp->mask))
2550					goto cleanup_mask;
2551				break;
2552			}
2553		}
2554
2555		tmp->is_affine &= tmp->mask.is_affine;
2556	}
2557
2558	tmp->u.gen9.flags =
2559		GEN9_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
2560					      tmp->src.repeat,
2561					      tmp->mask.filter,
2562					      tmp->mask.repeat),
2563			       gen9_get_blend(tmp->op,
2564					      tmp->has_component_alpha,
2565					      tmp->dst.format),
2566			       gen4_choose_composite_emitter(sna, tmp));
2567	tmp->u.gen9.wm_kernel = gen9_choose_composite_kernel(tmp->op,
2568							     tmp->mask.bo != NULL,
2569							     tmp->has_component_alpha,
2570							     tmp->is_affine);
2571
2572	tmp->blt   = gen9_render_composite_blt;
2573	tmp->box   = gen9_render_composite_box;
2574	tmp->boxes = gen9_render_composite_boxes__blt;
2575	if (tmp->emit_boxes){
2576		tmp->boxes = gen9_render_composite_boxes;
2577		tmp->thread_boxes = gen9_render_composite_boxes__thread;
2578	}
2579	tmp->done  = gen9_render_composite_done;
2580
2581	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
2582	if (!kgem_check_bo(&sna->kgem,
2583			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2584			   NULL)) {
2585		kgem_submit(&sna->kgem);
2586		if (!kgem_check_bo(&sna->kgem,
2587				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2588				   NULL))
2589			goto cleanup_mask;
2590		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2591	}
2592
2593	gen9_align_vertex(sna, tmp);
2594	gen9_emit_composite_state(sna, tmp);
2595	return true;
2596
2597cleanup_mask:
2598	if (tmp->mask.bo) {
2599		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2600		tmp->mask.bo = NULL;
2601	}
2602cleanup_src:
2603	if (tmp->src.bo) {
2604		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2605		tmp->src.bo = NULL;
2606	}
2607cleanup_dst:
2608	if (tmp->redirect.real_bo) {
2609		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2610		tmp->redirect.real_bo = NULL;
2611	}
2612fallback:
2613	return (mask == NULL &&
2614		sna_blt_composite(sna, op,
2615				  src, dst,
2616				  src_x, src_y,
2617				  dst_x, dst_y,
2618				  width, height,
2619				  flags | COMPOSITE_FALLBACK, tmp));
2620}
2621
2622#if !NO_COMPOSITE_SPANS
2623fastcall static void
2624gen9_render_composite_spans_box(struct sna *sna,
2625				const struct sna_composite_spans_op *op,
2626				const BoxRec *box, float opacity)
2627{
2628	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2629	     __FUNCTION__,
2630	     op->base.src.offset[0], op->base.src.offset[1],
2631	     opacity,
2632	     op->base.dst.x, op->base.dst.y,
2633	     box->x1, box->y1,
2634	     box->x2 - box->x1,
2635	     box->y2 - box->y1));
2636
2637	gen9_get_rectangles(sna, &op->base, 1, gen9_emit_composite_state);
2638	op->prim_emit(sna, op, box, opacity);
2639}
2640
2641static void
2642gen9_render_composite_spans_boxes(struct sna *sna,
2643				  const struct sna_composite_spans_op *op,
2644				  const BoxRec *box, int nbox,
2645				  float opacity)
2646{
2647	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2648	     __FUNCTION__, nbox,
2649	     op->base.src.offset[0], op->base.src.offset[1],
2650	     opacity,
2651	     op->base.dst.x, op->base.dst.y));
2652
2653	do {
2654		int nbox_this_time;
2655
2656		nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox,
2657						     gen9_emit_composite_state);
2658		nbox -= nbox_this_time;
2659
2660		do {
2661			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2662			     box->x1, box->y1,
2663			     box->x2 - box->x1,
2664			     box->y2 - box->y1));
2665
2666			op->prim_emit(sna, op, box++, opacity);
2667		} while (--nbox_this_time);
2668	} while (nbox);
2669}
2670
2671fastcall static void
2672gen9_render_composite_spans_boxes__thread(struct sna *sna,
2673					  const struct sna_composite_spans_op *op,
2674					  const struct sna_opacity_box *box,
2675					  int nbox)
2676{
2677	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
2678	     __FUNCTION__, nbox,
2679	     op->base.src.offset[0], op->base.src.offset[1],
2680	     op->base.dst.x, op->base.dst.y));
2681
2682	sna_vertex_lock(&sna->render);
2683	do {
2684		int nbox_this_time;
2685		float *v;
2686
2687		nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox,
2688						     gen9_emit_composite_state);
2689		assert(nbox_this_time);
2690		nbox -= nbox_this_time;
2691
2692		v = sna->render.vertices + sna->render.vertex_used;
2693		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
2694
2695		sna_vertex_acquire__locked(&sna->render);
2696		sna_vertex_unlock(&sna->render);
2697
2698		op->emit_boxes(op, box, nbox_this_time, v);
2699		box += nbox_this_time;
2700
2701		sna_vertex_lock(&sna->render);
2702		sna_vertex_release__locked(&sna->render);
2703	} while (nbox);
2704	sna_vertex_unlock(&sna->render);
2705}
2706
2707fastcall static void
2708gen9_render_composite_spans_done(struct sna *sna,
2709				 const struct sna_composite_spans_op *op)
2710{
2711	if (sna->render.vertex_offset)
2712		gen8_vertex_flush(sna);
2713
2714	DBG(("%s()\n", __FUNCTION__));
2715
2716	if (op->base.src.bo)
2717		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2718
2719	sna_render_composite_redirect_done(sna, &op->base);
2720}
2721
2722static bool
2723gen9_check_composite_spans(struct sna *sna,
2724			   uint8_t op, PicturePtr src, PicturePtr dst,
2725			   int16_t width, int16_t height, unsigned flags)
2726{
2727	if (op >= ARRAY_SIZE(gen9_blend_op))
2728		return false;
2729
2730	if (gen9_composite_fallback(sna, src, NULL, dst))
2731		return false;
2732
2733	if (need_tiling(sna, width, height) &&
2734	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2735		DBG(("%s: fallback, tiled operation not on GPU\n",
2736		     __FUNCTION__));
2737		return false;
2738	}
2739
2740	return true;
2741}
2742
2743static bool
2744gen9_render_composite_spans(struct sna *sna,
2745			    uint8_t op,
2746			    PicturePtr src,
2747			    PicturePtr dst,
2748			    int16_t src_x,  int16_t src_y,
2749			    int16_t dst_x,  int16_t dst_y,
2750			    int16_t width,  int16_t height,
2751			    unsigned flags,
2752			    struct sna_composite_spans_op *tmp)
2753{
2754	DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
2755	     width, height, flags, sna->kgem.ring));
2756
2757	assert(gen9_check_composite_spans(sna, op, src, dst, width, height, flags));
2758
2759	if (need_tiling(sna, width, height)) {
2760		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2761		     __FUNCTION__, width, height));
2762		return sna_tiling_composite_spans(op, src, dst,
2763						  src_x, src_y, dst_x, dst_y,
2764						  width, height, flags, tmp);
2765	}
2766
2767	tmp->base.op = op;
2768	if (!gen9_composite_set_target(sna, &tmp->base, dst,
2769				       dst_x, dst_y, width, height, true))
2770		return false;
2771
2772	switch (gen9_composite_picture(sna, src, &tmp->base.src,
2773				       src_x, src_y,
2774				       width, height,
2775				       dst_x, dst_y,
2776				       dst->polyMode == PolyModePrecise)) {
2777	case -1:
2778		goto cleanup_dst;
2779	case 0:
2780		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
2781			goto cleanup_dst;
2782		/* fall through to fixup */
2783	case 1:
2784		if (!gen9_composite_channel_convert(&tmp->base.src))
2785			goto cleanup_src;
2786		break;
2787	}
2788	tmp->base.mask.bo = NULL;
2789
2790	tmp->base.is_affine = tmp->base.src.is_affine;
2791	tmp->base.need_magic_ca_pass = false;
2792
2793	tmp->base.u.gen9.flags =
2794		GEN9_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter,
2795					      tmp->base.src.repeat,
2796					      SAMPLER_FILTER_NEAREST,
2797					      SAMPLER_EXTEND_PAD),
2798			       gen9_get_blend(tmp->base.op, false, tmp->base.dst.format),
2799			       gen4_choose_spans_emitter(sna, tmp));
2800	tmp->base.u.gen9.wm_kernel =
2801		GEN9_WM_KERNEL_OPACITY | !tmp->base.is_affine;
2802
2803	tmp->box   = gen9_render_composite_spans_box;
2804	tmp->boxes = gen9_render_composite_spans_boxes;
2805	if (tmp->emit_boxes)
2806		tmp->thread_boxes = gen9_render_composite_spans_boxes__thread;
2807	tmp->done  = gen9_render_composite_spans_done;
2808
2809	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
2810	if (!kgem_check_bo(&sna->kgem,
2811			   tmp->base.dst.bo, tmp->base.src.bo,
2812			   NULL)) {
2813		kgem_submit(&sna->kgem);
2814		if (!kgem_check_bo(&sna->kgem,
2815				   tmp->base.dst.bo, tmp->base.src.bo,
2816				   NULL))
2817			goto cleanup_src;
2818		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2819	}
2820
2821	gen9_align_vertex(sna, &tmp->base);
2822	gen9_emit_composite_state(sna, &tmp->base);
2823	return true;
2824
2825cleanup_src:
2826	if (tmp->base.src.bo)
2827		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2828cleanup_dst:
2829	if (tmp->base.redirect.real_bo)
2830		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2831	return false;
2832}
2833#endif
2834
2835static void
2836gen9_emit_copy_state(struct sna *sna,
2837		     const struct sna_composite_op *op)
2838{
2839	uint32_t *binding_table;
2840	uint16_t offset, dirty;
2841
2842	gen9_get_batch(sna, op);
2843
2844	binding_table = gen9_composite_get_binding_table(sna, &offset);
2845
2846	dirty = kgem_bo_is_dirty(op->dst.bo);
2847
2848	binding_table[0] =
2849		gen9_bind_bo(sna,
2850			     op->dst.bo, op->dst.width, op->dst.height,
2851			     gen9_get_dest_format(op->dst.format),
2852			     true);
2853	binding_table[1] =
2854		gen9_bind_bo(sna,
2855			     op->src.bo, op->src.width, op->src.height,
2856			     op->src.card_format,
2857			     false);
2858
2859	if (sna->kgem.surface == offset &&
2860	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table) {
2861		sna->kgem.surface += SURFACE_DW;
2862		offset = sna->render_state.gen9.surface_table;
2863	}
2864
2865	if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0])
2866		dirty = 0;
2867
2868	assert(!GEN9_READS_DST(op->u.gen9.flags));
2869	gen9_emit_state(sna, op, offset | dirty);
2870}
2871
2872static inline bool
2873prefer_blt_copy(struct sna *sna,
2874		struct kgem_bo *src_bo,
2875		struct kgem_bo *dst_bo,
2876		unsigned flags)
2877{
2878	if (sna->kgem.mode == KGEM_BLT)
2879		return true;
2880
2881	assert((flags & COPY_SYNC) == 0);
2882
2883	if (untiled_tlb_miss(src_bo) ||
2884	    untiled_tlb_miss(dst_bo))
2885		return true;
2886
2887	if (flags & COPY_DRI && !sna->kgem.has_semaphores)
2888		return false;
2889
2890	if (force_blt_ring(sna, dst_bo))
2891		return true;
2892
2893	if ((flags & COPY_SMALL ||
2894	     (sna->render_state.gt < 3 && src_bo == dst_bo)) &&
2895	    can_switch_to_blt(sna, dst_bo, flags))
2896		return true;
2897
2898	if (kgem_bo_is_render(dst_bo) ||
2899	    kgem_bo_is_render(src_bo))
2900		return false;
2901
2902	if (flags & COPY_LAST &&
2903	    sna->render_state.gt < 3 &&
2904            can_switch_to_blt(sna, dst_bo, flags))
2905		return true;
2906
2907	if (prefer_render_ring(sna, dst_bo))
2908		return false;
2909
2910	if (!prefer_blt_ring(sna, dst_bo, flags))
2911		return false;
2912
2913	return prefer_blt_bo(sna, src_bo, dst_bo);
2914}
2915
2916static bool
2917gen9_render_copy_boxes(struct sna *sna, uint8_t alu,
2918		       const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2919		       const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2920		       const BoxRec *box, int n, unsigned flags)
2921{
2922	struct sna_composite_op tmp;
2923	BoxRec extents;
2924
2925	DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n",
2926	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags,
2927	     src_bo == dst_bo,
2928	     overlaps(sna,
2929		      src_bo, src_dx, src_dy,
2930		      dst_bo, dst_dx, dst_dy,
2931		      box, n, flags, &extents)));
2932
2933	if (prefer_blt_copy(sna, src_bo, dst_bo, flags) &&
2934	    sna_blt_compare_depth(src, dst) &&
2935	    sna_blt_copy_boxes(sna, alu,
2936			       src_bo, src_dx, src_dy,
2937			       dst_bo, dst_dx, dst_dy,
2938			       dst->bitsPerPixel,
2939			       box, n))
2940		return true;
2941
2942	if (!(alu == GXcopy || alu == GXclear) ||
2943	    unaligned(src_bo, src->bitsPerPixel) ||
2944	    unaligned(dst_bo, dst->bitsPerPixel)) {
2945fallback_blt:
2946		DBG(("%s: fallback blt\n", __FUNCTION__));
2947		if (!sna_blt_compare_depth(src, dst))
2948			return false;
2949
2950		return sna_blt_copy_boxes_fallback(sna, alu,
2951						   src, src_bo, src_dx, src_dy,
2952						   dst, dst_bo, dst_dx, dst_dy,
2953						   box, n);
2954	}
2955
2956	if (overlaps(sna,
2957		     src_bo, src_dx, src_dy,
2958		     dst_bo, dst_dx, dst_dy,
2959		     box, n, flags,
2960		     &extents)) {
2961		bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1);
2962
2963		if ((big || !prefer_render_ring(sna, dst_bo)) &&
2964		    sna_blt_copy_boxes(sna, alu,
2965				       src_bo, src_dx, src_dy,
2966				       dst_bo, dst_dx, dst_dy,
2967				       dst->bitsPerPixel,
2968				       box, n))
2969			return true;
2970
2971		if (big)
2972			goto fallback_blt;
2973
2974		assert(src_bo == dst_bo);
2975		assert(src->depth == dst->depth);
2976		assert(src->width == dst->width);
2977		assert(src->height == dst->height);
2978		return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo,
2979						      src_dx, src_dy,
2980						      dst_dx, dst_dy,
2981						      box, n, &extents);
2982	}
2983
2984	if (dst->depth == src->depth) {
2985		tmp.dst.format = sna_render_format_for_depth(dst->depth);
2986		tmp.src.pict_format = tmp.dst.format;
2987	} else {
2988		tmp.dst.format = sna_format_for_depth(dst->depth);
2989		tmp.src.pict_format = sna_format_for_depth(src->depth);
2990	}
2991	if (!gen9_check_format(tmp.src.pict_format))
2992		goto fallback_blt;
2993
2994	tmp.dst.pixmap = (PixmapPtr)dst;
2995	tmp.dst.width  = dst->width;
2996	tmp.dst.height = dst->height;
2997	tmp.dst.bo = dst_bo;
2998	tmp.dst.x = tmp.dst.y = 0;
2999	tmp.damage = NULL;
3000
3001	sna_render_composite_redirect_init(&tmp);
3002	if (too_large(tmp.dst.width, tmp.dst.height)) {
3003		int i;
3004
3005		extents = box[0];
3006		for (i = 1; i < n; i++) {
3007			if (box[i].x1 < extents.x1)
3008				extents.x1 = box[i].x1;
3009			if (box[i].y1 < extents.y1)
3010				extents.y1 = box[i].y1;
3011
3012			if (box[i].x2 > extents.x2)
3013				extents.x2 = box[i].x2;
3014			if (box[i].y2 > extents.y2)
3015				extents.y2 = box[i].y2;
3016		}
3017
3018		if (!sna_render_composite_redirect(sna, &tmp,
3019						   extents.x1 + dst_dx,
3020						   extents.y1 + dst_dy,
3021						   extents.x2 - extents.x1,
3022						   extents.y2 - extents.y1,
3023						   n > 1))
3024			goto fallback_tiled;
3025	}
3026
3027	tmp.src.card_format = gen9_get_card_format(tmp.src.pict_format);
3028	if (too_large(src->width, src->height)) {
3029		int i;
3030
3031		extents = box[0];
3032		for (i = 1; i < n; i++) {
3033			if (box[i].x1 < extents.x1)
3034				extents.x1 = box[i].x1;
3035			if (box[i].y1 < extents.y1)
3036				extents.y1 = box[i].y1;
3037
3038			if (box[i].x2 > extents.x2)
3039				extents.x2 = box[i].x2;
3040			if (box[i].y2 > extents.y2)
3041				extents.y2 = box[i].y2;
3042		}
3043
3044		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
3045					       extents.x1 + src_dx,
3046					       extents.y1 + src_dy,
3047					       extents.x2 - extents.x1,
3048					       extents.y2 - extents.y1))
3049			goto fallback_tiled_dst;
3050	} else {
3051		tmp.src.bo = src_bo;
3052		tmp.src.width  = src->width;
3053		tmp.src.height = src->height;
3054		tmp.src.offset[0] = tmp.src.offset[1] = 0;
3055	}
3056
3057	tmp.mask.bo = NULL;
3058
3059	tmp.floats_per_vertex = 2;
3060	tmp.floats_per_rect = 6;
3061	tmp.need_magic_ca_pass = 0;
3062
3063	tmp.u.gen9.flags = COPY_FLAGS(alu);
3064	tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
3065
3066	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
3067	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
3068		kgem_submit(&sna->kgem);
3069		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
3070			if (tmp.src.bo != src_bo)
3071				kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3072			if (tmp.redirect.real_bo)
3073				kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
3074			goto fallback_blt;
3075		}
3076		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3077	}
3078
3079	src_dx += tmp.src.offset[0];
3080	src_dy += tmp.src.offset[1];
3081
3082	dst_dx += tmp.dst.x;
3083	dst_dy += tmp.dst.y;
3084
3085	tmp.dst.x = tmp.dst.y = 0;
3086
3087	gen9_align_vertex(sna, &tmp);
3088	gen9_emit_copy_state(sna, &tmp);
3089
3090	do {
3091		int16_t *v;
3092		int n_this_time;
3093
3094		n_this_time = gen9_get_rectangles(sna, &tmp, n,
3095						  gen9_emit_copy_state);
3096		n -= n_this_time;
3097
3098		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
3099		sna->render.vertex_used += 6 * n_this_time;
3100		assert(sna->render.vertex_used <= sna->render.vertex_size);
3101		do {
3102
3103			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
3104			     box->x1 + src_dx, box->y1 + src_dy,
3105			     box->x1 + dst_dx, box->y1 + dst_dy,
3106			     box->x2 - box->x1, box->y2 - box->y1));
3107			v[0] = box->x2 + dst_dx;
3108			v[2] = box->x2 + src_dx;
3109			v[1]  = v[5] = box->y2 + dst_dy;
3110			v[3]  = v[7] = box->y2 + src_dy;
3111			v[8]  = v[4] = box->x1 + dst_dx;
3112			v[10] = v[6] = box->x1 + src_dx;
3113			v[9]  = box->y1 + dst_dy;
3114			v[11] = box->y1 + src_dy;
3115			v += 12; box++;
3116		} while (--n_this_time);
3117	} while (n);
3118
3119	gen8_vertex_flush(sna);
3120	sna_render_composite_redirect_done(sna, &tmp);
3121	if (tmp.src.bo != src_bo)
3122		kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3123	return true;
3124
3125fallback_tiled_dst:
3126	if (tmp.redirect.real_bo)
3127		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
3128fallback_tiled:
3129	DBG(("%s: fallback tiled\n", __FUNCTION__));
3130	if (sna_blt_compare_depth(src, dst) &&
3131	    sna_blt_copy_boxes(sna, alu,
3132			       src_bo, src_dx, src_dy,
3133			       dst_bo, dst_dx, dst_dy,
3134			       dst->bitsPerPixel,
3135			       box, n))
3136		return true;
3137
3138	return sna_tiling_copy_boxes(sna, alu,
3139				     src, src_bo, src_dx, src_dy,
3140				     dst, dst_bo, dst_dx, dst_dy,
3141				     box, n);
3142}
3143
3144static void
3145gen9_render_copy_blt(struct sna *sna,
3146		     const struct sna_copy_op *op,
3147		     int16_t sx, int16_t sy,
3148		     int16_t w,  int16_t h,
3149		     int16_t dx, int16_t dy)
3150{
3151	int16_t *v;
3152
3153	gen9_get_rectangles(sna, &op->base, 1, gen9_emit_copy_state);
3154
3155	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3156	sna->render.vertex_used += 6;
3157	assert(sna->render.vertex_used <= sna->render.vertex_size);
3158
3159	v[0]  = dx+w; v[1]  = dy+h;
3160	v[2]  = sx+w; v[3]  = sy+h;
3161	v[4]  = dx;   v[5]  = dy+h;
3162	v[6]  = sx;   v[7]  = sy+h;
3163	v[8]  = dx;   v[9]  = dy;
3164	v[10] = sx;   v[11] = sy;
3165}
3166
3167static void
3168gen9_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
3169{
3170	if (sna->render.vertex_offset)
3171		gen8_vertex_flush(sna);
3172}
3173
3174static bool
3175gen9_render_copy(struct sna *sna, uint8_t alu,
3176		 PixmapPtr src, struct kgem_bo *src_bo,
3177		 PixmapPtr dst, struct kgem_bo *dst_bo,
3178		 struct sna_copy_op *op)
3179{
3180	DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n",
3181	     __FUNCTION__, alu,
3182	     src->drawable.width, src->drawable.height,
3183	     dst->drawable.width, dst->drawable.height));
3184
3185	if (prefer_blt_copy(sna, src_bo, dst_bo, 0) &&
3186	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
3187	    sna_blt_copy(sna, alu,
3188			 src_bo, dst_bo,
3189			 dst->drawable.bitsPerPixel,
3190			 op))
3191		return true;
3192
3193	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
3194	    too_large(src->drawable.width, src->drawable.height) ||
3195	    too_large(dst->drawable.width, dst->drawable.height) ||
3196	    unaligned(src_bo, src->drawable.bitsPerPixel) ||
3197	    unaligned(dst_bo, dst->drawable.bitsPerPixel)) {
3198fallback:
3199		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
3200			return false;
3201
3202		return sna_blt_copy(sna, alu, src_bo, dst_bo,
3203				    dst->drawable.bitsPerPixel,
3204				    op);
3205	}
3206
3207	if (dst->drawable.depth == src->drawable.depth) {
3208		op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
3209		op->base.src.pict_format = op->base.dst.format;
3210	} else {
3211		op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
3212		op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
3213	}
3214	if (!gen9_check_format(op->base.src.pict_format))
3215		goto fallback;
3216
3217	op->base.dst.pixmap = dst;
3218	op->base.dst.width  = dst->drawable.width;
3219	op->base.dst.height = dst->drawable.height;
3220	op->base.dst.bo = dst_bo;
3221
3222	op->base.src.bo = src_bo;
3223	op->base.src.card_format =
3224		gen9_get_card_format(op->base.src.pict_format);
3225	op->base.src.width  = src->drawable.width;
3226	op->base.src.height = src->drawable.height;
3227
3228	op->base.mask.bo = NULL;
3229
3230	op->base.floats_per_vertex = 2;
3231	op->base.floats_per_rect = 6;
3232
3233	op->base.u.gen9.flags = COPY_FLAGS(alu);
3234	op->base.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
3235
3236	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3237	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
3238		kgem_submit(&sna->kgem);
3239		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
3240			goto fallback;
3241		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3242	}
3243
3244	gen9_align_vertex(sna, &op->base);
3245	gen9_emit_copy_state(sna, &op->base);
3246
3247	op->blt  = gen9_render_copy_blt;
3248	op->done = gen9_render_copy_done;
3249	return true;
3250}
3251
3252static void
3253gen9_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
3254{
3255	uint32_t *binding_table;
3256	uint16_t offset, dirty;
3257
3258	/* XXX Render Target Fast Clear
3259	 * Set RTFC Enable in PS and render a rectangle.
3260	 * Limited to a clearing the full MSC surface only with a
3261	 * specific kernel.
3262	 */
3263
3264	gen9_get_batch(sna, op);
3265
3266	binding_table = gen9_composite_get_binding_table(sna, &offset);
3267
3268	dirty = kgem_bo_is_dirty(op->dst.bo);
3269
3270	binding_table[0] =
3271		gen9_bind_bo(sna,
3272			     op->dst.bo, op->dst.width, op->dst.height,
3273			     gen9_get_dest_format(op->dst.format),
3274			     true);
3275	binding_table[1] =
3276		gen9_bind_bo(sna,
3277			     op->src.bo, 1, 1,
3278			     SURFACEFORMAT_B8G8R8A8_UNORM,
3279			     false);
3280
3281	if (sna->kgem.surface == offset &&
3282	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen9.surface_table) == *(uint64_t*)binding_table) {
3283		sna->kgem.surface += SURFACE_DW;
3284		offset = sna->render_state.gen9.surface_table;
3285	}
3286
3287	if (sna->kgem.batch[sna->render_state.gen9.surface_table] == binding_table[0])
3288		dirty = 0;
3289
3290	gen9_emit_state(sna, op, offset | dirty);
3291}
3292
3293static bool
3294gen9_render_fill_boxes(struct sna *sna,
3295		       CARD8 op,
3296		       PictFormat format,
3297		       const xRenderColor *color,
3298		       const DrawableRec *dst, struct kgem_bo *dst_bo,
3299		       const BoxRec *box, int n)
3300{
3301	struct sna_composite_op tmp;
3302	uint32_t pixel;
3303
3304	DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
3305	     __FUNCTION__, op,
3306	     color->red, color->green, color->blue, color->alpha, (int)format));
3307
3308	if (op >= ARRAY_SIZE(gen9_blend_op)) {
3309		DBG(("%s: fallback due to unhandled blend op: %d\n",
3310		     __FUNCTION__, op));
3311		return false;
3312	}
3313
3314	if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) ||
3315	    !gen9_check_dst_format(format) ||
3316	    unaligned(dst_bo, PICT_FORMAT_BPP(format))) {
3317		uint8_t alu = GXinvalid;
3318
3319		if (op <= PictOpSrc) {
3320			pixel = 0;
3321			if (op == PictOpClear)
3322				alu = GXclear;
3323			else if (sna_get_pixel_from_rgba(&pixel,
3324							 color->red,
3325							 color->green,
3326							 color->blue,
3327							 color->alpha,
3328							 format))
3329				alu = GXcopy;
3330		}
3331
3332		if (alu != GXinvalid &&
3333		    sna_blt_fill_boxes(sna, alu,
3334				       dst_bo, dst->bitsPerPixel,
3335				       pixel, box, n))
3336			return true;
3337
3338		if (!gen9_check_dst_format(format))
3339			return false;
3340	}
3341
3342	if (op == PictOpClear) {
3343		pixel = 0;
3344		op = PictOpSrc;
3345	} else if (!sna_get_pixel_from_rgba(&pixel,
3346					    color->red,
3347					    color->green,
3348					    color->blue,
3349					    color->alpha,
3350					    PICT_a8r8g8b8))
3351		return false;
3352
3353	DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
3354	     __FUNCTION__, pixel, n,
3355	     box[0].x1, box[0].y1, box[0].x2, box[0].y2));
3356
3357	tmp.dst.pixmap = (PixmapPtr)dst;
3358	tmp.dst.width  = dst->width;
3359	tmp.dst.height = dst->height;
3360	tmp.dst.format = format;
3361	tmp.dst.bo = dst_bo;
3362	tmp.dst.x = tmp.dst.y = 0;
3363	tmp.damage = NULL;
3364
3365	sna_render_composite_redirect_init(&tmp);
3366	if (too_large(dst->width, dst->height)) {
3367		BoxRec extents;
3368
3369		boxes_extents(box, n, &extents);
3370		if (!sna_render_composite_redirect(sna, &tmp,
3371						   extents.x1, extents.y1,
3372						   extents.x2 - extents.x1,
3373						   extents.y2 - extents.y1,
3374						   n > 1))
3375			return sna_tiling_fill_boxes(sna, op, format, color,
3376						     dst, dst_bo, box, n);
3377	}
3378
3379	tmp.src.bo = sna_render_get_solid(sna, pixel);
3380	tmp.mask.bo = NULL;
3381
3382	tmp.floats_per_vertex = 2;
3383	tmp.floats_per_rect = 6;
3384	tmp.need_magic_ca_pass = false;
3385
3386	tmp.u.gen9.flags = FILL_FLAGS(op, format);
3387	tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
3388
3389	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3390	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3391		kgem_submit(&sna->kgem);
3392		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3393			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3394			tmp.src.bo = NULL;
3395
3396			if (tmp.redirect.real_bo) {
3397				kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
3398				tmp.redirect.real_bo = NULL;
3399			}
3400
3401			return false;
3402		}
3403		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3404	}
3405
3406	gen9_align_vertex(sna, &tmp);
3407	gen9_emit_fill_state(sna, &tmp);
3408
3409	do {
3410		int n_this_time;
3411		int16_t *v;
3412
3413		n_this_time = gen9_get_rectangles(sna, &tmp, n,
3414						  gen9_emit_fill_state);
3415		n -= n_this_time;
3416
3417		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
3418		sna->render.vertex_used += 6 * n_this_time;
3419		assert(sna->render.vertex_used <= sna->render.vertex_size);
3420		do {
3421			DBG(("	(%d, %d), (%d, %d)\n",
3422			     box->x1, box->y1, box->x2, box->y2));
3423
3424			v[0] = box->x2;
3425			v[5] = v[1] = box->y2;
3426			v[8] = v[4] = box->x1;
3427			v[9] = box->y1;
3428			v[2] = v[3]  = v[7]  = 1;
3429			v[6] = v[10] = v[11] = 0;
3430			v += 12; box++;
3431		} while (--n_this_time);
3432	} while (n);
3433
3434	gen8_vertex_flush(sna);
3435	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3436	sna_render_composite_redirect_done(sna, &tmp);
3437	return true;
3438}
3439
3440static void
3441gen9_render_fill_op_blt(struct sna *sna,
3442			const struct sna_fill_op *op,
3443			int16_t x, int16_t y, int16_t w, int16_t h)
3444{
3445	int16_t *v;
3446
3447	DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
3448
3449	gen9_get_rectangles(sna, &op->base, 1, gen9_emit_fill_state);
3450
3451	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3452	sna->render.vertex_used += 6;
3453	assert(sna->render.vertex_used <= sna->render.vertex_size);
3454
3455	v[0] = x+w;
3456	v[4] = v[8] = x;
3457	v[1] = v[5] = y+h;
3458	v[9] = y;
3459
3460	v[2] = v[3]  = v[7]  = 1;
3461	v[6] = v[10] = v[11] = 0;
3462}
3463
3464fastcall static void
3465gen9_render_fill_op_box(struct sna *sna,
3466			const struct sna_fill_op *op,
3467			const BoxRec *box)
3468{
3469	int16_t *v;
3470
3471	DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
3472	     box->x1, box->y1, box->x2, box->y2));
3473
3474	gen9_get_rectangles(sna, &op->base, 1, gen9_emit_fill_state);
3475
3476	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3477	sna->render.vertex_used += 6;
3478	assert(sna->render.vertex_used <= sna->render.vertex_size);
3479
3480	v[0] = box->x2;
3481	v[8] = v[4] = box->x1;
3482	v[5] = v[1] = box->y2;
3483	v[9] = box->y1;
3484
3485	v[7] = v[2]  = v[3]  = 1;
3486	v[6] = v[10] = v[11] = 0;
3487}
3488
3489fastcall static void
3490gen9_render_fill_op_boxes(struct sna *sna,
3491			  const struct sna_fill_op *op,
3492			  const BoxRec *box,
3493			  int nbox)
3494{
3495	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
3496	     box->x1, box->y1, box->x2, box->y2, nbox));
3497
3498	do {
3499		int nbox_this_time;
3500		int16_t *v;
3501
3502		nbox_this_time = gen9_get_rectangles(sna, &op->base, nbox,
3503						     gen9_emit_fill_state);
3504		nbox -= nbox_this_time;
3505
3506		v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3507		sna->render.vertex_used += 6 * nbox_this_time;
3508		assert(sna->render.vertex_used <= sna->render.vertex_size);
3509
3510		do {
3511			v[0] = box->x2;
3512			v[8] = v[4] = box->x1;
3513			v[5] = v[1] = box->y2;
3514			v[9] = box->y1;
3515			v[7] = v[2]  = v[3]  = 1;
3516			v[6] = v[10] = v[11] = 0;
3517			box++; v += 12;
3518		} while (--nbox_this_time);
3519	} while (nbox);
3520}
3521
3522static void
3523gen9_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
3524{
3525	if (sna->render.vertex_offset)
3526		gen8_vertex_flush(sna);
3527	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
3528}
3529
3530static bool
3531gen9_render_fill(struct sna *sna, uint8_t alu,
3532		 PixmapPtr dst, struct kgem_bo *dst_bo,
3533		 uint32_t color, unsigned flags,
3534		 struct sna_fill_op *op)
3535{
3536	DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color));
3537
3538	if (prefer_blt_fill(sna, dst_bo, flags) &&
3539	    sna_blt_fill(sna, alu,
3540			 dst_bo, dst->drawable.bitsPerPixel,
3541			 color,
3542			 op))
3543		return true;
3544
3545	if (!(alu == GXcopy || alu == GXclear) ||
3546	    too_large(dst->drawable.width, dst->drawable.height) ||
3547	    unaligned(dst_bo, dst->drawable.bitsPerPixel))
3548		return sna_blt_fill(sna, alu,
3549				    dst_bo, dst->drawable.bitsPerPixel,
3550				    color,
3551				    op);
3552
3553	if (alu == GXclear)
3554		color = 0;
3555
3556	op->base.dst.pixmap = dst;
3557	op->base.dst.width  = dst->drawable.width;
3558	op->base.dst.height = dst->drawable.height;
3559	op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
3560	op->base.dst.bo = dst_bo;
3561	op->base.dst.x = op->base.dst.y = 0;
3562
3563	op->base.src.bo =
3564		sna_render_get_solid(sna,
3565				     sna_rgba_for_color(color,
3566							dst->drawable.depth));
3567	op->base.mask.bo = NULL;
3568
3569	op->base.need_magic_ca_pass = false;
3570	op->base.floats_per_vertex = 2;
3571	op->base.floats_per_rect = 6;
3572
3573	op->base.u.gen9.flags = FILL_FLAGS_NOBLEND;
3574	op->base.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
3575
3576	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3577	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3578		kgem_submit(&sna->kgem);
3579		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3580			kgem_bo_destroy(&sna->kgem, op->base.src.bo);
3581			return false;
3582		}
3583
3584		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3585	}
3586
3587	gen9_align_vertex(sna, &op->base);
3588	gen9_emit_fill_state(sna, &op->base);
3589
3590	op->blt   = gen9_render_fill_op_blt;
3591	op->box   = gen9_render_fill_op_box;
3592	op->boxes = gen9_render_fill_op_boxes;
3593	op->points = NULL;
3594	op->done  = gen9_render_fill_op_done;
3595	return true;
3596}
3597
3598static bool
3599gen9_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3600			     uint32_t color,
3601			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
3602			     uint8_t alu)
3603{
3604	BoxRec box;
3605
3606	box.x1 = x1;
3607	box.y1 = y1;
3608	box.x2 = x2;
3609	box.y2 = y2;
3610
3611	return sna_blt_fill_boxes(sna, alu,
3612				  bo, dst->drawable.bitsPerPixel,
3613				  color, &box, 1);
3614}
3615
3616static bool
3617gen9_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3618		     uint32_t color,
3619		     int16_t x1, int16_t y1,
3620		     int16_t x2, int16_t y2,
3621		     uint8_t alu)
3622{
3623	struct sna_composite_op tmp;
3624	int16_t *v;
3625
3626	/* Prefer to use the BLT if already engaged */
3627	if (prefer_blt_fill(sna, bo, FILL_BOXES) &&
3628	    gen9_render_fill_one_try_blt(sna, dst, bo, color,
3629					 x1, y1, x2, y2, alu))
3630		return true;
3631
3632	/* Must use the BLT if we can't RENDER... */
3633	if (!(alu == GXcopy || alu == GXclear) ||
3634	    too_large(dst->drawable.width, dst->drawable.height) ||
3635	    unaligned(bo, dst->drawable.bitsPerPixel))
3636		return gen9_render_fill_one_try_blt(sna, dst, bo, color,
3637						    x1, y1, x2, y2, alu);
3638
3639	if (alu == GXclear)
3640		color = 0;
3641
3642	tmp.dst.pixmap = dst;
3643	tmp.dst.width  = dst->drawable.width;
3644	tmp.dst.height = dst->drawable.height;
3645	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3646	tmp.dst.bo = bo;
3647	tmp.dst.x = tmp.dst.y = 0;
3648
3649	tmp.src.bo =
3650		sna_render_get_solid(sna,
3651				     sna_rgba_for_color(color,
3652							dst->drawable.depth));
3653	tmp.mask.bo = NULL;
3654
3655	tmp.floats_per_vertex = 2;
3656	tmp.floats_per_rect = 6;
3657	tmp.need_magic_ca_pass = false;
3658
3659	tmp.u.gen9.flags = FILL_FLAGS_NOBLEND;
3660	tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
3661
3662	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
3663	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3664		kgem_submit(&sna->kgem);
3665		if (kgem_check_bo(&sna->kgem, bo, NULL)) {
3666			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3667			return false;
3668		}
3669		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3670	}
3671
3672	gen9_align_vertex(sna, &tmp);
3673	gen9_emit_fill_state(sna, &tmp);
3674
3675	gen9_get_rectangles(sna, &tmp, 1, gen9_emit_fill_state);
3676
3677	DBG(("	(%d, %d), (%d, %d)\n", x1, y1, x2, y2));
3678
3679	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3680	sna->render.vertex_used += 6;
3681	assert(sna->render.vertex_used <= sna->render.vertex_size);
3682
3683	v[0] = x2;
3684	v[8] = v[4] = x1;
3685	v[5] = v[1] = y2;
3686	v[9] = y1;
3687	v[7] = v[2]  = v[3]  = 1;
3688	v[6] = v[10] = v[11] = 0;
3689
3690	gen8_vertex_flush(sna);
3691	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3692
3693	return true;
3694}
3695
3696static bool
3697gen9_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3698{
3699	BoxRec box;
3700
3701	box.x1 = 0;
3702	box.y1 = 0;
3703	box.x2 = dst->drawable.width;
3704	box.y2 = dst->drawable.height;
3705
3706	return sna_blt_fill_boxes(sna, GXclear,
3707				  bo, dst->drawable.bitsPerPixel,
3708				  0, &box, 1);
3709}
3710
3711static bool
3712gen9_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3713{
3714	struct sna_composite_op tmp;
3715	int16_t *v;
3716
3717	DBG(("%s: %dx%d\n",
3718	     __FUNCTION__,
3719	     dst->drawable.width,
3720	     dst->drawable.height));
3721
3722	/* Prefer to use the BLT if already engaged */
3723	if (sna->kgem.mode == KGEM_BLT &&
3724	    gen9_render_clear_try_blt(sna, dst, bo))
3725		return true;
3726
3727	/* Must use the BLT if we can't RENDER... */
3728	if (too_large(dst->drawable.width, dst->drawable.height) ||
3729	    unaligned(bo, dst->drawable.bitsPerPixel))
3730		return gen9_render_clear_try_blt(sna, dst, bo);
3731
3732	tmp.dst.pixmap = dst;
3733	tmp.dst.width  = dst->drawable.width;
3734	tmp.dst.height = dst->drawable.height;
3735	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3736	tmp.dst.bo = bo;
3737	tmp.dst.x = tmp.dst.y = 0;
3738
3739	tmp.src.bo = sna_render_get_solid(sna, 0);
3740	tmp.mask.bo = NULL;
3741
3742	tmp.floats_per_vertex = 2;
3743	tmp.floats_per_rect = 6;
3744	tmp.need_magic_ca_pass = false;
3745
3746	tmp.u.gen9.flags = FILL_FLAGS_NOBLEND;
3747	tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
3748
3749	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
3750	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3751		kgem_submit(&sna->kgem);
3752		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3753			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3754			return false;
3755		}
3756		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3757	}
3758
3759	gen9_align_vertex(sna, &tmp);
3760	gen9_emit_fill_state(sna, &tmp);
3761
3762	gen9_get_rectangles(sna, &tmp, 1, gen9_emit_fill_state);
3763
3764	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3765	sna->render.vertex_used += 6;
3766	assert(sna->render.vertex_used <= sna->render.vertex_size);
3767
3768	v[0] = dst->drawable.width;
3769	v[5] = v[1] = dst->drawable.height;
3770	v[8] = v[4] = 0;
3771	v[9] = 0;
3772
3773	v[7] = v[2]  = v[3]  = 1;
3774	v[6] = v[10] = v[11] = 0;
3775
3776	gen8_vertex_flush(sna);
3777	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3778
3779	return true;
3780}
3781
3782#if !NO_VIDEO
3783static uint32_t gen9_bind_video_source(struct sna *sna,
3784				       struct kgem_bo *bo,
3785				       uint32_t delta,
3786				       int width,
3787				       int height,
3788				       int pitch,
3789				       uint32_t format)
3790{
3791	uint32_t *ss;
3792	int offset;
3793
3794	offset = sna->kgem.surface -= SURFACE_DW;
3795	ss = sna->kgem.batch + offset;
3796	ss[0] = (SURFACE_2D << SURFACE_TYPE_SHIFT |
3797		 gen9_tiling_bits(bo->tiling) |
3798		 format << SURFACE_FORMAT_SHIFT |
3799		 SURFACE_VALIGN_4 | SURFACE_HALIGN_4);
3800	ss[1] = 0;
3801	ss[2] = ((width - 1)  << SURFACE_WIDTH_SHIFT |
3802		 (height - 1) << SURFACE_HEIGHT_SHIFT);
3803	ss[3] = (pitch - 1) << SURFACE_PITCH_SHIFT;
3804	ss[4] = 0;
3805	ss[5] = 0;
3806	ss[6] = 0;
3807	ss[7] = SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
3808	*(uint64_t *)(ss+8) =
3809		kgem_add_reloc64(&sna->kgem, offset + 8, bo,
3810				 I915_GEM_DOMAIN_SAMPLER << 16,
3811				 delta);
3812	ss[10] = 0;
3813	ss[11] = 0;
3814	ss[12] = 0;
3815	ss[13] = 0;
3816	ss[14] = 0;
3817	ss[15] = 0;
3818
3819	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> sampler\n",
3820	     offset, bo->handle, ss[1],
3821	     format, width, height, bo->pitch, bo->tiling));
3822
3823	return offset * sizeof(uint32_t);
3824}
3825
3826static void gen9_emit_video_state(struct sna *sna,
3827				  const struct sna_composite_op *op)
3828{
3829	struct sna_video_frame *frame = op->priv;
3830	uint32_t src_surf_format[6];
3831	uint32_t src_surf_base[6];
3832	int src_width[6];
3833	int src_height[6];
3834	int src_pitch[6];
3835	uint32_t *binding_table;
3836	uint16_t offset;
3837	int n_src, n;
3838
3839	/* XXX VeBox, bicubic */
3840
3841	gen9_get_batch(sna, op);
3842
3843	src_surf_base[0] = 0;
3844	src_surf_base[1] = 0;
3845	src_surf_base[2] = frame->VBufOffset;
3846	src_surf_base[3] = frame->VBufOffset;
3847	src_surf_base[4] = frame->UBufOffset;
3848	src_surf_base[5] = frame->UBufOffset;
3849
3850	if (is_planar_fourcc(frame->id)) {
3851		for (n = 0; n < 2; n++) {
3852			src_surf_format[n] = SURFACEFORMAT_R8_UNORM;
3853			src_width[n]  = frame->width;
3854			src_height[n] = frame->height;
3855			src_pitch[n]  = frame->pitch[1];
3856		}
3857		for (; n < 6; n++) {
3858			if (is_nv12_fourcc(frame->id))
3859				src_surf_format[n] = SURFACEFORMAT_R8G8_UNORM;
3860			else
3861				src_surf_format[n] = SURFACEFORMAT_R8_UNORM;
3862			src_width[n]  = frame->width / 2;
3863			src_height[n] = frame->height / 2;
3864			src_pitch[n]  = frame->pitch[0];
3865		}
3866		n_src = 6;
3867	} else {
3868		if (frame->id == FOURCC_RGB888)
3869			src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM;
3870		else if (frame->id == FOURCC_UYVY)
3871			src_surf_format[0] = SURFACEFORMAT_YCRCB_SWAPY;
3872		else if (is_ayuv_fourcc(frame->id))
3873			src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM;
3874		else
3875			src_surf_format[0] = SURFACEFORMAT_YCRCB_NORMAL;
3876
3877		src_width[0]  = frame->width;
3878		src_height[0] = frame->height;
3879		src_pitch[0]  = frame->pitch[0];
3880		n_src = 1;
3881	}
3882
3883	binding_table = gen9_composite_get_binding_table(sna, &offset);
3884
3885	binding_table[0] =
3886		gen9_bind_bo(sna,
3887			     op->dst.bo, op->dst.width, op->dst.height,
3888			     gen9_get_dest_format(op->dst.format),
3889			     true);
3890	for (n = 0; n < n_src; n++) {
3891		binding_table[1+n] =
3892			gen9_bind_video_source(sna,
3893					       frame->bo,
3894					       src_surf_base[n],
3895					       src_width[n],
3896					       src_height[n],
3897					       src_pitch[n],
3898					       src_surf_format[n]);
3899	}
3900
3901	gen9_emit_state(sna, op, offset);
3902}
3903
3904static unsigned select_video_kernel(const struct sna_video *video,
3905				    const struct sna_video_frame *frame)
3906{
3907	switch (frame->id) {
3908	case FOURCC_YV12:
3909	case FOURCC_I420:
3910	case FOURCC_XVMC:
3911		return video->colorspace ?
3912			GEN9_WM_KERNEL_VIDEO_PLANAR_BT709 :
3913			GEN9_WM_KERNEL_VIDEO_PLANAR_BT601;
3914
3915	case FOURCC_NV12:
3916		return video->colorspace ?
3917			GEN9_WM_KERNEL_VIDEO_NV12_BT709 :
3918			GEN9_WM_KERNEL_VIDEO_NV12_BT601;
3919
3920	case FOURCC_RGB888:
3921	case FOURCC_RGB565:
3922		return GEN9_WM_KERNEL_VIDEO_RGB;
3923
3924	case FOURCC_AYUV:
3925		return video->colorspace ?
3926			GEN9_WM_KERNEL_VIDEO_AYUV_BT709 :
3927			GEN9_WM_KERNEL_VIDEO_AYUV_BT601;
3928
3929	default:
3930		return video->colorspace ?
3931			GEN9_WM_KERNEL_VIDEO_PACKED_BT709 :
3932			GEN9_WM_KERNEL_VIDEO_PACKED_BT601;
3933	}
3934}
3935
3936static bool
3937gen9_render_video(struct sna *sna,
3938		  struct sna_video *video,
3939		  struct sna_video_frame *frame,
3940		  RegionPtr dstRegion,
3941		  PixmapPtr pixmap)
3942{
3943	struct sna_composite_op tmp;
3944	struct sna_pixmap *priv = sna_pixmap(pixmap);
3945	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
3946	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
3947	int src_width = frame->src.x2 - frame->src.x1;
3948	int src_height = frame->src.y2 - frame->src.y1;
3949	float src_offset_x, src_offset_y;
3950	float src_scale_x, src_scale_y;
3951	unsigned filter;
3952	const BoxRec *box;
3953	int nbox;
3954
3955	DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
3956	     __FUNCTION__,
3957	     src_width, src_height, dst_width, dst_height,
3958	     region_num_rects(dstRegion),
3959	     REGION_EXTENTS(NULL, dstRegion)->x1,
3960	     REGION_EXTENTS(NULL, dstRegion)->y1,
3961	     REGION_EXTENTS(NULL, dstRegion)->x2,
3962	     REGION_EXTENTS(NULL, dstRegion)->y2));
3963
3964	assert(priv->gpu_bo);
3965	assert(!too_large(pixmap->drawable.width, pixmap->drawable.height));
3966	assert(!unaligned(priv->gpu_bo, pixmap->drawable.bitsPerPixel));
3967
3968	memset(&tmp, 0, sizeof(tmp));
3969
3970	tmp.dst.pixmap = pixmap;
3971	tmp.dst.width  = pixmap->drawable.width;
3972	tmp.dst.height = pixmap->drawable.height;
3973	tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
3974	tmp.dst.bo = priv->gpu_bo;
3975
3976	tmp.src.bo = frame->bo;
3977	tmp.mask.bo = NULL;
3978
3979	tmp.floats_per_vertex = 3;
3980	tmp.floats_per_rect = 9;
3981
3982	DBG(("%s: scaling?=%d, planar?=%d [%x]\n",
3983	     __FUNCTION__,
3984	     src_width != dst_width || src_height != dst_height,
3985	     is_planar_fourcc(frame->id), frame->id));
3986
3987	if (src_width == dst_width && src_height == dst_height)
3988		filter = SAMPLER_FILTER_NEAREST;
3989	else
3990		filter = SAMPLER_FILTER_BILINEAR;
3991
3992	tmp.u.gen9.flags =
3993		GEN9_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
3994					      SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
3995			       NO_BLEND,
3996			       2);
3997	tmp.u.gen9.wm_kernel = select_video_kernel(video, frame);
3998	tmp.priv = frame;
3999
4000	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
4001	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
4002		kgem_submit(&sna->kgem);
4003		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
4004			return false;
4005
4006		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
4007	}
4008
4009	gen9_align_vertex(sna, &tmp);
4010	gen9_emit_video_state(sna, &tmp);
4011
4012	DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n",
4013	     __FUNCTION__,
4014	     frame->src.x1, frame->src.y1,
4015	     src_width, src_height,
4016	     dst_width, dst_height,
4017	     frame->width, frame->height));
4018
4019	src_scale_x = (float)src_width / dst_width / frame->width;
4020	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
4021
4022	src_scale_y = (float)src_height / dst_height / frame->height;
4023	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
4024
4025	DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n",
4026	     __FUNCTION__,
4027	     src_scale_x, src_scale_y,
4028	     src_offset_x, src_offset_y));
4029
4030	box = region_rects(dstRegion);
4031	nbox = region_num_rects(dstRegion);
4032	while (nbox--) {
4033		DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n",
4034		     __FUNCTION__,
4035		     box->x1, box->y1,
4036		     box->x2, box->y2,
4037		     box->x1 * src_scale_x + src_offset_x,
4038		     box->y1 * src_scale_y + src_offset_y,
4039		     box->x2 * src_scale_x + src_offset_x,
4040		     box->y2 * src_scale_y + src_offset_y));
4041
4042		gen9_get_rectangles(sna, &tmp, 1, gen9_emit_video_state);
4043
4044		OUT_VERTEX(box->x2, box->y2);
4045		OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
4046		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
4047
4048		OUT_VERTEX(box->x1, box->y2);
4049		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
4050		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
4051
4052		OUT_VERTEX(box->x1, box->y1);
4053		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
4054		OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
4055
4056		box++;
4057	}
4058	gen8_vertex_flush(sna);
4059
4060	if (!DAMAGE_IS_ALL(priv->gpu_damage))
4061		sna_damage_add(&priv->gpu_damage, dstRegion);
4062
4063	return true;
4064}
4065#endif
4066
4067static void gen9_render_flush(struct sna *sna)
4068{
4069	gen8_vertex_close(sna);
4070
4071	assert(sna->render.vb_id == 0);
4072	assert(sna->render.vertex_offset == 0);
4073}
4074
4075static void gen9_render_reset(struct sna *sna)
4076{
4077	sna->render_state.gen9.emit_flush = false;
4078	sna->render_state.gen9.needs_invariant = true;
4079	sna->render_state.gen9.ve_id = 3 << 2;
4080	sna->render_state.gen9.ve_dirty = false;
4081	sna->render_state.gen9.last_primitive = -1;
4082
4083	sna->render_state.gen9.num_sf_outputs = 0;
4084	sna->render_state.gen9.samplers = -1;
4085	sna->render_state.gen9.blend = -1;
4086	sna->render_state.gen9.kernel = -1;
4087	sna->render_state.gen9.drawrect_offset = -1;
4088	sna->render_state.gen9.drawrect_limit = -1;
4089	sna->render_state.gen9.surface_table = 0;
4090
4091	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
4092		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
4093		discard_vbo(sna);
4094	}
4095
4096	sna->render.vertex_offset = 0;
4097	sna->render.nvertex_reloc = 0;
4098	sna->render.vb_id = 0;
4099}
4100
4101static void gen9_render_fini(struct sna *sna)
4102{
4103	kgem_bo_destroy(&sna->kgem, sna->render_state.gen9.general_bo);
4104}
4105
4106static bool gen9_render_setup(struct sna *sna)
4107{
4108	struct gen9_render_state *state = &sna->render_state.gen9;
4109	struct sna_static_stream general;
4110	struct gen9_sampler_state *ss;
4111	int i, j, k, l, m;
4112	uint32_t devid;
4113
4114	devid = intel_get_device_id(sna->dev);
4115	if (devid & 0xf)
4116		state->gt = GEN9_GT_BIAS + ((devid >> 4) & 0xf) + 1;
4117	DBG(("%s: gt=%d\n", __FUNCTION__, state->gt));
4118
4119	state->info = &min_gt_info;
4120	if (is_skl(sna))
4121		state->info = &skl_gt_info;
4122	if (is_bxt(sna))
4123		state->info = &bxt_gt_info;
4124	if (is_kbl(sna))
4125		state->info = &kbl_gt_info;
4126	if (is_glk(sna))
4127		state->info = &glk_gt_info;
4128	if (is_cfl(sna))
4129		state->info = &cfl_gt_info;
4130
4131	sna_static_stream_init(&general);
4132
4133	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
4134	 * dumps, you know it points to zero.
4135	 */
4136	null_create(&general);
4137
4138	for (m = 0; m < ARRAY_SIZE(wm_kernels); m++) {
4139		if (wm_kernels[m].size) {
4140			state->wm_kernel[m][1] =
4141				sna_static_stream_add(&general,
4142						      wm_kernels[m].data,
4143						      wm_kernels[m].size,
4144						      64);
4145		} else {
4146			if (USE_8_PIXEL_DISPATCH) {
4147				state->wm_kernel[m][0] =
4148					sna_static_stream_compile_wm(sna, &general,
4149								     wm_kernels[m].data, 8);
4150			}
4151
4152			if (USE_16_PIXEL_DISPATCH) {
4153				state->wm_kernel[m][1] =
4154					sna_static_stream_compile_wm(sna, &general,
4155								     wm_kernels[m].data, 16);
4156			}
4157
4158			if (USE_32_PIXEL_DISPATCH) {
4159				state->wm_kernel[m][2] =
4160					sna_static_stream_compile_wm(sna, &general,
4161								     wm_kernels[m].data, 32);
4162			}
4163		}
4164		assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
4165	}
4166
4167	COMPILE_TIME_ASSERT(GEN9_WM_KERNEL_COUNT <=
4168			    1 << (sizeof(((struct sna_composite_op *)NULL)->u.gen9.wm_kernel) * 8));
4169
4170	COMPILE_TIME_ASSERT(SAMPLER_OFFSET(FILTER_COUNT, EXTEND_COUNT, FILTER_COUNT, EXTEND_COUNT) <= 0x7ff);
4171	ss = sna_static_stream_map(&general,
4172				   2 * sizeof(*ss) *
4173				   (2 +
4174				    FILTER_COUNT * EXTEND_COUNT *
4175				    FILTER_COUNT * EXTEND_COUNT),
4176				   32);
4177	state->wm_state = sna_static_stream_offsetof(&general, ss);
4178	sampler_copy_init(ss); ss += 2;
4179	sampler_fill_init(ss); ss += 2;
4180	for (i = 0; i < FILTER_COUNT; i++) {
4181		for (j = 0; j < EXTEND_COUNT; j++) {
4182			for (k = 0; k < FILTER_COUNT; k++) {
4183				for (l = 0; l < EXTEND_COUNT; l++) {
4184					sampler_state_init(ss++, i, j);
4185					sampler_state_init(ss++, k, l);
4186				}
4187			}
4188		}
4189	}
4190
4191	state->cc_blend = gen9_create_blend_state(&general);
4192
4193	state->general_bo = sna_static_stream_fini(sna, &general);
4194	return state->general_bo != NULL;
4195}
4196
4197const char *gen9_render_init(struct sna *sna, const char *backend)
4198{
4199	if (!gen9_render_setup(sna))
4200		return backend;
4201
4202	sna->kgem.context_switch = gen6_render_context_switch;
4203	sna->kgem.retire = gen6_render_retire;
4204	sna->kgem.expire = gen4_render_expire;
4205
4206#if !NO_COMPOSITE
4207	sna->render.composite = gen9_render_composite;
4208	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
4209#endif
4210#if !NO_COMPOSITE_SPANS
4211	sna->render.check_composite_spans = gen9_check_composite_spans;
4212	sna->render.composite_spans = gen9_render_composite_spans;
4213	sna->render.prefer_gpu |= PREFER_GPU_SPANS;
4214#endif
4215#if !NO_VIDEO
4216	sna->render.video = gen9_render_video;
4217#endif
4218
4219#if !NO_COPY_BOXES
4220	sna->render.copy_boxes = gen9_render_copy_boxes;
4221#endif
4222#if !NO_COPY
4223	sna->render.copy = gen9_render_copy;
4224#endif
4225
4226#if !NO_FILL_BOXES
4227	sna->render.fill_boxes = gen9_render_fill_boxes;
4228#endif
4229#if !NO_FILL
4230	sna->render.fill = gen9_render_fill;
4231#endif
4232#if !NO_FILL_ONE
4233	sna->render.fill_one = gen9_render_fill_one;
4234#endif
4235#if !NO_FILL_CLEAR
4236	sna->render.clear = gen9_render_clear;
4237#endif
4238
4239	sna->render.flush = gen9_render_flush;
4240	sna->render.reset = gen9_render_reset;
4241	sna->render.fini = gen9_render_fini;
4242
4243	sna->render.max_3d_size = GEN9_MAX_SIZE;
4244	sna->render.max_3d_pitch = 1 << 18;
4245	return sna->render_state.gen9.info->name;
4246}
4247