gen7_render.c revision 42542f5f
1/*
2 * Copyright © 2006,2008,2011 Intel Corporation
3 * Copyright © 2007 Red Hat, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 *    Wang Zhenyu <zhenyu.z.wang@sna.com>
26 *    Eric Anholt <eric@anholt.net>
27 *    Carl Worth <cworth@redhat.com>
28 *    Keith Packard <keithp@keithp.com>
29 *    Chris Wilson <chris@chris-wilson.co.uk>
30 *
31 */
32
33#ifdef HAVE_CONFIG_H
34#include "config.h"
35#endif
36
37#include "sna.h"
38#include "sna_reg.h"
39#include "sna_render.h"
40#include "sna_render_inline.h"
41#include "sna_video.h"
42
43#include "brw/brw.h"
44#include "gen7_render.h"
45#include "gen4_common.h"
46#include "gen4_source.h"
47#include "gen4_vertex.h"
48#include "gen6_common.h"
49
50#define ALWAYS_INVALIDATE 0
51#define ALWAYS_FLUSH 0
52#define ALWAYS_STALL 0
53
54#define NO_COMPOSITE 0
55#define NO_COMPOSITE_SPANS 0
56#define NO_COPY 0
57#define NO_COPY_BOXES 0
58#define NO_FILL 0
59#define NO_FILL_BOXES 0
60#define NO_FILL_ONE 0
61#define NO_FILL_CLEAR 0
62
63#define NO_RING_SWITCH 0
64
65#define USE_8_PIXEL_DISPATCH 1
66#define USE_16_PIXEL_DISPATCH 1
67#define USE_32_PIXEL_DISPATCH 0
68
69#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
70#error "Must select at least 8, 16 or 32 pixel dispatch"
71#endif
72
73#define GEN7_MAX_SIZE 16384
74
75/* XXX Todo
76 *
77 * STR (software tiled rendering) mode. No, really.
78 * 64x32 pixel blocks align with the rendering cache. Worth considering.
79 */
80
81#define is_aligned(x, y) (((x) & ((y) - 1)) == 0)
82
83struct gt_info {
84	const char *name;
85	uint32_t max_vs_threads;
86	uint32_t max_gs_threads;
87	uint32_t max_wm_threads;
88	struct {
89		int size;
90		int max_vs_entries;
91		int max_gs_entries;
92		int push_ps_size; /* in 1KBs */
93	} urb;
94	int gt;
95	uint32_t mocs;
96};
97
98static const struct gt_info ivb_gt_info = {
99	.name = "Ivybridge (gen7)",
100	.max_vs_threads = 16,
101	.max_gs_threads = 16,
102	.max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT,
103	.urb = { 128, 64, 64, 8 },
104	.gt = 0,
105};
106
107static const struct gt_info ivb_gt1_info = {
108	.name = "Ivybridge (gen7, gt1)",
109	.max_vs_threads = 36,
110	.max_gs_threads = 36,
111	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
112	.urb = { 128, 512, 192, 8 },
113	.gt = 1,
114	.mocs = 3,
115};
116
117static const struct gt_info ivb_gt2_info = {
118	.name = "Ivybridge (gen7, gt2)",
119	.max_vs_threads = 128,
120	.max_gs_threads = 128,
121	.max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT,
122	.urb = { 256, 704, 320, 8 },
123	.gt = 2,
124	.mocs = 3,
125};
126
127static const struct gt_info byt_gt_info = {
128	.name = "Baytrail (gen7)",
129	.max_vs_threads = 36,
130	.max_gs_threads = 36,
131	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
132	.urb = { 128, 512, 192, 8 },
133	.gt = 1,
134};
135
136static const struct gt_info hsw_gt_info = {
137	.name = "Haswell (gen7.5)",
138	.max_vs_threads = 8,
139	.max_gs_threads = 8,
140	.max_wm_threads =
141		(8 - 1) << HSW_PS_MAX_THREADS_SHIFT |
142		1 << HSW_PS_SAMPLE_MASK_SHIFT,
143	.urb = { 128, 64, 64, 8 },
144	.gt = 0,
145};
146
147static const struct gt_info hsw_gt1_info = {
148	.name = "Haswell (gen7.5, gt1)",
149	.max_vs_threads = 70,
150	.max_gs_threads = 70,
151	.max_wm_threads =
152		(102 - 1) << HSW_PS_MAX_THREADS_SHIFT |
153		1 << HSW_PS_SAMPLE_MASK_SHIFT,
154	.urb = { 128, 640, 256, 8 },
155	.gt = 1,
156	.mocs = 5,
157};
158
159static const struct gt_info hsw_gt2_info = {
160	.name = "Haswell (gen7.5, gt2)",
161	.max_vs_threads = 140,
162	.max_gs_threads = 140,
163	.max_wm_threads =
164		(140 - 1) << HSW_PS_MAX_THREADS_SHIFT |
165		1 << HSW_PS_SAMPLE_MASK_SHIFT,
166	.urb = { 256, 1664, 640, 8 },
167	.gt = 2,
168	.mocs = 5,
169};
170
171static const struct gt_info hsw_gt3_info = {
172	.name = "Haswell (gen7.5, gt3)",
173	.max_vs_threads = 280,
174	.max_gs_threads = 280,
175	.max_wm_threads =
176		(280 - 1) << HSW_PS_MAX_THREADS_SHIFT |
177		1 << HSW_PS_SAMPLE_MASK_SHIFT,
178	.urb = { 512, 3328, 1280, 16 },
179	.gt = 3,
180	.mocs = 5,
181};
182
183inline static bool is_ivb(struct sna *sna)
184{
185	return sna->kgem.gen == 070;
186}
187
188inline static bool is_byt(struct sna *sna)
189{
190	return sna->kgem.gen == 071;
191}
192
193inline static bool is_hsw(struct sna *sna)
194{
195	return sna->kgem.gen == 075;
196}
197
198static const uint32_t ps_kernel_packed[][4] = {
199#include "exa_wm_src_affine.g7b"
200#include "exa_wm_src_sample_argb.g7b"
201#include "exa_wm_yuv_rgb.g7b"
202#include "exa_wm_write.g7b"
203};
204
205static const uint32_t ps_kernel_planar[][4] = {
206#include "exa_wm_src_affine.g7b"
207#include "exa_wm_src_sample_planar.g7b"
208#include "exa_wm_yuv_rgb.g7b"
209#include "exa_wm_write.g7b"
210};
211
212#define KERNEL(kernel_enum, kernel, num_surfaces) \
213    [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces}
214#define NOKERNEL(kernel_enum, func, num_surfaces) \
215    [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces}
216static const struct wm_kernel_info {
217	const char *name;
218	const void *data;
219	unsigned int size;
220	int num_surfaces;
221} wm_kernels[] = {
222	NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
223	NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
224
225	NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
226	NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
227
228	NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
229	NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
230
231	NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
232	NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
233
234	NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
235	NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
236
237	KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
238	KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
239};
240#undef KERNEL
241
242static const struct blendinfo {
243	bool src_alpha;
244	uint32_t src_blend;
245	uint32_t dst_blend;
246} gen7_blend_op[] = {
247	/* Clear */	{0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO},
248	/* Src */	{0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO},
249	/* Dst */	{0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ONE},
250	/* Over */	{1, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
251	/* OverReverse */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ONE},
252	/* In */	{0, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
253	/* InReverse */	{1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_SRC_ALPHA},
254	/* Out */	{0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
255	/* OutReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
256	/* Atop */	{1, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
257	/* AtopReverse */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_SRC_ALPHA},
258	/* Xor */	{1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
259	/* Add */	{0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ONE},
260};
261
262/**
263 * Highest-valued BLENDFACTOR used in gen7_blend_op.
264 *
265 * This leaves out GEN7_BLENDFACTOR_INV_DST_COLOR,
266 * GEN7_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
267 * GEN7_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
268 */
269#define GEN7_BLENDFACTOR_COUNT (GEN7_BLENDFACTOR_INV_DST_ALPHA + 1)
270
271#define GEN7_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen7_blend_state), 64)
272
273#define BLEND_OFFSET(s, d) \
274	((d != GEN7_BLENDFACTOR_ZERO) << 15 | \
275	 (((s) * GEN7_BLENDFACTOR_COUNT + (d)) * GEN7_BLEND_STATE_PADDED_SIZE))
276
277#define NO_BLEND BLEND_OFFSET(GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO)
278#define CLEAR BLEND_OFFSET(GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO)
279
280#define SAMPLER_OFFSET(sf, se, mf, me) \
281	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) * 2 * sizeof(struct gen7_sampler_state))
282
283#define VERTEX_2s2s 0
284
285#define COPY_SAMPLER 0
286#define COPY_VERTEX VERTEX_2s2s
287#define COPY_FLAGS(a) GEN7_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN7_WM_KERNEL_NOMASK, COPY_VERTEX)
288
289#define FILL_SAMPLER (2 * sizeof(struct gen7_sampler_state))
290#define FILL_VERTEX VERTEX_2s2s
291#define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
292#define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
293
294#define GEN7_SAMPLER(f) (((f) >> 16) & 0xfff0)
295#define GEN7_BLEND(f) (((f) >> 0) & 0x7ff0)
296#define GEN7_READS_DST(f) (((f) >> 15) & 1)
297#define GEN7_KERNEL(f) (((f) >> 16) & 0xf)
298#define GEN7_VERTEX(f) (((f) >> 0) & 0xf)
299#define GEN7_SET_FLAGS(S, B, K, V)  (((S) | (K)) << 16 | ((B) | (V)))
300
301#define OUT_BATCH(v) batch_emit(sna, v)
302#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
303#define OUT_VERTEX_F(v) vertex_emit(sna, v)
304
305static inline bool too_large(int width, int height)
306{
307	return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE;
308}
309
310static uint32_t gen7_get_blend(int op,
311			       bool has_component_alpha,
312			       uint32_t dst_format)
313{
314	uint32_t src, dst;
315
316	src = gen7_blend_op[op].src_blend;
317	dst = gen7_blend_op[op].dst_blend;
318
319	/* If there's no dst alpha channel, adjust the blend op so that
320	 * we'll treat it always as 1.
321	 */
322	if (PICT_FORMAT_A(dst_format) == 0) {
323		if (src == GEN7_BLENDFACTOR_DST_ALPHA)
324			src = GEN7_BLENDFACTOR_ONE;
325		else if (src == GEN7_BLENDFACTOR_INV_DST_ALPHA)
326			src = GEN7_BLENDFACTOR_ZERO;
327	}
328
329	/* If the source alpha is being used, then we should only be in a
330	 * case where the source blend factor is 0, and the source blend
331	 * value is the mask channels multiplied by the source picture's alpha.
332	 */
333	if (has_component_alpha && gen7_blend_op[op].src_alpha) {
334		if (dst == GEN7_BLENDFACTOR_SRC_ALPHA)
335			dst = GEN7_BLENDFACTOR_SRC_COLOR;
336		else if (dst == GEN7_BLENDFACTOR_INV_SRC_ALPHA)
337			dst = GEN7_BLENDFACTOR_INV_SRC_COLOR;
338	}
339
340	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
341	     op, dst_format, PICT_FORMAT_A(dst_format),
342	     src, dst, (int)BLEND_OFFSET(src, dst)));
343	return BLEND_OFFSET(src, dst);
344}
345
346static uint32_t gen7_get_card_format(PictFormat format)
347{
348	switch (format) {
349	default:
350		return -1;
351	case PICT_a8r8g8b8:
352		return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
353	case PICT_x8r8g8b8:
354		return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM;
355	case PICT_a8b8g8r8:
356		return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
357	case PICT_x8b8g8r8:
358		return GEN7_SURFACEFORMAT_R8G8B8X8_UNORM;
359#ifdef PICT_a2r10g10b10
360	case PICT_a2r10g10b10:
361		return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
362	case PICT_x2r10g10b10:
363		return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM;
364#endif
365	case PICT_r8g8b8:
366		return GEN7_SURFACEFORMAT_R8G8B8_UNORM;
367	case PICT_r5g6b5:
368		return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
369	case PICT_a1r5g5b5:
370		return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
371	case PICT_a8:
372		return GEN7_SURFACEFORMAT_A8_UNORM;
373	case PICT_a4r4g4b4:
374		return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
375	}
376}
377
378static uint32_t gen7_get_dest_format(PictFormat format)
379{
380	switch (format) {
381	default:
382		return -1;
383	case PICT_a8r8g8b8:
384	case PICT_x8r8g8b8:
385		return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
386	case PICT_a8b8g8r8:
387	case PICT_x8b8g8r8:
388		return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
389#ifdef PICT_a2r10g10b10
390	case PICT_a2r10g10b10:
391	case PICT_x2r10g10b10:
392		return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
393#endif
394	case PICT_r5g6b5:
395		return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
396	case PICT_x1r5g5b5:
397	case PICT_a1r5g5b5:
398		return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
399	case PICT_a8:
400		return GEN7_SURFACEFORMAT_A8_UNORM;
401	case PICT_a4r4g4b4:
402	case PICT_x4r4g4b4:
403		return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
404	}
405}
406
407static bool gen7_check_dst_format(PictFormat format)
408{
409	if (gen7_get_dest_format(format) != -1)
410		return true;
411
412	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
413	return false;
414}
415
416static bool gen7_check_format(uint32_t format)
417{
418	if (gen7_get_card_format(format) != -1)
419		return true;
420
421	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
422	return false;
423}
424
425static uint32_t gen7_filter(uint32_t filter)
426{
427	switch (filter) {
428	default:
429		assert(0);
430	case PictFilterNearest:
431		return SAMPLER_FILTER_NEAREST;
432	case PictFilterBilinear:
433		return SAMPLER_FILTER_BILINEAR;
434	}
435}
436
437static uint32_t gen7_check_filter(PicturePtr picture)
438{
439	switch (picture->filter) {
440	case PictFilterNearest:
441	case PictFilterBilinear:
442		return true;
443	default:
444		return false;
445	}
446}
447
448static uint32_t gen7_repeat(uint32_t repeat)
449{
450	switch (repeat) {
451	default:
452		assert(0);
453	case RepeatNone:
454		return SAMPLER_EXTEND_NONE;
455	case RepeatNormal:
456		return SAMPLER_EXTEND_REPEAT;
457	case RepeatPad:
458		return SAMPLER_EXTEND_PAD;
459	case RepeatReflect:
460		return SAMPLER_EXTEND_REFLECT;
461	}
462}
463
464static bool gen7_check_repeat(PicturePtr picture)
465{
466	if (!picture->repeat)
467		return true;
468
469	switch (picture->repeatType) {
470	case RepeatNone:
471	case RepeatNormal:
472	case RepeatPad:
473	case RepeatReflect:
474		return true;
475	default:
476		return false;
477	}
478}
479
480static int
481gen7_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
482{
483	int base;
484
485	if (has_mask) {
486		if (is_ca) {
487			if (gen7_blend_op[op].src_alpha)
488				base = GEN7_WM_KERNEL_MASKSA;
489			else
490				base = GEN7_WM_KERNEL_MASKCA;
491		} else
492			base = GEN7_WM_KERNEL_MASK;
493	} else
494		base = GEN7_WM_KERNEL_NOMASK;
495
496	return base + !is_affine;
497}
498
499static void
500gen7_emit_urb(struct sna *sna)
501{
502	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
503	OUT_BATCH(sna->render_state.gen7.info->urb.push_ps_size);
504
505	/* num of VS entries must be divisible by 8 if size < 9 */
506	OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
507	OUT_BATCH((sna->render_state.gen7.info->urb.max_vs_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
508		  (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
509		  (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
510
511	OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2));
512	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
513		  (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
514
515	OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2));
516	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
517		  (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
518
519	OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2));
520	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
521		  (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
522}
523
524static void
525gen7_emit_state_base_address(struct sna *sna)
526{
527	uint32_t mocs = sna->render_state.gen7.info->mocs << 8;
528
529	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
530	OUT_BATCH(0); /* general */
531	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
532				 sna->kgem.nbatch,
533				 NULL,
534				 I915_GEM_DOMAIN_INSTRUCTION << 16,
535				 BASE_ADDRESS_MODIFY));
536	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* dynamic */
537				 sna->kgem.nbatch,
538				 sna->render_state.gen7.general_bo,
539				 I915_GEM_DOMAIN_INSTRUCTION << 16,
540				 mocs | BASE_ADDRESS_MODIFY));
541	OUT_BATCH(0); /* indirect */
542	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
543				 sna->kgem.nbatch,
544				 sna->render_state.gen7.general_bo,
545				 I915_GEM_DOMAIN_INSTRUCTION << 16,
546				 mocs | BASE_ADDRESS_MODIFY));
547
548	/* upper bounds, disable */
549	OUT_BATCH(0);
550	OUT_BATCH(BASE_ADDRESS_MODIFY);
551	OUT_BATCH(0);
552	OUT_BATCH(BASE_ADDRESS_MODIFY);
553}
554
555static void
556gen7_disable_vs(struct sna *sna)
557{
558	/* For future reference:
559	 * A PIPE_CONTROL with post-sync op set to 1 and a depth stall needs
560	 * to be emitted just prior to change VS state, i.e. 3DSTATE_VS,
561	 * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS,
562	 * 3DSTATE_BINDING_TABLE_POINTER_VS, 3DSTATE_SAMPLER_STATE_POINTER_VS.
563	 *
564	 * Here we saved by the full-flush incurred when emitting
565	 * the batchbuffer.
566	 */
567	OUT_BATCH(GEN7_3DSTATE_VS | (6 - 2));
568	OUT_BATCH(0); /* no VS kernel */
569	OUT_BATCH(0);
570	OUT_BATCH(0);
571	OUT_BATCH(0);
572	OUT_BATCH(0); /* pass-through */
573
574#if 0
575	OUT_BATCH(GEN7_3DSTATE_CONSTANT_VS | (7 - 2));
576	OUT_BATCH(0);
577	OUT_BATCH(0);
578	OUT_BATCH(0);
579	OUT_BATCH(0);
580	OUT_BATCH(0);
581	OUT_BATCH(0);
582
583	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
584	OUT_BATCH(0);
585
586	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
587	OUT_BATCH(0);
588#endif
589}
590
591static void
592gen7_disable_hs(struct sna *sna)
593{
594	OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2));
595	OUT_BATCH(0); /* no HS kernel */
596	OUT_BATCH(0);
597	OUT_BATCH(0);
598	OUT_BATCH(0);
599	OUT_BATCH(0);
600	OUT_BATCH(0); /* pass-through */
601
602#if 0
603	OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
604	OUT_BATCH(0);
605	OUT_BATCH(0);
606	OUT_BATCH(0);
607	OUT_BATCH(0);
608	OUT_BATCH(0);
609	OUT_BATCH(0);
610
611	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
612	OUT_BATCH(0);
613
614	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
615	OUT_BATCH(0);
616#endif
617}
618
619static void
620gen7_disable_te(struct sna *sna)
621{
622	OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2));
623	OUT_BATCH(0);
624	OUT_BATCH(0);
625	OUT_BATCH(0);
626}
627
628static void
629gen7_disable_ds(struct sna *sna)
630{
631	OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2));
632	OUT_BATCH(0);
633	OUT_BATCH(0);
634	OUT_BATCH(0);
635	OUT_BATCH(0);
636	OUT_BATCH(0);
637
638#if 0
639	OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
640	OUT_BATCH(0);
641	OUT_BATCH(0);
642	OUT_BATCH(0);
643	OUT_BATCH(0);
644	OUT_BATCH(0);
645	OUT_BATCH(0);
646
647	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
648	OUT_BATCH(0);
649
650	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
651	OUT_BATCH(0);
652#endif
653}
654
655static void
656gen7_disable_gs(struct sna *sna)
657{
658	OUT_BATCH(GEN7_3DSTATE_GS | (7 - 2));
659	OUT_BATCH(0); /* no GS kernel */
660	OUT_BATCH(0);
661	OUT_BATCH(0);
662	OUT_BATCH(0);
663	OUT_BATCH(0);
664	OUT_BATCH(0); /* pass-through */
665
666#if 0
667	OUT_BATCH(GEN7_3DSTATE_CONSTANT_GS | (7 - 2));
668	OUT_BATCH(0);
669	OUT_BATCH(0);
670	OUT_BATCH(0);
671	OUT_BATCH(0);
672	OUT_BATCH(0);
673	OUT_BATCH(0);
674
675	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
676	OUT_BATCH(0);
677
678	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
679	OUT_BATCH(0);
680#endif
681}
682
683static void
684gen7_disable_streamout(struct sna *sna)
685{
686	OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2));
687	OUT_BATCH(0);
688	OUT_BATCH(0);
689}
690
691static void
692gen7_emit_sf_invariant(struct sna *sna)
693{
694	OUT_BATCH(GEN7_3DSTATE_SF | (7 - 2));
695	OUT_BATCH(0);
696	OUT_BATCH(GEN7_3DSTATE_SF_CULL_NONE);
697	OUT_BATCH(2 << GEN7_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
698	OUT_BATCH(0);
699	OUT_BATCH(0);
700	OUT_BATCH(0);
701}
702
703static void
704gen7_emit_cc_invariant(struct sna *sna)
705{
706#if 0 /* unused, no change */
707	OUT_BATCH(GEN7_3DSTATE_CC_STATE_POINTERS | (2 - 2));
708	OUT_BATCH(0);
709
710	OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
711	OUT_BATCH(0);
712#endif
713
714	/* XXX clear to be safe */
715	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
716	OUT_BATCH(0);
717}
718
719static void
720gen7_disable_clip(struct sna *sna)
721{
722	OUT_BATCH(GEN7_3DSTATE_CLIP | (4 - 2));
723	OUT_BATCH(0);
724	OUT_BATCH(0); /* pass-through */
725	OUT_BATCH(0);
726
727	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
728	OUT_BATCH(0);
729}
730
731static void
732gen7_emit_wm_invariant(struct sna *sna)
733{
734	OUT_BATCH(GEN7_3DSTATE_WM | (3 - 2));
735	OUT_BATCH(GEN7_WM_DISPATCH_ENABLE |
736		  GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
737	OUT_BATCH(0);
738
739#if 0
740	/* XXX length bias of 7 in old spec? */
741	OUT_BATCH(GEN7_3DSTATE_CONSTANT_PS | (7 - 2));
742	OUT_BATCH(0);
743	OUT_BATCH(0);
744	OUT_BATCH(0);
745	OUT_BATCH(0);
746	OUT_BATCH(0);
747	OUT_BATCH(0);
748#endif
749}
750
751static void
752gen7_emit_null_depth_buffer(struct sna *sna)
753{
754	OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
755	OUT_BATCH(GEN7_SURFACE_NULL << GEN7_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
756		  GEN7_DEPTHFORMAT_D32_FLOAT << GEN7_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
757	OUT_BATCH(0); /* disable depth, stencil and hiz */
758	OUT_BATCH(0);
759	OUT_BATCH(0);
760	OUT_BATCH(0);
761	OUT_BATCH(0);
762
763#if 0
764	OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
765	OUT_BATCH(0);
766	OUT_BATCH(0);
767#endif
768}
769
770static void
771gen7_emit_invariant(struct sna *sna)
772{
773	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D);
774
775	OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE | (4 - 2));
776	OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
777		  GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
778	OUT_BATCH(0);
779	OUT_BATCH(0);
780
781	OUT_BATCH(GEN7_3DSTATE_SAMPLE_MASK | (2 - 2));
782	OUT_BATCH(1);
783
784	gen7_emit_urb(sna);
785
786	gen7_emit_state_base_address(sna);
787
788	gen7_disable_vs(sna);
789	gen7_disable_hs(sna);
790	gen7_disable_te(sna);
791	gen7_disable_ds(sna);
792	gen7_disable_gs(sna);
793	gen7_disable_clip(sna);
794	gen7_emit_sf_invariant(sna);
795	gen7_emit_wm_invariant(sna);
796	gen7_emit_cc_invariant(sna);
797	gen7_disable_streamout(sna);
798	gen7_emit_null_depth_buffer(sna);
799
800	sna->render_state.gen7.needs_invariant = false;
801}
802
803static void
804gen7_emit_cc(struct sna *sna, uint32_t blend_offset)
805{
806	struct gen7_render_state *render = &sna->render_state.gen7;
807
808	if (render->blend == blend_offset)
809		return;
810
811	DBG(("%s: blend = %x\n", __FUNCTION__, blend_offset));
812
813	/* XXX can have upto 8 blend states preload, selectable via
814	 * Render Target Index. What other side-effects of Render Target Index?
815	 */
816
817	assert (is_aligned(render->cc_blend + blend_offset, 64));
818	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
819	OUT_BATCH((render->cc_blend + blend_offset) | 1);
820
821	render->blend = blend_offset;
822}
823
824static void
825gen7_emit_sampler(struct sna *sna, uint32_t state)
826{
827	if (sna->render_state.gen7.samplers == state)
828		return;
829
830	sna->render_state.gen7.samplers = state;
831
832	DBG(("%s: sampler = %x\n", __FUNCTION__, state));
833
834	assert (is_aligned(sna->render_state.gen7.wm_state + state, 32));
835	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
836	OUT_BATCH(sna->render_state.gen7.wm_state + state);
837}
838
839static void
840gen7_emit_sf(struct sna *sna, bool has_mask)
841{
842	int num_sf_outputs = has_mask ? 2 : 1;
843
844	if (sna->render_state.gen7.num_sf_outputs == num_sf_outputs)
845		return;
846
847	DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
848	     __FUNCTION__, num_sf_outputs, 1, 0));
849
850	sna->render_state.gen7.num_sf_outputs = num_sf_outputs;
851
852	OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2));
853	OUT_BATCH(num_sf_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT |
854		  1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
855		  1 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
856	OUT_BATCH(0);
857	OUT_BATCH(0); /* dw4 */
858	OUT_BATCH(0);
859	OUT_BATCH(0);
860	OUT_BATCH(0);
861	OUT_BATCH(0); /* dw8 */
862	OUT_BATCH(0);
863	OUT_BATCH(0);
864	OUT_BATCH(0);
865	OUT_BATCH(0); /* dw12 */
866	OUT_BATCH(0);
867	OUT_BATCH(0);
868}
869
870static void
871gen7_emit_wm(struct sna *sna, int kernel)
872{
873	const uint32_t *kernels;
874
875	if (sna->render_state.gen7.kernel == kernel)
876		return;
877
878	sna->render_state.gen7.kernel = kernel;
879	kernels = sna->render_state.gen7.wm_kernel[kernel];
880
881	DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n",
882	     __FUNCTION__,
883	     wm_kernels[kernel].name,
884	     wm_kernels[kernel].num_surfaces,
885	     kernels[0], kernels[1], kernels[2]));
886
887	OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2));
888	OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
889	OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT |
890		  wm_kernels[kernel].num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
891	OUT_BATCH(0); /* scratch address */
892	OUT_BATCH(sna->render_state.gen7.info->max_wm_threads |
893		  (kernels[0] ? GEN7_PS_8_DISPATCH_ENABLE : 0) |
894		  (kernels[1] ? GEN7_PS_16_DISPATCH_ENABLE : 0) |
895		  (kernels[2] ? GEN7_PS_32_DISPATCH_ENABLE : 0) |
896		  GEN7_PS_ATTRIBUTE_ENABLE);
897	OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
898		  8 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
899		  6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
900	OUT_BATCH(kernels[2]);
901	OUT_BATCH(kernels[1]);
902}
903
904static bool
905gen7_emit_binding_table(struct sna *sna, uint16_t offset)
906{
907	if (sna->render_state.gen7.surface_table == offset)
908		return false;
909
910	/* Binding table pointers */
911	assert(is_aligned(4*offset, 32));
912	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
913	OUT_BATCH(offset*4);
914
915	sna->render_state.gen7.surface_table = offset;
916	return true;
917}
918
919static bool
920gen7_emit_drawing_rectangle(struct sna *sna,
921			    const struct sna_composite_op *op)
922{
923	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
924	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
925
926	assert(!too_large(op->dst.x, op->dst.y));
927	assert(!too_large(op->dst.width, op->dst.height));
928
929	if (sna->render_state.gen7.drawrect_limit == limit &&
930	    sna->render_state.gen7.drawrect_offset == offset)
931		return true;
932
933	sna->render_state.gen7.drawrect_offset = offset;
934	sna->render_state.gen7.drawrect_limit = limit;
935
936	OUT_BATCH(GEN7_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
937	OUT_BATCH(0);
938	OUT_BATCH(limit);
939	OUT_BATCH(offset);
940	return false;
941}
942
943static void
944gen7_emit_vertex_elements(struct sna *sna,
945			  const struct sna_composite_op *op)
946{
947	/*
948	 * vertex data in vertex buffer
949	 *    position: (x, y)
950	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
951	 *    texture coordinate 1 if (has_mask is true): same as above
952	 */
953	struct gen7_render_state *render = &sna->render_state.gen7;
954	uint32_t src_format, dw;
955	int id = GEN7_VERTEX(op->u.gen7.flags);
956	bool has_mask;
957
958	DBG(("%s: setup id=%d\n", __FUNCTION__, id));
959
960	if (render->ve_id == id)
961		return;
962	render->ve_id = id;
963
964	/* The VUE layout
965	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
966	 *    dword 4-7: position (x, y, 1.0, 1.0),
967	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
968	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
969	 *
970	 * dword 4-15 are fetched from vertex buffer
971	 */
972	has_mask = (id >> 2) != 0;
973	OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS |
974		((2 * (3 + has_mask)) + 1 - 2));
975
976	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
977		  GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT |
978		  0 << GEN7_VE0_OFFSET_SHIFT);
979	OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT |
980		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT |
981		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
982		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT);
983
984	/* x,y */
985	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
986		  GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT |
987		  0 << GEN7_VE0_OFFSET_SHIFT);
988	OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
989		  GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
990		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
991		  GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
992
993	/* u0, v0, w0 */
994	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
995	dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
996	switch (id & 3) {
997	default:
998		assert(0);
999	case 0:
1000		src_format = GEN7_SURFACEFORMAT_R16G16_SSCALED;
1001		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
1002		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
1003		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
1004		break;
1005	case 1:
1006		src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
1007		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
1008		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
1009		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
1010		break;
1011	case 2:
1012		src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
1013		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
1014		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
1015		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
1016		break;
1017	case 3:
1018		src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
1019		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
1020		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
1021		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
1022		break;
1023	}
1024	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
1025		  src_format << GEN7_VE0_FORMAT_SHIFT |
1026		  4 << GEN7_VE0_OFFSET_SHIFT);
1027	OUT_BATCH(dw);
1028
1029	/* u1, v1, w1 */
1030	if (has_mask) {
1031		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
1032		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
1033		dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
1034		switch (id >> 2) {
1035		case 1:
1036			src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
1037			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
1038			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
1039			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
1040			break;
1041		default:
1042			assert(0);
1043		case 2:
1044			src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
1045			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
1046			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
1047			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
1048			break;
1049		case 3:
1050			src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
1051			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
1052			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
1053			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
1054			break;
1055		}
1056		OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
1057			  src_format << GEN7_VE0_FORMAT_SHIFT |
1058			  offset << GEN7_VE0_OFFSET_SHIFT);
1059		OUT_BATCH(dw);
1060	}
1061}
1062
1063inline static void
1064gen7_emit_pipe_invalidate(struct sna *sna)
1065{
1066	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
1067	OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH |
1068		  GEN7_PIPE_CONTROL_TC_FLUSH |
1069		  GEN7_PIPE_CONTROL_CS_STALL);
1070	OUT_BATCH(0);
1071	OUT_BATCH(0);
1072}
1073
1074inline static void
1075gen7_emit_pipe_flush(struct sna *sna, bool need_stall)
1076{
1077	unsigned stall;
1078
1079	stall = 0;
1080	if (need_stall)
1081		stall = (GEN7_PIPE_CONTROL_CS_STALL |
1082			 GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
1083
1084	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
1085	OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | stall);
1086	OUT_BATCH(0);
1087	OUT_BATCH(0);
1088}
1089
1090inline static void
1091gen7_emit_pipe_stall(struct sna *sna)
1092{
1093	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
1094	OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL |
1095		  GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
1096	OUT_BATCH(0);
1097	OUT_BATCH(0);
1098}
1099
1100static void
1101gen7_emit_state(struct sna *sna,
1102		const struct sna_composite_op *op,
1103		uint16_t wm_binding_table)
1104{
1105	bool need_invalidate;
1106	bool need_flush;
1107	bool need_stall;
1108
1109	assert(op->dst.bo->exec);
1110
1111	need_flush = wm_binding_table & 1 ||
1112		(sna->render_state.gen7.emit_flush && GEN7_READS_DST(op->u.gen7.flags));
1113	if (ALWAYS_FLUSH)
1114		need_flush = true;
1115
1116	wm_binding_table &= ~1;
1117
1118	need_stall = sna->render_state.gen7.surface_table != wm_binding_table;
1119
1120	need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo);
1121	if (ALWAYS_INVALIDATE)
1122		need_invalidate = true;
1123
1124	need_stall &= gen7_emit_drawing_rectangle(sna, op);
1125	if (ALWAYS_STALL)
1126		need_stall = true;
1127
1128	if (need_invalidate) {
1129		gen7_emit_pipe_invalidate(sna);
1130		kgem_clear_dirty(&sna->kgem);
1131		assert(op->dst.bo->exec);
1132		kgem_bo_mark_dirty(op->dst.bo);
1133
1134		need_flush = false;
1135		need_stall = false;
1136	}
1137	if (need_flush) {
1138		gen7_emit_pipe_flush(sna, need_stall);
1139		need_stall = false;
1140	}
1141	if (need_stall)
1142		gen7_emit_pipe_stall(sna);
1143
1144	gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
1145	gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags));
1146	gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2);
1147	gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
1148	gen7_emit_vertex_elements(sna, op);
1149	gen7_emit_binding_table(sna, wm_binding_table);
1150
1151	sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags);
1152}
1153
1154static bool gen7_magic_ca_pass(struct sna *sna,
1155			       const struct sna_composite_op *op)
1156{
1157	struct gen7_render_state *state = &sna->render_state.gen7;
1158
1159	if (!op->need_magic_ca_pass)
1160		return false;
1161
1162	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
1163	     sna->render.vertex_start, sna->render.vertex_index));
1164
1165	gen7_emit_pipe_stall(sna);
1166
1167	gen7_emit_cc(sna,
1168		     GEN7_BLEND(gen7_get_blend(PictOpAdd, true,
1169					       op->dst.format)));
1170	gen7_emit_wm(sna,
1171		     gen7_choose_composite_kernel(PictOpAdd,
1172						  true, true,
1173						  op->is_affine));
1174
1175	OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
1176	OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
1177	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
1178	OUT_BATCH(sna->render.vertex_start);
1179	OUT_BATCH(1);	/* single instance */
1180	OUT_BATCH(0);	/* start instance location */
1181	OUT_BATCH(0);	/* index buffer offset, ignored */
1182
1183	state->last_primitive = sna->kgem.nbatch;
1184	return true;
1185}
1186
1187static void null_create(struct sna_static_stream *stream)
1188{
1189	/* A bunch of zeros useful for legacy border color and depth-stencil */
1190	sna_static_stream_map(stream, 64, 64);
1191}
1192
1193static void
1194sampler_state_init(struct gen7_sampler_state *sampler_state,
1195		   sampler_filter_t filter,
1196		   sampler_extend_t extend)
1197{
1198	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
1199
1200	/* We use the legacy mode to get the semantics specified by
1201	 * the Render extension. */
1202	sampler_state->ss0.default_color_mode = GEN7_BORDER_COLOR_MODE_LEGACY;
1203
1204	switch (filter) {
1205	default:
1206	case SAMPLER_FILTER_NEAREST:
1207		sampler_state->ss0.min_filter = GEN7_MAPFILTER_NEAREST;
1208		sampler_state->ss0.mag_filter = GEN7_MAPFILTER_NEAREST;
1209		break;
1210	case SAMPLER_FILTER_BILINEAR:
1211		sampler_state->ss0.min_filter = GEN7_MAPFILTER_LINEAR;
1212		sampler_state->ss0.mag_filter = GEN7_MAPFILTER_LINEAR;
1213		break;
1214	}
1215
1216	switch (extend) {
1217	default:
1218	case SAMPLER_EXTEND_NONE:
1219		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1220		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1221		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1222		break;
1223	case SAMPLER_EXTEND_REPEAT:
1224		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1225		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1226		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1227		break;
1228	case SAMPLER_EXTEND_PAD:
1229		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1230		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1231		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1232		break;
1233	case SAMPLER_EXTEND_REFLECT:
1234		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1235		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1236		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1237		break;
1238	}
1239}
1240
1241static void
1242sampler_copy_init(struct gen7_sampler_state *ss)
1243{
1244	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1245	ss->ss3.non_normalized_coord = 1;
1246
1247	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1248}
1249
1250static void
1251sampler_fill_init(struct gen7_sampler_state *ss)
1252{
1253	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
1254	ss->ss3.non_normalized_coord = 1;
1255
1256	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1257}
1258
1259static uint32_t
1260gen7_tiling_bits(uint32_t tiling)
1261{
1262	switch (tiling) {
1263	default: assert(0);
1264	case I915_TILING_NONE: return 0;
1265	case I915_TILING_X: return GEN7_SURFACE_TILED;
1266	case I915_TILING_Y: return GEN7_SURFACE_TILED | GEN7_SURFACE_TILED_Y;
1267	}
1268}
1269
1270/**
1271 * Sets up the common fields for a surface state buffer for the given
1272 * picture in the given surface state buffer.
1273 */
1274static uint32_t
1275gen7_bind_bo(struct sna *sna,
1276	     struct kgem_bo *bo,
1277	     uint32_t width,
1278	     uint32_t height,
1279	     uint32_t format,
1280	     bool is_dst)
1281{
1282	uint32_t *ss;
1283	uint32_t domains;
1284	int offset;
1285	uint32_t is_scanout = is_dst && bo->scanout;
1286
1287	COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32);
1288
1289	/* After the first bind, we manage the cache domains within the batch */
1290	offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
1291	if (offset) {
1292		assert(offset >= sna->kgem.surface);
1293		if (is_dst)
1294			kgem_bo_mark_dirty(bo);
1295		return offset * sizeof(uint32_t);
1296	}
1297
1298	offset = sna->kgem.surface -=
1299		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1300	ss = sna->kgem.batch + offset;
1301	ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
1302		 gen7_tiling_bits(bo->tiling) |
1303		 format << GEN7_SURFACE_FORMAT_SHIFT);
1304	if (bo->tiling == I915_TILING_Y)
1305		ss[0] |= GEN7_SURFACE_VALIGN_4;
1306	if (is_dst) {
1307		ss[0] |= GEN7_SURFACE_RC_READ_WRITE;
1308		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
1309	} else
1310		domains = I915_GEM_DOMAIN_SAMPLER << 16;
1311	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
1312	ss[2] = ((width - 1)  << GEN7_SURFACE_WIDTH_SHIFT |
1313		 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
1314	ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
1315	ss[4] = 0;
1316	ss[5] = (is_scanout || bo->io) ? 0 : sna->render_state.gen7.info->mocs << 16;
1317	ss[6] = 0;
1318	ss[7] = 0;
1319	if (is_hsw(sna))
1320		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
1321
1322	kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
1323
1324	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
1325	     offset, bo->handle, ss[1],
1326	     format, width, height, bo->pitch, bo->tiling,
1327	     domains & 0xffff ? "render" : "sampler"));
1328
1329	return offset * sizeof(uint32_t);
1330}
1331
1332static void gen7_emit_vertex_buffer(struct sna *sna,
1333				    const struct sna_composite_op *op)
1334{
1335	int id = GEN7_VERTEX(op->u.gen7.flags);
1336
1337	OUT_BATCH(GEN7_3DSTATE_VERTEX_BUFFERS | (5 - 2));
1338	OUT_BATCH(id << GEN7_VB0_BUFFER_INDEX_SHIFT |
1339		  GEN7_VB0_VERTEXDATA |
1340		  GEN7_VB0_ADDRESS_MODIFY_ENABLE |
1341		  4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT);
1342	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
1343	OUT_BATCH(0);
1344	OUT_BATCH(~0); /* max address: disabled */
1345	OUT_BATCH(0);
1346
1347	sna->render.vb_id |= 1 << id;
1348}
1349
1350static void gen7_emit_primitive(struct sna *sna)
1351{
1352	if (sna->kgem.nbatch == sna->render_state.gen7.last_primitive) {
1353		sna->render.vertex_offset = sna->kgem.nbatch - 5;
1354		return;
1355	}
1356
1357	OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
1358	OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
1359	sna->render.vertex_offset = sna->kgem.nbatch;
1360	OUT_BATCH(0);	/* vertex count, to be filled in later */
1361	OUT_BATCH(sna->render.vertex_index);
1362	OUT_BATCH(1);	/* single instance */
1363	OUT_BATCH(0);	/* start instance location */
1364	OUT_BATCH(0);	/* index buffer offset, ignored */
1365	sna->render.vertex_start = sna->render.vertex_index;
1366
1367	sna->render_state.gen7.last_primitive = sna->kgem.nbatch;
1368}
1369
1370static bool gen7_rectangle_begin(struct sna *sna,
1371				 const struct sna_composite_op *op)
1372{
1373	int id = 1 << GEN7_VERTEX(op->u.gen7.flags);
1374	int ndwords;
1375
1376	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
1377		return true;
1378
1379	ndwords = op->need_magic_ca_pass ? 60 : 6;
1380	if ((sna->render.vb_id & id) == 0)
1381		ndwords += 5;
1382	if (!kgem_check_batch(&sna->kgem, ndwords))
1383		return false;
1384
1385	if ((sna->render.vb_id & id) == 0)
1386		gen7_emit_vertex_buffer(sna, op);
1387
1388	gen7_emit_primitive(sna);
1389	return true;
1390}
1391
1392static int gen7_get_rectangles__flush(struct sna *sna,
1393				      const struct sna_composite_op *op)
1394{
1395	/* Preventing discarding new vbo after lock contention */
1396	if (sna_vertex_wait__locked(&sna->render)) {
1397		int rem = vertex_space(sna);
1398		if (rem > op->floats_per_rect)
1399			return rem;
1400	}
1401
1402	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6))
1403		return 0;
1404	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
1405		return 0;
1406
1407	if (sna->render.vertex_offset) {
1408		gen4_vertex_flush(sna);
1409		if (gen7_magic_ca_pass(sna, op)) {
1410			gen7_emit_pipe_stall(sna);
1411			gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
1412			gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
1413		}
1414	}
1415
1416	return gen4_vertex_finish(sna);
1417}
1418
1419inline static int gen7_get_rectangles(struct sna *sna,
1420				      const struct sna_composite_op *op,
1421				      int want,
1422				      void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
1423{
1424	int rem;
1425
1426	assert(want);
1427
1428start:
1429	rem = vertex_space(sna);
1430	if (unlikely(rem < op->floats_per_rect)) {
1431		DBG(("flushing vbo for %s: %d < %d\n",
1432		     __FUNCTION__, rem, op->floats_per_rect));
1433		rem = gen7_get_rectangles__flush(sna, op);
1434		if (unlikely(rem == 0))
1435			goto flush;
1436	}
1437
1438	if (unlikely(sna->render.vertex_offset == 0)) {
1439		if (!gen7_rectangle_begin(sna, op))
1440			goto flush;
1441		else
1442			goto start;
1443	}
1444
1445	assert(rem <= vertex_space(sna));
1446	assert(op->floats_per_rect <= rem);
1447	if (want > 1 && want * op->floats_per_rect > rem)
1448		want = rem / op->floats_per_rect;
1449
1450	assert(want > 0);
1451	sna->render.vertex_index += 3*want;
1452	return want;
1453
1454flush:
1455	if (sna->render.vertex_offset) {
1456		gen4_vertex_flush(sna);
1457		gen7_magic_ca_pass(sna, op);
1458	}
1459	sna_vertex_wait__locked(&sna->render);
1460	_kgem_submit(&sna->kgem);
1461	emit_state(sna, op);
1462	goto start;
1463}
1464
1465inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna,
1466							 uint16_t *offset)
1467{
1468	uint32_t *table;
1469
1470	sna->kgem.surface -=
1471		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1472	/* Clear all surplus entries to zero in case of prefetch */
1473	table = memset(sna->kgem.batch + sna->kgem.surface,
1474		       0, sizeof(struct gen7_surface_state));
1475
1476	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
1477
1478	*offset = sna->kgem.surface;
1479	return table;
1480}
1481
1482static void
1483gen7_get_batch(struct sna *sna, const struct sna_composite_op *op)
1484{
1485	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1486
1487	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
1488		DBG(("%s: flushing batch: %d < %d+%d\n",
1489		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
1490		     150, 4*8));
1491		_kgem_submit(&sna->kgem);
1492		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1493	}
1494
1495	assert(sna->kgem.mode == KGEM_RENDER);
1496	assert(sna->kgem.ring == KGEM_RENDER);
1497
1498	if (sna->render_state.gen7.needs_invariant)
1499		gen7_emit_invariant(sna);
1500}
1501
1502static void gen7_emit_composite_state(struct sna *sna,
1503				      const struct sna_composite_op *op)
1504{
1505	uint32_t *binding_table;
1506	uint16_t offset, dirty;
1507
1508	gen7_get_batch(sna, op);
1509
1510	binding_table = gen7_composite_get_binding_table(sna, &offset);
1511
1512	dirty = kgem_bo_is_dirty(op->dst.bo);
1513
1514	binding_table[0] =
1515		gen7_bind_bo(sna,
1516			    op->dst.bo, op->dst.width, op->dst.height,
1517			    gen7_get_dest_format(op->dst.format),
1518			    true);
1519	binding_table[1] =
1520		gen7_bind_bo(sna,
1521			     op->src.bo, op->src.width, op->src.height,
1522			     op->src.card_format,
1523			     false);
1524	if (op->mask.bo) {
1525		binding_table[2] =
1526			gen7_bind_bo(sna,
1527				     op->mask.bo,
1528				     op->mask.width,
1529				     op->mask.height,
1530				     op->mask.card_format,
1531				     false);
1532	}
1533
1534	if (sna->kgem.surface == offset &&
1535	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table &&
1536	    (op->mask.bo == NULL ||
1537	     sna->kgem.batch[sna->render_state.gen7.surface_table+2] == binding_table[2])) {
1538		sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1539		offset = sna->render_state.gen7.surface_table;
1540	}
1541
1542	if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0])
1543		dirty = 0;
1544
1545	gen7_emit_state(sna, op, offset | dirty);
1546}
1547
1548static void
1549gen7_align_vertex(struct sna *sna, const struct sna_composite_op *op)
1550{
1551	if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) {
1552		DBG(("aligning vertex: was %d, now %d floats per vertex\n",
1553		     sna->render_state.gen7.floats_per_vertex, op->floats_per_vertex));
1554		gen4_vertex_align(sna, op);
1555		sna->render_state.gen7.floats_per_vertex = op->floats_per_vertex;
1556	}
1557}
1558
1559fastcall static void
1560gen7_render_composite_blt(struct sna *sna,
1561			  const struct sna_composite_op *op,
1562			  const struct sna_composite_rectangles *r)
1563{
1564	gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state);
1565	op->prim_emit(sna, op, r);
1566}
1567
1568fastcall static void
1569gen7_render_composite_box(struct sna *sna,
1570			  const struct sna_composite_op *op,
1571			  const BoxRec *box)
1572{
1573	struct sna_composite_rectangles r;
1574
1575	gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state);
1576
1577	DBG(("  %s: (%d, %d), (%d, %d)\n",
1578	     __FUNCTION__,
1579	     box->x1, box->y1, box->x2, box->y2));
1580
1581	r.dst.x = box->x1;
1582	r.dst.y = box->y1;
1583	r.width  = box->x2 - box->x1;
1584	r.height = box->y2 - box->y1;
1585	r.src = r.mask = r.dst;
1586
1587	op->prim_emit(sna, op, &r);
1588}
1589
1590static void
1591gen7_render_composite_boxes__blt(struct sna *sna,
1592				 const struct sna_composite_op *op,
1593				 const BoxRec *box, int nbox)
1594{
1595	DBG(("composite_boxes(%d)\n", nbox));
1596
1597	do {
1598		int nbox_this_time;
1599
1600		nbox_this_time = gen7_get_rectangles(sna, op, nbox,
1601						     gen7_emit_composite_state);
1602		nbox -= nbox_this_time;
1603
1604		do {
1605			struct sna_composite_rectangles r;
1606
1607			DBG(("  %s: (%d, %d), (%d, %d)\n",
1608			     __FUNCTION__,
1609			     box->x1, box->y1, box->x2, box->y2));
1610
1611			r.dst.x = box->x1;
1612			r.dst.y = box->y1;
1613			r.width  = box->x2 - box->x1;
1614			r.height = box->y2 - box->y1;
1615			r.src = r.mask = r.dst;
1616
1617			op->prim_emit(sna, op, &r);
1618			box++;
1619		} while (--nbox_this_time);
1620	} while (nbox);
1621}
1622
1623static void
1624gen7_render_composite_boxes(struct sna *sna,
1625			    const struct sna_composite_op *op,
1626			    const BoxRec *box, int nbox)
1627{
1628	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1629
1630	do {
1631		int nbox_this_time;
1632		float *v;
1633
1634		nbox_this_time = gen7_get_rectangles(sna, op, nbox,
1635						     gen7_emit_composite_state);
1636		assert(nbox_this_time);
1637		nbox -= nbox_this_time;
1638
1639		v = sna->render.vertices + sna->render.vertex_used;
1640		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1641
1642		op->emit_boxes(op, box, nbox_this_time, v);
1643		box += nbox_this_time;
1644	} while (nbox);
1645}
1646
1647static void
1648gen7_render_composite_boxes__thread(struct sna *sna,
1649				    const struct sna_composite_op *op,
1650				    const BoxRec *box, int nbox)
1651{
1652	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1653
1654	sna_vertex_lock(&sna->render);
1655	do {
1656		int nbox_this_time;
1657		float *v;
1658
1659		nbox_this_time = gen7_get_rectangles(sna, op, nbox,
1660						     gen7_emit_composite_state);
1661		assert(nbox_this_time);
1662		nbox -= nbox_this_time;
1663
1664		v = sna->render.vertices + sna->render.vertex_used;
1665		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1666
1667		sna_vertex_acquire__locked(&sna->render);
1668		sna_vertex_unlock(&sna->render);
1669
1670		op->emit_boxes(op, box, nbox_this_time, v);
1671		box += nbox_this_time;
1672
1673		sna_vertex_lock(&sna->render);
1674		sna_vertex_release__locked(&sna->render);
1675	} while (nbox);
1676	sna_vertex_unlock(&sna->render);
1677}
1678
1679#ifndef MAX
1680#define MAX(a,b) ((a) > (b) ? (a) : (b))
1681#endif
1682
1683static uint32_t
1684gen7_composite_create_blend_state(struct sna_static_stream *stream)
1685{
1686	char *base, *ptr;
1687	int src, dst;
1688
1689	base = sna_static_stream_map(stream,
1690				     GEN7_BLENDFACTOR_COUNT * GEN7_BLENDFACTOR_COUNT * GEN7_BLEND_STATE_PADDED_SIZE,
1691				     64);
1692
1693	ptr = base;
1694	for (src = 0; src < GEN7_BLENDFACTOR_COUNT; src++) {
1695		for (dst= 0; dst < GEN7_BLENDFACTOR_COUNT; dst++) {
1696			struct gen7_blend_state *blend =
1697				(struct gen7_blend_state *)ptr;
1698
1699			blend->blend0.dest_blend_factor = dst;
1700			blend->blend0.source_blend_factor = src;
1701			blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD;
1702			blend->blend0.blend_enable =
1703				!(dst == GEN7_BLENDFACTOR_ZERO && src == GEN7_BLENDFACTOR_ONE);
1704
1705			blend->blend1.post_blend_clamp_enable = 1;
1706			blend->blend1.pre_blend_clamp_enable = 1;
1707
1708			ptr += GEN7_BLEND_STATE_PADDED_SIZE;
1709		}
1710	}
1711
1712	return sna_static_stream_offsetof(stream, base);
1713}
1714
1715static uint32_t gen7_bind_video_source(struct sna *sna,
1716				       struct kgem_bo *bo,
1717				       uint32_t offset,
1718				       int width,
1719				       int height,
1720				       int pitch,
1721				       uint32_t format)
1722{
1723	uint32_t *ss, bind;
1724
1725	bind = sna->kgem.surface -=
1726		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1727
1728	assert(bo->tiling == I915_TILING_NONE);
1729
1730	ss = sna->kgem.batch + bind;
1731	ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
1732		 format << GEN7_SURFACE_FORMAT_SHIFT);
1733	ss[1] = kgem_add_reloc(&sna->kgem, bind + 1, bo,
1734			       I915_GEM_DOMAIN_SAMPLER << 16,
1735			       offset);
1736	ss[2] = ((width - 1)  << GEN7_SURFACE_WIDTH_SHIFT |
1737		 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
1738	ss[3] = (pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
1739	ss[4] = 0;
1740	ss[5] = 0;
1741	ss[6] = 0;
1742	ss[7] = 0;
1743	if (is_hsw(sna))
1744		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
1745
1746	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, offset=%d\n",
1747	     bind, bo->handle, ss[1],
1748	     format, width, height, pitch, offset));
1749
1750	return bind * sizeof(uint32_t);
1751}
1752
1753static void gen7_emit_video_state(struct sna *sna,
1754				  const struct sna_composite_op *op)
1755{
1756	struct sna_video_frame *frame = op->priv;
1757	uint32_t src_surf_format;
1758	uint32_t src_surf_base[6];
1759	int src_width[6];
1760	int src_height[6];
1761	int src_pitch[6];
1762	uint32_t *binding_table;
1763	uint16_t offset, dirty;
1764	int n_src, n;
1765
1766	gen7_get_batch(sna, op);
1767
1768	src_surf_base[0] = 0;
1769	src_surf_base[1] = 0;
1770	src_surf_base[2] = frame->VBufOffset;
1771	src_surf_base[3] = frame->VBufOffset;
1772	src_surf_base[4] = frame->UBufOffset;
1773	src_surf_base[5] = frame->UBufOffset;
1774
1775	if (is_planar_fourcc(frame->id)) {
1776		src_surf_format = GEN7_SURFACEFORMAT_R8_UNORM;
1777		src_width[1]  = src_width[0]  = frame->width;
1778		src_height[1] = src_height[0] = frame->height;
1779		src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
1780		src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
1781			frame->width / 2;
1782		src_height[4] = src_height[5] = src_height[2] = src_height[3] =
1783			frame->height / 2;
1784		src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
1785			frame->pitch[0];
1786		n_src = 6;
1787	} else {
1788		if (frame->id == FOURCC_UYVY)
1789			src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY;
1790		else
1791			src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL;
1792
1793		src_width[0]  = frame->width;
1794		src_height[0] = frame->height;
1795		src_pitch[0]  = frame->pitch[0];
1796		n_src = 1;
1797	}
1798
1799	binding_table = gen7_composite_get_binding_table(sna, &offset);
1800
1801	dirty = kgem_bo_is_dirty(op->dst.bo);
1802
1803	binding_table[0] =
1804		gen7_bind_bo(sna,
1805			     op->dst.bo, op->dst.width, op->dst.height,
1806			     gen7_get_dest_format(op->dst.format),
1807			     true);
1808	for (n = 0; n < n_src; n++) {
1809		binding_table[1+n] =
1810			gen7_bind_video_source(sna,
1811					       frame->bo,
1812					       src_surf_base[n],
1813					       src_width[n],
1814					       src_height[n],
1815					       src_pitch[n],
1816					       src_surf_format);
1817	}
1818
1819	gen7_emit_state(sna, op, offset | dirty);
1820}
1821
1822static bool
1823gen7_render_video(struct sna *sna,
1824		  struct sna_video *video,
1825		  struct sna_video_frame *frame,
1826		  RegionPtr dstRegion,
1827		  PixmapPtr pixmap)
1828{
1829	struct sna_composite_op tmp;
1830	struct sna_pixmap *priv = sna_pixmap(pixmap);
1831	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1832	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1833	int src_width = frame->src.x2 - frame->src.x1;
1834	int src_height = frame->src.y2 - frame->src.y1;
1835	float src_offset_x, src_offset_y;
1836	float src_scale_x, src_scale_y;
1837	int nbox, pix_xoff, pix_yoff;
1838	unsigned filter;
1839	const BoxRec *box;
1840
1841	DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
1842	     __FUNCTION__,
1843	     src_width, src_height, dst_width, dst_height,
1844	     region_num_rects(dstRegion),
1845	     REGION_EXTENTS(NULL, dstRegion)->x1,
1846	     REGION_EXTENTS(NULL, dstRegion)->y1,
1847	     REGION_EXTENTS(NULL, dstRegion)->x2,
1848	     REGION_EXTENTS(NULL, dstRegion)->y2));
1849
1850	assert(priv->gpu_bo);
1851	memset(&tmp, 0, sizeof(tmp));
1852
1853	tmp.dst.pixmap = pixmap;
1854	tmp.dst.width  = pixmap->drawable.width;
1855	tmp.dst.height = pixmap->drawable.height;
1856	tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
1857	tmp.dst.bo = priv->gpu_bo;
1858
1859	tmp.src.bo = frame->bo;
1860	tmp.mask.bo = NULL;
1861
1862	tmp.floats_per_vertex = 3;
1863	tmp.floats_per_rect = 9;
1864
1865	if (src_width == dst_width && src_height == dst_height)
1866		filter = SAMPLER_FILTER_NEAREST;
1867	else
1868		filter = SAMPLER_FILTER_BILINEAR;
1869
1870	tmp.u.gen7.flags =
1871		GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
1872					      SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
1873			       NO_BLEND,
1874			       is_planar_fourcc(frame->id) ?
1875			       GEN7_WM_KERNEL_VIDEO_PLANAR :
1876			       GEN7_WM_KERNEL_VIDEO_PACKED,
1877			       2);
1878	tmp.priv = frame;
1879
1880	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
1881	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1882		kgem_submit(&sna->kgem);
1883		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
1884			return false;
1885
1886		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1887	}
1888
1889	gen7_align_vertex(sna, &tmp);
1890	gen7_emit_video_state(sna, &tmp);
1891
1892	/* Set up the offset for translating from the given region (in screen
1893	 * coordinates) to the backing pixmap.
1894	 */
1895#ifdef COMPOSITE
1896	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
1897	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
1898#else
1899	pix_xoff = 0;
1900	pix_yoff = 0;
1901#endif
1902
1903	DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n",
1904	     __FUNCTION__,
1905	     frame->src.x1, frame->src.y1,
1906	     src_width, src_height,
1907	     dst_width, dst_height,
1908	     frame->width, frame->height));
1909
1910	src_scale_x = (float)src_width / dst_width / frame->width;
1911	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1912
1913	src_scale_y = (float)src_height / dst_height / frame->height;
1914	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1915
1916	DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n",
1917	     __FUNCTION__,
1918	     src_scale_x, src_scale_y,
1919	     src_offset_x, src_offset_y));
1920
1921	box = region_rects(dstRegion);
1922	nbox = region_num_rects(dstRegion);
1923	while (nbox--) {
1924		BoxRec r;
1925
1926		DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n",
1927		     __FUNCTION__,
1928		     box->x1, box->y1,
1929		     box->x2, box->y2,
1930		     pix_xoff, pix_yoff,
1931		     box->x1 * src_scale_x + src_offset_x,
1932		     box->y1 * src_scale_y + src_offset_y,
1933		     box->x2 * src_scale_x + src_offset_x,
1934		     box->y2 * src_scale_y + src_offset_y));
1935
1936		r.x1 = box->x1 + pix_xoff;
1937		r.x2 = box->x2 + pix_xoff;
1938		r.y1 = box->y1 + pix_yoff;
1939		r.y2 = box->y2 + pix_yoff;
1940
1941		gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state);
1942
1943		OUT_VERTEX(r.x2, r.y2);
1944		OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1945		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1946
1947		OUT_VERTEX(r.x1, r.y2);
1948		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1949		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1950
1951		OUT_VERTEX(r.x1, r.y1);
1952		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1953		OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1954
1955		if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
1956			sna_damage_add_box(&priv->gpu_damage, &r);
1957			sna_damage_subtract_box(&priv->cpu_damage, &r);
1958		}
1959		box++;
1960	}
1961
1962	gen4_vertex_flush(sna);
1963	return true;
1964}
1965
1966static int
1967gen7_composite_picture(struct sna *sna,
1968		       PicturePtr picture,
1969		       struct sna_composite_channel *channel,
1970		       int x, int y,
1971		       int w, int h,
1972		       int dst_x, int dst_y,
1973		       bool precise)
1974{
1975	PixmapPtr pixmap;
1976	uint32_t color;
1977	int16_t dx, dy;
1978
1979	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1980	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
1981
1982	channel->is_solid = false;
1983	channel->card_format = -1;
1984
1985	if (sna_picture_is_solid(picture, &color))
1986		return gen4_channel_init_solid(sna, channel, color);
1987
1988	if (picture->pDrawable == NULL) {
1989		int ret;
1990
1991		if (picture->pSourcePict->type == SourcePictTypeLinear)
1992			return gen4_channel_init_linear(sna, picture, channel,
1993							x, y,
1994							w, h,
1995							dst_x, dst_y);
1996
1997		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
1998		ret = -1;
1999		if (!precise)
2000			ret = sna_render_picture_approximate_gradient(sna, picture, channel,
2001								      x, y, w, h, dst_x, dst_y);
2002		if (ret == -1)
2003			ret = sna_render_picture_fixup(sna, picture, channel,
2004						       x, y, w, h, dst_x, dst_y);
2005		return ret;
2006	}
2007
2008	if (picture->alphaMap) {
2009		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
2010		return sna_render_picture_fixup(sna, picture, channel,
2011						x, y, w, h, dst_x, dst_y);
2012	}
2013
2014	if (!gen7_check_repeat(picture))
2015		return sna_render_picture_fixup(sna, picture, channel,
2016						x, y, w, h, dst_x, dst_y);
2017
2018	if (!gen7_check_filter(picture))
2019		return sna_render_picture_fixup(sna, picture, channel,
2020						x, y, w, h, dst_x, dst_y);
2021
2022	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
2023	channel->filter = picture->filter;
2024
2025	assert(picture->pDrawable);
2026	pixmap = get_drawable_pixmap(picture->pDrawable);
2027	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
2028
2029	x += dx + picture->pDrawable->x;
2030	y += dy + picture->pDrawable->y;
2031
2032	channel->is_affine = sna_transform_is_affine(picture->transform);
2033	if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
2034		DBG(("%s: integer translation (%d, %d), removing\n",
2035		     __FUNCTION__, dx, dy));
2036		x += dx;
2037		y += dy;
2038		channel->transform = NULL;
2039		channel->filter = PictFilterNearest;
2040
2041		if (channel->repeat ||
2042		    (x >= 0 &&
2043		     y >= 0 &&
2044		     x + w < pixmap->drawable.width &&
2045		     y + h < pixmap->drawable.height)) {
2046			struct sna_pixmap *priv = sna_pixmap(pixmap);
2047			if (priv && priv->clear) {
2048				DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
2049				return gen4_channel_init_solid(sna, channel, priv->clear_color);
2050			}
2051		}
2052	} else
2053		channel->transform = picture->transform;
2054
2055	channel->pict_format = picture->format;
2056	channel->card_format = gen7_get_card_format(picture->format);
2057	if (channel->card_format == (unsigned)-1)
2058		return sna_render_picture_convert(sna, picture, channel, pixmap,
2059						  x, y, w, h, dst_x, dst_y,
2060						  false);
2061
2062	if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
2063		DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__,
2064		     pixmap->drawable.width, pixmap->drawable.height));
2065		return sna_render_picture_extract(sna, picture, channel,
2066						  x, y, w, h, dst_x, dst_y);
2067	}
2068
2069	DBG(("%s: pixmap, repeat=%d, filter=%d, transform?=%d [affine? %d], format=%08x\n",
2070	     __FUNCTION__,
2071	     channel->repeat, channel->filter,
2072	     channel->transform != NULL, channel->is_affine,
2073	     channel->pict_format));
2074	if (channel->transform) {
2075		DBG(("%s: transform=[%f %f %f, %f %f %f, %f %f %f]\n",
2076		     __FUNCTION__,
2077		     channel->transform->matrix[0][0] / 65536.,
2078		     channel->transform->matrix[0][1] / 65536.,
2079		     channel->transform->matrix[0][2] / 65536.,
2080		     channel->transform->matrix[1][0] / 65536.,
2081		     channel->transform->matrix[1][1] / 65536.,
2082		     channel->transform->matrix[1][2] / 65536.,
2083		     channel->transform->matrix[2][0] / 65536.,
2084		     channel->transform->matrix[2][1] / 65536.,
2085		     channel->transform->matrix[2][2] / 65536.));
2086	}
2087
2088	return sna_render_pixmap_bo(sna, channel, pixmap,
2089				    x, y, w, h, dst_x, dst_y);
2090}
2091
2092inline static void gen7_composite_channel_convert(struct sna_composite_channel *channel)
2093{
2094	channel->repeat = gen7_repeat(channel->repeat);
2095	channel->filter = gen7_filter(channel->filter);
2096	if (channel->card_format == (unsigned)-1)
2097		channel->card_format = gen7_get_card_format(channel->pict_format);
2098	assert(channel->card_format != (unsigned)-1);
2099}
2100
2101static void gen7_render_composite_done(struct sna *sna,
2102				       const struct sna_composite_op *op)
2103{
2104	if (sna->render.vertex_offset) {
2105		gen4_vertex_flush(sna);
2106		gen7_magic_ca_pass(sna, op);
2107	}
2108
2109	if (op->mask.bo)
2110		kgem_bo_destroy(&sna->kgem, op->mask.bo);
2111	if (op->src.bo)
2112		kgem_bo_destroy(&sna->kgem, op->src.bo);
2113
2114	sna_render_composite_redirect_done(sna, op);
2115}
2116
2117inline static bool
2118gen7_composite_set_target(struct sna *sna,
2119			  struct sna_composite_op *op,
2120			  PicturePtr dst,
2121			  int x, int y, int w, int h,
2122			  bool partial)
2123{
2124	BoxRec box;
2125	unsigned int hint;
2126
2127	DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial));
2128
2129	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
2130	op->dst.format = dst->format;
2131	op->dst.width  = op->dst.pixmap->drawable.width;
2132	op->dst.height = op->dst.pixmap->drawable.height;
2133
2134	if (w | h) {
2135		assert(w && h);
2136		box.x1 = x;
2137		box.y1 = y;
2138		box.x2 = x + w;
2139		box.y2 = y + h;
2140	} else
2141		sna_render_picture_extents(dst, &box);
2142
2143	hint = PREFER_GPU | FORCE_GPU | RENDER_GPU;
2144	if (!partial) {
2145		hint |= IGNORE_DAMAGE;
2146		if (w == op->dst.width && h == op->dst.height)
2147			hint |= REPLACES;
2148	}
2149
2150	op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
2151	if (op->dst.bo == NULL)
2152		return false;
2153
2154	if (hint & REPLACES) {
2155		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
2156		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
2157	}
2158
2159	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
2160			    &op->dst.x, &op->dst.y);
2161
2162	DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
2163	     __FUNCTION__,
2164	     op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
2165	     op->dst.width, op->dst.height,
2166	     op->dst.bo->pitch,
2167	     op->dst.x, op->dst.y,
2168	     op->damage ? *op->damage : (void *)-1));
2169
2170	assert(op->dst.bo->proxy == NULL);
2171
2172	if (too_large(op->dst.width, op->dst.height) &&
2173	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
2174		return false;
2175
2176	return true;
2177}
2178
2179static bool
2180try_blt(struct sna *sna,
2181	PicturePtr dst, PicturePtr src,
2182	int width, int height)
2183{
2184	struct kgem_bo *bo;
2185
2186	if (sna->kgem.mode == KGEM_BLT) {
2187		DBG(("%s: already performing BLT\n", __FUNCTION__));
2188		return true;
2189	}
2190
2191	if (too_large(width, height)) {
2192		DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
2193		     __FUNCTION__, width, height));
2194		return true;
2195	}
2196
2197	bo = __sna_drawable_peek_bo(dst->pDrawable);
2198	if (bo == NULL)
2199		return true;
2200	if (bo->rq)
2201		return RQ_IS_BLT(bo->rq);
2202
2203	if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0))
2204		return true;
2205
2206	if (src->pDrawable) {
2207		bo = __sna_drawable_peek_bo(src->pDrawable);
2208		if (bo == NULL)
2209			return true;
2210
2211		if (prefer_blt_bo(sna, bo))
2212			return true;
2213	}
2214
2215	if (sna->kgem.ring == KGEM_BLT) {
2216		DBG(("%s: already performing BLT\n", __FUNCTION__));
2217		return true;
2218	}
2219
2220	return false;
2221}
2222
2223static bool
2224check_gradient(PicturePtr picture, bool precise)
2225{
2226	if (picture->pDrawable)
2227		return false;
2228
2229	switch (picture->pSourcePict->type) {
2230	case SourcePictTypeSolidFill:
2231	case SourcePictTypeLinear:
2232		return false;
2233	default:
2234		return precise;
2235	}
2236}
2237
2238static bool
2239has_alphamap(PicturePtr p)
2240{
2241	return p->alphaMap != NULL;
2242}
2243
2244static bool
2245need_upload(PicturePtr p)
2246{
2247	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
2248}
2249
2250static bool
2251source_is_busy(PixmapPtr pixmap)
2252{
2253	struct sna_pixmap *priv = sna_pixmap(pixmap);
2254	if (priv == NULL || priv->clear)
2255		return false;
2256
2257	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
2258		return true;
2259
2260	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
2261		return true;
2262
2263	return priv->gpu_damage && !priv->cpu_damage;
2264}
2265
2266static bool
2267source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise)
2268{
2269	if (sna_picture_is_solid(p, NULL))
2270		return false;
2271
2272	if (p->pSourcePict)
2273		return check_gradient(p, precise);
2274
2275	if (!gen7_check_repeat(p) || !gen7_check_format(p->format))
2276		return true;
2277
2278	if (pixmap && source_is_busy(pixmap))
2279		return false;
2280
2281	return has_alphamap(p) || !gen7_check_filter(p) || need_upload(p);
2282}
2283
2284static bool
2285gen7_composite_fallback(struct sna *sna,
2286			PicturePtr src,
2287			PicturePtr mask,
2288			PicturePtr dst)
2289{
2290	PixmapPtr src_pixmap;
2291	PixmapPtr mask_pixmap;
2292	PixmapPtr dst_pixmap;
2293	bool src_fallback, mask_fallback;
2294
2295	if (!gen7_check_dst_format(dst->format)) {
2296		DBG(("%s: unknown destination format: %d\n",
2297		     __FUNCTION__, dst->format));
2298		return true;
2299	}
2300
2301	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
2302
2303	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
2304	src_fallback = source_fallback(src, src_pixmap,
2305				       dst->polyMode == PolyModePrecise);
2306
2307	if (mask) {
2308		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
2309		mask_fallback = source_fallback(mask, mask_pixmap,
2310						dst->polyMode == PolyModePrecise);
2311	} else {
2312		mask_pixmap = NULL;
2313		mask_fallback = false;
2314	}
2315
2316	/* If we are using the destination as a source and need to
2317	 * readback in order to upload the source, do it all
2318	 * on the cpu.
2319	 */
2320	if (src_pixmap == dst_pixmap && src_fallback) {
2321		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
2322		return true;
2323	}
2324	if (mask_pixmap == dst_pixmap && mask_fallback) {
2325		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
2326		return true;
2327	}
2328
2329	/* If anything is on the GPU, push everything out to the GPU */
2330	if (dst_use_gpu(dst_pixmap)) {
2331		DBG(("%s: dst is already on the GPU, try to use GPU\n",
2332		     __FUNCTION__));
2333		return false;
2334	}
2335
2336	if (src_pixmap && !src_fallback) {
2337		DBG(("%s: src is already on the GPU, try to use GPU\n",
2338		     __FUNCTION__));
2339		return false;
2340	}
2341	if (mask_pixmap && !mask_fallback) {
2342		DBG(("%s: mask is already on the GPU, try to use GPU\n",
2343		     __FUNCTION__));
2344		return false;
2345	}
2346
2347	/* However if the dst is not on the GPU and we need to
2348	 * render one of the sources using the CPU, we may
2349	 * as well do the entire operation in place onthe CPU.
2350	 */
2351	if (src_fallback) {
2352		DBG(("%s: dst is on the CPU and src will fallback\n",
2353		     __FUNCTION__));
2354		return true;
2355	}
2356
2357	if (mask && mask_fallback) {
2358		DBG(("%s: dst is on the CPU and mask will fallback\n",
2359		     __FUNCTION__));
2360		return true;
2361	}
2362
2363	if (too_large(dst_pixmap->drawable.width,
2364		      dst_pixmap->drawable.height) &&
2365	    dst_is_cpu(dst_pixmap)) {
2366		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
2367		return true;
2368	}
2369
2370	DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
2371	     __FUNCTION__));
2372	return dst_use_cpu(dst_pixmap);
2373}
2374
2375static int
2376reuse_source(struct sna *sna,
2377	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
2378	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
2379{
2380	uint32_t color;
2381
2382	if (src_x != msk_x || src_y != msk_y)
2383		return false;
2384
2385	if (src == mask) {
2386		DBG(("%s: mask is source\n", __FUNCTION__));
2387		*mc = *sc;
2388		mc->bo = kgem_bo_reference(mc->bo);
2389		return true;
2390	}
2391
2392	if (sna_picture_is_solid(mask, &color))
2393		return gen4_channel_init_solid(sna, mc, color);
2394
2395	if (sc->is_solid)
2396		return false;
2397
2398	if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
2399		return false;
2400
2401	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
2402
2403	if (!sna_transform_equal(src->transform, mask->transform))
2404		return false;
2405
2406	if (!sna_picture_alphamap_equal(src, mask))
2407		return false;
2408
2409	if (!gen7_check_repeat(mask))
2410		return false;
2411
2412	if (!gen7_check_filter(mask))
2413		return false;
2414
2415	if (!gen7_check_format(mask->format))
2416		return false;
2417
2418	DBG(("%s: reusing source channel for mask with a twist\n",
2419	     __FUNCTION__));
2420
2421	*mc = *sc;
2422	mc->repeat = gen7_repeat(mask->repeat ? mask->repeatType : RepeatNone);
2423	mc->filter = gen7_filter(mask->filter);
2424	mc->pict_format = mask->format;
2425	mc->card_format = gen7_get_card_format(mask->format);
2426	mc->bo = kgem_bo_reference(mc->bo);
2427	return true;
2428}
2429
2430static bool
2431gen7_render_composite(struct sna *sna,
2432		      uint8_t op,
2433		      PicturePtr src,
2434		      PicturePtr mask,
2435		      PicturePtr dst,
2436		      int16_t src_x, int16_t src_y,
2437		      int16_t msk_x, int16_t msk_y,
2438		      int16_t dst_x, int16_t dst_y,
2439		      int16_t width, int16_t height,
2440		      unsigned flags,
2441		      struct sna_composite_op *tmp)
2442{
2443	if (op >= ARRAY_SIZE(gen7_blend_op))
2444		return false;
2445
2446	DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__,
2447	     width, height, sna->kgem.mode, sna->kgem.ring));
2448
2449	if (mask == NULL &&
2450	    try_blt(sna, dst, src, width, height) &&
2451	    sna_blt_composite(sna, op,
2452			      src, dst,
2453			      src_x, src_y,
2454			      dst_x, dst_y,
2455			      width, height,
2456			      flags, tmp))
2457		return true;
2458
2459	if (gen7_composite_fallback(sna, src, mask, dst))
2460		goto fallback;
2461
2462	if (need_tiling(sna, width, height))
2463		return sna_tiling_composite(op, src, mask, dst,
2464					    src_x, src_y,
2465					    msk_x, msk_y,
2466					    dst_x, dst_y,
2467					    width, height,
2468					    tmp);
2469
2470	if (op == PictOpClear && src == sna->clear)
2471		op = PictOpSrc;
2472	tmp->op = op;
2473	if (!gen7_composite_set_target(sna, tmp, dst,
2474				       dst_x, dst_y, width, height,
2475				       flags & COMPOSITE_PARTIAL || op > PictOpSrc))
2476		goto fallback;
2477
2478	switch (gen7_composite_picture(sna, src, &tmp->src,
2479				       src_x, src_y,
2480				       width, height,
2481				       dst_x, dst_y,
2482				       dst->polyMode == PolyModePrecise)) {
2483	case -1:
2484		goto cleanup_dst;
2485	case 0:
2486		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
2487			goto cleanup_dst;
2488		/* fall through to fixup */
2489	case 1:
2490		/* Did we just switch rings to prepare the source? */
2491		if (mask == NULL &&
2492		    prefer_blt_composite(sna, tmp) &&
2493		    sna_blt_composite__convert(sna,
2494					       dst_x, dst_y, width, height,
2495					       tmp))
2496			return true;
2497
2498		gen7_composite_channel_convert(&tmp->src);
2499		break;
2500	}
2501
2502	tmp->is_affine = tmp->src.is_affine;
2503	tmp->has_component_alpha = false;
2504	tmp->need_magic_ca_pass = false;
2505
2506	tmp->mask.bo = NULL;
2507	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
2508	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
2509
2510	if (mask) {
2511		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
2512			tmp->has_component_alpha = true;
2513
2514			/* Check if it's component alpha that relies on a source alpha and on
2515			 * the source value.  We can only get one of those into the single
2516			 * source value that we get to blend with.
2517			 */
2518			if (gen7_blend_op[op].src_alpha &&
2519			    (gen7_blend_op[op].src_blend != GEN7_BLENDFACTOR_ZERO)) {
2520				if (op != PictOpOver)
2521					goto cleanup_src;
2522
2523				tmp->need_magic_ca_pass = true;
2524				tmp->op = PictOpOutReverse;
2525			}
2526		}
2527
2528		if (!reuse_source(sna,
2529				  src, &tmp->src, src_x, src_y,
2530				  mask, &tmp->mask, msk_x, msk_y)) {
2531			switch (gen7_composite_picture(sna, mask, &tmp->mask,
2532						       msk_x, msk_y,
2533						       width, height,
2534						       dst_x, dst_y,
2535						       dst->polyMode == PolyModePrecise)) {
2536			case -1:
2537				goto cleanup_src;
2538			case 0:
2539				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
2540					goto cleanup_src;
2541				/* fall through to fixup */
2542			case 1:
2543				gen7_composite_channel_convert(&tmp->mask);
2544				break;
2545			}
2546		}
2547
2548		tmp->is_affine &= tmp->mask.is_affine;
2549	}
2550
2551	tmp->u.gen7.flags =
2552		GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
2553					      tmp->src.repeat,
2554					      tmp->mask.filter,
2555					      tmp->mask.repeat),
2556			       gen7_get_blend(tmp->op,
2557					      tmp->has_component_alpha,
2558					      tmp->dst.format),
2559			       gen7_choose_composite_kernel(tmp->op,
2560							    tmp->mask.bo != NULL,
2561							    tmp->has_component_alpha,
2562							    tmp->is_affine),
2563			       gen4_choose_composite_emitter(sna, tmp));
2564
2565	tmp->blt   = gen7_render_composite_blt;
2566	tmp->box   = gen7_render_composite_box;
2567	tmp->boxes = gen7_render_composite_boxes__blt;
2568	if (tmp->emit_boxes){
2569		tmp->boxes = gen7_render_composite_boxes;
2570		tmp->thread_boxes = gen7_render_composite_boxes__thread;
2571	}
2572	tmp->done  = gen7_render_composite_done;
2573
2574	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
2575	if (!kgem_check_bo(&sna->kgem,
2576			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2577			   NULL)) {
2578		kgem_submit(&sna->kgem);
2579		if (!kgem_check_bo(&sna->kgem,
2580				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2581				   NULL))
2582			goto cleanup_mask;
2583		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2584	}
2585
2586	gen7_align_vertex(sna, tmp);
2587	gen7_emit_composite_state(sna, tmp);
2588	return true;
2589
2590cleanup_mask:
2591	if (tmp->mask.bo) {
2592		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2593		tmp->mask.bo = NULL;
2594	}
2595cleanup_src:
2596	if (tmp->src.bo) {
2597		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2598		tmp->src.bo = NULL;
2599	}
2600cleanup_dst:
2601	if (tmp->redirect.real_bo) {
2602		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2603		tmp->redirect.real_bo = NULL;
2604	}
2605fallback:
2606	return (mask == NULL &&
2607		sna_blt_composite(sna, op,
2608				  src, dst,
2609				  src_x, src_y,
2610				  dst_x, dst_y,
2611				  width, height,
2612				  flags | COMPOSITE_FALLBACK, tmp));
2613}
2614
2615#if !NO_COMPOSITE_SPANS
2616fastcall static void
2617gen7_render_composite_spans_box(struct sna *sna,
2618				const struct sna_composite_spans_op *op,
2619				const BoxRec *box, float opacity)
2620{
2621	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2622	     __FUNCTION__,
2623	     op->base.src.offset[0], op->base.src.offset[1],
2624	     opacity,
2625	     op->base.dst.x, op->base.dst.y,
2626	     box->x1, box->y1,
2627	     box->x2 - box->x1,
2628	     box->y2 - box->y1));
2629
2630	gen7_get_rectangles(sna, &op->base, 1, gen7_emit_composite_state);
2631	op->prim_emit(sna, op, box, opacity);
2632}
2633
2634static void
2635gen7_render_composite_spans_boxes(struct sna *sna,
2636				  const struct sna_composite_spans_op *op,
2637				  const BoxRec *box, int nbox,
2638				  float opacity)
2639{
2640	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2641	     __FUNCTION__, nbox,
2642	     op->base.src.offset[0], op->base.src.offset[1],
2643	     opacity,
2644	     op->base.dst.x, op->base.dst.y));
2645
2646	do {
2647		int nbox_this_time;
2648
2649		nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox,
2650						     gen7_emit_composite_state);
2651		nbox -= nbox_this_time;
2652
2653		do {
2654			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2655			     box->x1, box->y1,
2656			     box->x2 - box->x1,
2657			     box->y2 - box->y1));
2658
2659			op->prim_emit(sna, op, box++, opacity);
2660		} while (--nbox_this_time);
2661	} while (nbox);
2662}
2663
2664fastcall static void
2665gen7_render_composite_spans_boxes__thread(struct sna *sna,
2666					  const struct sna_composite_spans_op *op,
2667					  const struct sna_opacity_box *box,
2668					  int nbox)
2669{
2670	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
2671	     __FUNCTION__, nbox,
2672	     op->base.src.offset[0], op->base.src.offset[1],
2673	     op->base.dst.x, op->base.dst.y));
2674
2675	sna_vertex_lock(&sna->render);
2676	do {
2677		int nbox_this_time;
2678		float *v;
2679
2680		nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox,
2681						     gen7_emit_composite_state);
2682		assert(nbox_this_time);
2683		nbox -= nbox_this_time;
2684
2685		v = sna->render.vertices + sna->render.vertex_used;
2686		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
2687
2688		sna_vertex_acquire__locked(&sna->render);
2689		sna_vertex_unlock(&sna->render);
2690
2691		op->emit_boxes(op, box, nbox_this_time, v);
2692		box += nbox_this_time;
2693
2694		sna_vertex_lock(&sna->render);
2695		sna_vertex_release__locked(&sna->render);
2696	} while (nbox);
2697	sna_vertex_unlock(&sna->render);
2698}
2699
2700fastcall static void
2701gen7_render_composite_spans_done(struct sna *sna,
2702				 const struct sna_composite_spans_op *op)
2703{
2704	if (sna->render.vertex_offset)
2705		gen4_vertex_flush(sna);
2706
2707	DBG(("%s()\n", __FUNCTION__));
2708
2709	if (op->base.src.bo)
2710		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2711
2712	sna_render_composite_redirect_done(sna, &op->base);
2713}
2714
2715static bool
2716gen7_check_composite_spans(struct sna *sna,
2717			   uint8_t op, PicturePtr src, PicturePtr dst,
2718			   int16_t width, int16_t height, unsigned flags)
2719{
2720	if (op >= ARRAY_SIZE(gen7_blend_op))
2721		return false;
2722
2723	if (gen7_composite_fallback(sna, src, NULL, dst))
2724		return false;
2725
2726	if (need_tiling(sna, width, height) &&
2727	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2728		DBG(("%s: fallback, tiled operation not on GPU\n",
2729		     __FUNCTION__));
2730		return false;
2731	}
2732
2733	return true;
2734}
2735
2736static bool
2737gen7_render_composite_spans(struct sna *sna,
2738			    uint8_t op,
2739			    PicturePtr src,
2740			    PicturePtr dst,
2741			    int16_t src_x,  int16_t src_y,
2742			    int16_t dst_x,  int16_t dst_y,
2743			    int16_t width,  int16_t height,
2744			    unsigned flags,
2745			    struct sna_composite_spans_op *tmp)
2746{
2747	DBG(("%s: %dx%d with flags=%x, current mode=%d/%d\n", __FUNCTION__,
2748	     width, height, flags, sna->kgem.mode, sna->kgem.ring));
2749
2750	assert(gen7_check_composite_spans(sna, op, src, dst, width, height, flags));
2751
2752	if (need_tiling(sna, width, height)) {
2753		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2754		     __FUNCTION__, width, height));
2755		return sna_tiling_composite_spans(op, src, dst,
2756						  src_x, src_y, dst_x, dst_y,
2757						  width, height, flags, tmp);
2758	}
2759
2760	tmp->base.op = op;
2761	if (!gen7_composite_set_target(sna, &tmp->base, dst,
2762				       dst_x, dst_y, width, height, true))
2763		return false;
2764
2765	switch (gen7_composite_picture(sna, src, &tmp->base.src,
2766				       src_x, src_y,
2767				       width, height,
2768				       dst_x, dst_y,
2769				       dst->polyMode == PolyModePrecise)) {
2770	case -1:
2771		goto cleanup_dst;
2772	case 0:
2773		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
2774			goto cleanup_dst;
2775		/* fall through to fixup */
2776	case 1:
2777		gen7_composite_channel_convert(&tmp->base.src);
2778		break;
2779	}
2780	tmp->base.mask.bo = NULL;
2781
2782	tmp->base.is_affine = tmp->base.src.is_affine;
2783	tmp->base.need_magic_ca_pass = false;
2784
2785	tmp->base.u.gen7.flags =
2786		GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter,
2787					      tmp->base.src.repeat,
2788					      SAMPLER_FILTER_NEAREST,
2789					      SAMPLER_EXTEND_PAD),
2790			       gen7_get_blend(tmp->base.op, false, tmp->base.dst.format),
2791			       GEN7_WM_KERNEL_OPACITY | !tmp->base.is_affine,
2792			       gen4_choose_spans_emitter(sna, tmp));
2793
2794	tmp->box   = gen7_render_composite_spans_box;
2795	tmp->boxes = gen7_render_composite_spans_boxes;
2796	if (tmp->emit_boxes)
2797		tmp->thread_boxes = gen7_render_composite_spans_boxes__thread;
2798	tmp->done  = gen7_render_composite_spans_done;
2799
2800	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
2801	if (!kgem_check_bo(&sna->kgem,
2802			   tmp->base.dst.bo, tmp->base.src.bo,
2803			   NULL)) {
2804		kgem_submit(&sna->kgem);
2805		if (!kgem_check_bo(&sna->kgem,
2806				   tmp->base.dst.bo, tmp->base.src.bo,
2807				   NULL))
2808			goto cleanup_src;
2809		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2810	}
2811
2812	gen7_align_vertex(sna, &tmp->base);
2813	gen7_emit_composite_state(sna, &tmp->base);
2814	return true;
2815
2816cleanup_src:
2817	if (tmp->base.src.bo)
2818		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2819cleanup_dst:
2820	if (tmp->base.redirect.real_bo)
2821		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2822	return false;
2823}
2824#endif
2825
2826static void
2827gen7_emit_copy_state(struct sna *sna,
2828		     const struct sna_composite_op *op)
2829{
2830	uint32_t *binding_table;
2831	uint16_t offset, dirty;
2832
2833	gen7_get_batch(sna, op);
2834
2835	binding_table = gen7_composite_get_binding_table(sna, &offset);
2836
2837	dirty = kgem_bo_is_dirty(op->dst.bo);
2838
2839	binding_table[0] =
2840		gen7_bind_bo(sna,
2841			     op->dst.bo, op->dst.width, op->dst.height,
2842			     gen7_get_dest_format(op->dst.format),
2843			     true);
2844	binding_table[1] =
2845		gen7_bind_bo(sna,
2846			     op->src.bo, op->src.width, op->src.height,
2847			     op->src.card_format,
2848			     false);
2849
2850	if (sna->kgem.surface == offset &&
2851	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table) {
2852		sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t);
2853		offset = sna->render_state.gen7.surface_table;
2854	}
2855
2856	if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0])
2857		dirty = 0;
2858
2859	assert(!GEN7_READS_DST(op->u.gen7.flags));
2860	gen7_emit_state(sna, op, offset | dirty);
2861}
2862
2863static inline bool
2864prefer_blt_copy(struct sna *sna,
2865		struct kgem_bo *src_bo,
2866		struct kgem_bo *dst_bo,
2867		unsigned flags)
2868{
2869	if (sna->kgem.mode == KGEM_BLT)
2870		return true;
2871
2872	assert((flags & COPY_SYNC) == 0);
2873
2874	if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags))
2875		return true;
2876
2877	if (untiled_tlb_miss(src_bo) ||
2878	    untiled_tlb_miss(dst_bo))
2879		return true;
2880
2881	if (force_blt_ring(sna))
2882		return true;
2883
2884	if (kgem_bo_is_render(dst_bo) ||
2885	    kgem_bo_is_render(src_bo))
2886		return false;
2887
2888	if (prefer_render_ring(sna, dst_bo))
2889		return false;
2890
2891	if (!prefer_blt_ring(sna, dst_bo, flags))
2892		return false;
2893
2894	return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo);
2895}
2896
2897static bool
2898gen7_render_copy_boxes(struct sna *sna, uint8_t alu,
2899		       const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2900		       const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2901		       const BoxRec *box, int n, unsigned flags)
2902{
2903	struct sna_composite_op tmp;
2904	BoxRec extents;
2905
2906	DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n",
2907	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags,
2908	     src_bo == dst_bo,
2909	     overlaps(sna,
2910		      src_bo, src_dx, src_dy,
2911		      dst_bo, dst_dx, dst_dy,
2912		      box, n, flags, &extents)));
2913
2914	if (prefer_blt_copy(sna, src_bo, dst_bo, flags) &&
2915	    sna_blt_compare_depth(src, dst) &&
2916	    sna_blt_copy_boxes(sna, alu,
2917			       src_bo, src_dx, src_dy,
2918			       dst_bo, dst_dx, dst_dy,
2919			       dst->bitsPerPixel,
2920			       box, n))
2921		return true;
2922
2923	if (!(alu == GXcopy || alu == GXclear)) {
2924fallback_blt:
2925		DBG(("%s: fallback blt\n", __FUNCTION__));
2926		if (!sna_blt_compare_depth(src, dst))
2927			return false;
2928
2929		return sna_blt_copy_boxes_fallback(sna, alu,
2930						   src, src_bo, src_dx, src_dy,
2931						   dst, dst_bo, dst_dx, dst_dy,
2932						   box, n);
2933	}
2934
2935	if (overlaps(sna,
2936		     src_bo, src_dx, src_dy,
2937		     dst_bo, dst_dx, dst_dy,
2938		     box, n, flags,
2939		     &extents)) {
2940		bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1);
2941
2942		if ((big || can_switch_to_blt(sna, dst_bo, flags)) &&
2943		    sna_blt_copy_boxes(sna, alu,
2944				       src_bo, src_dx, src_dy,
2945				       dst_bo, dst_dx, dst_dy,
2946				       dst->bitsPerPixel,
2947				       box, n))
2948			return true;
2949
2950		if (big)
2951			goto fallback_blt;
2952
2953		assert(src_bo == dst_bo);
2954		assert(src->depth == dst->depth);
2955		assert(src->width == dst->width);
2956		assert(src->height == dst->height);
2957		return sna_render_copy_boxes__overlap(sna, alu,
2958						      src, src_bo,
2959						      src_dx, src_dy,
2960						      dst_dx, dst_dy,
2961						      box, n, &extents);
2962	}
2963
2964	if (dst->depth == src->depth) {
2965		tmp.dst.format = sna_render_format_for_depth(dst->depth);
2966		tmp.src.pict_format = tmp.dst.format;
2967	} else {
2968		tmp.dst.format = sna_format_for_depth(dst->depth);
2969		tmp.src.pict_format = sna_format_for_depth(src->depth);
2970	}
2971	if (!gen7_check_format(tmp.src.pict_format))
2972		goto fallback_blt;
2973
2974	tmp.dst.pixmap = (PixmapPtr)dst;
2975	tmp.dst.width  = dst->width;
2976	tmp.dst.height = dst->height;
2977	tmp.dst.bo = dst_bo;
2978	tmp.dst.x = tmp.dst.y = 0;
2979	tmp.damage = NULL;
2980
2981	sna_render_composite_redirect_init(&tmp);
2982	if (too_large(tmp.dst.width, tmp.dst.height)) {
2983		int i;
2984
2985		extents = box[0];
2986		for (i = 1; i < n; i++) {
2987			if (box[i].x1 < extents.x1)
2988				extents.x1 = box[i].x1;
2989			if (box[i].y1 < extents.y1)
2990				extents.y1 = box[i].y1;
2991
2992			if (box[i].x2 > extents.x2)
2993				extents.x2 = box[i].x2;
2994			if (box[i].y2 > extents.y2)
2995				extents.y2 = box[i].y2;
2996		}
2997
2998		if (!sna_render_composite_redirect(sna, &tmp,
2999						   extents.x1 + dst_dx,
3000						   extents.y1 + dst_dy,
3001						   extents.x2 - extents.x1,
3002						   extents.y2 - extents.y1,
3003						   n > 1))
3004			goto fallback_tiled;
3005	}
3006
3007	tmp.src.card_format = gen7_get_card_format(tmp.src.pict_format);
3008	if (too_large(src->width, src->height)) {
3009		int i;
3010
3011		extents = box[0];
3012		for (i = 1; i < n; i++) {
3013			if (box[i].x1 < extents.x1)
3014				extents.x1 = box[i].x1;
3015			if (box[i].y1 < extents.y1)
3016				extents.y1 = box[i].y1;
3017
3018			if (box[i].x2 > extents.x2)
3019				extents.x2 = box[i].x2;
3020			if (box[i].y2 > extents.y2)
3021				extents.y2 = box[i].y2;
3022		}
3023
3024		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
3025					       extents.x1 + src_dx,
3026					       extents.y1 + src_dy,
3027					       extents.x2 - extents.x1,
3028					       extents.y2 - extents.y1))
3029			goto fallback_tiled_dst;
3030	} else {
3031		tmp.src.bo = src_bo;
3032		tmp.src.width  = src->width;
3033		tmp.src.height = src->height;
3034		tmp.src.offset[0] = tmp.src.offset[1] = 0;
3035	}
3036
3037	tmp.mask.bo = NULL;
3038
3039	tmp.floats_per_vertex = 2;
3040	tmp.floats_per_rect = 6;
3041	tmp.need_magic_ca_pass = 0;
3042
3043	tmp.u.gen7.flags = COPY_FLAGS(alu);
3044
3045	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
3046	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
3047		kgem_submit(&sna->kgem);
3048		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
3049			if (tmp.src.bo != src_bo)
3050				kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3051			if (tmp.redirect.real_bo)
3052				kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
3053			goto fallback_blt;
3054		}
3055		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3056	}
3057
3058	src_dx += tmp.src.offset[0];
3059	src_dy += tmp.src.offset[1];
3060
3061	dst_dx += tmp.dst.x;
3062	dst_dy += tmp.dst.y;
3063
3064	tmp.dst.x = tmp.dst.y = 0;
3065
3066	gen7_align_vertex(sna, &tmp);
3067	gen7_emit_copy_state(sna, &tmp);
3068
3069	do {
3070		int16_t *v;
3071		int n_this_time;
3072
3073		n_this_time = gen7_get_rectangles(sna, &tmp, n,
3074						  gen7_emit_copy_state);
3075		n -= n_this_time;
3076
3077		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
3078		sna->render.vertex_used += 6 * n_this_time;
3079		assert(sna->render.vertex_used <= sna->render.vertex_size);
3080		do {
3081
3082			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
3083			     box->x1 + src_dx, box->y1 + src_dy,
3084			     box->x1 + dst_dx, box->y1 + dst_dy,
3085			     box->x2 - box->x1, box->y2 - box->y1));
3086			v[0] = box->x2 + dst_dx;
3087			v[2] = box->x2 + src_dx;
3088			v[1]  = v[5] = box->y2 + dst_dy;
3089			v[3]  = v[7] = box->y2 + src_dy;
3090			v[8]  = v[4] = box->x1 + dst_dx;
3091			v[10] = v[6] = box->x1 + src_dx;
3092			v[9]  = box->y1 + dst_dy;
3093			v[11] = box->y1 + src_dy;
3094			v += 12; box++;
3095		} while (--n_this_time);
3096	} while (n);
3097
3098	gen4_vertex_flush(sna);
3099	sna_render_composite_redirect_done(sna, &tmp);
3100	if (tmp.src.bo != src_bo)
3101		kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3102	return true;
3103
3104fallback_tiled_dst:
3105	if (tmp.redirect.real_bo)
3106		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
3107fallback_tiled:
3108	DBG(("%s: fallback tiled\n", __FUNCTION__));
3109	if (sna_blt_compare_depth(src, dst) &&
3110	    sna_blt_copy_boxes(sna, alu,
3111			       src_bo, src_dx, src_dy,
3112			       dst_bo, dst_dx, dst_dy,
3113			       dst->bitsPerPixel,
3114			       box, n))
3115		return true;
3116
3117	return sna_tiling_copy_boxes(sna, alu,
3118				     src, src_bo, src_dx, src_dy,
3119				     dst, dst_bo, dst_dx, dst_dy,
3120				     box, n);
3121}
3122
3123static void
3124gen7_render_copy_blt(struct sna *sna,
3125		     const struct sna_copy_op *op,
3126		     int16_t sx, int16_t sy,
3127		     int16_t w,  int16_t h,
3128		     int16_t dx, int16_t dy)
3129{
3130	int16_t *v;
3131
3132	gen7_get_rectangles(sna, &op->base, 1, gen7_emit_copy_state);
3133
3134	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3135	sna->render.vertex_used += 6;
3136	assert(sna->render.vertex_used <= sna->render.vertex_size);
3137
3138	v[0]  = dx+w; v[1]  = dy+h;
3139	v[2]  = sx+w; v[3]  = sy+h;
3140	v[4]  = dx;   v[5]  = dy+h;
3141	v[6]  = sx;   v[7]  = sy+h;
3142	v[8]  = dx;   v[9]  = dy;
3143	v[10] = sx;   v[11] = sy;
3144}
3145
3146static void
3147gen7_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
3148{
3149	if (sna->render.vertex_offset)
3150		gen4_vertex_flush(sna);
3151}
3152
3153static bool
3154gen7_render_copy(struct sna *sna, uint8_t alu,
3155		 PixmapPtr src, struct kgem_bo *src_bo,
3156		 PixmapPtr dst, struct kgem_bo *dst_bo,
3157		 struct sna_copy_op *op)
3158{
3159	DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n",
3160	     __FUNCTION__, alu,
3161	     src->drawable.width, src->drawable.height,
3162	     dst->drawable.width, dst->drawable.height));
3163
3164	if (prefer_blt_copy(sna, src_bo, dst_bo, 0) &&
3165	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
3166	    sna_blt_copy(sna, alu,
3167			 src_bo, dst_bo,
3168			 dst->drawable.bitsPerPixel,
3169			 op))
3170		return true;
3171
3172	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
3173	    too_large(src->drawable.width, src->drawable.height) ||
3174	    too_large(dst->drawable.width, dst->drawable.height)) {
3175fallback:
3176		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
3177			return false;
3178
3179		return sna_blt_copy(sna, alu, src_bo, dst_bo,
3180				    dst->drawable.bitsPerPixel,
3181				    op);
3182	}
3183
3184	if (dst->drawable.depth == src->drawable.depth) {
3185		op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
3186		op->base.src.pict_format = op->base.dst.format;
3187	} else {
3188		op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
3189		op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
3190	}
3191	if (!gen7_check_format(op->base.src.pict_format))
3192		goto fallback;
3193
3194	op->base.dst.pixmap = dst;
3195	op->base.dst.width  = dst->drawable.width;
3196	op->base.dst.height = dst->drawable.height;
3197	op->base.dst.bo = dst_bo;
3198
3199	op->base.src.bo = src_bo;
3200	op->base.src.card_format =
3201		gen7_get_card_format(op->base.src.pict_format);
3202	op->base.src.width  = src->drawable.width;
3203	op->base.src.height = src->drawable.height;
3204
3205	op->base.mask.bo = NULL;
3206
3207	op->base.floats_per_vertex = 2;
3208	op->base.floats_per_rect = 6;
3209
3210	op->base.u.gen7.flags = COPY_FLAGS(alu);
3211
3212	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3213	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
3214		kgem_submit(&sna->kgem);
3215		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
3216			goto fallback;
3217		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3218	}
3219
3220	gen7_align_vertex(sna, &op->base);
3221	gen7_emit_copy_state(sna, &op->base);
3222
3223	op->blt  = gen7_render_copy_blt;
3224	op->done = gen7_render_copy_done;
3225	return true;
3226}
3227
3228static void
3229gen7_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
3230{
3231	uint16_t dirty;
3232	uint32_t *binding_table;
3233	uint16_t offset;
3234
3235	/* XXX Render Target Fast Clear
3236	 * Set RTFC Enable in PS and render a rectangle.
3237	 * Limited to a clearing the full MSC surface only with a
3238	 * specific kernel.
3239	 */
3240
3241	gen7_get_batch(sna, op);
3242
3243	binding_table = gen7_composite_get_binding_table(sna, &offset);
3244
3245	dirty = kgem_bo_is_dirty(op->dst.bo);
3246
3247	binding_table[0] =
3248		gen7_bind_bo(sna,
3249			     op->dst.bo, op->dst.width, op->dst.height,
3250			     gen7_get_dest_format(op->dst.format),
3251			     true);
3252	binding_table[1] =
3253		gen7_bind_bo(sna,
3254			     op->src.bo, 1, 1,
3255			     GEN7_SURFACEFORMAT_B8G8R8A8_UNORM,
3256			     false);
3257
3258	if (sna->kgem.surface == offset &&
3259	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table) {
3260		sna->kgem.surface +=
3261			sizeof(struct gen7_surface_state)/sizeof(uint32_t);
3262		offset = sna->render_state.gen7.surface_table;
3263	}
3264
3265	if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0])
3266		dirty = 0;
3267
3268	gen7_emit_state(sna, op, offset | dirty);
3269}
3270
3271static bool
3272gen7_render_fill_boxes(struct sna *sna,
3273		       CARD8 op,
3274		       PictFormat format,
3275		       const xRenderColor *color,
3276		       const DrawableRec *dst, struct kgem_bo *dst_bo,
3277		       const BoxRec *box, int n)
3278{
3279	struct sna_composite_op tmp;
3280	uint32_t pixel;
3281
3282	DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
3283	     __FUNCTION__, op,
3284	     color->red, color->green, color->blue, color->alpha, (int)format));
3285
3286	if (op >= ARRAY_SIZE(gen7_blend_op)) {
3287		DBG(("%s: fallback due to unhandled blend op: %d\n",
3288		     __FUNCTION__, op));
3289		return false;
3290	}
3291
3292	if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) ||
3293	    !gen7_check_dst_format(format)) {
3294		uint8_t alu = GXinvalid;
3295
3296		if (op <= PictOpSrc) {
3297			pixel = 0;
3298			if (op == PictOpClear)
3299				alu = GXclear;
3300			else if (sna_get_pixel_from_rgba(&pixel,
3301							 color->red,
3302							 color->green,
3303							 color->blue,
3304							 color->alpha,
3305							 format))
3306				alu = GXcopy;
3307		}
3308
3309		if (alu != GXinvalid &&
3310		    sna_blt_fill_boxes(sna, alu,
3311				       dst_bo, dst->bitsPerPixel,
3312				       pixel, box, n))
3313			return true;
3314
3315		if (!gen7_check_dst_format(format))
3316			return false;
3317	}
3318
3319	if (op == PictOpClear) {
3320		pixel = 0;
3321		op = PictOpSrc;
3322	} else if (!sna_get_pixel_from_rgba(&pixel,
3323					    color->red,
3324					    color->green,
3325					    color->blue,
3326					    color->alpha,
3327					    PICT_a8r8g8b8))
3328		return false;
3329
3330	DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
3331	     __FUNCTION__, pixel, n,
3332	     box[0].x1, box[0].y1, box[0].x2, box[0].y2));
3333
3334	tmp.dst.pixmap = (PixmapPtr)dst;
3335	tmp.dst.width  = dst->width;
3336	tmp.dst.height = dst->height;
3337	tmp.dst.format = format;
3338	tmp.dst.bo = dst_bo;
3339	tmp.dst.x = tmp.dst.y = 0;
3340	tmp.damage = NULL;
3341
3342	sna_render_composite_redirect_init(&tmp);
3343	if (too_large(dst->width, dst->height)) {
3344		BoxRec extents;
3345
3346		boxes_extents(box, n, &extents);
3347		if (!sna_render_composite_redirect(sna, &tmp,
3348						   extents.x1, extents.y1,
3349						   extents.x2 - extents.x1,
3350						   extents.y2 - extents.y1,
3351						   n > 1))
3352			return sna_tiling_fill_boxes(sna, op, format, color,
3353						     dst, dst_bo, box, n);
3354	}
3355
3356	tmp.src.bo = sna_render_get_solid(sna, pixel);
3357	tmp.mask.bo = NULL;
3358
3359	tmp.floats_per_vertex = 2;
3360	tmp.floats_per_rect = 6;
3361	tmp.need_magic_ca_pass = false;
3362
3363	tmp.u.gen7.flags = FILL_FLAGS(op, format);
3364
3365	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3366	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3367		kgem_submit(&sna->kgem);
3368		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3369			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3370			if (tmp.redirect.real_bo)
3371				kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
3372
3373			return false;
3374		}
3375		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3376	}
3377
3378	gen7_align_vertex(sna, &tmp);
3379	gen7_emit_fill_state(sna, &tmp);
3380
3381	do {
3382		int n_this_time;
3383		int16_t *v;
3384
3385		n_this_time = gen7_get_rectangles(sna, &tmp, n,
3386						  gen7_emit_fill_state);
3387		n -= n_this_time;
3388
3389		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
3390		sna->render.vertex_used += 6 * n_this_time;
3391		assert(sna->render.vertex_used <= sna->render.vertex_size);
3392		do {
3393			DBG(("	(%d, %d), (%d, %d)\n",
3394			     box->x1, box->y1, box->x2, box->y2));
3395
3396			v[0] = box->x2;
3397			v[5] = v[1] = box->y2;
3398			v[8] = v[4] = box->x1;
3399			v[9] = box->y1;
3400			v[2] = v[3]  = v[7]  = 1;
3401			v[6] = v[10] = v[11] = 0;
3402			v += 12; box++;
3403		} while (--n_this_time);
3404	} while (n);
3405
3406	gen4_vertex_flush(sna);
3407	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3408	sna_render_composite_redirect_done(sna, &tmp);
3409	return true;
3410}
3411
3412static void
3413gen7_render_fill_op_blt(struct sna *sna,
3414			const struct sna_fill_op *op,
3415			int16_t x, int16_t y, int16_t w, int16_t h)
3416{
3417	int16_t *v;
3418
3419	DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
3420
3421	gen7_get_rectangles(sna, &op->base, 1, gen7_emit_fill_state);
3422
3423	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3424	sna->render.vertex_used += 6;
3425	assert(sna->render.vertex_used <= sna->render.vertex_size);
3426
3427	v[0] = x+w;
3428	v[4] = v[8] = x;
3429	v[1] = v[5] = y+h;
3430	v[9] = y;
3431
3432	v[2] = v[3]  = v[7]  = 1;
3433	v[6] = v[10] = v[11] = 0;
3434}
3435
3436fastcall static void
3437gen7_render_fill_op_box(struct sna *sna,
3438			const struct sna_fill_op *op,
3439			const BoxRec *box)
3440{
3441	int16_t *v;
3442
3443	DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
3444	     box->x1, box->y1, box->x2, box->y2));
3445
3446	gen7_get_rectangles(sna, &op->base, 1, gen7_emit_fill_state);
3447
3448	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3449	sna->render.vertex_used += 6;
3450	assert(sna->render.vertex_used <= sna->render.vertex_size);
3451
3452	v[0] = box->x2;
3453	v[8] = v[4] = box->x1;
3454	v[5] = v[1] = box->y2;
3455	v[9] = box->y1;
3456
3457	v[7] = v[2]  = v[3]  = 1;
3458	v[6] = v[10] = v[11] = 0;
3459}
3460
3461fastcall static void
3462gen7_render_fill_op_boxes(struct sna *sna,
3463			  const struct sna_fill_op *op,
3464			  const BoxRec *box,
3465			  int nbox)
3466{
3467	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
3468	     box->x1, box->y1, box->x2, box->y2, nbox));
3469
3470	do {
3471		int nbox_this_time;
3472		int16_t *v;
3473
3474		nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox,
3475						     gen7_emit_fill_state);
3476		nbox -= nbox_this_time;
3477
3478		v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3479		sna->render.vertex_used += 6 * nbox_this_time;
3480		assert(sna->render.vertex_used <= sna->render.vertex_size);
3481
3482		do {
3483			v[0] = box->x2;
3484			v[8] = v[4] = box->x1;
3485			v[5] = v[1] = box->y2;
3486			v[9] = box->y1;
3487			v[7] = v[2]  = v[3]  = 1;
3488			v[6] = v[10] = v[11] = 0;
3489			box++; v += 12;
3490		} while (--nbox_this_time);
3491	} while (nbox);
3492}
3493
3494static void
3495gen7_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
3496{
3497	if (sna->render.vertex_offset)
3498		gen4_vertex_flush(sna);
3499	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
3500}
3501
3502static bool
3503gen7_render_fill(struct sna *sna, uint8_t alu,
3504		 PixmapPtr dst, struct kgem_bo *dst_bo,
3505		 uint32_t color, unsigned flags,
3506		 struct sna_fill_op *op)
3507{
3508	DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color));
3509
3510	if (prefer_blt_fill(sna, dst_bo, flags) &&
3511	    sna_blt_fill(sna, alu,
3512			 dst_bo, dst->drawable.bitsPerPixel,
3513			 color,
3514			 op))
3515		return true;
3516
3517	if (!(alu == GXcopy || alu == GXclear) ||
3518	    too_large(dst->drawable.width, dst->drawable.height))
3519		return sna_blt_fill(sna, alu,
3520				    dst_bo, dst->drawable.bitsPerPixel,
3521				    color,
3522				    op);
3523
3524	if (alu == GXclear)
3525		color = 0;
3526
3527	op->base.dst.pixmap = dst;
3528	op->base.dst.width  = dst->drawable.width;
3529	op->base.dst.height = dst->drawable.height;
3530	op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
3531	op->base.dst.bo = dst_bo;
3532	op->base.dst.x = op->base.dst.y = 0;
3533
3534	op->base.src.bo =
3535		sna_render_get_solid(sna,
3536				     sna_rgba_for_color(color,
3537							dst->drawable.depth));
3538	op->base.mask.bo = NULL;
3539
3540	op->base.need_magic_ca_pass = false;
3541	op->base.floats_per_vertex = 2;
3542	op->base.floats_per_rect = 6;
3543
3544	op->base.u.gen7.flags = FILL_FLAGS_NOBLEND;
3545
3546	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3547	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3548		kgem_submit(&sna->kgem);
3549		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3550			kgem_bo_destroy(&sna->kgem, op->base.src.bo);
3551			return false;
3552		}
3553
3554		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3555	}
3556
3557	gen7_align_vertex(sna, &op->base);
3558	gen7_emit_fill_state(sna, &op->base);
3559
3560	op->blt   = gen7_render_fill_op_blt;
3561	op->box   = gen7_render_fill_op_box;
3562	op->boxes = gen7_render_fill_op_boxes;
3563	op->points = NULL;
3564	op->done  = gen7_render_fill_op_done;
3565	return true;
3566}
3567
3568static bool
3569gen7_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3570			     uint32_t color,
3571			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
3572			     uint8_t alu)
3573{
3574	BoxRec box;
3575
3576	box.x1 = x1;
3577	box.y1 = y1;
3578	box.x2 = x2;
3579	box.y2 = y2;
3580
3581	return sna_blt_fill_boxes(sna, alu,
3582				  bo, dst->drawable.bitsPerPixel,
3583				  color, &box, 1);
3584}
3585
3586static bool
3587gen7_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3588		     uint32_t color,
3589		     int16_t x1, int16_t y1,
3590		     int16_t x2, int16_t y2,
3591		     uint8_t alu)
3592{
3593	struct sna_composite_op tmp;
3594	int16_t *v;
3595
3596	/* Prefer to use the BLT if already engaged */
3597	if (prefer_blt_fill(sna, bo, FILL_BOXES) &&
3598	    gen7_render_fill_one_try_blt(sna, dst, bo, color,
3599					 x1, y1, x2, y2, alu))
3600		return true;
3601
3602	/* Must use the BLT if we can't RENDER... */
3603	if (!(alu == GXcopy || alu == GXclear) ||
3604	    too_large(dst->drawable.width, dst->drawable.height))
3605		return gen7_render_fill_one_try_blt(sna, dst, bo, color,
3606						    x1, y1, x2, y2, alu);
3607
3608	if (alu == GXclear)
3609		color = 0;
3610
3611	tmp.dst.pixmap = dst;
3612	tmp.dst.width  = dst->drawable.width;
3613	tmp.dst.height = dst->drawable.height;
3614	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3615	tmp.dst.bo = bo;
3616	tmp.dst.x = tmp.dst.y = 0;
3617
3618	tmp.src.bo =
3619		sna_render_get_solid(sna,
3620				     sna_rgba_for_color(color,
3621							dst->drawable.depth));
3622	tmp.mask.bo = NULL;
3623
3624	tmp.floats_per_vertex = 2;
3625	tmp.floats_per_rect = 6;
3626	tmp.need_magic_ca_pass = false;
3627
3628	tmp.u.gen7.flags = FILL_FLAGS_NOBLEND;
3629
3630	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
3631	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3632		kgem_submit(&sna->kgem);
3633		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3634			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3635			return false;
3636		}
3637		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3638	}
3639
3640	gen7_align_vertex(sna, &tmp);
3641	gen7_emit_fill_state(sna, &tmp);
3642
3643	gen7_get_rectangles(sna, &tmp, 1, gen7_emit_fill_state);
3644
3645	DBG(("	(%d, %d), (%d, %d)\n", x1, y1, x2, y2));
3646
3647	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3648	sna->render.vertex_used += 6;
3649	assert(sna->render.vertex_used <= sna->render.vertex_size);
3650
3651	v[0] = x2;
3652	v[8] = v[4] = x1;
3653	v[5] = v[1] = y2;
3654	v[9] = y1;
3655	v[7] = v[2]  = v[3]  = 1;
3656	v[6] = v[10] = v[11] = 0;
3657
3658	gen4_vertex_flush(sna);
3659	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3660
3661	return true;
3662}
3663
3664static bool
3665gen7_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3666{
3667	BoxRec box;
3668
3669	box.x1 = 0;
3670	box.y1 = 0;
3671	box.x2 = dst->drawable.width;
3672	box.y2 = dst->drawable.height;
3673
3674	return sna_blt_fill_boxes(sna, GXclear,
3675				  bo, dst->drawable.bitsPerPixel,
3676				  0, &box, 1);
3677}
3678
3679static bool
3680gen7_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3681{
3682	struct sna_composite_op tmp;
3683	int16_t *v;
3684
3685	DBG(("%s: %dx%d\n",
3686	     __FUNCTION__,
3687	     dst->drawable.width,
3688	     dst->drawable.height));
3689
3690	/* Prefer to use the BLT if already engaged */
3691	if (sna->kgem.mode == KGEM_BLT &&
3692	    gen7_render_clear_try_blt(sna, dst, bo))
3693		return true;
3694
3695	/* Must use the BLT if we can't RENDER... */
3696	if (too_large(dst->drawable.width, dst->drawable.height))
3697		return gen7_render_clear_try_blt(sna, dst, bo);
3698
3699	tmp.dst.pixmap = dst;
3700	tmp.dst.width  = dst->drawable.width;
3701	tmp.dst.height = dst->drawable.height;
3702	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3703	tmp.dst.bo = bo;
3704	tmp.dst.x = tmp.dst.y = 0;
3705
3706	tmp.src.bo = sna_render_get_solid(sna, 0);
3707	tmp.mask.bo = NULL;
3708
3709	tmp.floats_per_vertex = 2;
3710	tmp.floats_per_rect = 6;
3711	tmp.need_magic_ca_pass = false;
3712
3713	tmp.u.gen7.flags = FILL_FLAGS_NOBLEND;
3714
3715	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
3716	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3717		kgem_submit(&sna->kgem);
3718		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3719			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3720			return false;
3721		}
3722		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3723	}
3724
3725	gen7_align_vertex(sna, &tmp);
3726	gen7_emit_fill_state(sna, &tmp);
3727
3728	gen7_get_rectangles(sna, &tmp, 1, gen7_emit_fill_state);
3729
3730	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3731	sna->render.vertex_used += 6;
3732	assert(sna->render.vertex_used <= sna->render.vertex_size);
3733
3734	v[0] = dst->drawable.width;
3735	v[5] = v[1] = dst->drawable.height;
3736	v[8] = v[4] = 0;
3737	v[9] = 0;
3738
3739	v[7] = v[2]  = v[3]  = 1;
3740	v[6] = v[10] = v[11] = 0;
3741
3742	gen4_vertex_flush(sna);
3743	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3744
3745	return true;
3746}
3747static void gen7_render_reset(struct sna *sna)
3748{
3749	sna->render_state.gen7.emit_flush = false;
3750	sna->render_state.gen7.needs_invariant = true;
3751	sna->render_state.gen7.ve_id = 3 << 2;
3752	sna->render_state.gen7.last_primitive = -1;
3753
3754	sna->render_state.gen7.num_sf_outputs = 0;
3755	sna->render_state.gen7.samplers = -1;
3756	sna->render_state.gen7.blend = -1;
3757	sna->render_state.gen7.kernel = -1;
3758	sna->render_state.gen7.drawrect_offset = -1;
3759	sna->render_state.gen7.drawrect_limit = -1;
3760	sna->render_state.gen7.surface_table = 0;
3761
3762	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
3763		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
3764		discard_vbo(sna);
3765	}
3766
3767	sna->render.vertex_offset = 0;
3768	sna->render.nvertex_reloc = 0;
3769	sna->render.vb_id = 0;
3770}
3771
3772static void gen7_render_fini(struct sna *sna)
3773{
3774	kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo);
3775}
3776
3777static bool is_gt3(struct sna *sna, int devid)
3778{
3779	assert(sna->kgem.gen == 075);
3780	return devid & 0x20;
3781}
3782
3783static bool is_gt2(struct sna *sna, int devid)
3784{
3785	return devid & (is_hsw(sna)? 0x30 : 0x20);
3786}
3787
3788static bool is_mobile(struct sna *sna, int devid)
3789{
3790	return (devid & 0xf) == 0x6;
3791}
3792
3793static bool gen7_render_setup(struct sna *sna, int devid)
3794{
3795	struct gen7_render_state *state = &sna->render_state.gen7;
3796	struct sna_static_stream general;
3797	struct gen7_sampler_state *ss;
3798	int i, j, k, l, m;
3799
3800	if (is_ivb(sna)) {
3801		state->info = &ivb_gt_info;
3802		if (devid & 0xf) {
3803			state->info = &ivb_gt1_info;
3804			if (is_gt2(sna, devid))
3805				state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */
3806		}
3807	} else if (is_byt(sna)) {
3808		state->info = &byt_gt_info;
3809	} else if (is_hsw(sna)) {
3810		state->info = &hsw_gt_info;
3811		if (devid & 0xf) {
3812			if (is_gt3(sna, devid))
3813				state->info = &hsw_gt3_info;
3814			else if (is_gt2(sna, devid))
3815				state->info = &hsw_gt2_info;
3816			else
3817				state->info = &hsw_gt1_info;
3818		}
3819	} else
3820		return false;
3821
3822	state->gt = state->info->gt;
3823
3824	sna_static_stream_init(&general);
3825
3826	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
3827	 * dumps, you know it points to zero.
3828	 */
3829	null_create(&general);
3830
3831	for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++) {
3832		if (wm_kernels[m].size) {
3833			state->wm_kernel[m][1] =
3834				sna_static_stream_add(&general,
3835						      wm_kernels[m].data,
3836						      wm_kernels[m].size,
3837						      64);
3838		} else {
3839			if (USE_8_PIXEL_DISPATCH) {
3840				state->wm_kernel[m][0] =
3841					sna_static_stream_compile_wm(sna, &general,
3842								     wm_kernels[m].data, 8);
3843			}
3844
3845			if (USE_16_PIXEL_DISPATCH) {
3846				state->wm_kernel[m][1] =
3847					sna_static_stream_compile_wm(sna, &general,
3848								     wm_kernels[m].data, 16);
3849			}
3850
3851			if (USE_32_PIXEL_DISPATCH) {
3852				state->wm_kernel[m][2] =
3853					sna_static_stream_compile_wm(sna, &general,
3854								     wm_kernels[m].data, 32);
3855			}
3856		}
3857		assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
3858	}
3859
3860	ss = sna_static_stream_map(&general,
3861				   2 * sizeof(*ss) *
3862				   (2 +
3863				    FILTER_COUNT * EXTEND_COUNT *
3864				    FILTER_COUNT * EXTEND_COUNT),
3865				   32);
3866	state->wm_state = sna_static_stream_offsetof(&general, ss);
3867	sampler_copy_init(ss); ss += 2;
3868	sampler_fill_init(ss); ss += 2;
3869	for (i = 0; i < FILTER_COUNT; i++) {
3870		for (j = 0; j < EXTEND_COUNT; j++) {
3871			for (k = 0; k < FILTER_COUNT; k++) {
3872				for (l = 0; l < EXTEND_COUNT; l++) {
3873					sampler_state_init(ss++, i, j);
3874					sampler_state_init(ss++, k, l);
3875				}
3876			}
3877		}
3878	}
3879
3880	state->cc_blend = gen7_composite_create_blend_state(&general);
3881
3882	state->general_bo = sna_static_stream_fini(sna, &general);
3883	return state->general_bo != NULL;
3884}
3885
3886const char *gen7_render_init(struct sna *sna, const char *backend)
3887{
3888	int devid = intel_get_device_id(sna->scrn);
3889
3890	if (!gen7_render_setup(sna, devid))
3891		return backend;
3892
3893	sna->kgem.context_switch = gen6_render_context_switch;
3894	sna->kgem.retire = gen6_render_retire;
3895	sna->kgem.expire = gen4_render_expire;
3896
3897#if !NO_COMPOSITE
3898	sna->render.composite = gen7_render_composite;
3899	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
3900#endif
3901#if !NO_COMPOSITE_SPANS
3902	sna->render.check_composite_spans = gen7_check_composite_spans;
3903	sna->render.composite_spans = gen7_render_composite_spans;
3904	if (is_mobile(sna, devid) || is_gt2(sna, devid) || is_byt(sna))
3905		sna->render.prefer_gpu |= PREFER_GPU_SPANS;
3906#endif
3907	sna->render.video = gen7_render_video;
3908
3909#if !NO_COPY_BOXES
3910	sna->render.copy_boxes = gen7_render_copy_boxes;
3911#endif
3912#if !NO_COPY
3913	sna->render.copy = gen7_render_copy;
3914#endif
3915
3916#if !NO_FILL_BOXES
3917	sna->render.fill_boxes = gen7_render_fill_boxes;
3918#endif
3919#if !NO_FILL
3920	sna->render.fill = gen7_render_fill;
3921#endif
3922#if !NO_FILL_ONE
3923	sna->render.fill_one = gen7_render_fill_one;
3924#endif
3925#if !NO_FILL_CLEAR
3926	sna->render.clear = gen7_render_clear;
3927#endif
3928
3929	sna->render.flush = gen4_render_flush;
3930	sna->render.reset = gen7_render_reset;
3931	sna->render.fini = gen7_render_fini;
3932
3933	sna->render.max_3d_size = GEN7_MAX_SIZE;
3934	sna->render.max_3d_pitch = 1 << 18;
3935	return sna->render_state.gen7.info->name;
3936}
3937