gen7_render.c revision 13496ba1
1/*
2 * Copyright © 2006,2008,2011 Intel Corporation
3 * Copyright © 2007 Red Hat, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 *    Wang Zhenyu <zhenyu.z.wang@sna.com>
26 *    Eric Anholt <eric@anholt.net>
27 *    Carl Worth <cworth@redhat.com>
28 *    Keith Packard <keithp@keithp.com>
29 *    Chris Wilson <chris@chris-wilson.co.uk>
30 *
31 */
32
33#ifdef HAVE_CONFIG_H
34#include "config.h"
35#endif
36
37#include "sna.h"
38#include "sna_reg.h"
39#include "sna_render.h"
40#include "sna_render_inline.h"
41#include "sna_video.h"
42
43#include "brw/brw.h"
44#include "gen7_render.h"
45#include "gen4_common.h"
46#include "gen4_source.h"
47#include "gen4_vertex.h"
48#include "gen6_common.h"
49
50#define ALWAYS_INVALIDATE 0
51#define ALWAYS_FLUSH 0
52#define ALWAYS_STALL 0
53
54#define NO_COMPOSITE 0
55#define NO_COMPOSITE_SPANS 0
56#define NO_COPY 0
57#define NO_COPY_BOXES 0
58#define NO_FILL 0
59#define NO_FILL_BOXES 0
60#define NO_FILL_ONE 0
61#define NO_FILL_CLEAR 0
62
63#define NO_RING_SWITCH 0
64
65#define USE_8_PIXEL_DISPATCH 1
66#define USE_16_PIXEL_DISPATCH 1
67#define USE_32_PIXEL_DISPATCH 0
68
69#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
70#error "Must select at least 8, 16 or 32 pixel dispatch"
71#endif
72
73#define GEN7_MAX_SIZE 16384
74
75/* XXX Todo
76 *
77 * STR (software tiled rendering) mode. No, really.
78 * 64x32 pixel blocks align with the rendering cache. Worth considering.
79 */
80
81#define is_aligned(x, y) (((x) & ((y) - 1)) == 0)
82
83struct gt_info {
84	const char *name;
85	uint32_t max_vs_threads;
86	uint32_t max_gs_threads;
87	uint32_t max_wm_threads;
88	struct {
89		int size;
90		int max_vs_entries;
91		int max_gs_entries;
92		int push_ps_size; /* in 1KBs */
93	} urb;
94	int gt;
95	uint32_t mocs;
96};
97
98static const struct gt_info ivb_gt_info = {
99	.name = "Ivybridge (gen7)",
100	.max_vs_threads = 16,
101	.max_gs_threads = 16,
102	.max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT,
103	.urb = { 128, 64, 64, 8 },
104	.gt = 0,
105};
106
107static const struct gt_info ivb_gt1_info = {
108	.name = "Ivybridge (gen7, gt1)",
109	.max_vs_threads = 36,
110	.max_gs_threads = 36,
111	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
112	.urb = { 128, 512, 192, 8 },
113	.gt = 1,
114	.mocs = 3,
115};
116
117static const struct gt_info ivb_gt2_info = {
118	.name = "Ivybridge (gen7, gt2)",
119	.max_vs_threads = 128,
120	.max_gs_threads = 128,
121	.max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT,
122	.urb = { 256, 704, 320, 8 },
123	.gt = 2,
124	.mocs = 3,
125};
126
127static const struct gt_info byt_gt_info = {
128	.name = "Baytrail (gen7)",
129	.max_vs_threads = 36,
130	.max_gs_threads = 36,
131	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
132	.urb = { 128, 512, 192, 8 },
133	.gt = 1,
134};
135
136static const struct gt_info hsw_gt_info = {
137	.name = "Haswell (gen7.5)",
138	.max_vs_threads = 8,
139	.max_gs_threads = 8,
140	.max_wm_threads =
141		(8 - 1) << HSW_PS_MAX_THREADS_SHIFT |
142		1 << HSW_PS_SAMPLE_MASK_SHIFT,
143	.urb = { 128, 64, 64, 8 },
144	.gt = 0,
145};
146
147static const struct gt_info hsw_gt1_info = {
148	.name = "Haswell (gen7.5, gt1)",
149	.max_vs_threads = 70,
150	.max_gs_threads = 70,
151	.max_wm_threads =
152		(102 - 1) << HSW_PS_MAX_THREADS_SHIFT |
153		1 << HSW_PS_SAMPLE_MASK_SHIFT,
154	.urb = { 128, 640, 256, 8 },
155	.gt = 1,
156	.mocs = 5,
157};
158
159static const struct gt_info hsw_gt2_info = {
160	.name = "Haswell (gen7.5, gt2)",
161	.max_vs_threads = 140,
162	.max_gs_threads = 140,
163	.max_wm_threads =
164		(140 - 1) << HSW_PS_MAX_THREADS_SHIFT |
165		1 << HSW_PS_SAMPLE_MASK_SHIFT,
166	.urb = { 256, 1664, 640, 8 },
167	.gt = 2,
168	.mocs = 5,
169};
170
171static const struct gt_info hsw_gt3_info = {
172	.name = "Haswell (gen7.5, gt3)",
173	.max_vs_threads = 280,
174	.max_gs_threads = 280,
175	.max_wm_threads =
176		(280 - 1) << HSW_PS_MAX_THREADS_SHIFT |
177		1 << HSW_PS_SAMPLE_MASK_SHIFT,
178	.urb = { 512, 3328, 1280, 16 },
179	.gt = 3,
180	.mocs = 5,
181};
182
183inline static bool is_ivb(struct sna *sna)
184{
185	return sna->kgem.gen == 070;
186}
187
188inline static bool is_byt(struct sna *sna)
189{
190	return sna->kgem.gen == 071;
191}
192
193inline static bool is_hsw(struct sna *sna)
194{
195	return sna->kgem.gen == 075;
196}
197
198static const uint32_t ps_kernel_packed[][4] = {
199#include "exa_wm_src_affine.g7b"
200#include "exa_wm_src_sample_argb.g7b"
201#include "exa_wm_yuv_rgb.g7b"
202#include "exa_wm_write.g7b"
203};
204
205static const uint32_t ps_kernel_planar[][4] = {
206#include "exa_wm_src_affine.g7b"
207#include "exa_wm_src_sample_planar.g7b"
208#include "exa_wm_yuv_rgb.g7b"
209#include "exa_wm_write.g7b"
210};
211
212#define KERNEL(kernel_enum, kernel, num_surfaces) \
213    [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces}
214#define NOKERNEL(kernel_enum, func, num_surfaces) \
215    [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces}
216static const struct wm_kernel_info {
217	const char *name;
218	const void *data;
219	unsigned int size;
220	int num_surfaces;
221} wm_kernels[] = {
222	NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
223	NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
224
225	NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
226	NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
227
228	NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
229	NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
230
231	NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
232	NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
233
234	NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
235	NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
236
237	KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
238	KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
239};
240#undef KERNEL
241
242static const struct blendinfo {
243	bool src_alpha;
244	uint32_t src_blend;
245	uint32_t dst_blend;
246} gen7_blend_op[] = {
247	/* Clear */	{0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO},
248	/* Src */	{0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO},
249	/* Dst */	{0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ONE},
250	/* Over */	{1, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
251	/* OverReverse */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ONE},
252	/* In */	{0, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
253	/* InReverse */	{1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_SRC_ALPHA},
254	/* Out */	{0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
255	/* OutReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
256	/* Atop */	{1, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
257	/* AtopReverse */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_SRC_ALPHA},
258	/* Xor */	{1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
259	/* Add */	{0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ONE},
260};
261
262/**
263 * Highest-valued BLENDFACTOR used in gen7_blend_op.
264 *
265 * This leaves out GEN7_BLENDFACTOR_INV_DST_COLOR,
266 * GEN7_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
267 * GEN7_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
268 */
269#define GEN7_BLENDFACTOR_COUNT (GEN7_BLENDFACTOR_INV_DST_ALPHA + 1)
270
271#define GEN7_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen7_blend_state), 64)
272
273#define BLEND_OFFSET(s, d) \
274	((d != GEN7_BLENDFACTOR_ZERO) << 15 | \
275	 (((s) * GEN7_BLENDFACTOR_COUNT + (d)) * GEN7_BLEND_STATE_PADDED_SIZE))
276
277#define NO_BLEND BLEND_OFFSET(GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO)
278#define CLEAR BLEND_OFFSET(GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO)
279
280#define SAMPLER_OFFSET(sf, se, mf, me) \
281	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) * 2 * sizeof(struct gen7_sampler_state))
282
283#define VERTEX_2s2s 0
284
285#define COPY_SAMPLER 0
286#define COPY_VERTEX VERTEX_2s2s
287#define COPY_FLAGS(a) GEN7_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN7_WM_KERNEL_NOMASK, COPY_VERTEX)
288
289#define FILL_SAMPLER (2 * sizeof(struct gen7_sampler_state))
290#define FILL_VERTEX VERTEX_2s2s
291#define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
292#define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
293
294#define GEN7_SAMPLER(f) (((f) >> 16) & 0xfff0)
295#define GEN7_BLEND(f) (((f) >> 0) & 0x7ff0)
296#define GEN7_READS_DST(f) (((f) >> 15) & 1)
297#define GEN7_KERNEL(f) (((f) >> 16) & 0xf)
298#define GEN7_VERTEX(f) (((f) >> 0) & 0xf)
299#define GEN7_SET_FLAGS(S, B, K, V)  (((S) | (K)) << 16 | ((B) | (V)))
300
301#define OUT_BATCH(v) batch_emit(sna, v)
302#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
303#define OUT_VERTEX_F(v) vertex_emit(sna, v)
304
305static inline bool too_large(int width, int height)
306{
307	return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE;
308}
309
310static uint32_t gen7_get_blend(int op,
311			       bool has_component_alpha,
312			       uint32_t dst_format)
313{
314	uint32_t src, dst;
315
316	src = gen7_blend_op[op].src_blend;
317	dst = gen7_blend_op[op].dst_blend;
318
319	/* If there's no dst alpha channel, adjust the blend op so that
320	 * we'll treat it always as 1.
321	 */
322	if (PICT_FORMAT_A(dst_format) == 0) {
323		if (src == GEN7_BLENDFACTOR_DST_ALPHA)
324			src = GEN7_BLENDFACTOR_ONE;
325		else if (src == GEN7_BLENDFACTOR_INV_DST_ALPHA)
326			src = GEN7_BLENDFACTOR_ZERO;
327	}
328
329	/* If the source alpha is being used, then we should only be in a
330	 * case where the source blend factor is 0, and the source blend
331	 * value is the mask channels multiplied by the source picture's alpha.
332	 */
333	if (has_component_alpha && gen7_blend_op[op].src_alpha) {
334		if (dst == GEN7_BLENDFACTOR_SRC_ALPHA)
335			dst = GEN7_BLENDFACTOR_SRC_COLOR;
336		else if (dst == GEN7_BLENDFACTOR_INV_SRC_ALPHA)
337			dst = GEN7_BLENDFACTOR_INV_SRC_COLOR;
338	}
339
340	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
341	     op, dst_format, PICT_FORMAT_A(dst_format),
342	     src, dst, (int)BLEND_OFFSET(src, dst)));
343	return BLEND_OFFSET(src, dst);
344}
345
346static uint32_t gen7_get_card_format(PictFormat format)
347{
348	switch (format) {
349	default:
350		return -1;
351	case PICT_a8r8g8b8:
352		return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
353	case PICT_x8r8g8b8:
354		return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM;
355	case PICT_a8b8g8r8:
356		return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
357	case PICT_x8b8g8r8:
358		return GEN7_SURFACEFORMAT_R8G8B8X8_UNORM;
359#ifdef PICT_a2r10g10b10
360	case PICT_a2r10g10b10:
361		return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
362	case PICT_x2r10g10b10:
363		return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM;
364#endif
365	case PICT_r8g8b8:
366		return GEN7_SURFACEFORMAT_R8G8B8_UNORM;
367	case PICT_r5g6b5:
368		return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
369	case PICT_a1r5g5b5:
370		return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
371	case PICT_a8:
372		return GEN7_SURFACEFORMAT_A8_UNORM;
373	case PICT_a4r4g4b4:
374		return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
375	}
376}
377
378static uint32_t gen7_get_dest_format(PictFormat format)
379{
380	switch (format) {
381	default:
382		return -1;
383	case PICT_a8r8g8b8:
384	case PICT_x8r8g8b8:
385		return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
386	case PICT_a8b8g8r8:
387	case PICT_x8b8g8r8:
388		return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
389#ifdef PICT_a2r10g10b10
390	case PICT_a2r10g10b10:
391	case PICT_x2r10g10b10:
392		return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
393#endif
394	case PICT_r5g6b5:
395		return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
396	case PICT_x1r5g5b5:
397	case PICT_a1r5g5b5:
398		return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
399	case PICT_a8:
400		return GEN7_SURFACEFORMAT_A8_UNORM;
401	case PICT_a4r4g4b4:
402	case PICT_x4r4g4b4:
403		return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
404	}
405}
406
407static bool gen7_check_dst_format(PictFormat format)
408{
409	if (gen7_get_dest_format(format) != -1)
410		return true;
411
412	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
413	return false;
414}
415
416static bool gen7_check_format(uint32_t format)
417{
418	if (gen7_get_card_format(format) != -1)
419		return true;
420
421	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
422	return false;
423}
424
425static uint32_t gen7_filter(uint32_t filter)
426{
427	switch (filter) {
428	default:
429		assert(0);
430	case PictFilterNearest:
431		return SAMPLER_FILTER_NEAREST;
432	case PictFilterBilinear:
433		return SAMPLER_FILTER_BILINEAR;
434	}
435}
436
437static uint32_t gen7_check_filter(PicturePtr picture)
438{
439	switch (picture->filter) {
440	case PictFilterNearest:
441	case PictFilterBilinear:
442		return true;
443	default:
444		return false;
445	}
446}
447
448static uint32_t gen7_repeat(uint32_t repeat)
449{
450	switch (repeat) {
451	default:
452		assert(0);
453	case RepeatNone:
454		return SAMPLER_EXTEND_NONE;
455	case RepeatNormal:
456		return SAMPLER_EXTEND_REPEAT;
457	case RepeatPad:
458		return SAMPLER_EXTEND_PAD;
459	case RepeatReflect:
460		return SAMPLER_EXTEND_REFLECT;
461	}
462}
463
464static bool gen7_check_repeat(PicturePtr picture)
465{
466	if (!picture->repeat)
467		return true;
468
469	switch (picture->repeatType) {
470	case RepeatNone:
471	case RepeatNormal:
472	case RepeatPad:
473	case RepeatReflect:
474		return true;
475	default:
476		return false;
477	}
478}
479
480static int
481gen7_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
482{
483	int base;
484
485	if (has_mask) {
486		if (is_ca) {
487			if (gen7_blend_op[op].src_alpha)
488				base = GEN7_WM_KERNEL_MASKSA;
489			else
490				base = GEN7_WM_KERNEL_MASKCA;
491		} else
492			base = GEN7_WM_KERNEL_MASK;
493	} else
494		base = GEN7_WM_KERNEL_NOMASK;
495
496	return base + !is_affine;
497}
498
499static void
500gen7_emit_urb(struct sna *sna)
501{
502	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
503	OUT_BATCH(sna->render_state.gen7.info->urb.push_ps_size);
504
505	/* num of VS entries must be divisible by 8 if size < 9 */
506	OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
507	OUT_BATCH((sna->render_state.gen7.info->urb.max_vs_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
508		  (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
509		  (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
510
511	OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2));
512	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
513		  (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
514
515	OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2));
516	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
517		  (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
518
519	OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2));
520	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
521		  (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
522}
523
524static void
525gen7_emit_state_base_address(struct sna *sna)
526{
527	uint32_t mocs = sna->render_state.gen7.info->mocs << 8;
528
529	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
530	OUT_BATCH(0); /* general */
531	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
532				 sna->kgem.nbatch,
533				 NULL,
534				 I915_GEM_DOMAIN_INSTRUCTION << 16,
535				 BASE_ADDRESS_MODIFY));
536	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* dynamic */
537				 sna->kgem.nbatch,
538				 sna->render_state.gen7.general_bo,
539				 I915_GEM_DOMAIN_INSTRUCTION << 16,
540				 mocs | BASE_ADDRESS_MODIFY));
541	OUT_BATCH(0); /* indirect */
542	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
543				 sna->kgem.nbatch,
544				 sna->render_state.gen7.general_bo,
545				 I915_GEM_DOMAIN_INSTRUCTION << 16,
546				 mocs | BASE_ADDRESS_MODIFY));
547
548	/* upper bounds, disable */
549	OUT_BATCH(0);
550	OUT_BATCH(BASE_ADDRESS_MODIFY);
551	OUT_BATCH(0);
552	OUT_BATCH(BASE_ADDRESS_MODIFY);
553}
554
555static void
556gen7_disable_vs(struct sna *sna)
557{
558	/* For future reference:
559	 * A PIPE_CONTROL with post-sync op set to 1 and a depth stall needs
560	 * to be emitted just prior to change VS state, i.e. 3DSTATE_VS,
561	 * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS,
562	 * 3DSTATE_BINDING_TABLE_POINTER_VS, 3DSTATE_SAMPLER_STATE_POINTER_VS.
563	 *
564	 * Here we saved by the full-flush incurred when emitting
565	 * the batchbuffer.
566	 */
567	OUT_BATCH(GEN7_3DSTATE_VS | (6 - 2));
568	OUT_BATCH(0); /* no VS kernel */
569	OUT_BATCH(0);
570	OUT_BATCH(0);
571	OUT_BATCH(0);
572	OUT_BATCH(0); /* pass-through */
573
574#if 0
575	OUT_BATCH(GEN7_3DSTATE_CONSTANT_VS | (7 - 2));
576	OUT_BATCH(0);
577	OUT_BATCH(0);
578	OUT_BATCH(0);
579	OUT_BATCH(0);
580	OUT_BATCH(0);
581	OUT_BATCH(0);
582
583	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
584	OUT_BATCH(0);
585
586	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
587	OUT_BATCH(0);
588#endif
589}
590
591static void
592gen7_disable_hs(struct sna *sna)
593{
594	OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2));
595	OUT_BATCH(0); /* no HS kernel */
596	OUT_BATCH(0);
597	OUT_BATCH(0);
598	OUT_BATCH(0);
599	OUT_BATCH(0);
600	OUT_BATCH(0); /* pass-through */
601
602#if 0
603	OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
604	OUT_BATCH(0);
605	OUT_BATCH(0);
606	OUT_BATCH(0);
607	OUT_BATCH(0);
608	OUT_BATCH(0);
609	OUT_BATCH(0);
610
611	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
612	OUT_BATCH(0);
613
614	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
615	OUT_BATCH(0);
616#endif
617}
618
619static void
620gen7_disable_te(struct sna *sna)
621{
622	OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2));
623	OUT_BATCH(0);
624	OUT_BATCH(0);
625	OUT_BATCH(0);
626}
627
628static void
629gen7_disable_ds(struct sna *sna)
630{
631	OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2));
632	OUT_BATCH(0);
633	OUT_BATCH(0);
634	OUT_BATCH(0);
635	OUT_BATCH(0);
636	OUT_BATCH(0);
637
638#if 0
639	OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
640	OUT_BATCH(0);
641	OUT_BATCH(0);
642	OUT_BATCH(0);
643	OUT_BATCH(0);
644	OUT_BATCH(0);
645	OUT_BATCH(0);
646
647	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
648	OUT_BATCH(0);
649
650	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
651	OUT_BATCH(0);
652#endif
653}
654
655static void
656gen7_disable_gs(struct sna *sna)
657{
658	OUT_BATCH(GEN7_3DSTATE_GS | (7 - 2));
659	OUT_BATCH(0); /* no GS kernel */
660	OUT_BATCH(0);
661	OUT_BATCH(0);
662	OUT_BATCH(0);
663	OUT_BATCH(0);
664	OUT_BATCH(0); /* pass-through */
665
666#if 0
667	OUT_BATCH(GEN7_3DSTATE_CONSTANT_GS | (7 - 2));
668	OUT_BATCH(0);
669	OUT_BATCH(0);
670	OUT_BATCH(0);
671	OUT_BATCH(0);
672	OUT_BATCH(0);
673	OUT_BATCH(0);
674
675	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
676	OUT_BATCH(0);
677
678	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
679	OUT_BATCH(0);
680#endif
681}
682
683static void
684gen7_disable_streamout(struct sna *sna)
685{
686	OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2));
687	OUT_BATCH(0);
688	OUT_BATCH(0);
689}
690
691static void
692gen7_emit_sf_invariant(struct sna *sna)
693{
694	OUT_BATCH(GEN7_3DSTATE_SF | (7 - 2));
695	OUT_BATCH(0);
696	OUT_BATCH(GEN7_3DSTATE_SF_CULL_NONE);
697	OUT_BATCH(2 << GEN7_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
698	OUT_BATCH(0);
699	OUT_BATCH(0);
700	OUT_BATCH(0);
701}
702
703static void
704gen7_emit_cc_invariant(struct sna *sna)
705{
706#if 0 /* unused, no change */
707	OUT_BATCH(GEN7_3DSTATE_CC_STATE_POINTERS | (2 - 2));
708	OUT_BATCH(0);
709
710	OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
711	OUT_BATCH(0);
712#endif
713
714	/* XXX clear to be safe */
715	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
716	OUT_BATCH(0);
717}
718
719static void
720gen7_disable_clip(struct sna *sna)
721{
722	OUT_BATCH(GEN7_3DSTATE_CLIP | (4 - 2));
723	OUT_BATCH(0);
724	OUT_BATCH(0); /* pass-through */
725	OUT_BATCH(0);
726
727	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
728	OUT_BATCH(0);
729}
730
731static void
732gen7_emit_wm_invariant(struct sna *sna)
733{
734	OUT_BATCH(GEN7_3DSTATE_WM | (3 - 2));
735	OUT_BATCH(GEN7_WM_DISPATCH_ENABLE |
736		  GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
737	OUT_BATCH(0);
738
739#if 0
740	/* XXX length bias of 7 in old spec? */
741	OUT_BATCH(GEN7_3DSTATE_CONSTANT_PS | (7 - 2));
742	OUT_BATCH(0);
743	OUT_BATCH(0);
744	OUT_BATCH(0);
745	OUT_BATCH(0);
746	OUT_BATCH(0);
747	OUT_BATCH(0);
748#endif
749}
750
751static void
752gen7_emit_null_depth_buffer(struct sna *sna)
753{
754	OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
755	OUT_BATCH(GEN7_SURFACE_NULL << GEN7_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
756		  GEN7_DEPTHFORMAT_D32_FLOAT << GEN7_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
757	OUT_BATCH(0); /* disable depth, stencil and hiz */
758	OUT_BATCH(0);
759	OUT_BATCH(0);
760	OUT_BATCH(0);
761	OUT_BATCH(0);
762
763#if 0
764	OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
765	OUT_BATCH(0);
766	OUT_BATCH(0);
767#endif
768}
769
770static void
771gen7_emit_invariant(struct sna *sna)
772{
773	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D);
774
775	OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE | (4 - 2));
776	OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
777		  GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
778	OUT_BATCH(0);
779	OUT_BATCH(0);
780
781	OUT_BATCH(GEN7_3DSTATE_SAMPLE_MASK | (2 - 2));
782	OUT_BATCH(1);
783
784	gen7_emit_urb(sna);
785
786	gen7_emit_state_base_address(sna);
787
788	gen7_disable_vs(sna);
789	gen7_disable_hs(sna);
790	gen7_disable_te(sna);
791	gen7_disable_ds(sna);
792	gen7_disable_gs(sna);
793	gen7_disable_clip(sna);
794	gen7_emit_sf_invariant(sna);
795	gen7_emit_wm_invariant(sna);
796	gen7_emit_cc_invariant(sna);
797	gen7_disable_streamout(sna);
798	gen7_emit_null_depth_buffer(sna);
799
800	sna->render_state.gen7.needs_invariant = false;
801}
802
803static void
804gen7_emit_cc(struct sna *sna, uint32_t blend_offset)
805{
806	struct gen7_render_state *render = &sna->render_state.gen7;
807
808	if (render->blend == blend_offset)
809		return;
810
811	DBG(("%s: blend = %x\n", __FUNCTION__, blend_offset));
812
813	/* XXX can have upto 8 blend states preload, selectable via
814	 * Render Target Index. What other side-effects of Render Target Index?
815	 */
816
817	assert (is_aligned(render->cc_blend + blend_offset, 64));
818	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
819	OUT_BATCH((render->cc_blend + blend_offset) | 1);
820
821	render->blend = blend_offset;
822}
823
824static void
825gen7_emit_sampler(struct sna *sna, uint32_t state)
826{
827	if (sna->render_state.gen7.samplers == state)
828		return;
829
830	sna->render_state.gen7.samplers = state;
831
832	DBG(("%s: sampler = %x\n", __FUNCTION__, state));
833
834	assert (is_aligned(sna->render_state.gen7.wm_state + state, 32));
835	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
836	OUT_BATCH(sna->render_state.gen7.wm_state + state);
837}
838
839static void
840gen7_emit_sf(struct sna *sna, bool has_mask)
841{
842	int num_sf_outputs = has_mask ? 2 : 1;
843
844	if (sna->render_state.gen7.num_sf_outputs == num_sf_outputs)
845		return;
846
847	DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
848	     __FUNCTION__, num_sf_outputs, 1, 0));
849
850	sna->render_state.gen7.num_sf_outputs = num_sf_outputs;
851
852	OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2));
853	OUT_BATCH(num_sf_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT |
854		  1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
855		  1 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
856	OUT_BATCH(0);
857	OUT_BATCH(0); /* dw4 */
858	OUT_BATCH(0);
859	OUT_BATCH(0);
860	OUT_BATCH(0);
861	OUT_BATCH(0); /* dw8 */
862	OUT_BATCH(0);
863	OUT_BATCH(0);
864	OUT_BATCH(0);
865	OUT_BATCH(0); /* dw12 */
866	OUT_BATCH(0);
867	OUT_BATCH(0);
868}
869
870static void
871gen7_emit_wm(struct sna *sna, int kernel)
872{
873	const uint32_t *kernels;
874
875	if (sna->render_state.gen7.kernel == kernel)
876		return;
877
878	sna->render_state.gen7.kernel = kernel;
879	kernels = sna->render_state.gen7.wm_kernel[kernel];
880
881	DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n",
882	     __FUNCTION__,
883	     wm_kernels[kernel].name,
884	     wm_kernels[kernel].num_surfaces,
885	     kernels[0], kernels[1], kernels[2]));
886
887	OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2));
888	OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
889	OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT |
890		  wm_kernels[kernel].num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
891	OUT_BATCH(0); /* scratch address */
892	OUT_BATCH(sna->render_state.gen7.info->max_wm_threads |
893		  (kernels[0] ? GEN7_PS_8_DISPATCH_ENABLE : 0) |
894		  (kernels[1] ? GEN7_PS_16_DISPATCH_ENABLE : 0) |
895		  (kernels[2] ? GEN7_PS_32_DISPATCH_ENABLE : 0) |
896		  GEN7_PS_ATTRIBUTE_ENABLE);
897	OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
898		  8 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
899		  6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
900	OUT_BATCH(kernels[2]);
901	OUT_BATCH(kernels[1]);
902}
903
904static bool
905gen7_emit_binding_table(struct sna *sna, uint16_t offset)
906{
907	if (sna->render_state.gen7.surface_table == offset)
908		return false;
909
910	/* Binding table pointers */
911	assert(is_aligned(4*offset, 32));
912	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
913	OUT_BATCH(offset*4);
914
915	sna->render_state.gen7.surface_table = offset;
916	return true;
917}
918
919static bool
920gen7_emit_drawing_rectangle(struct sna *sna,
921			    const struct sna_composite_op *op)
922{
923	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
924	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
925
926	assert(!too_large(abs(op->dst.x), abs(op->dst.y)));
927	assert(!too_large(op->dst.width, op->dst.height));
928
929	if (sna->render_state.gen7.drawrect_limit == limit &&
930	    sna->render_state.gen7.drawrect_offset == offset)
931		return true;
932
933	sna->render_state.gen7.drawrect_offset = offset;
934	sna->render_state.gen7.drawrect_limit = limit;
935
936	OUT_BATCH(GEN7_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
937	OUT_BATCH(0);
938	OUT_BATCH(limit);
939	OUT_BATCH(offset);
940	return false;
941}
942
943static void
944gen7_emit_vertex_elements(struct sna *sna,
945			  const struct sna_composite_op *op)
946{
947	/*
948	 * vertex data in vertex buffer
949	 *    position: (x, y)
950	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
951	 *    texture coordinate 1 if (has_mask is true): same as above
952	 */
953	struct gen7_render_state *render = &sna->render_state.gen7;
954	uint32_t src_format, dw;
955	int id = GEN7_VERTEX(op->u.gen7.flags);
956	bool has_mask;
957
958	DBG(("%s: setup id=%d\n", __FUNCTION__, id));
959
960	if (render->ve_id == id)
961		return;
962	render->ve_id = id;
963
964	/* The VUE layout
965	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
966	 *    dword 4-7: position (x, y, 1.0, 1.0),
967	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
968	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
969	 *
970	 * dword 4-15 are fetched from vertex buffer
971	 */
972	has_mask = (id >> 2) != 0;
973	OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS |
974		((2 * (3 + has_mask)) + 1 - 2));
975
976	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
977		  GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT |
978		  0 << GEN7_VE0_OFFSET_SHIFT);
979	OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT |
980		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT |
981		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
982		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT);
983
984	/* x,y */
985	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
986		  GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT |
987		  0 << GEN7_VE0_OFFSET_SHIFT);
988	OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
989		  GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
990		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
991		  GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
992
993	/* u0, v0, w0 */
994	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
995	dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
996	switch (id & 3) {
997	default:
998		assert(0);
999	case 0:
1000		src_format = GEN7_SURFACEFORMAT_R16G16_SSCALED;
1001		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
1002		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
1003		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
1004		break;
1005	case 1:
1006		src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
1007		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
1008		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
1009		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
1010		break;
1011	case 2:
1012		src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
1013		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
1014		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
1015		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
1016		break;
1017	case 3:
1018		src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
1019		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
1020		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
1021		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
1022		break;
1023	}
1024	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
1025		  src_format << GEN7_VE0_FORMAT_SHIFT |
1026		  4 << GEN7_VE0_OFFSET_SHIFT);
1027	OUT_BATCH(dw);
1028
1029	/* u1, v1, w1 */
1030	if (has_mask) {
1031		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
1032		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
1033		dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
1034		switch (id >> 2) {
1035		case 1:
1036			src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
1037			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
1038			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
1039			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
1040			break;
1041		default:
1042			assert(0);
1043		case 2:
1044			src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
1045			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
1046			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
1047			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
1048			break;
1049		case 3:
1050			src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
1051			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
1052			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
1053			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
1054			break;
1055		}
1056		OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
1057			  src_format << GEN7_VE0_FORMAT_SHIFT |
1058			  offset << GEN7_VE0_OFFSET_SHIFT);
1059		OUT_BATCH(dw);
1060	}
1061}
1062
1063inline static void
1064gen7_emit_pipe_invalidate(struct sna *sna)
1065{
1066	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
1067	OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH |
1068		  GEN7_PIPE_CONTROL_TC_FLUSH |
1069		  GEN7_PIPE_CONTROL_CS_STALL);
1070	OUT_BATCH(0);
1071	OUT_BATCH(0);
1072	sna->render_state.gen7.pipe_controls_since_stall = 0;
1073}
1074
1075inline static void
1076gen7_emit_pipe_flush(struct sna *sna, bool need_stall)
1077{
1078	unsigned stall;
1079
1080	stall = 0;
1081	if (need_stall) {
1082		stall = GEN7_PIPE_CONTROL_CS_STALL;
1083		sna->render_state.gen7.pipe_controls_since_stall = 0;
1084	} else
1085		sna->render_state.gen7.pipe_controls_since_stall++;
1086
1087	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
1088	OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | stall);
1089	OUT_BATCH(0);
1090	OUT_BATCH(0);
1091}
1092
1093inline static void
1094gen7_emit_pipe_stall(struct sna *sna)
1095{
1096	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
1097	OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL |
1098		  GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
1099	OUT_BATCH(0);
1100	OUT_BATCH(0);
1101	sna->render_state.gen7.pipe_controls_since_stall = 0;
1102}
1103
1104static void
1105gen7_emit_state(struct sna *sna,
1106		const struct sna_composite_op *op,
1107		uint16_t wm_binding_table)
1108{
1109	bool need_invalidate;
1110	bool need_flush;
1111	bool need_stall;
1112
1113	assert(op->dst.bo->exec);
1114
1115	need_flush = wm_binding_table & 1 ||
1116		(sna->render_state.gen7.emit_flush && GEN7_READS_DST(op->u.gen7.flags));
1117	if (ALWAYS_FLUSH)
1118		need_flush = true;
1119
1120	wm_binding_table &= ~1;
1121
1122	need_stall = sna->render_state.gen7.surface_table != wm_binding_table;
1123
1124	need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo);
1125	if (ALWAYS_INVALIDATE)
1126		need_invalidate = true;
1127
1128	need_stall &= gen7_emit_drawing_rectangle(sna, op);
1129	if (ALWAYS_STALL)
1130		need_stall = true;
1131	if (sna->kgem.gen < 075 &&
1132	    sna->render_state.gen7.pipe_controls_since_stall >= 3)
1133		need_stall = true;
1134
1135	if (need_invalidate) {
1136		gen7_emit_pipe_invalidate(sna);
1137		kgem_clear_dirty(&sna->kgem);
1138		assert(op->dst.bo->exec);
1139		kgem_bo_mark_dirty(op->dst.bo);
1140
1141		need_flush = false;
1142		need_stall = false;
1143	}
1144	if (need_flush) {
1145		gen7_emit_pipe_flush(sna, need_stall);
1146		need_stall = false;
1147	}
1148	if (need_stall)
1149		gen7_emit_pipe_stall(sna);
1150
1151	gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
1152	gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags));
1153	gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2);
1154	gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
1155	gen7_emit_vertex_elements(sna, op);
1156	gen7_emit_binding_table(sna, wm_binding_table);
1157
1158	sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags);
1159}
1160
1161static bool gen7_magic_ca_pass(struct sna *sna,
1162			       const struct sna_composite_op *op)
1163{
1164	struct gen7_render_state *state = &sna->render_state.gen7;
1165
1166	if (!op->need_magic_ca_pass)
1167		return false;
1168
1169	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
1170	     sna->render.vertex_start, sna->render.vertex_index));
1171
1172	gen7_emit_pipe_stall(sna);
1173
1174	gen7_emit_cc(sna,
1175		     GEN7_BLEND(gen7_get_blend(PictOpAdd, true,
1176					       op->dst.format)));
1177	gen7_emit_wm(sna,
1178		     gen7_choose_composite_kernel(PictOpAdd,
1179						  true, true,
1180						  op->is_affine));
1181
1182	OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
1183	OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
1184	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
1185	OUT_BATCH(sna->render.vertex_start);
1186	OUT_BATCH(1);	/* single instance */
1187	OUT_BATCH(0);	/* start instance location */
1188	OUT_BATCH(0);	/* index buffer offset, ignored */
1189
1190	state->last_primitive = sna->kgem.nbatch;
1191	return true;
1192}
1193
1194static void null_create(struct sna_static_stream *stream)
1195{
1196	/* A bunch of zeros useful for legacy border color and depth-stencil */
1197	sna_static_stream_map(stream, 64, 64);
1198}
1199
1200static void
1201sampler_state_init(struct gen7_sampler_state *sampler_state,
1202		   sampler_filter_t filter,
1203		   sampler_extend_t extend)
1204{
1205	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
1206
1207	/* We use the legacy mode to get the semantics specified by
1208	 * the Render extension. */
1209	sampler_state->ss0.default_color_mode = GEN7_BORDER_COLOR_MODE_LEGACY;
1210
1211	switch (filter) {
1212	default:
1213	case SAMPLER_FILTER_NEAREST:
1214		sampler_state->ss0.min_filter = GEN7_MAPFILTER_NEAREST;
1215		sampler_state->ss0.mag_filter = GEN7_MAPFILTER_NEAREST;
1216		break;
1217	case SAMPLER_FILTER_BILINEAR:
1218		sampler_state->ss0.min_filter = GEN7_MAPFILTER_LINEAR;
1219		sampler_state->ss0.mag_filter = GEN7_MAPFILTER_LINEAR;
1220		break;
1221	}
1222
1223	switch (extend) {
1224	default:
1225	case SAMPLER_EXTEND_NONE:
1226		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1227		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1228		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1229		break;
1230	case SAMPLER_EXTEND_REPEAT:
1231		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1232		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1233		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1234		break;
1235	case SAMPLER_EXTEND_PAD:
1236		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1237		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1238		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1239		break;
1240	case SAMPLER_EXTEND_REFLECT:
1241		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1242		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1243		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1244		break;
1245	}
1246}
1247
1248static void
1249sampler_copy_init(struct gen7_sampler_state *ss)
1250{
1251	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1252	ss->ss3.non_normalized_coord = 1;
1253
1254	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1255}
1256
1257static void
1258sampler_fill_init(struct gen7_sampler_state *ss)
1259{
1260	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
1261	ss->ss3.non_normalized_coord = 1;
1262
1263	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1264}
1265
1266static uint32_t
1267gen7_tiling_bits(uint32_t tiling)
1268{
1269	switch (tiling) {
1270	default: assert(0);
1271	case I915_TILING_NONE: return 0;
1272	case I915_TILING_X: return GEN7_SURFACE_TILED;
1273	case I915_TILING_Y: return GEN7_SURFACE_TILED | GEN7_SURFACE_TILED_Y;
1274	}
1275}
1276
1277/**
1278 * Sets up the common fields for a surface state buffer for the given
1279 * picture in the given surface state buffer.
1280 */
1281static uint32_t
1282gen7_bind_bo(struct sna *sna,
1283	     struct kgem_bo *bo,
1284	     uint32_t width,
1285	     uint32_t height,
1286	     uint32_t format,
1287	     bool is_dst)
1288{
1289	uint32_t *ss;
1290	uint32_t domains;
1291	int offset;
1292	uint32_t is_scanout = is_dst && bo->scanout;
1293
1294	COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32);
1295
1296	/* After the first bind, we manage the cache domains within the batch */
1297	offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
1298	if (offset) {
1299		assert(offset >= sna->kgem.surface);
1300		if (is_dst)
1301			kgem_bo_mark_dirty(bo);
1302		return offset * sizeof(uint32_t);
1303	}
1304
1305	offset = sna->kgem.surface -=
1306		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1307	ss = sna->kgem.batch + offset;
1308	ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
1309		 gen7_tiling_bits(bo->tiling) |
1310		 format << GEN7_SURFACE_FORMAT_SHIFT);
1311	if (bo->tiling == I915_TILING_Y)
1312		ss[0] |= GEN7_SURFACE_VALIGN_4;
1313	if (is_dst) {
1314		ss[0] |= GEN7_SURFACE_RC_READ_WRITE;
1315		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
1316	} else
1317		domains = I915_GEM_DOMAIN_SAMPLER << 16;
1318	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
1319	ss[2] = ((width - 1)  << GEN7_SURFACE_WIDTH_SHIFT |
1320		 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
1321	ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
1322	ss[4] = 0;
1323	ss[5] = (is_scanout || bo->io) ? 0 : sna->render_state.gen7.info->mocs << 16;
1324	ss[6] = 0;
1325	ss[7] = 0;
1326	if (is_hsw(sna))
1327		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
1328
1329	kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
1330
1331	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
1332	     offset, bo->handle, ss[1],
1333	     format, width, height, bo->pitch, bo->tiling,
1334	     domains & 0xffff ? "render" : "sampler"));
1335
1336	return offset * sizeof(uint32_t);
1337}
1338
1339static void gen7_emit_vertex_buffer(struct sna *sna,
1340				    const struct sna_composite_op *op)
1341{
1342	int id = GEN7_VERTEX(op->u.gen7.flags);
1343
1344	OUT_BATCH(GEN7_3DSTATE_VERTEX_BUFFERS | (5 - 2));
1345	OUT_BATCH(id << GEN7_VB0_BUFFER_INDEX_SHIFT |
1346		  GEN7_VB0_VERTEXDATA |
1347		  GEN7_VB0_ADDRESS_MODIFY_ENABLE |
1348		  4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT);
1349	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
1350	OUT_BATCH(0);
1351	OUT_BATCH(~0); /* max address: disabled */
1352	OUT_BATCH(0);
1353
1354	sna->render.vb_id |= 1 << id;
1355}
1356
1357static void gen7_emit_primitive(struct sna *sna)
1358{
1359	if (sna->kgem.nbatch == sna->render_state.gen7.last_primitive) {
1360		sna->render.vertex_offset = sna->kgem.nbatch - 5;
1361		return;
1362	}
1363
1364	OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
1365	OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
1366	sna->render.vertex_offset = sna->kgem.nbatch;
1367	OUT_BATCH(0);	/* vertex count, to be filled in later */
1368	OUT_BATCH(sna->render.vertex_index);
1369	OUT_BATCH(1);	/* single instance */
1370	OUT_BATCH(0);	/* start instance location */
1371	OUT_BATCH(0);	/* index buffer offset, ignored */
1372	sna->render.vertex_start = sna->render.vertex_index;
1373
1374	sna->render_state.gen7.last_primitive = sna->kgem.nbatch;
1375}
1376
1377static bool gen7_rectangle_begin(struct sna *sna,
1378				 const struct sna_composite_op *op)
1379{
1380	int id = 1 << GEN7_VERTEX(op->u.gen7.flags);
1381	int ndwords;
1382
1383	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
1384		return true;
1385
1386	ndwords = op->need_magic_ca_pass ? 60 : 6;
1387	if ((sna->render.vb_id & id) == 0)
1388		ndwords += 5;
1389	if (!kgem_check_batch(&sna->kgem, ndwords))
1390		return false;
1391
1392	if ((sna->render.vb_id & id) == 0)
1393		gen7_emit_vertex_buffer(sna, op);
1394
1395	gen7_emit_primitive(sna);
1396	return true;
1397}
1398
1399static int gen7_get_rectangles__flush(struct sna *sna,
1400				      const struct sna_composite_op *op)
1401{
1402	/* Preventing discarding new vbo after lock contention */
1403	if (sna_vertex_wait__locked(&sna->render)) {
1404		int rem = vertex_space(sna);
1405		if (rem > op->floats_per_rect)
1406			return rem;
1407	}
1408
1409	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6))
1410		return 0;
1411	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
1412		return 0;
1413
1414	if (sna->render.vertex_offset) {
1415		gen4_vertex_flush(sna);
1416		if (gen7_magic_ca_pass(sna, op)) {
1417			gen7_emit_pipe_stall(sna);
1418			gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
1419			gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
1420		}
1421	}
1422
1423	return gen4_vertex_finish(sna);
1424}
1425
1426inline static int gen7_get_rectangles(struct sna *sna,
1427				      const struct sna_composite_op *op,
1428				      int want,
1429				      void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
1430{
1431	int rem;
1432
1433	assert(want);
1434
1435start:
1436	rem = vertex_space(sna);
1437	if (unlikely(rem < op->floats_per_rect)) {
1438		DBG(("flushing vbo for %s: %d < %d\n",
1439		     __FUNCTION__, rem, op->floats_per_rect));
1440		rem = gen7_get_rectangles__flush(sna, op);
1441		if (unlikely(rem == 0))
1442			goto flush;
1443	}
1444
1445	if (unlikely(sna->render.vertex_offset == 0)) {
1446		if (!gen7_rectangle_begin(sna, op))
1447			goto flush;
1448		else
1449			goto start;
1450	}
1451
1452	assert(rem <= vertex_space(sna));
1453	assert(op->floats_per_rect <= rem);
1454	if (want > 1 && want * op->floats_per_rect > rem)
1455		want = rem / op->floats_per_rect;
1456
1457	assert(want > 0);
1458	sna->render.vertex_index += 3*want;
1459	return want;
1460
1461flush:
1462	if (sna->render.vertex_offset) {
1463		gen4_vertex_flush(sna);
1464		gen7_magic_ca_pass(sna, op);
1465	}
1466	sna_vertex_wait__locked(&sna->render);
1467	_kgem_submit(&sna->kgem);
1468	emit_state(sna, op);
1469	goto start;
1470}
1471
1472inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna,
1473							 uint16_t *offset)
1474{
1475	uint32_t *table;
1476
1477	sna->kgem.surface -=
1478		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1479	/* Clear all surplus entries to zero in case of prefetch */
1480	table = memset(sna->kgem.batch + sna->kgem.surface,
1481		       0, sizeof(struct gen7_surface_state));
1482
1483	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
1484
1485	*offset = sna->kgem.surface;
1486	return table;
1487}
1488
1489static void
1490gen7_get_batch(struct sna *sna, const struct sna_composite_op *op)
1491{
1492	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1493
1494	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
1495		DBG(("%s: flushing batch: %d < %d+%d\n",
1496		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
1497		     150, 4*8));
1498		_kgem_submit(&sna->kgem);
1499		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1500	}
1501
1502	assert(sna->kgem.mode == KGEM_RENDER);
1503	assert(sna->kgem.ring == KGEM_RENDER);
1504
1505	if (sna->render_state.gen7.needs_invariant)
1506		gen7_emit_invariant(sna);
1507}
1508
1509static void gen7_emit_composite_state(struct sna *sna,
1510				      const struct sna_composite_op *op)
1511{
1512	uint32_t *binding_table;
1513	uint16_t offset, dirty;
1514
1515	gen7_get_batch(sna, op);
1516
1517	binding_table = gen7_composite_get_binding_table(sna, &offset);
1518
1519	dirty = kgem_bo_is_dirty(op->dst.bo);
1520
1521	binding_table[0] =
1522		gen7_bind_bo(sna,
1523			    op->dst.bo, op->dst.width, op->dst.height,
1524			    gen7_get_dest_format(op->dst.format),
1525			    true);
1526	binding_table[1] =
1527		gen7_bind_bo(sna,
1528			     op->src.bo, op->src.width, op->src.height,
1529			     op->src.card_format,
1530			     false);
1531	if (op->mask.bo) {
1532		binding_table[2] =
1533			gen7_bind_bo(sna,
1534				     op->mask.bo,
1535				     op->mask.width,
1536				     op->mask.height,
1537				     op->mask.card_format,
1538				     false);
1539	}
1540
1541	if (sna->kgem.surface == offset &&
1542	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table &&
1543	    (op->mask.bo == NULL ||
1544	     sna->kgem.batch[sna->render_state.gen7.surface_table+2] == binding_table[2])) {
1545		sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1546		offset = sna->render_state.gen7.surface_table;
1547	}
1548
1549	if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0])
1550		dirty = 0;
1551
1552	gen7_emit_state(sna, op, offset | dirty);
1553}
1554
1555static void
1556gen7_align_vertex(struct sna *sna, const struct sna_composite_op *op)
1557{
1558	if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) {
1559		DBG(("aligning vertex: was %d, now %d floats per vertex\n",
1560		     sna->render_state.gen7.floats_per_vertex, op->floats_per_vertex));
1561		gen4_vertex_align(sna, op);
1562		sna->render_state.gen7.floats_per_vertex = op->floats_per_vertex;
1563	}
1564}
1565
1566fastcall static void
1567gen7_render_composite_blt(struct sna *sna,
1568			  const struct sna_composite_op *op,
1569			  const struct sna_composite_rectangles *r)
1570{
1571	gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state);
1572	op->prim_emit(sna, op, r);
1573}
1574
1575fastcall static void
1576gen7_render_composite_box(struct sna *sna,
1577			  const struct sna_composite_op *op,
1578			  const BoxRec *box)
1579{
1580	struct sna_composite_rectangles r;
1581
1582	gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state);
1583
1584	DBG(("  %s: (%d, %d), (%d, %d)\n",
1585	     __FUNCTION__,
1586	     box->x1, box->y1, box->x2, box->y2));
1587
1588	r.dst.x = box->x1;
1589	r.dst.y = box->y1;
1590	r.width  = box->x2 - box->x1;
1591	r.height = box->y2 - box->y1;
1592	r.src = r.mask = r.dst;
1593
1594	op->prim_emit(sna, op, &r);
1595}
1596
1597static void
1598gen7_render_composite_boxes__blt(struct sna *sna,
1599				 const struct sna_composite_op *op,
1600				 const BoxRec *box, int nbox)
1601{
1602	DBG(("composite_boxes(%d)\n", nbox));
1603
1604	do {
1605		int nbox_this_time;
1606
1607		nbox_this_time = gen7_get_rectangles(sna, op, nbox,
1608						     gen7_emit_composite_state);
1609		nbox -= nbox_this_time;
1610
1611		do {
1612			struct sna_composite_rectangles r;
1613
1614			DBG(("  %s: (%d, %d), (%d, %d)\n",
1615			     __FUNCTION__,
1616			     box->x1, box->y1, box->x2, box->y2));
1617
1618			r.dst.x = box->x1;
1619			r.dst.y = box->y1;
1620			r.width  = box->x2 - box->x1;
1621			r.height = box->y2 - box->y1;
1622			r.src = r.mask = r.dst;
1623
1624			op->prim_emit(sna, op, &r);
1625			box++;
1626		} while (--nbox_this_time);
1627	} while (nbox);
1628}
1629
1630static void
1631gen7_render_composite_boxes(struct sna *sna,
1632			    const struct sna_composite_op *op,
1633			    const BoxRec *box, int nbox)
1634{
1635	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1636
1637	do {
1638		int nbox_this_time;
1639		float *v;
1640
1641		nbox_this_time = gen7_get_rectangles(sna, op, nbox,
1642						     gen7_emit_composite_state);
1643		assert(nbox_this_time);
1644		nbox -= nbox_this_time;
1645
1646		v = sna->render.vertices + sna->render.vertex_used;
1647		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1648
1649		op->emit_boxes(op, box, nbox_this_time, v);
1650		box += nbox_this_time;
1651	} while (nbox);
1652}
1653
1654static void
1655gen7_render_composite_boxes__thread(struct sna *sna,
1656				    const struct sna_composite_op *op,
1657				    const BoxRec *box, int nbox)
1658{
1659	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1660
1661	sna_vertex_lock(&sna->render);
1662	do {
1663		int nbox_this_time;
1664		float *v;
1665
1666		nbox_this_time = gen7_get_rectangles(sna, op, nbox,
1667						     gen7_emit_composite_state);
1668		assert(nbox_this_time);
1669		nbox -= nbox_this_time;
1670
1671		v = sna->render.vertices + sna->render.vertex_used;
1672		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1673
1674		sna_vertex_acquire__locked(&sna->render);
1675		sna_vertex_unlock(&sna->render);
1676
1677		op->emit_boxes(op, box, nbox_this_time, v);
1678		box += nbox_this_time;
1679
1680		sna_vertex_lock(&sna->render);
1681		sna_vertex_release__locked(&sna->render);
1682	} while (nbox);
1683	sna_vertex_unlock(&sna->render);
1684}
1685
1686#ifndef MAX
1687#define MAX(a,b) ((a) > (b) ? (a) : (b))
1688#endif
1689
1690static uint32_t
1691gen7_composite_create_blend_state(struct sna_static_stream *stream)
1692{
1693	char *base, *ptr;
1694	int src, dst;
1695
1696	base = sna_static_stream_map(stream,
1697				     GEN7_BLENDFACTOR_COUNT * GEN7_BLENDFACTOR_COUNT * GEN7_BLEND_STATE_PADDED_SIZE,
1698				     64);
1699
1700	ptr = base;
1701	for (src = 0; src < GEN7_BLENDFACTOR_COUNT; src++) {
1702		for (dst= 0; dst < GEN7_BLENDFACTOR_COUNT; dst++) {
1703			struct gen7_blend_state *blend =
1704				(struct gen7_blend_state *)ptr;
1705
1706			blend->blend0.dest_blend_factor = dst;
1707			blend->blend0.source_blend_factor = src;
1708			blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD;
1709			blend->blend0.blend_enable =
1710				!(dst == GEN7_BLENDFACTOR_ZERO && src == GEN7_BLENDFACTOR_ONE);
1711
1712			blend->blend1.post_blend_clamp_enable = 1;
1713			blend->blend1.pre_blend_clamp_enable = 1;
1714
1715			ptr += GEN7_BLEND_STATE_PADDED_SIZE;
1716		}
1717	}
1718
1719	return sna_static_stream_offsetof(stream, base);
1720}
1721
1722static uint32_t gen7_bind_video_source(struct sna *sna,
1723				       struct kgem_bo *bo,
1724				       uint32_t offset,
1725				       int width,
1726				       int height,
1727				       int pitch,
1728				       uint32_t format)
1729{
1730	uint32_t *ss, bind;
1731
1732	bind = sna->kgem.surface -=
1733		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1734
1735	assert(bo->tiling == I915_TILING_NONE);
1736
1737	ss = sna->kgem.batch + bind;
1738	ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
1739		 format << GEN7_SURFACE_FORMAT_SHIFT);
1740	ss[1] = kgem_add_reloc(&sna->kgem, bind + 1, bo,
1741			       I915_GEM_DOMAIN_SAMPLER << 16,
1742			       offset);
1743	ss[2] = ((width - 1)  << GEN7_SURFACE_WIDTH_SHIFT |
1744		 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
1745	ss[3] = (pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
1746	ss[4] = 0;
1747	ss[5] = 0;
1748	ss[6] = 0;
1749	ss[7] = 0;
1750	if (is_hsw(sna))
1751		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
1752
1753	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, offset=%d\n",
1754	     bind, bo->handle, ss[1],
1755	     format, width, height, pitch, offset));
1756
1757	return bind * sizeof(uint32_t);
1758}
1759
1760static void gen7_emit_video_state(struct sna *sna,
1761				  const struct sna_composite_op *op)
1762{
1763	struct sna_video_frame *frame = op->priv;
1764	uint32_t src_surf_format;
1765	uint32_t src_surf_base[6];
1766	int src_width[6];
1767	int src_height[6];
1768	int src_pitch[6];
1769	uint32_t *binding_table;
1770	uint16_t offset, dirty;
1771	int n_src, n;
1772
1773	gen7_get_batch(sna, op);
1774
1775	src_surf_base[0] = 0;
1776	src_surf_base[1] = 0;
1777	src_surf_base[2] = frame->VBufOffset;
1778	src_surf_base[3] = frame->VBufOffset;
1779	src_surf_base[4] = frame->UBufOffset;
1780	src_surf_base[5] = frame->UBufOffset;
1781
1782	if (is_planar_fourcc(frame->id)) {
1783		src_surf_format = GEN7_SURFACEFORMAT_R8_UNORM;
1784		src_width[1]  = src_width[0]  = frame->width;
1785		src_height[1] = src_height[0] = frame->height;
1786		src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
1787		src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
1788			frame->width / 2;
1789		src_height[4] = src_height[5] = src_height[2] = src_height[3] =
1790			frame->height / 2;
1791		src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
1792			frame->pitch[0];
1793		n_src = 6;
1794	} else {
1795		if (frame->id == FOURCC_UYVY)
1796			src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY;
1797		else
1798			src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL;
1799
1800		src_width[0]  = frame->width;
1801		src_height[0] = frame->height;
1802		src_pitch[0]  = frame->pitch[0];
1803		n_src = 1;
1804	}
1805
1806	binding_table = gen7_composite_get_binding_table(sna, &offset);
1807
1808	dirty = kgem_bo_is_dirty(op->dst.bo);
1809
1810	binding_table[0] =
1811		gen7_bind_bo(sna,
1812			     op->dst.bo, op->dst.width, op->dst.height,
1813			     gen7_get_dest_format(op->dst.format),
1814			     true);
1815	for (n = 0; n < n_src; n++) {
1816		binding_table[1+n] =
1817			gen7_bind_video_source(sna,
1818					       frame->bo,
1819					       src_surf_base[n],
1820					       src_width[n],
1821					       src_height[n],
1822					       src_pitch[n],
1823					       src_surf_format);
1824	}
1825
1826	gen7_emit_state(sna, op, offset | dirty);
1827}
1828
1829static bool
1830gen7_render_video(struct sna *sna,
1831		  struct sna_video *video,
1832		  struct sna_video_frame *frame,
1833		  RegionPtr dstRegion,
1834		  PixmapPtr pixmap)
1835{
1836	struct sna_composite_op tmp;
1837	struct sna_pixmap *priv = sna_pixmap(pixmap);
1838	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1839	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1840	int src_width = frame->src.x2 - frame->src.x1;
1841	int src_height = frame->src.y2 - frame->src.y1;
1842	float src_offset_x, src_offset_y;
1843	float src_scale_x, src_scale_y;
1844	int nbox, pix_xoff, pix_yoff;
1845	unsigned filter;
1846	const BoxRec *box;
1847
1848	DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
1849	     __FUNCTION__,
1850	     src_width, src_height, dst_width, dst_height,
1851	     region_num_rects(dstRegion),
1852	     REGION_EXTENTS(NULL, dstRegion)->x1,
1853	     REGION_EXTENTS(NULL, dstRegion)->y1,
1854	     REGION_EXTENTS(NULL, dstRegion)->x2,
1855	     REGION_EXTENTS(NULL, dstRegion)->y2));
1856
1857	assert(priv->gpu_bo);
1858	memset(&tmp, 0, sizeof(tmp));
1859
1860	tmp.dst.pixmap = pixmap;
1861	tmp.dst.width  = pixmap->drawable.width;
1862	tmp.dst.height = pixmap->drawable.height;
1863	tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
1864	tmp.dst.bo = priv->gpu_bo;
1865
1866	tmp.src.bo = frame->bo;
1867	tmp.mask.bo = NULL;
1868
1869	tmp.floats_per_vertex = 3;
1870	tmp.floats_per_rect = 9;
1871
1872	if (src_width == dst_width && src_height == dst_height)
1873		filter = SAMPLER_FILTER_NEAREST;
1874	else
1875		filter = SAMPLER_FILTER_BILINEAR;
1876
1877	tmp.u.gen7.flags =
1878		GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
1879					      SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
1880			       NO_BLEND,
1881			       is_planar_fourcc(frame->id) ?
1882			       GEN7_WM_KERNEL_VIDEO_PLANAR :
1883			       GEN7_WM_KERNEL_VIDEO_PACKED,
1884			       2);
1885	tmp.priv = frame;
1886
1887	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
1888	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1889		kgem_submit(&sna->kgem);
1890		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
1891			return false;
1892
1893		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1894	}
1895
1896	gen7_align_vertex(sna, &tmp);
1897	gen7_emit_video_state(sna, &tmp);
1898
1899	/* Set up the offset for translating from the given region (in screen
1900	 * coordinates) to the backing pixmap.
1901	 */
1902#ifdef COMPOSITE
1903	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
1904	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
1905#else
1906	pix_xoff = 0;
1907	pix_yoff = 0;
1908#endif
1909
1910	DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n",
1911	     __FUNCTION__,
1912	     frame->src.x1, frame->src.y1,
1913	     src_width, src_height,
1914	     dst_width, dst_height,
1915	     frame->width, frame->height));
1916
1917	src_scale_x = (float)src_width / dst_width / frame->width;
1918	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1919
1920	src_scale_y = (float)src_height / dst_height / frame->height;
1921	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1922
1923	DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n",
1924	     __FUNCTION__,
1925	     src_scale_x, src_scale_y,
1926	     src_offset_x, src_offset_y));
1927
1928	box = region_rects(dstRegion);
1929	nbox = region_num_rects(dstRegion);
1930	while (nbox--) {
1931		BoxRec r;
1932
1933		DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n",
1934		     __FUNCTION__,
1935		     box->x1, box->y1,
1936		     box->x2, box->y2,
1937		     pix_xoff, pix_yoff,
1938		     box->x1 * src_scale_x + src_offset_x,
1939		     box->y1 * src_scale_y + src_offset_y,
1940		     box->x2 * src_scale_x + src_offset_x,
1941		     box->y2 * src_scale_y + src_offset_y));
1942
1943		r.x1 = box->x1 + pix_xoff;
1944		r.x2 = box->x2 + pix_xoff;
1945		r.y1 = box->y1 + pix_yoff;
1946		r.y2 = box->y2 + pix_yoff;
1947
1948		gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state);
1949
1950		OUT_VERTEX(r.x2, r.y2);
1951		OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1952		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1953
1954		OUT_VERTEX(r.x1, r.y2);
1955		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1956		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1957
1958		OUT_VERTEX(r.x1, r.y1);
1959		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1960		OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1961
1962		if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
1963			sna_damage_add_box(&priv->gpu_damage, &r);
1964			sna_damage_subtract_box(&priv->cpu_damage, &r);
1965		}
1966		box++;
1967	}
1968
1969	gen4_vertex_flush(sna);
1970	return true;
1971}
1972
1973static int
1974gen7_composite_picture(struct sna *sna,
1975		       PicturePtr picture,
1976		       struct sna_composite_channel *channel,
1977		       int x, int y,
1978		       int w, int h,
1979		       int dst_x, int dst_y,
1980		       bool precise)
1981{
1982	PixmapPtr pixmap;
1983	uint32_t color;
1984	int16_t dx, dy;
1985
1986	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1987	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
1988
1989	channel->is_solid = false;
1990	channel->card_format = -1;
1991
1992	if (sna_picture_is_solid(picture, &color))
1993		return gen4_channel_init_solid(sna, channel, color);
1994
1995	if (picture->pDrawable == NULL) {
1996		int ret;
1997
1998		if (picture->pSourcePict->type == SourcePictTypeLinear)
1999			return gen4_channel_init_linear(sna, picture, channel,
2000							x, y,
2001							w, h,
2002							dst_x, dst_y);
2003
2004		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
2005		ret = -1;
2006		if (!precise)
2007			ret = sna_render_picture_approximate_gradient(sna, picture, channel,
2008								      x, y, w, h, dst_x, dst_y);
2009		if (ret == -1)
2010			ret = sna_render_picture_fixup(sna, picture, channel,
2011						       x, y, w, h, dst_x, dst_y);
2012		return ret;
2013	}
2014
2015	if (picture->alphaMap) {
2016		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
2017		return sna_render_picture_fixup(sna, picture, channel,
2018						x, y, w, h, dst_x, dst_y);
2019	}
2020
2021	if (!gen7_check_repeat(picture))
2022		return sna_render_picture_fixup(sna, picture, channel,
2023						x, y, w, h, dst_x, dst_y);
2024
2025	if (!gen7_check_filter(picture))
2026		return sna_render_picture_fixup(sna, picture, channel,
2027						x, y, w, h, dst_x, dst_y);
2028
2029	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
2030	channel->filter = picture->filter;
2031
2032	assert(picture->pDrawable);
2033	pixmap = get_drawable_pixmap(picture->pDrawable);
2034	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
2035
2036	x += dx + picture->pDrawable->x;
2037	y += dy + picture->pDrawable->y;
2038
2039	channel->is_affine = sna_transform_is_affine(picture->transform);
2040	if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
2041		DBG(("%s: integer translation (%d, %d), removing\n",
2042		     __FUNCTION__, dx, dy));
2043		x += dx;
2044		y += dy;
2045		channel->transform = NULL;
2046		channel->filter = PictFilterNearest;
2047
2048		if (channel->repeat ||
2049		    (x >= 0 &&
2050		     y >= 0 &&
2051		     x + w < pixmap->drawable.width &&
2052		     y + h < pixmap->drawable.height)) {
2053			struct sna_pixmap *priv = sna_pixmap(pixmap);
2054			if (priv && priv->clear) {
2055				DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
2056				return gen4_channel_init_solid(sna, channel, priv->clear_color);
2057			}
2058		}
2059	} else
2060		channel->transform = picture->transform;
2061
2062	channel->pict_format = picture->format;
2063	channel->card_format = gen7_get_card_format(picture->format);
2064	if (channel->card_format == (unsigned)-1)
2065		return sna_render_picture_convert(sna, picture, channel, pixmap,
2066						  x, y, w, h, dst_x, dst_y,
2067						  false);
2068
2069	if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
2070		DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__,
2071		     pixmap->drawable.width, pixmap->drawable.height));
2072		return sna_render_picture_extract(sna, picture, channel,
2073						  x, y, w, h, dst_x, dst_y);
2074	}
2075
2076	DBG(("%s: pixmap, repeat=%d, filter=%d, transform?=%d [affine? %d], format=%08x\n",
2077	     __FUNCTION__,
2078	     channel->repeat, channel->filter,
2079	     channel->transform != NULL, channel->is_affine,
2080	     channel->pict_format));
2081	if (channel->transform) {
2082		DBG(("%s: transform=[%f %f %f, %f %f %f, %f %f %f]\n",
2083		     __FUNCTION__,
2084		     channel->transform->matrix[0][0] / 65536.,
2085		     channel->transform->matrix[0][1] / 65536.,
2086		     channel->transform->matrix[0][2] / 65536.,
2087		     channel->transform->matrix[1][0] / 65536.,
2088		     channel->transform->matrix[1][1] / 65536.,
2089		     channel->transform->matrix[1][2] / 65536.,
2090		     channel->transform->matrix[2][0] / 65536.,
2091		     channel->transform->matrix[2][1] / 65536.,
2092		     channel->transform->matrix[2][2] / 65536.));
2093	}
2094
2095	return sna_render_pixmap_bo(sna, channel, pixmap,
2096				    x, y, w, h, dst_x, dst_y);
2097}
2098
2099inline static void gen7_composite_channel_convert(struct sna_composite_channel *channel)
2100{
2101	channel->repeat = gen7_repeat(channel->repeat);
2102	channel->filter = gen7_filter(channel->filter);
2103	if (channel->card_format == (unsigned)-1)
2104		channel->card_format = gen7_get_card_format(channel->pict_format);
2105	assert(channel->card_format != (unsigned)-1);
2106}
2107
2108static void gen7_render_composite_done(struct sna *sna,
2109				       const struct sna_composite_op *op)
2110{
2111	if (sna->render.vertex_offset) {
2112		gen4_vertex_flush(sna);
2113		gen7_magic_ca_pass(sna, op);
2114	}
2115
2116	if (op->mask.bo)
2117		kgem_bo_destroy(&sna->kgem, op->mask.bo);
2118	if (op->src.bo)
2119		kgem_bo_destroy(&sna->kgem, op->src.bo);
2120
2121	sna_render_composite_redirect_done(sna, op);
2122}
2123
2124inline static bool
2125gen7_composite_set_target(struct sna *sna,
2126			  struct sna_composite_op *op,
2127			  PicturePtr dst,
2128			  int x, int y, int w, int h,
2129			  bool partial)
2130{
2131	BoxRec box;
2132	unsigned int hint;
2133
2134	DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial));
2135
2136	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
2137	op->dst.format = dst->format;
2138	op->dst.width  = op->dst.pixmap->drawable.width;
2139	op->dst.height = op->dst.pixmap->drawable.height;
2140
2141	if (w | h) {
2142		assert(w && h);
2143		box.x1 = x;
2144		box.y1 = y;
2145		box.x2 = x + w;
2146		box.y2 = y + h;
2147	} else
2148		sna_render_picture_extents(dst, &box);
2149
2150	hint = PREFER_GPU | FORCE_GPU | RENDER_GPU;
2151	if (!partial) {
2152		hint |= IGNORE_DAMAGE;
2153		if (w == op->dst.width && h == op->dst.height)
2154			hint |= REPLACES;
2155	}
2156
2157	op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
2158	if (op->dst.bo == NULL)
2159		return false;
2160
2161	if (hint & REPLACES) {
2162		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
2163		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
2164	}
2165
2166	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
2167			    &op->dst.x, &op->dst.y);
2168
2169	DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
2170	     __FUNCTION__,
2171	     op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
2172	     op->dst.width, op->dst.height,
2173	     op->dst.bo->pitch,
2174	     op->dst.x, op->dst.y,
2175	     op->damage ? *op->damage : (void *)-1));
2176
2177	assert(op->dst.bo->proxy == NULL);
2178
2179	if (too_large(op->dst.width, op->dst.height) &&
2180	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
2181		return false;
2182
2183	return true;
2184}
2185
2186static bool
2187try_blt(struct sna *sna,
2188	PicturePtr dst, PicturePtr src,
2189	int width, int height)
2190{
2191	struct kgem_bo *bo;
2192
2193	if (sna->kgem.mode == KGEM_BLT) {
2194		DBG(("%s: already performing BLT\n", __FUNCTION__));
2195		return true;
2196	}
2197
2198	if (too_large(width, height)) {
2199		DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
2200		     __FUNCTION__, width, height));
2201		return true;
2202	}
2203
2204	bo = __sna_drawable_peek_bo(dst->pDrawable);
2205	if (bo == NULL)
2206		return true;
2207	if (bo->rq)
2208		return RQ_IS_BLT(bo->rq);
2209
2210	if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0))
2211		return true;
2212
2213	if (src->pDrawable) {
2214		bo = __sna_drawable_peek_bo(src->pDrawable);
2215		if (bo == NULL)
2216			return true;
2217
2218		if (prefer_blt_bo(sna, bo))
2219			return true;
2220	}
2221
2222	if (sna->kgem.ring == KGEM_BLT) {
2223		DBG(("%s: already performing BLT\n", __FUNCTION__));
2224		return true;
2225	}
2226
2227	return false;
2228}
2229
2230static bool
2231check_gradient(PicturePtr picture, bool precise)
2232{
2233	if (picture->pDrawable)
2234		return false;
2235
2236	switch (picture->pSourcePict->type) {
2237	case SourcePictTypeSolidFill:
2238	case SourcePictTypeLinear:
2239		return false;
2240	default:
2241		return precise;
2242	}
2243}
2244
2245static bool
2246has_alphamap(PicturePtr p)
2247{
2248	return p->alphaMap != NULL;
2249}
2250
2251static bool
2252need_upload(PicturePtr p)
2253{
2254	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
2255}
2256
2257static bool
2258source_is_busy(PixmapPtr pixmap)
2259{
2260	struct sna_pixmap *priv = sna_pixmap(pixmap);
2261	if (priv == NULL || priv->clear)
2262		return false;
2263
2264	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
2265		return true;
2266
2267	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
2268		return true;
2269
2270	return priv->gpu_damage && !priv->cpu_damage;
2271}
2272
2273static bool
2274source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise)
2275{
2276	if (sna_picture_is_solid(p, NULL))
2277		return false;
2278
2279	if (p->pSourcePict)
2280		return check_gradient(p, precise);
2281
2282	if (!gen7_check_repeat(p) || !gen7_check_format(p->format))
2283		return true;
2284
2285	if (pixmap && source_is_busy(pixmap))
2286		return false;
2287
2288	return has_alphamap(p) || !gen7_check_filter(p) || need_upload(p);
2289}
2290
2291static bool
2292gen7_composite_fallback(struct sna *sna,
2293			PicturePtr src,
2294			PicturePtr mask,
2295			PicturePtr dst)
2296{
2297	PixmapPtr src_pixmap;
2298	PixmapPtr mask_pixmap;
2299	PixmapPtr dst_pixmap;
2300	bool src_fallback, mask_fallback;
2301
2302	if (!gen7_check_dst_format(dst->format)) {
2303		DBG(("%s: unknown destination format: %d\n",
2304		     __FUNCTION__, dst->format));
2305		return true;
2306	}
2307
2308	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
2309
2310	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
2311	src_fallback = source_fallback(src, src_pixmap,
2312				       dst->polyMode == PolyModePrecise);
2313
2314	if (mask) {
2315		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
2316		mask_fallback = source_fallback(mask, mask_pixmap,
2317						dst->polyMode == PolyModePrecise);
2318	} else {
2319		mask_pixmap = NULL;
2320		mask_fallback = false;
2321	}
2322
2323	/* If we are using the destination as a source and need to
2324	 * readback in order to upload the source, do it all
2325	 * on the cpu.
2326	 */
2327	if (src_pixmap == dst_pixmap && src_fallback) {
2328		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
2329		return true;
2330	}
2331	if (mask_pixmap == dst_pixmap && mask_fallback) {
2332		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
2333		return true;
2334	}
2335
2336	/* If anything is on the GPU, push everything out to the GPU */
2337	if (dst_use_gpu(dst_pixmap)) {
2338		DBG(("%s: dst is already on the GPU, try to use GPU\n",
2339		     __FUNCTION__));
2340		return false;
2341	}
2342
2343	if (src_pixmap && !src_fallback) {
2344		DBG(("%s: src is already on the GPU, try to use GPU\n",
2345		     __FUNCTION__));
2346		return false;
2347	}
2348	if (mask_pixmap && !mask_fallback) {
2349		DBG(("%s: mask is already on the GPU, try to use GPU\n",
2350		     __FUNCTION__));
2351		return false;
2352	}
2353
2354	/* However if the dst is not on the GPU and we need to
2355	 * render one of the sources using the CPU, we may
2356	 * as well do the entire operation in place onthe CPU.
2357	 */
2358	if (src_fallback) {
2359		DBG(("%s: dst is on the CPU and src will fallback\n",
2360		     __FUNCTION__));
2361		return true;
2362	}
2363
2364	if (mask && mask_fallback) {
2365		DBG(("%s: dst is on the CPU and mask will fallback\n",
2366		     __FUNCTION__));
2367		return true;
2368	}
2369
2370	if (too_large(dst_pixmap->drawable.width,
2371		      dst_pixmap->drawable.height) &&
2372	    dst_is_cpu(dst_pixmap)) {
2373		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
2374		return true;
2375	}
2376
2377	DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
2378	     __FUNCTION__));
2379	return dst_use_cpu(dst_pixmap);
2380}
2381
2382static int
2383reuse_source(struct sna *sna,
2384	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
2385	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
2386{
2387	uint32_t color;
2388
2389	if (src_x != msk_x || src_y != msk_y)
2390		return false;
2391
2392	if (src == mask) {
2393		DBG(("%s: mask is source\n", __FUNCTION__));
2394		*mc = *sc;
2395		mc->bo = kgem_bo_reference(mc->bo);
2396		return true;
2397	}
2398
2399	if (sna_picture_is_solid(mask, &color))
2400		return gen4_channel_init_solid(sna, mc, color);
2401
2402	if (sc->is_solid)
2403		return false;
2404
2405	if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
2406		return false;
2407
2408	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
2409
2410	if (!sna_transform_equal(src->transform, mask->transform))
2411		return false;
2412
2413	if (!sna_picture_alphamap_equal(src, mask))
2414		return false;
2415
2416	if (!gen7_check_repeat(mask))
2417		return false;
2418
2419	if (!gen7_check_filter(mask))
2420		return false;
2421
2422	if (!gen7_check_format(mask->format))
2423		return false;
2424
2425	DBG(("%s: reusing source channel for mask with a twist\n",
2426	     __FUNCTION__));
2427
2428	*mc = *sc;
2429	mc->repeat = gen7_repeat(mask->repeat ? mask->repeatType : RepeatNone);
2430	mc->filter = gen7_filter(mask->filter);
2431	mc->pict_format = mask->format;
2432	mc->card_format = gen7_get_card_format(mask->format);
2433	mc->bo = kgem_bo_reference(mc->bo);
2434	return true;
2435}
2436
2437static bool
2438gen7_render_composite(struct sna *sna,
2439		      uint8_t op,
2440		      PicturePtr src,
2441		      PicturePtr mask,
2442		      PicturePtr dst,
2443		      int16_t src_x, int16_t src_y,
2444		      int16_t msk_x, int16_t msk_y,
2445		      int16_t dst_x, int16_t dst_y,
2446		      int16_t width, int16_t height,
2447		      unsigned flags,
2448		      struct sna_composite_op *tmp)
2449{
2450	if (op >= ARRAY_SIZE(gen7_blend_op))
2451		return false;
2452
2453	DBG(("%s: %dx%d, current mode=%d/%d\n", __FUNCTION__,
2454	     width, height, sna->kgem.mode, sna->kgem.ring));
2455
2456	if (mask == NULL &&
2457	    try_blt(sna, dst, src, width, height) &&
2458	    sna_blt_composite(sna, op,
2459			      src, dst,
2460			      src_x, src_y,
2461			      dst_x, dst_y,
2462			      width, height,
2463			      flags, tmp))
2464		return true;
2465
2466	if (gen7_composite_fallback(sna, src, mask, dst))
2467		goto fallback;
2468
2469	if (need_tiling(sna, width, height))
2470		return sna_tiling_composite(op, src, mask, dst,
2471					    src_x, src_y,
2472					    msk_x, msk_y,
2473					    dst_x, dst_y,
2474					    width, height,
2475					    tmp);
2476
2477	if (op == PictOpClear && src == sna->clear)
2478		op = PictOpSrc;
2479	tmp->op = op;
2480	if (!gen7_composite_set_target(sna, tmp, dst,
2481				       dst_x, dst_y, width, height,
2482				       flags & COMPOSITE_PARTIAL || op > PictOpSrc))
2483		goto fallback;
2484
2485	switch (gen7_composite_picture(sna, src, &tmp->src,
2486				       src_x, src_y,
2487				       width, height,
2488				       dst_x, dst_y,
2489				       dst->polyMode == PolyModePrecise)) {
2490	case -1:
2491		goto cleanup_dst;
2492	case 0:
2493		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
2494			goto cleanup_dst;
2495		/* fall through to fixup */
2496	case 1:
2497		/* Did we just switch rings to prepare the source? */
2498		if (mask == NULL &&
2499		    prefer_blt_composite(sna, tmp) &&
2500		    sna_blt_composite__convert(sna,
2501					       dst_x, dst_y, width, height,
2502					       tmp))
2503			return true;
2504
2505		gen7_composite_channel_convert(&tmp->src);
2506		break;
2507	}
2508
2509	tmp->is_affine = tmp->src.is_affine;
2510	tmp->has_component_alpha = false;
2511	tmp->need_magic_ca_pass = false;
2512
2513	tmp->mask.bo = NULL;
2514	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
2515	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
2516
2517	if (mask) {
2518		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
2519			tmp->has_component_alpha = true;
2520
2521			/* Check if it's component alpha that relies on a source alpha and on
2522			 * the source value.  We can only get one of those into the single
2523			 * source value that we get to blend with.
2524			 */
2525			if (gen7_blend_op[op].src_alpha &&
2526			    (gen7_blend_op[op].src_blend != GEN7_BLENDFACTOR_ZERO)) {
2527				if (op != PictOpOver)
2528					goto cleanup_src;
2529
2530				tmp->need_magic_ca_pass = true;
2531				tmp->op = PictOpOutReverse;
2532			}
2533		}
2534
2535		if (!reuse_source(sna,
2536				  src, &tmp->src, src_x, src_y,
2537				  mask, &tmp->mask, msk_x, msk_y)) {
2538			switch (gen7_composite_picture(sna, mask, &tmp->mask,
2539						       msk_x, msk_y,
2540						       width, height,
2541						       dst_x, dst_y,
2542						       dst->polyMode == PolyModePrecise)) {
2543			case -1:
2544				goto cleanup_src;
2545			case 0:
2546				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
2547					goto cleanup_src;
2548				/* fall through to fixup */
2549			case 1:
2550				gen7_composite_channel_convert(&tmp->mask);
2551				break;
2552			}
2553		}
2554
2555		tmp->is_affine &= tmp->mask.is_affine;
2556	}
2557
2558	tmp->u.gen7.flags =
2559		GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
2560					      tmp->src.repeat,
2561					      tmp->mask.filter,
2562					      tmp->mask.repeat),
2563			       gen7_get_blend(tmp->op,
2564					      tmp->has_component_alpha,
2565					      tmp->dst.format),
2566			       gen7_choose_composite_kernel(tmp->op,
2567							    tmp->mask.bo != NULL,
2568							    tmp->has_component_alpha,
2569							    tmp->is_affine),
2570			       gen4_choose_composite_emitter(sna, tmp));
2571
2572	tmp->blt   = gen7_render_composite_blt;
2573	tmp->box   = gen7_render_composite_box;
2574	tmp->boxes = gen7_render_composite_boxes__blt;
2575	if (tmp->emit_boxes){
2576		tmp->boxes = gen7_render_composite_boxes;
2577		tmp->thread_boxes = gen7_render_composite_boxes__thread;
2578	}
2579	tmp->done  = gen7_render_composite_done;
2580
2581	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
2582	if (!kgem_check_bo(&sna->kgem,
2583			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2584			   NULL)) {
2585		kgem_submit(&sna->kgem);
2586		if (!kgem_check_bo(&sna->kgem,
2587				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2588				   NULL))
2589			goto cleanup_mask;
2590		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2591	}
2592
2593	gen7_align_vertex(sna, tmp);
2594	gen7_emit_composite_state(sna, tmp);
2595	return true;
2596
2597cleanup_mask:
2598	if (tmp->mask.bo) {
2599		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2600		tmp->mask.bo = NULL;
2601	}
2602cleanup_src:
2603	if (tmp->src.bo) {
2604		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2605		tmp->src.bo = NULL;
2606	}
2607cleanup_dst:
2608	if (tmp->redirect.real_bo) {
2609		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2610		tmp->redirect.real_bo = NULL;
2611	}
2612fallback:
2613	return (mask == NULL &&
2614		sna_blt_composite(sna, op,
2615				  src, dst,
2616				  src_x, src_y,
2617				  dst_x, dst_y,
2618				  width, height,
2619				  flags | COMPOSITE_FALLBACK, tmp));
2620}
2621
2622#if !NO_COMPOSITE_SPANS
2623fastcall static void
2624gen7_render_composite_spans_box(struct sna *sna,
2625				const struct sna_composite_spans_op *op,
2626				const BoxRec *box, float opacity)
2627{
2628	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2629	     __FUNCTION__,
2630	     op->base.src.offset[0], op->base.src.offset[1],
2631	     opacity,
2632	     op->base.dst.x, op->base.dst.y,
2633	     box->x1, box->y1,
2634	     box->x2 - box->x1,
2635	     box->y2 - box->y1));
2636
2637	gen7_get_rectangles(sna, &op->base, 1, gen7_emit_composite_state);
2638	op->prim_emit(sna, op, box, opacity);
2639}
2640
2641static void
2642gen7_render_composite_spans_boxes(struct sna *sna,
2643				  const struct sna_composite_spans_op *op,
2644				  const BoxRec *box, int nbox,
2645				  float opacity)
2646{
2647	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2648	     __FUNCTION__, nbox,
2649	     op->base.src.offset[0], op->base.src.offset[1],
2650	     opacity,
2651	     op->base.dst.x, op->base.dst.y));
2652
2653	do {
2654		int nbox_this_time;
2655
2656		nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox,
2657						     gen7_emit_composite_state);
2658		nbox -= nbox_this_time;
2659
2660		do {
2661			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2662			     box->x1, box->y1,
2663			     box->x2 - box->x1,
2664			     box->y2 - box->y1));
2665
2666			op->prim_emit(sna, op, box++, opacity);
2667		} while (--nbox_this_time);
2668	} while (nbox);
2669}
2670
2671fastcall static void
2672gen7_render_composite_spans_boxes__thread(struct sna *sna,
2673					  const struct sna_composite_spans_op *op,
2674					  const struct sna_opacity_box *box,
2675					  int nbox)
2676{
2677	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
2678	     __FUNCTION__, nbox,
2679	     op->base.src.offset[0], op->base.src.offset[1],
2680	     op->base.dst.x, op->base.dst.y));
2681
2682	sna_vertex_lock(&sna->render);
2683	do {
2684		int nbox_this_time;
2685		float *v;
2686
2687		nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox,
2688						     gen7_emit_composite_state);
2689		assert(nbox_this_time);
2690		nbox -= nbox_this_time;
2691
2692		v = sna->render.vertices + sna->render.vertex_used;
2693		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
2694
2695		sna_vertex_acquire__locked(&sna->render);
2696		sna_vertex_unlock(&sna->render);
2697
2698		op->emit_boxes(op, box, nbox_this_time, v);
2699		box += nbox_this_time;
2700
2701		sna_vertex_lock(&sna->render);
2702		sna_vertex_release__locked(&sna->render);
2703	} while (nbox);
2704	sna_vertex_unlock(&sna->render);
2705}
2706
2707fastcall static void
2708gen7_render_composite_spans_done(struct sna *sna,
2709				 const struct sna_composite_spans_op *op)
2710{
2711	if (sna->render.vertex_offset)
2712		gen4_vertex_flush(sna);
2713
2714	DBG(("%s()\n", __FUNCTION__));
2715
2716	if (op->base.src.bo)
2717		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2718
2719	sna_render_composite_redirect_done(sna, &op->base);
2720}
2721
2722static bool
2723gen7_check_composite_spans(struct sna *sna,
2724			   uint8_t op, PicturePtr src, PicturePtr dst,
2725			   int16_t width, int16_t height, unsigned flags)
2726{
2727	if (op >= ARRAY_SIZE(gen7_blend_op))
2728		return false;
2729
2730	if (gen7_composite_fallback(sna, src, NULL, dst))
2731		return false;
2732
2733	if (need_tiling(sna, width, height) &&
2734	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2735		DBG(("%s: fallback, tiled operation not on GPU\n",
2736		     __FUNCTION__));
2737		return false;
2738	}
2739
2740	return true;
2741}
2742
2743static bool
2744gen7_render_composite_spans(struct sna *sna,
2745			    uint8_t op,
2746			    PicturePtr src,
2747			    PicturePtr dst,
2748			    int16_t src_x,  int16_t src_y,
2749			    int16_t dst_x,  int16_t dst_y,
2750			    int16_t width,  int16_t height,
2751			    unsigned flags,
2752			    struct sna_composite_spans_op *tmp)
2753{
2754	DBG(("%s: %dx%d with flags=%x, current mode=%d/%d\n", __FUNCTION__,
2755	     width, height, flags, sna->kgem.mode, sna->kgem.ring));
2756
2757	assert(gen7_check_composite_spans(sna, op, src, dst, width, height, flags));
2758
2759	if (need_tiling(sna, width, height)) {
2760		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2761		     __FUNCTION__, width, height));
2762		return sna_tiling_composite_spans(op, src, dst,
2763						  src_x, src_y, dst_x, dst_y,
2764						  width, height, flags, tmp);
2765	}
2766
2767	tmp->base.op = op;
2768	if (!gen7_composite_set_target(sna, &tmp->base, dst,
2769				       dst_x, dst_y, width, height, true))
2770		return false;
2771
2772	switch (gen7_composite_picture(sna, src, &tmp->base.src,
2773				       src_x, src_y,
2774				       width, height,
2775				       dst_x, dst_y,
2776				       dst->polyMode == PolyModePrecise)) {
2777	case -1:
2778		goto cleanup_dst;
2779	case 0:
2780		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
2781			goto cleanup_dst;
2782		/* fall through to fixup */
2783	case 1:
2784		gen7_composite_channel_convert(&tmp->base.src);
2785		break;
2786	}
2787	tmp->base.mask.bo = NULL;
2788
2789	tmp->base.is_affine = tmp->base.src.is_affine;
2790	tmp->base.need_magic_ca_pass = false;
2791
2792	tmp->base.u.gen7.flags =
2793		GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter,
2794					      tmp->base.src.repeat,
2795					      SAMPLER_FILTER_NEAREST,
2796					      SAMPLER_EXTEND_PAD),
2797			       gen7_get_blend(tmp->base.op, false, tmp->base.dst.format),
2798			       GEN7_WM_KERNEL_OPACITY | !tmp->base.is_affine,
2799			       gen4_choose_spans_emitter(sna, tmp));
2800
2801	tmp->box   = gen7_render_composite_spans_box;
2802	tmp->boxes = gen7_render_composite_spans_boxes;
2803	if (tmp->emit_boxes)
2804		tmp->thread_boxes = gen7_render_composite_spans_boxes__thread;
2805	tmp->done  = gen7_render_composite_spans_done;
2806
2807	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
2808	if (!kgem_check_bo(&sna->kgem,
2809			   tmp->base.dst.bo, tmp->base.src.bo,
2810			   NULL)) {
2811		kgem_submit(&sna->kgem);
2812		if (!kgem_check_bo(&sna->kgem,
2813				   tmp->base.dst.bo, tmp->base.src.bo,
2814				   NULL))
2815			goto cleanup_src;
2816		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2817	}
2818
2819	gen7_align_vertex(sna, &tmp->base);
2820	gen7_emit_composite_state(sna, &tmp->base);
2821	return true;
2822
2823cleanup_src:
2824	if (tmp->base.src.bo)
2825		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2826cleanup_dst:
2827	if (tmp->base.redirect.real_bo)
2828		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2829	return false;
2830}
2831#endif
2832
2833static void
2834gen7_emit_copy_state(struct sna *sna,
2835		     const struct sna_composite_op *op)
2836{
2837	uint32_t *binding_table;
2838	uint16_t offset, dirty;
2839
2840	gen7_get_batch(sna, op);
2841
2842	binding_table = gen7_composite_get_binding_table(sna, &offset);
2843
2844	dirty = kgem_bo_is_dirty(op->dst.bo);
2845
2846	binding_table[0] =
2847		gen7_bind_bo(sna,
2848			     op->dst.bo, op->dst.width, op->dst.height,
2849			     gen7_get_dest_format(op->dst.format),
2850			     true);
2851	binding_table[1] =
2852		gen7_bind_bo(sna,
2853			     op->src.bo, op->src.width, op->src.height,
2854			     op->src.card_format,
2855			     false);
2856
2857	if (sna->kgem.surface == offset &&
2858	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table) {
2859		sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t);
2860		offset = sna->render_state.gen7.surface_table;
2861	}
2862
2863	if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0])
2864		dirty = 0;
2865
2866	assert(!GEN7_READS_DST(op->u.gen7.flags));
2867	gen7_emit_state(sna, op, offset | dirty);
2868}
2869
2870static inline bool
2871prefer_blt_copy(struct sna *sna,
2872		struct kgem_bo *src_bo,
2873		struct kgem_bo *dst_bo,
2874		unsigned flags)
2875{
2876	if (sna->kgem.mode == KGEM_BLT)
2877		return true;
2878
2879	assert((flags & COPY_SYNC) == 0);
2880
2881	if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags))
2882		return true;
2883
2884	if (untiled_tlb_miss(src_bo) ||
2885	    untiled_tlb_miss(dst_bo))
2886		return true;
2887
2888	if (force_blt_ring(sna))
2889		return true;
2890
2891	if (kgem_bo_is_render(dst_bo) ||
2892	    kgem_bo_is_render(src_bo))
2893		return false;
2894
2895	if (prefer_render_ring(sna, dst_bo))
2896		return false;
2897
2898	if (!prefer_blt_ring(sna, dst_bo, flags))
2899		return false;
2900
2901	return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo);
2902}
2903
2904static bool
2905gen7_render_copy_boxes(struct sna *sna, uint8_t alu,
2906		       const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2907		       const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2908		       const BoxRec *box, int n, unsigned flags)
2909{
2910	struct sna_composite_op tmp;
2911	BoxRec extents;
2912
2913	DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, flags=%x, self-copy=%d, overlaps? %d\n",
2914	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, flags,
2915	     src_bo == dst_bo,
2916	     overlaps(sna,
2917		      src_bo, src_dx, src_dy,
2918		      dst_bo, dst_dx, dst_dy,
2919		      box, n, flags, &extents)));
2920
2921	if (prefer_blt_copy(sna, src_bo, dst_bo, flags) &&
2922	    sna_blt_compare_depth(src, dst) &&
2923	    sna_blt_copy_boxes(sna, alu,
2924			       src_bo, src_dx, src_dy,
2925			       dst_bo, dst_dx, dst_dy,
2926			       dst->bitsPerPixel,
2927			       box, n))
2928		return true;
2929
2930	if (!(alu == GXcopy || alu == GXclear)) {
2931fallback_blt:
2932		DBG(("%s: fallback blt\n", __FUNCTION__));
2933		if (!sna_blt_compare_depth(src, dst))
2934			return false;
2935
2936		return sna_blt_copy_boxes_fallback(sna, alu,
2937						   src, src_bo, src_dx, src_dy,
2938						   dst, dst_bo, dst_dx, dst_dy,
2939						   box, n);
2940	}
2941
2942	if (overlaps(sna,
2943		     src_bo, src_dx, src_dy,
2944		     dst_bo, dst_dx, dst_dy,
2945		     box, n, flags,
2946		     &extents)) {
2947		bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1);
2948
2949		if ((big || can_switch_to_blt(sna, dst_bo, flags)) &&
2950		    sna_blt_copy_boxes(sna, alu,
2951				       src_bo, src_dx, src_dy,
2952				       dst_bo, dst_dx, dst_dy,
2953				       dst->bitsPerPixel,
2954				       box, n))
2955			return true;
2956
2957		if (big)
2958			goto fallback_blt;
2959
2960		assert(src_bo == dst_bo);
2961		assert(src->depth == dst->depth);
2962		assert(src->width == dst->width);
2963		assert(src->height == dst->height);
2964		return sna_render_copy_boxes__overlap(sna, alu,
2965						      src, src_bo,
2966						      src_dx, src_dy,
2967						      dst_dx, dst_dy,
2968						      box, n, &extents);
2969	}
2970
2971	if (dst->depth == src->depth) {
2972		tmp.dst.format = sna_render_format_for_depth(dst->depth);
2973		tmp.src.pict_format = tmp.dst.format;
2974	} else {
2975		tmp.dst.format = sna_format_for_depth(dst->depth);
2976		tmp.src.pict_format = sna_format_for_depth(src->depth);
2977	}
2978	if (!gen7_check_format(tmp.src.pict_format))
2979		goto fallback_blt;
2980
2981	tmp.dst.pixmap = (PixmapPtr)dst;
2982	tmp.dst.width  = dst->width;
2983	tmp.dst.height = dst->height;
2984	tmp.dst.bo = dst_bo;
2985	tmp.dst.x = tmp.dst.y = 0;
2986	tmp.damage = NULL;
2987
2988	sna_render_composite_redirect_init(&tmp);
2989	if (too_large(tmp.dst.width, tmp.dst.height)) {
2990		int i;
2991
2992		extents = box[0];
2993		for (i = 1; i < n; i++) {
2994			if (box[i].x1 < extents.x1)
2995				extents.x1 = box[i].x1;
2996			if (box[i].y1 < extents.y1)
2997				extents.y1 = box[i].y1;
2998
2999			if (box[i].x2 > extents.x2)
3000				extents.x2 = box[i].x2;
3001			if (box[i].y2 > extents.y2)
3002				extents.y2 = box[i].y2;
3003		}
3004
3005		if (!sna_render_composite_redirect(sna, &tmp,
3006						   extents.x1 + dst_dx,
3007						   extents.y1 + dst_dy,
3008						   extents.x2 - extents.x1,
3009						   extents.y2 - extents.y1,
3010						   n > 1))
3011			goto fallback_tiled;
3012	}
3013
3014	tmp.src.card_format = gen7_get_card_format(tmp.src.pict_format);
3015	if (too_large(src->width, src->height)) {
3016		int i;
3017
3018		extents = box[0];
3019		for (i = 1; i < n; i++) {
3020			if (box[i].x1 < extents.x1)
3021				extents.x1 = box[i].x1;
3022			if (box[i].y1 < extents.y1)
3023				extents.y1 = box[i].y1;
3024
3025			if (box[i].x2 > extents.x2)
3026				extents.x2 = box[i].x2;
3027			if (box[i].y2 > extents.y2)
3028				extents.y2 = box[i].y2;
3029		}
3030
3031		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
3032					       extents.x1 + src_dx,
3033					       extents.y1 + src_dy,
3034					       extents.x2 - extents.x1,
3035					       extents.y2 - extents.y1))
3036			goto fallback_tiled_dst;
3037	} else {
3038		tmp.src.bo = src_bo;
3039		tmp.src.width  = src->width;
3040		tmp.src.height = src->height;
3041		tmp.src.offset[0] = tmp.src.offset[1] = 0;
3042	}
3043
3044	tmp.mask.bo = NULL;
3045
3046	tmp.floats_per_vertex = 2;
3047	tmp.floats_per_rect = 6;
3048	tmp.need_magic_ca_pass = 0;
3049
3050	tmp.u.gen7.flags = COPY_FLAGS(alu);
3051
3052	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
3053	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
3054		kgem_submit(&sna->kgem);
3055		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
3056			if (tmp.src.bo != src_bo)
3057				kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3058			if (tmp.redirect.real_bo)
3059				kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
3060			goto fallback_blt;
3061		}
3062		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3063	}
3064
3065	src_dx += tmp.src.offset[0];
3066	src_dy += tmp.src.offset[1];
3067
3068	dst_dx += tmp.dst.x;
3069	dst_dy += tmp.dst.y;
3070
3071	tmp.dst.x = tmp.dst.y = 0;
3072
3073	gen7_align_vertex(sna, &tmp);
3074	gen7_emit_copy_state(sna, &tmp);
3075
3076	do {
3077		int16_t *v;
3078		int n_this_time;
3079
3080		n_this_time = gen7_get_rectangles(sna, &tmp, n,
3081						  gen7_emit_copy_state);
3082		n -= n_this_time;
3083
3084		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
3085		sna->render.vertex_used += 6 * n_this_time;
3086		assert(sna->render.vertex_used <= sna->render.vertex_size);
3087		do {
3088
3089			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
3090			     box->x1 + src_dx, box->y1 + src_dy,
3091			     box->x1 + dst_dx, box->y1 + dst_dy,
3092			     box->x2 - box->x1, box->y2 - box->y1));
3093			v[0] = box->x2 + dst_dx;
3094			v[2] = box->x2 + src_dx;
3095			v[1]  = v[5] = box->y2 + dst_dy;
3096			v[3]  = v[7] = box->y2 + src_dy;
3097			v[8]  = v[4] = box->x1 + dst_dx;
3098			v[10] = v[6] = box->x1 + src_dx;
3099			v[9]  = box->y1 + dst_dy;
3100			v[11] = box->y1 + src_dy;
3101			v += 12; box++;
3102		} while (--n_this_time);
3103	} while (n);
3104
3105	gen4_vertex_flush(sna);
3106	sna_render_composite_redirect_done(sna, &tmp);
3107	if (tmp.src.bo != src_bo)
3108		kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3109	return true;
3110
3111fallback_tiled_dst:
3112	if (tmp.redirect.real_bo)
3113		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
3114fallback_tiled:
3115	DBG(("%s: fallback tiled\n", __FUNCTION__));
3116	if (sna_blt_compare_depth(src, dst) &&
3117	    sna_blt_copy_boxes(sna, alu,
3118			       src_bo, src_dx, src_dy,
3119			       dst_bo, dst_dx, dst_dy,
3120			       dst->bitsPerPixel,
3121			       box, n))
3122		return true;
3123
3124	return sna_tiling_copy_boxes(sna, alu,
3125				     src, src_bo, src_dx, src_dy,
3126				     dst, dst_bo, dst_dx, dst_dy,
3127				     box, n);
3128}
3129
3130static void
3131gen7_render_copy_blt(struct sna *sna,
3132		     const struct sna_copy_op *op,
3133		     int16_t sx, int16_t sy,
3134		     int16_t w,  int16_t h,
3135		     int16_t dx, int16_t dy)
3136{
3137	int16_t *v;
3138
3139	gen7_get_rectangles(sna, &op->base, 1, gen7_emit_copy_state);
3140
3141	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3142	sna->render.vertex_used += 6;
3143	assert(sna->render.vertex_used <= sna->render.vertex_size);
3144
3145	v[0]  = dx+w; v[1]  = dy+h;
3146	v[2]  = sx+w; v[3]  = sy+h;
3147	v[4]  = dx;   v[5]  = dy+h;
3148	v[6]  = sx;   v[7]  = sy+h;
3149	v[8]  = dx;   v[9]  = dy;
3150	v[10] = sx;   v[11] = sy;
3151}
3152
3153static void
3154gen7_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
3155{
3156	if (sna->render.vertex_offset)
3157		gen4_vertex_flush(sna);
3158}
3159
3160static bool
3161gen7_render_copy(struct sna *sna, uint8_t alu,
3162		 PixmapPtr src, struct kgem_bo *src_bo,
3163		 PixmapPtr dst, struct kgem_bo *dst_bo,
3164		 struct sna_copy_op *op)
3165{
3166	DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n",
3167	     __FUNCTION__, alu,
3168	     src->drawable.width, src->drawable.height,
3169	     dst->drawable.width, dst->drawable.height));
3170
3171	if (prefer_blt_copy(sna, src_bo, dst_bo, 0) &&
3172	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
3173	    sna_blt_copy(sna, alu,
3174			 src_bo, dst_bo,
3175			 dst->drawable.bitsPerPixel,
3176			 op))
3177		return true;
3178
3179	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
3180	    too_large(src->drawable.width, src->drawable.height) ||
3181	    too_large(dst->drawable.width, dst->drawable.height)) {
3182fallback:
3183		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
3184			return false;
3185
3186		return sna_blt_copy(sna, alu, src_bo, dst_bo,
3187				    dst->drawable.bitsPerPixel,
3188				    op);
3189	}
3190
3191	if (dst->drawable.depth == src->drawable.depth) {
3192		op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
3193		op->base.src.pict_format = op->base.dst.format;
3194	} else {
3195		op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
3196		op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
3197	}
3198	if (!gen7_check_format(op->base.src.pict_format))
3199		goto fallback;
3200
3201	op->base.dst.pixmap = dst;
3202	op->base.dst.width  = dst->drawable.width;
3203	op->base.dst.height = dst->drawable.height;
3204	op->base.dst.bo = dst_bo;
3205
3206	op->base.src.bo = src_bo;
3207	op->base.src.card_format =
3208		gen7_get_card_format(op->base.src.pict_format);
3209	op->base.src.width  = src->drawable.width;
3210	op->base.src.height = src->drawable.height;
3211
3212	op->base.mask.bo = NULL;
3213
3214	op->base.floats_per_vertex = 2;
3215	op->base.floats_per_rect = 6;
3216
3217	op->base.u.gen7.flags = COPY_FLAGS(alu);
3218
3219	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3220	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
3221		kgem_submit(&sna->kgem);
3222		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
3223			goto fallback;
3224		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3225	}
3226
3227	gen7_align_vertex(sna, &op->base);
3228	gen7_emit_copy_state(sna, &op->base);
3229
3230	op->blt  = gen7_render_copy_blt;
3231	op->done = gen7_render_copy_done;
3232	return true;
3233}
3234
3235static void
3236gen7_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
3237{
3238	uint16_t dirty;
3239	uint32_t *binding_table;
3240	uint16_t offset;
3241
3242	/* XXX Render Target Fast Clear
3243	 * Set RTFC Enable in PS and render a rectangle.
3244	 * Limited to a clearing the full MSC surface only with a
3245	 * specific kernel.
3246	 */
3247
3248	gen7_get_batch(sna, op);
3249
3250	binding_table = gen7_composite_get_binding_table(sna, &offset);
3251
3252	dirty = kgem_bo_is_dirty(op->dst.bo);
3253
3254	binding_table[0] =
3255		gen7_bind_bo(sna,
3256			     op->dst.bo, op->dst.width, op->dst.height,
3257			     gen7_get_dest_format(op->dst.format),
3258			     true);
3259	binding_table[1] =
3260		gen7_bind_bo(sna,
3261			     op->src.bo, 1, 1,
3262			     GEN7_SURFACEFORMAT_B8G8R8A8_UNORM,
3263			     false);
3264
3265	if (sna->kgem.surface == offset &&
3266	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table) {
3267		sna->kgem.surface +=
3268			sizeof(struct gen7_surface_state)/sizeof(uint32_t);
3269		offset = sna->render_state.gen7.surface_table;
3270	}
3271
3272	if (sna->kgem.batch[sna->render_state.gen7.surface_table] == binding_table[0])
3273		dirty = 0;
3274
3275	gen7_emit_state(sna, op, offset | dirty);
3276}
3277
3278static bool
3279gen7_render_fill_boxes(struct sna *sna,
3280		       CARD8 op,
3281		       PictFormat format,
3282		       const xRenderColor *color,
3283		       const DrawableRec *dst, struct kgem_bo *dst_bo,
3284		       const BoxRec *box, int n)
3285{
3286	struct sna_composite_op tmp;
3287	uint32_t pixel;
3288
3289	DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
3290	     __FUNCTION__, op,
3291	     color->red, color->green, color->blue, color->alpha, (int)format));
3292
3293	if (op >= ARRAY_SIZE(gen7_blend_op)) {
3294		DBG(("%s: fallback due to unhandled blend op: %d\n",
3295		     __FUNCTION__, op));
3296		return false;
3297	}
3298
3299	if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) ||
3300	    !gen7_check_dst_format(format)) {
3301		uint8_t alu = GXinvalid;
3302
3303		if (op <= PictOpSrc) {
3304			pixel = 0;
3305			if (op == PictOpClear)
3306				alu = GXclear;
3307			else if (sna_get_pixel_from_rgba(&pixel,
3308							 color->red,
3309							 color->green,
3310							 color->blue,
3311							 color->alpha,
3312							 format))
3313				alu = GXcopy;
3314		}
3315
3316		if (alu != GXinvalid &&
3317		    sna_blt_fill_boxes(sna, alu,
3318				       dst_bo, dst->bitsPerPixel,
3319				       pixel, box, n))
3320			return true;
3321
3322		if (!gen7_check_dst_format(format))
3323			return false;
3324	}
3325
3326	if (op == PictOpClear) {
3327		pixel = 0;
3328		op = PictOpSrc;
3329	} else if (!sna_get_pixel_from_rgba(&pixel,
3330					    color->red,
3331					    color->green,
3332					    color->blue,
3333					    color->alpha,
3334					    PICT_a8r8g8b8))
3335		return false;
3336
3337	DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
3338	     __FUNCTION__, pixel, n,
3339	     box[0].x1, box[0].y1, box[0].x2, box[0].y2));
3340
3341	tmp.dst.pixmap = (PixmapPtr)dst;
3342	tmp.dst.width  = dst->width;
3343	tmp.dst.height = dst->height;
3344	tmp.dst.format = format;
3345	tmp.dst.bo = dst_bo;
3346	tmp.dst.x = tmp.dst.y = 0;
3347	tmp.damage = NULL;
3348
3349	sna_render_composite_redirect_init(&tmp);
3350	if (too_large(dst->width, dst->height)) {
3351		BoxRec extents;
3352
3353		boxes_extents(box, n, &extents);
3354		if (!sna_render_composite_redirect(sna, &tmp,
3355						   extents.x1, extents.y1,
3356						   extents.x2 - extents.x1,
3357						   extents.y2 - extents.y1,
3358						   n > 1))
3359			return sna_tiling_fill_boxes(sna, op, format, color,
3360						     dst, dst_bo, box, n);
3361	}
3362
3363	tmp.src.bo = sna_render_get_solid(sna, pixel);
3364	tmp.mask.bo = NULL;
3365
3366	tmp.floats_per_vertex = 2;
3367	tmp.floats_per_rect = 6;
3368	tmp.need_magic_ca_pass = false;
3369
3370	tmp.u.gen7.flags = FILL_FLAGS(op, format);
3371
3372	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3373	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3374		kgem_submit(&sna->kgem);
3375		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3376			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3377			if (tmp.redirect.real_bo)
3378				kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
3379
3380			return false;
3381		}
3382		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3383	}
3384
3385	gen7_align_vertex(sna, &tmp);
3386	gen7_emit_fill_state(sna, &tmp);
3387
3388	do {
3389		int n_this_time;
3390		int16_t *v;
3391
3392		n_this_time = gen7_get_rectangles(sna, &tmp, n,
3393						  gen7_emit_fill_state);
3394		n -= n_this_time;
3395
3396		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
3397		sna->render.vertex_used += 6 * n_this_time;
3398		assert(sna->render.vertex_used <= sna->render.vertex_size);
3399		do {
3400			DBG(("	(%d, %d), (%d, %d)\n",
3401			     box->x1, box->y1, box->x2, box->y2));
3402
3403			v[0] = box->x2;
3404			v[5] = v[1] = box->y2;
3405			v[8] = v[4] = box->x1;
3406			v[9] = box->y1;
3407			v[2] = v[3]  = v[7]  = 1;
3408			v[6] = v[10] = v[11] = 0;
3409			v += 12; box++;
3410		} while (--n_this_time);
3411	} while (n);
3412
3413	gen4_vertex_flush(sna);
3414	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3415	sna_render_composite_redirect_done(sna, &tmp);
3416	return true;
3417}
3418
3419static void
3420gen7_render_fill_op_blt(struct sna *sna,
3421			const struct sna_fill_op *op,
3422			int16_t x, int16_t y, int16_t w, int16_t h)
3423{
3424	int16_t *v;
3425
3426	DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
3427
3428	gen7_get_rectangles(sna, &op->base, 1, gen7_emit_fill_state);
3429
3430	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3431	sna->render.vertex_used += 6;
3432	assert(sna->render.vertex_used <= sna->render.vertex_size);
3433
3434	v[0] = x+w;
3435	v[4] = v[8] = x;
3436	v[1] = v[5] = y+h;
3437	v[9] = y;
3438
3439	v[2] = v[3]  = v[7]  = 1;
3440	v[6] = v[10] = v[11] = 0;
3441}
3442
3443fastcall static void
3444gen7_render_fill_op_box(struct sna *sna,
3445			const struct sna_fill_op *op,
3446			const BoxRec *box)
3447{
3448	int16_t *v;
3449
3450	DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
3451	     box->x1, box->y1, box->x2, box->y2));
3452
3453	gen7_get_rectangles(sna, &op->base, 1, gen7_emit_fill_state);
3454
3455	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3456	sna->render.vertex_used += 6;
3457	assert(sna->render.vertex_used <= sna->render.vertex_size);
3458
3459	v[0] = box->x2;
3460	v[8] = v[4] = box->x1;
3461	v[5] = v[1] = box->y2;
3462	v[9] = box->y1;
3463
3464	v[7] = v[2]  = v[3]  = 1;
3465	v[6] = v[10] = v[11] = 0;
3466}
3467
3468fastcall static void
3469gen7_render_fill_op_boxes(struct sna *sna,
3470			  const struct sna_fill_op *op,
3471			  const BoxRec *box,
3472			  int nbox)
3473{
3474	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
3475	     box->x1, box->y1, box->x2, box->y2, nbox));
3476
3477	do {
3478		int nbox_this_time;
3479		int16_t *v;
3480
3481		nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox,
3482						     gen7_emit_fill_state);
3483		nbox -= nbox_this_time;
3484
3485		v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3486		sna->render.vertex_used += 6 * nbox_this_time;
3487		assert(sna->render.vertex_used <= sna->render.vertex_size);
3488
3489		do {
3490			v[0] = box->x2;
3491			v[8] = v[4] = box->x1;
3492			v[5] = v[1] = box->y2;
3493			v[9] = box->y1;
3494			v[7] = v[2]  = v[3]  = 1;
3495			v[6] = v[10] = v[11] = 0;
3496			box++; v += 12;
3497		} while (--nbox_this_time);
3498	} while (nbox);
3499}
3500
3501static void
3502gen7_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
3503{
3504	if (sna->render.vertex_offset)
3505		gen4_vertex_flush(sna);
3506	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
3507}
3508
3509static bool
3510gen7_render_fill(struct sna *sna, uint8_t alu,
3511		 PixmapPtr dst, struct kgem_bo *dst_bo,
3512		 uint32_t color, unsigned flags,
3513		 struct sna_fill_op *op)
3514{
3515	DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color));
3516
3517	if (prefer_blt_fill(sna, dst_bo, flags) &&
3518	    sna_blt_fill(sna, alu,
3519			 dst_bo, dst->drawable.bitsPerPixel,
3520			 color,
3521			 op))
3522		return true;
3523
3524	if (!(alu == GXcopy || alu == GXclear) ||
3525	    too_large(dst->drawable.width, dst->drawable.height))
3526		return sna_blt_fill(sna, alu,
3527				    dst_bo, dst->drawable.bitsPerPixel,
3528				    color,
3529				    op);
3530
3531	if (alu == GXclear)
3532		color = 0;
3533
3534	op->base.dst.pixmap = dst;
3535	op->base.dst.width  = dst->drawable.width;
3536	op->base.dst.height = dst->drawable.height;
3537	op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
3538	op->base.dst.bo = dst_bo;
3539	op->base.dst.x = op->base.dst.y = 0;
3540
3541	op->base.src.bo =
3542		sna_render_get_solid(sna,
3543				     sna_rgba_for_color(color,
3544							dst->drawable.depth));
3545	op->base.mask.bo = NULL;
3546
3547	op->base.need_magic_ca_pass = false;
3548	op->base.floats_per_vertex = 2;
3549	op->base.floats_per_rect = 6;
3550
3551	op->base.u.gen7.flags = FILL_FLAGS_NOBLEND;
3552
3553	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3554	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3555		kgem_submit(&sna->kgem);
3556		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3557			kgem_bo_destroy(&sna->kgem, op->base.src.bo);
3558			return false;
3559		}
3560
3561		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3562	}
3563
3564	gen7_align_vertex(sna, &op->base);
3565	gen7_emit_fill_state(sna, &op->base);
3566
3567	op->blt   = gen7_render_fill_op_blt;
3568	op->box   = gen7_render_fill_op_box;
3569	op->boxes = gen7_render_fill_op_boxes;
3570	op->points = NULL;
3571	op->done  = gen7_render_fill_op_done;
3572	return true;
3573}
3574
3575static bool
3576gen7_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3577			     uint32_t color,
3578			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
3579			     uint8_t alu)
3580{
3581	BoxRec box;
3582
3583	box.x1 = x1;
3584	box.y1 = y1;
3585	box.x2 = x2;
3586	box.y2 = y2;
3587
3588	return sna_blt_fill_boxes(sna, alu,
3589				  bo, dst->drawable.bitsPerPixel,
3590				  color, &box, 1);
3591}
3592
3593static bool
3594gen7_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3595		     uint32_t color,
3596		     int16_t x1, int16_t y1,
3597		     int16_t x2, int16_t y2,
3598		     uint8_t alu)
3599{
3600	struct sna_composite_op tmp;
3601	int16_t *v;
3602
3603	/* Prefer to use the BLT if already engaged */
3604	if (prefer_blt_fill(sna, bo, FILL_BOXES) &&
3605	    gen7_render_fill_one_try_blt(sna, dst, bo, color,
3606					 x1, y1, x2, y2, alu))
3607		return true;
3608
3609	/* Must use the BLT if we can't RENDER... */
3610	if (!(alu == GXcopy || alu == GXclear) ||
3611	    too_large(dst->drawable.width, dst->drawable.height))
3612		return gen7_render_fill_one_try_blt(sna, dst, bo, color,
3613						    x1, y1, x2, y2, alu);
3614
3615	if (alu == GXclear)
3616		color = 0;
3617
3618	tmp.dst.pixmap = dst;
3619	tmp.dst.width  = dst->drawable.width;
3620	tmp.dst.height = dst->drawable.height;
3621	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3622	tmp.dst.bo = bo;
3623	tmp.dst.x = tmp.dst.y = 0;
3624
3625	tmp.src.bo =
3626		sna_render_get_solid(sna,
3627				     sna_rgba_for_color(color,
3628							dst->drawable.depth));
3629	tmp.mask.bo = NULL;
3630
3631	tmp.floats_per_vertex = 2;
3632	tmp.floats_per_rect = 6;
3633	tmp.need_magic_ca_pass = false;
3634
3635	tmp.u.gen7.flags = FILL_FLAGS_NOBLEND;
3636
3637	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
3638	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3639		kgem_submit(&sna->kgem);
3640		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3641			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3642			return false;
3643		}
3644		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3645	}
3646
3647	gen7_align_vertex(sna, &tmp);
3648	gen7_emit_fill_state(sna, &tmp);
3649
3650	gen7_get_rectangles(sna, &tmp, 1, gen7_emit_fill_state);
3651
3652	DBG(("	(%d, %d), (%d, %d)\n", x1, y1, x2, y2));
3653
3654	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3655	sna->render.vertex_used += 6;
3656	assert(sna->render.vertex_used <= sna->render.vertex_size);
3657
3658	v[0] = x2;
3659	v[8] = v[4] = x1;
3660	v[5] = v[1] = y2;
3661	v[9] = y1;
3662	v[7] = v[2]  = v[3]  = 1;
3663	v[6] = v[10] = v[11] = 0;
3664
3665	gen4_vertex_flush(sna);
3666	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3667
3668	return true;
3669}
3670
3671static bool
3672gen7_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3673{
3674	BoxRec box;
3675
3676	box.x1 = 0;
3677	box.y1 = 0;
3678	box.x2 = dst->drawable.width;
3679	box.y2 = dst->drawable.height;
3680
3681	return sna_blt_fill_boxes(sna, GXclear,
3682				  bo, dst->drawable.bitsPerPixel,
3683				  0, &box, 1);
3684}
3685
3686static bool
3687gen7_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3688{
3689	struct sna_composite_op tmp;
3690	int16_t *v;
3691
3692	DBG(("%s: %dx%d\n",
3693	     __FUNCTION__,
3694	     dst->drawable.width,
3695	     dst->drawable.height));
3696
3697	/* Prefer to use the BLT if already engaged */
3698	if (sna->kgem.mode == KGEM_BLT &&
3699	    gen7_render_clear_try_blt(sna, dst, bo))
3700		return true;
3701
3702	/* Must use the BLT if we can't RENDER... */
3703	if (too_large(dst->drawable.width, dst->drawable.height))
3704		return gen7_render_clear_try_blt(sna, dst, bo);
3705
3706	tmp.dst.pixmap = dst;
3707	tmp.dst.width  = dst->drawable.width;
3708	tmp.dst.height = dst->drawable.height;
3709	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3710	tmp.dst.bo = bo;
3711	tmp.dst.x = tmp.dst.y = 0;
3712
3713	tmp.src.bo = sna_render_get_solid(sna, 0);
3714	tmp.mask.bo = NULL;
3715
3716	tmp.floats_per_vertex = 2;
3717	tmp.floats_per_rect = 6;
3718	tmp.need_magic_ca_pass = false;
3719
3720	tmp.u.gen7.flags = FILL_FLAGS_NOBLEND;
3721
3722	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
3723	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3724		kgem_submit(&sna->kgem);
3725		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3726			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3727			return false;
3728		}
3729		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3730	}
3731
3732	gen7_align_vertex(sna, &tmp);
3733	gen7_emit_fill_state(sna, &tmp);
3734
3735	gen7_get_rectangles(sna, &tmp, 1, gen7_emit_fill_state);
3736
3737	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3738	sna->render.vertex_used += 6;
3739	assert(sna->render.vertex_used <= sna->render.vertex_size);
3740
3741	v[0] = dst->drawable.width;
3742	v[5] = v[1] = dst->drawable.height;
3743	v[8] = v[4] = 0;
3744	v[9] = 0;
3745
3746	v[7] = v[2]  = v[3]  = 1;
3747	v[6] = v[10] = v[11] = 0;
3748
3749	gen4_vertex_flush(sna);
3750	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3751
3752	return true;
3753}
3754static void gen7_render_reset(struct sna *sna)
3755{
3756	sna->render_state.gen7.pipe_controls_since_stall = 0;
3757	sna->render_state.gen7.emit_flush = false;
3758	sna->render_state.gen7.needs_invariant = true;
3759	sna->render_state.gen7.ve_id = 3 << 2;
3760	sna->render_state.gen7.last_primitive = -1;
3761
3762	sna->render_state.gen7.num_sf_outputs = 0;
3763	sna->render_state.gen7.samplers = -1;
3764	sna->render_state.gen7.blend = -1;
3765	sna->render_state.gen7.kernel = -1;
3766	sna->render_state.gen7.drawrect_offset = -1;
3767	sna->render_state.gen7.drawrect_limit = -1;
3768	sna->render_state.gen7.surface_table = 0;
3769
3770	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
3771		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
3772		discard_vbo(sna);
3773	}
3774
3775	sna->render.vertex_offset = 0;
3776	sna->render.nvertex_reloc = 0;
3777	sna->render.vb_id = 0;
3778}
3779
3780static void gen7_render_fini(struct sna *sna)
3781{
3782	kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo);
3783}
3784
3785static bool is_gt3(struct sna *sna, int devid)
3786{
3787	assert(sna->kgem.gen == 075);
3788	return devid & 0x20;
3789}
3790
3791static bool is_gt2(struct sna *sna, int devid)
3792{
3793	return devid & (is_hsw(sna)? 0x30 : 0x20);
3794}
3795
3796static bool is_mobile(struct sna *sna, int devid)
3797{
3798	return (devid & 0xf) == 0x6;
3799}
3800
3801static bool gen7_render_setup(struct sna *sna, int devid)
3802{
3803	struct gen7_render_state *state = &sna->render_state.gen7;
3804	struct sna_static_stream general;
3805	struct gen7_sampler_state *ss;
3806	int i, j, k, l, m;
3807
3808	if (is_ivb(sna)) {
3809		state->info = &ivb_gt_info;
3810		if (devid & 0xf) {
3811			state->info = &ivb_gt1_info;
3812			if (is_gt2(sna, devid))
3813				state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */
3814		}
3815	} else if (is_byt(sna)) {
3816		state->info = &byt_gt_info;
3817	} else if (is_hsw(sna)) {
3818		state->info = &hsw_gt_info;
3819		if (devid & 0xf) {
3820			if (is_gt3(sna, devid))
3821				state->info = &hsw_gt3_info;
3822			else if (is_gt2(sna, devid))
3823				state->info = &hsw_gt2_info;
3824			else
3825				state->info = &hsw_gt1_info;
3826		}
3827	} else
3828		return false;
3829
3830	state->gt = state->info->gt;
3831
3832	sna_static_stream_init(&general);
3833
3834	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
3835	 * dumps, you know it points to zero.
3836	 */
3837	null_create(&general);
3838
3839	for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++) {
3840		if (wm_kernels[m].size) {
3841			state->wm_kernel[m][1] =
3842				sna_static_stream_add(&general,
3843						      wm_kernels[m].data,
3844						      wm_kernels[m].size,
3845						      64);
3846		} else {
3847			if (USE_8_PIXEL_DISPATCH) {
3848				state->wm_kernel[m][0] =
3849					sna_static_stream_compile_wm(sna, &general,
3850								     wm_kernels[m].data, 8);
3851			}
3852
3853			if (USE_16_PIXEL_DISPATCH) {
3854				state->wm_kernel[m][1] =
3855					sna_static_stream_compile_wm(sna, &general,
3856								     wm_kernels[m].data, 16);
3857			}
3858
3859			if (USE_32_PIXEL_DISPATCH) {
3860				state->wm_kernel[m][2] =
3861					sna_static_stream_compile_wm(sna, &general,
3862								     wm_kernels[m].data, 32);
3863			}
3864		}
3865		assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
3866	}
3867
3868	ss = sna_static_stream_map(&general,
3869				   2 * sizeof(*ss) *
3870				   (2 +
3871				    FILTER_COUNT * EXTEND_COUNT *
3872				    FILTER_COUNT * EXTEND_COUNT),
3873				   32);
3874	state->wm_state = sna_static_stream_offsetof(&general, ss);
3875	sampler_copy_init(ss); ss += 2;
3876	sampler_fill_init(ss); ss += 2;
3877	for (i = 0; i < FILTER_COUNT; i++) {
3878		for (j = 0; j < EXTEND_COUNT; j++) {
3879			for (k = 0; k < FILTER_COUNT; k++) {
3880				for (l = 0; l < EXTEND_COUNT; l++) {
3881					sampler_state_init(ss++, i, j);
3882					sampler_state_init(ss++, k, l);
3883				}
3884			}
3885		}
3886	}
3887
3888	state->cc_blend = gen7_composite_create_blend_state(&general);
3889
3890	state->general_bo = sna_static_stream_fini(sna, &general);
3891	return state->general_bo != NULL;
3892}
3893
3894const char *gen7_render_init(struct sna *sna, const char *backend)
3895{
3896	int devid = intel_get_device_id(sna->dev);
3897
3898	if (!gen7_render_setup(sna, devid))
3899		return backend;
3900
3901	sna->kgem.context_switch = gen6_render_context_switch;
3902	sna->kgem.retire = gen6_render_retire;
3903	sna->kgem.expire = gen4_render_expire;
3904
3905#if !NO_COMPOSITE
3906	sna->render.composite = gen7_render_composite;
3907	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
3908#endif
3909#if !NO_COMPOSITE_SPANS
3910	sna->render.check_composite_spans = gen7_check_composite_spans;
3911	sna->render.composite_spans = gen7_render_composite_spans;
3912	if (is_mobile(sna, devid) || is_gt2(sna, devid) || is_byt(sna))
3913		sna->render.prefer_gpu |= PREFER_GPU_SPANS;
3914#endif
3915	sna->render.video = gen7_render_video;
3916
3917#if !NO_COPY_BOXES
3918	sna->render.copy_boxes = gen7_render_copy_boxes;
3919#endif
3920#if !NO_COPY
3921	sna->render.copy = gen7_render_copy;
3922#endif
3923
3924#if !NO_FILL_BOXES
3925	sna->render.fill_boxes = gen7_render_fill_boxes;
3926#endif
3927#if !NO_FILL
3928	sna->render.fill = gen7_render_fill;
3929#endif
3930#if !NO_FILL_ONE
3931	sna->render.fill_one = gen7_render_fill_one;
3932#endif
3933#if !NO_FILL_CLEAR
3934	sna->render.clear = gen7_render_clear;
3935#endif
3936
3937	sna->render.flush = gen4_render_flush;
3938	sna->render.reset = gen7_render_reset;
3939	sna->render.fini = gen7_render_fini;
3940
3941	sna->render.max_3d_size = GEN7_MAX_SIZE;
3942	sna->render.max_3d_pitch = 1 << 18;
3943	return sna->render_state.gen7.info->name;
3944}
3945