gen6_render.c revision 03b705cf
1/*
2 * Copyright © 2006,2008,2011 Intel Corporation
3 * Copyright © 2007 Red Hat, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 *    Wang Zhenyu <zhenyu.z.wang@sna.com>
26 *    Eric Anholt <eric@anholt.net>
27 *    Carl Worth <cworth@redhat.com>
28 *    Keith Packard <keithp@keithp.com>
29 *    Chris Wilson <chris@chris-wilson.co.uk>
30 *
31 */
32
33#ifdef HAVE_CONFIG_H
34#include "config.h"
35#endif
36
37#include "sna.h"
38#include "sna_reg.h"
39#include "sna_render.h"
40#include "sna_render_inline.h"
41#include "sna_video.h"
42
43#include "brw/brw.h"
44#include "gen6_render.h"
45#include "gen4_source.h"
46#include "gen4_vertex.h"
47
48#define NO_COMPOSITE 0
49#define NO_COMPOSITE_SPANS 0
50#define NO_COPY 0
51#define NO_COPY_BOXES 0
52#define NO_FILL 0
53#define NO_FILL_BOXES 0
54#define NO_FILL_ONE 0
55#define NO_FILL_CLEAR 0
56
57#define NO_RING_SWITCH 0
58#define PREFER_RENDER 0
59
60#define USE_8_PIXEL_DISPATCH 1
61#define USE_16_PIXEL_DISPATCH 1
62#define USE_32_PIXEL_DISPATCH 0
63
64#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
65#error "Must select at least 8, 16 or 32 pixel dispatch"
66#endif
67
68#define GEN6_MAX_SIZE 8192
69
70struct gt_info {
71	const char *name;
72	int max_vs_threads;
73	int max_gs_threads;
74	int max_wm_threads;
75	struct {
76		int size;
77		int max_vs_entries;
78		int max_gs_entries;
79	} urb;
80};
81
82static const struct gt_info gt1_info = {
83	.name = "Sandybridge (gen6, gt1)",
84	.max_vs_threads = 24,
85	.max_gs_threads = 21,
86	.max_wm_threads = 40,
87	.urb = { 32, 256, 256 },
88};
89
90static const struct gt_info gt2_info = {
91	.name = "Sandybridge (gen6, gt2)",
92	.max_vs_threads = 60,
93	.max_gs_threads = 60,
94	.max_wm_threads = 80,
95	.urb = { 64, 256, 256 },
96};
97
98static const uint32_t ps_kernel_packed[][4] = {
99#include "exa_wm_src_affine.g6b"
100#include "exa_wm_src_sample_argb.g6b"
101#include "exa_wm_yuv_rgb.g6b"
102#include "exa_wm_write.g6b"
103};
104
105static const uint32_t ps_kernel_planar[][4] = {
106#include "exa_wm_src_affine.g6b"
107#include "exa_wm_src_sample_planar.g6b"
108#include "exa_wm_yuv_rgb.g6b"
109#include "exa_wm_write.g6b"
110};
111
112#define NOKERNEL(kernel_enum, func, ns) \
113    [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns}
114#define KERNEL(kernel_enum, kernel, ns) \
115    [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns}
116
117static const struct wm_kernel_info {
118	const char *name;
119	const void *data;
120	unsigned int size;
121	unsigned int num_surfaces;
122} wm_kernels[] = {
123	NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
124	NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
125
126	NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
127	NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
128
129	NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
130	NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
131
132	NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
133	NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
134
135	NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
136	NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
137
138	KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
139	KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
140};
141#undef KERNEL
142
143static const struct blendinfo {
144	bool src_alpha;
145	uint32_t src_blend;
146	uint32_t dst_blend;
147} gen6_blend_op[] = {
148	/* Clear */	{0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO},
149	/* Src */	{0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO},
150	/* Dst */	{0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE},
151	/* Over */	{1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
152	/* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE},
153	/* In */	{0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
154	/* InReverse */	{1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA},
155	/* Out */	{0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
156	/* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
157	/* Atop */	{1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
158	/* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA},
159	/* Xor */	{1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
160	/* Add */	{0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE},
161};
162
163/**
164 * Highest-valued BLENDFACTOR used in gen6_blend_op.
165 *
166 * This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR,
167 * GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
168 * GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
169 */
170#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1)
171
172#define GEN6_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen6_blend_state), 64)
173
174#define BLEND_OFFSET(s, d) \
175	(((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE)
176
177#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO)
178#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO)
179
180#define SAMPLER_OFFSET(sf, se, mf, me) \
181	(((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state))
182
183#define VERTEX_2s2s 0
184
185#define COPY_SAMPLER 0
186#define COPY_VERTEX VERTEX_2s2s
187#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX)
188
189#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state))
190#define FILL_VERTEX VERTEX_2s2s
191#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
192#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
193
194#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0)
195#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0)
196#define GEN6_KERNEL(f) (((f) >> 16) & 0xf)
197#define GEN6_VERTEX(f) (((f) >> 0) & 0xf)
198#define GEN6_SET_FLAGS(S, B, K, V)  (((S) | (K)) << 16 | ((B) | (V)))
199
200#define OUT_BATCH(v) batch_emit(sna, v)
201#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
202#define OUT_VERTEX_F(v) vertex_emit(sna, v)
203
204static inline bool too_large(int width, int height)
205{
206	return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE;
207}
208
209static uint32_t gen6_get_blend(int op,
210			       bool has_component_alpha,
211			       uint32_t dst_format)
212{
213	uint32_t src, dst;
214
215	src = gen6_blend_op[op].src_blend;
216	dst = gen6_blend_op[op].dst_blend;
217
218	/* If there's no dst alpha channel, adjust the blend op so that
219	 * we'll treat it always as 1.
220	 */
221	if (PICT_FORMAT_A(dst_format) == 0) {
222		if (src == GEN6_BLENDFACTOR_DST_ALPHA)
223			src = GEN6_BLENDFACTOR_ONE;
224		else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA)
225			src = GEN6_BLENDFACTOR_ZERO;
226	}
227
228	/* If the source alpha is being used, then we should only be in a
229	 * case where the source blend factor is 0, and the source blend
230	 * value is the mask channels multiplied by the source picture's alpha.
231	 */
232	if (has_component_alpha && gen6_blend_op[op].src_alpha) {
233		if (dst == GEN6_BLENDFACTOR_SRC_ALPHA)
234			dst = GEN6_BLENDFACTOR_SRC_COLOR;
235		else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA)
236			dst = GEN6_BLENDFACTOR_INV_SRC_COLOR;
237	}
238
239	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
240	     op, dst_format, PICT_FORMAT_A(dst_format),
241	     src, dst, (int)BLEND_OFFSET(src, dst)));
242	return BLEND_OFFSET(src, dst);
243}
244
245static uint32_t gen6_get_card_format(PictFormat format)
246{
247	switch (format) {
248	default:
249		return -1;
250	case PICT_a8r8g8b8:
251		return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
252	case PICT_x8r8g8b8:
253		return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
254	case PICT_a8b8g8r8:
255		return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
256	case PICT_x8b8g8r8:
257		return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM;
258	case PICT_a2r10g10b10:
259		return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
260	case PICT_x2r10g10b10:
261		return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM;
262	case PICT_r8g8b8:
263		return GEN6_SURFACEFORMAT_R8G8B8_UNORM;
264	case PICT_r5g6b5:
265		return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
266	case PICT_a1r5g5b5:
267		return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
268	case PICT_a8:
269		return GEN6_SURFACEFORMAT_A8_UNORM;
270	case PICT_a4r4g4b4:
271		return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
272	}
273}
274
275static uint32_t gen6_get_dest_format(PictFormat format)
276{
277	switch (format) {
278	default:
279		return -1;
280	case PICT_a8r8g8b8:
281	case PICT_x8r8g8b8:
282		return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
283	case PICT_a8b8g8r8:
284	case PICT_x8b8g8r8:
285		return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
286	case PICT_a2r10g10b10:
287	case PICT_x2r10g10b10:
288		return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
289	case PICT_r5g6b5:
290		return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
291	case PICT_x1r5g5b5:
292	case PICT_a1r5g5b5:
293		return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
294	case PICT_a8:
295		return GEN6_SURFACEFORMAT_A8_UNORM;
296	case PICT_a4r4g4b4:
297	case PICT_x4r4g4b4:
298		return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
299	}
300}
301
302static bool gen6_check_dst_format(PictFormat format)
303{
304	if (gen6_get_dest_format(format) != -1)
305		return true;
306
307	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
308	return false;
309}
310
311static bool gen6_check_format(uint32_t format)
312{
313	if (gen6_get_card_format(format) != -1)
314		return true;
315
316	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
317	return false;
318}
319
320static uint32_t gen6_filter(uint32_t filter)
321{
322	switch (filter) {
323	default:
324		assert(0);
325	case PictFilterNearest:
326		return SAMPLER_FILTER_NEAREST;
327	case PictFilterBilinear:
328		return SAMPLER_FILTER_BILINEAR;
329	}
330}
331
332static uint32_t gen6_check_filter(PicturePtr picture)
333{
334	switch (picture->filter) {
335	case PictFilterNearest:
336	case PictFilterBilinear:
337		return true;
338	default:
339		return false;
340	}
341}
342
343static uint32_t gen6_repeat(uint32_t repeat)
344{
345	switch (repeat) {
346	default:
347		assert(0);
348	case RepeatNone:
349		return SAMPLER_EXTEND_NONE;
350	case RepeatNormal:
351		return SAMPLER_EXTEND_REPEAT;
352	case RepeatPad:
353		return SAMPLER_EXTEND_PAD;
354	case RepeatReflect:
355		return SAMPLER_EXTEND_REFLECT;
356	}
357}
358
359static bool gen6_check_repeat(PicturePtr picture)
360{
361	if (!picture->repeat)
362		return true;
363
364	switch (picture->repeatType) {
365	case RepeatNone:
366	case RepeatNormal:
367	case RepeatPad:
368	case RepeatReflect:
369		return true;
370	default:
371		return false;
372	}
373}
374
375static int
376gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
377{
378	int base;
379
380	if (has_mask) {
381		if (is_ca) {
382			if (gen6_blend_op[op].src_alpha)
383				base = GEN6_WM_KERNEL_MASKSA;
384			else
385				base = GEN6_WM_KERNEL_MASKCA;
386		} else
387			base = GEN6_WM_KERNEL_MASK;
388	} else
389		base = GEN6_WM_KERNEL_NOMASK;
390
391	return base + !is_affine;
392}
393
394static void
395gen6_emit_urb(struct sna *sna)
396{
397	OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
398	OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
399		  (sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
400	OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
401		  (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
402}
403
404static void
405gen6_emit_state_base_address(struct sna *sna)
406{
407	OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2));
408	OUT_BATCH(0); /* general */
409	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
410				 sna->kgem.nbatch,
411				 NULL,
412				 I915_GEM_DOMAIN_INSTRUCTION << 16,
413				 BASE_ADDRESS_MODIFY));
414	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
415				 sna->kgem.nbatch,
416				 sna->render_state.gen6.general_bo,
417				 I915_GEM_DOMAIN_INSTRUCTION << 16,
418				 BASE_ADDRESS_MODIFY));
419	OUT_BATCH(0); /* indirect */
420	OUT_BATCH(kgem_add_reloc(&sna->kgem,
421				 sna->kgem.nbatch,
422				 sna->render_state.gen6.general_bo,
423				 I915_GEM_DOMAIN_INSTRUCTION << 16,
424				 BASE_ADDRESS_MODIFY));
425
426	/* upper bounds, disable */
427	OUT_BATCH(0);
428	OUT_BATCH(BASE_ADDRESS_MODIFY);
429	OUT_BATCH(0);
430	OUT_BATCH(BASE_ADDRESS_MODIFY);
431}
432
433static void
434gen6_emit_viewports(struct sna *sna)
435{
436	OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
437		  GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
438		  (4 - 2));
439	OUT_BATCH(0);
440	OUT_BATCH(0);
441	OUT_BATCH(0);
442}
443
444static void
445gen6_emit_vs(struct sna *sna)
446{
447	/* disable VS constant buffer */
448	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
449	OUT_BATCH(0);
450	OUT_BATCH(0);
451	OUT_BATCH(0);
452	OUT_BATCH(0);
453
454	OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
455	OUT_BATCH(0); /* no VS kernel */
456	OUT_BATCH(0);
457	OUT_BATCH(0);
458	OUT_BATCH(0);
459	OUT_BATCH(0); /* pass-through */
460}
461
462static void
463gen6_emit_gs(struct sna *sna)
464{
465	/* disable GS constant buffer */
466	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
467	OUT_BATCH(0);
468	OUT_BATCH(0);
469	OUT_BATCH(0);
470	OUT_BATCH(0);
471
472	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
473	OUT_BATCH(0); /* no GS kernel */
474	OUT_BATCH(0);
475	OUT_BATCH(0);
476	OUT_BATCH(0);
477	OUT_BATCH(0);
478	OUT_BATCH(0); /* pass-through */
479}
480
481static void
482gen6_emit_clip(struct sna *sna)
483{
484	OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
485	OUT_BATCH(0);
486	OUT_BATCH(0); /* pass-through */
487	OUT_BATCH(0);
488}
489
490static void
491gen6_emit_wm_constants(struct sna *sna)
492{
493	/* disable WM constant buffer */
494	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
495	OUT_BATCH(0);
496	OUT_BATCH(0);
497	OUT_BATCH(0);
498	OUT_BATCH(0);
499}
500
501static void
502gen6_emit_null_depth_buffer(struct sna *sna)
503{
504	OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2));
505	OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
506		  GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
507	OUT_BATCH(0);
508	OUT_BATCH(0);
509	OUT_BATCH(0);
510	OUT_BATCH(0);
511	OUT_BATCH(0);
512
513	OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2));
514	OUT_BATCH(0);
515}
516
517static void
518gen6_emit_invariant(struct sna *sna)
519{
520	OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D);
521
522	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
523	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
524		  GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
525	OUT_BATCH(0);
526
527	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
528	OUT_BATCH(1);
529
530	gen6_emit_urb(sna);
531
532	gen6_emit_state_base_address(sna);
533
534	gen6_emit_viewports(sna);
535	gen6_emit_vs(sna);
536	gen6_emit_gs(sna);
537	gen6_emit_clip(sna);
538	gen6_emit_wm_constants(sna);
539	gen6_emit_null_depth_buffer(sna);
540
541	sna->render_state.gen6.needs_invariant = false;
542}
543
544static bool
545gen6_emit_cc(struct sna *sna, int blend)
546{
547	struct gen6_render_state *render = &sna->render_state.gen6;
548
549	if (render->blend == blend)
550		return blend != NO_BLEND;
551
552	DBG(("%s: blend = %x\n", __FUNCTION__, blend));
553
554	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
555	OUT_BATCH((render->cc_blend + blend) | 1);
556	if (render->blend == (unsigned)-1) {
557		OUT_BATCH(1);
558		OUT_BATCH(1);
559	} else {
560		OUT_BATCH(0);
561		OUT_BATCH(0);
562	}
563
564	render->blend = blend;
565	return blend != NO_BLEND;
566}
567
568static void
569gen6_emit_sampler(struct sna *sna, uint32_t state)
570{
571	if (sna->render_state.gen6.samplers == state)
572		return;
573
574	sna->render_state.gen6.samplers = state;
575
576	DBG(("%s: sampler = %x\n", __FUNCTION__, state));
577
578	OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
579		  GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
580		  (4 - 2));
581	OUT_BATCH(0); /* VS */
582	OUT_BATCH(0); /* GS */
583	OUT_BATCH(sna->render_state.gen6.wm_state + state);
584}
585
586static void
587gen6_emit_sf(struct sna *sna, bool has_mask)
588{
589	int num_sf_outputs = has_mask ? 2 : 1;
590
591	if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs)
592		return;
593
594	DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
595	     __FUNCTION__, num_sf_outputs, 1, 0));
596
597	sna->render_state.gen6.num_sf_outputs = num_sf_outputs;
598
599	OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
600	OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT |
601		  1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT |
602		  1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT);
603	OUT_BATCH(0);
604	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
605	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
606	OUT_BATCH(0);
607	OUT_BATCH(0);
608	OUT_BATCH(0);
609	OUT_BATCH(0);
610	OUT_BATCH(0); /* DW9 */
611	OUT_BATCH(0);
612	OUT_BATCH(0);
613	OUT_BATCH(0);
614	OUT_BATCH(0);
615	OUT_BATCH(0); /* DW14 */
616	OUT_BATCH(0);
617	OUT_BATCH(0);
618	OUT_BATCH(0);
619	OUT_BATCH(0);
620	OUT_BATCH(0); /* DW19 */
621}
622
623static void
624gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask)
625{
626	const uint32_t *kernels;
627
628	if (sna->render_state.gen6.kernel == kernel)
629		return;
630
631	sna->render_state.gen6.kernel = kernel;
632	kernels = sna->render_state.gen6.wm_kernel[kernel];
633
634	DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n",
635	     __FUNCTION__,
636	     wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces,
637	    kernels[0], kernels[1], kernels[2]));
638
639	OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
640	OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
641	OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
642		  wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
643	OUT_BATCH(0); /* scratch space */
644	OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT |
645		  8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT |
646		  6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT);
647	OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT |
648		  (kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) |
649		  (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) |
650		  (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) |
651		  GEN6_3DSTATE_WM_DISPATCH_ENABLE);
652	OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT |
653		  GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
654	OUT_BATCH(kernels[2]);
655	OUT_BATCH(kernels[1]);
656}
657
658static bool
659gen6_emit_binding_table(struct sna *sna, uint16_t offset)
660{
661	if (sna->render_state.gen6.surface_table == offset)
662		return false;
663
664	/* Binding table pointers */
665	OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS |
666		  GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
667		  (4 - 2));
668	OUT_BATCH(0);		/* vs */
669	OUT_BATCH(0);		/* gs */
670	/* Only the PS uses the binding table */
671	OUT_BATCH(offset*4);
672
673	sna->render_state.gen6.surface_table = offset;
674	return true;
675}
676
677static bool
678gen6_emit_drawing_rectangle(struct sna *sna,
679			    const struct sna_composite_op *op)
680{
681	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
682	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
683
684	assert(!too_large(op->dst.x, op->dst.y));
685	assert(!too_large(op->dst.width, op->dst.height));
686
687	if (sna->render_state.gen6.drawrect_limit  == limit &&
688	    sna->render_state.gen6.drawrect_offset == offset)
689		return false;
690
691	/* [DevSNB-C+{W/A}] Before any depth stall flush (including those
692	 * produced by non-pipelined state commands), software needs to first
693	 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
694	 * 0.
695	 *
696	 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
697	 * BEFORE the pipe-control with a post-sync op and no write-cache
698	 * flushes.
699	 */
700	if (!sna->render_state.gen6.first_state_packet) {
701		OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
702		OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
703			  GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
704		OUT_BATCH(0);
705		OUT_BATCH(0);
706	}
707
708	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
709	OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME);
710	OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
711				 sna->render_state.gen6.general_bo,
712				 I915_GEM_DOMAIN_INSTRUCTION << 16 |
713				 I915_GEM_DOMAIN_INSTRUCTION,
714				 64));
715	OUT_BATCH(0);
716
717	OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
718	OUT_BATCH(0);
719	OUT_BATCH(limit);
720	OUT_BATCH(offset);
721
722	sna->render_state.gen6.drawrect_offset = offset;
723	sna->render_state.gen6.drawrect_limit = limit;
724	return true;
725}
726
727static void
728gen6_emit_vertex_elements(struct sna *sna,
729			  const struct sna_composite_op *op)
730{
731	/*
732	 * vertex data in vertex buffer
733	 *    position: (x, y)
734	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
735	 *    texture coordinate 1 if (has_mask is true): same as above
736	 */
737	struct gen6_render_state *render = &sna->render_state.gen6;
738	uint32_t src_format, dw;
739	int id = GEN6_VERTEX(op->u.gen6.flags);
740	bool has_mask;
741
742	DBG(("%s: setup id=%d\n", __FUNCTION__, id));
743
744	if (render->ve_id == id)
745		return;
746	render->ve_id = id;
747
748	/* The VUE layout
749	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
750	 *    dword 4-7: position (x, y, 1.0, 1.0),
751	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
752	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
753	 *
754	 * dword 4-15 are fetched from vertex buffer
755	 */
756	has_mask = (id >> 2) != 0;
757	OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS |
758		((2 * (3 + has_mask)) + 1 - 2));
759
760	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
761		  GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
762		  0 << VE0_OFFSET_SHIFT);
763	OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
764		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
765		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
766		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
767
768	/* x,y */
769	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
770		  GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
771		  0 << VE0_OFFSET_SHIFT);
772	OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
773		  GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
774		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
775		  GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
776
777	/* u0, v0, w0 */
778	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
779	dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
780	switch (id & 3) {
781	default:
782		assert(0);
783	case 0:
784		src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED;
785		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
786		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
787		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
788		break;
789	case 1:
790		src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
791		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
792		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
793		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
794		break;
795	case 2:
796		src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
797		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
798		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
799		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
800		break;
801	case 3:
802		src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
803		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
804		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
805		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
806		break;
807	}
808	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
809		  src_format << VE0_FORMAT_SHIFT |
810		  4 << VE0_OFFSET_SHIFT);
811	OUT_BATCH(dw);
812
813	/* u1, v1, w1 */
814	if (has_mask) {
815		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
816		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
817		dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
818		switch (id >> 2) {
819		case 1:
820			src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
821			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
822			dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
823			dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
824			break;
825		default:
826			assert(0);
827		case 2:
828			src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
829			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
830			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
831			dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
832			break;
833		case 3:
834			src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
835			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
836			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
837			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
838			break;
839		}
840		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
841			  src_format << VE0_FORMAT_SHIFT |
842			  offset << VE0_OFFSET_SHIFT);
843		OUT_BATCH(dw);
844	}
845}
846
847static void
848gen6_emit_flush(struct sna *sna)
849{
850	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
851	OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH |
852		  GEN6_PIPE_CONTROL_TC_FLUSH |
853		  GEN6_PIPE_CONTROL_CS_STALL);
854	OUT_BATCH(0);
855	OUT_BATCH(0);
856}
857
858static void
859gen6_emit_state(struct sna *sna,
860		const struct sna_composite_op *op,
861		uint16_t wm_binding_table)
862{
863	bool need_stall = wm_binding_table & 1;
864
865	assert(op->dst.bo->exec);
866
867	if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)))
868		need_stall = false;
869	gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags));
870	gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2);
871	gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2);
872	gen6_emit_vertex_elements(sna, op);
873
874	need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1);
875	if (gen6_emit_drawing_rectangle(sna, op))
876		need_stall = false;
877	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
878		gen6_emit_flush(sna);
879		kgem_clear_dirty(&sna->kgem);
880		assert(op->dst.bo->exec);
881		kgem_bo_mark_dirty(op->dst.bo);
882		need_stall = false;
883	}
884	if (need_stall) {
885		OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
886		OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
887			  GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
888		OUT_BATCH(0);
889		OUT_BATCH(0);
890	}
891	sna->render_state.gen6.first_state_packet = false;
892}
893
894static bool gen6_magic_ca_pass(struct sna *sna,
895			       const struct sna_composite_op *op)
896{
897	struct gen6_render_state *state = &sna->render_state.gen6;
898
899	if (!op->need_magic_ca_pass)
900		return false;
901
902	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
903	     sna->render.vertex_start, sna->render.vertex_index));
904
905	gen6_emit_flush(sna);
906
907	gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format));
908	gen6_emit_wm(sna,
909		     gen6_choose_composite_kernel(PictOpAdd,
910						  true, true,
911						  op->is_affine),
912		     true);
913
914	OUT_BATCH(GEN6_3DPRIMITIVE |
915		  GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
916		  _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
917		  0 << 9 |
918		  4);
919	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
920	OUT_BATCH(sna->render.vertex_start);
921	OUT_BATCH(1);	/* single instance */
922	OUT_BATCH(0);	/* start instance location */
923	OUT_BATCH(0);	/* index buffer offset, ignored */
924
925	state->last_primitive = sna->kgem.nbatch;
926	return true;
927}
928
929typedef struct gen6_surface_state_padded {
930	struct gen6_surface_state state;
931	char pad[32 - sizeof(struct gen6_surface_state)];
932} gen6_surface_state_padded;
933
934static void null_create(struct sna_static_stream *stream)
935{
936	/* A bunch of zeros useful for legacy border color and depth-stencil */
937	sna_static_stream_map(stream, 64, 64);
938}
939
940static void scratch_create(struct sna_static_stream *stream)
941{
942	/* 64 bytes of scratch space for random writes, such as
943	 * the pipe-control w/a.
944	 */
945	sna_static_stream_map(stream, 64, 64);
946}
947
948static void
949sampler_state_init(struct gen6_sampler_state *sampler_state,
950		   sampler_filter_t filter,
951		   sampler_extend_t extend)
952{
953	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
954
955	/* We use the legacy mode to get the semantics specified by
956	 * the Render extension. */
957	sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY;
958
959	switch (filter) {
960	default:
961	case SAMPLER_FILTER_NEAREST:
962		sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST;
963		sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
964		break;
965	case SAMPLER_FILTER_BILINEAR:
966		sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR;
967		sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR;
968		break;
969	}
970
971	switch (extend) {
972	default:
973	case SAMPLER_EXTEND_NONE:
974		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
975		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
976		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
977		break;
978	case SAMPLER_EXTEND_REPEAT:
979		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
980		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
981		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
982		break;
983	case SAMPLER_EXTEND_PAD:
984		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
985		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
986		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
987		break;
988	case SAMPLER_EXTEND_REFLECT:
989		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
990		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
991		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
992		break;
993	}
994}
995
996static void
997sampler_copy_init(struct gen6_sampler_state *ss)
998{
999	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1000	ss->ss3.non_normalized_coord = 1;
1001
1002	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1003}
1004
1005static void
1006sampler_fill_init(struct gen6_sampler_state *ss)
1007{
1008	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
1009	ss->ss3.non_normalized_coord = 1;
1010
1011	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1012}
1013
1014static uint32_t
1015gen6_tiling_bits(uint32_t tiling)
1016{
1017	switch (tiling) {
1018	default: assert(0);
1019	case I915_TILING_NONE: return 0;
1020	case I915_TILING_X: return GEN6_SURFACE_TILED;
1021	case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y;
1022	}
1023}
1024
1025/**
1026 * Sets up the common fields for a surface state buffer for the given
1027 * picture in the given surface state buffer.
1028 */
1029static int
1030gen6_bind_bo(struct sna *sna,
1031	     struct kgem_bo *bo,
1032	     uint32_t width,
1033	     uint32_t height,
1034	     uint32_t format,
1035	     bool is_dst)
1036{
1037	uint32_t *ss;
1038	uint32_t domains;
1039	uint16_t offset;
1040	uint32_t is_scanout = is_dst && bo->scanout;
1041
1042	/* After the first bind, we manage the cache domains within the batch */
1043	offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
1044	if (offset) {
1045		DBG(("[%x]  bo(handle=%d), format=%d, reuse %s binding\n",
1046		     offset, bo->handle, format,
1047		     is_dst ? "render" : "sampler"));
1048		if (is_dst)
1049			kgem_bo_mark_dirty(bo);
1050		return offset * sizeof(uint32_t);
1051	}
1052
1053	offset = sna->kgem.surface -=
1054		sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1055	ss = sna->kgem.batch + offset;
1056	ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT |
1057		 GEN6_SURFACE_BLEND_ENABLED |
1058		 format << GEN6_SURFACE_FORMAT_SHIFT);
1059	if (is_dst) {
1060		ss[0] |= GEN6_SURFACE_RC_READ_WRITE;
1061		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
1062	} else
1063		domains = I915_GEM_DOMAIN_SAMPLER << 16;
1064	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
1065	ss[2] = ((width - 1)  << GEN6_SURFACE_WIDTH_SHIFT |
1066		 (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT);
1067	assert(bo->pitch <= (1 << 18));
1068	ss[3] = (gen6_tiling_bits(bo->tiling) |
1069		 (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT);
1070	ss[4] = 0;
1071	ss[5] = is_scanout ? 0 : 3 << 16;
1072
1073	kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
1074
1075	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
1076	     offset, bo->handle, ss[1],
1077	     format, width, height, bo->pitch, bo->tiling,
1078	     domains & 0xffff ? "render" : "sampler"));
1079
1080	return offset * sizeof(uint32_t);
1081}
1082
1083static void gen6_emit_vertex_buffer(struct sna *sna,
1084				    const struct sna_composite_op *op)
1085{
1086	int id = GEN6_VERTEX(op->u.gen6.flags);
1087
1088	OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3);
1089	OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
1090		  4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT);
1091	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
1092	OUT_BATCH(0);
1093	OUT_BATCH(~0); /* max address: disabled */
1094	OUT_BATCH(0);
1095
1096	sna->render.vb_id |= 1 << id;
1097}
1098
1099static void gen6_emit_primitive(struct sna *sna)
1100{
1101	if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) {
1102		DBG(("%s: continuing previous primitive, start=%d, index=%d\n",
1103		     __FUNCTION__,
1104		     sna->render.vertex_start,
1105		     sna->render.vertex_index));
1106		sna->render.vertex_offset = sna->kgem.nbatch - 5;
1107		return;
1108	}
1109
1110	OUT_BATCH(GEN6_3DPRIMITIVE |
1111		  GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
1112		  _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
1113		  0 << 9 |
1114		  4);
1115	sna->render.vertex_offset = sna->kgem.nbatch;
1116	OUT_BATCH(0);	/* vertex count, to be filled in later */
1117	OUT_BATCH(sna->render.vertex_index);
1118	OUT_BATCH(1);	/* single instance */
1119	OUT_BATCH(0);	/* start instance location */
1120	OUT_BATCH(0);	/* index buffer offset, ignored */
1121	sna->render.vertex_start = sna->render.vertex_index;
1122	DBG(("%s: started new primitive: index=%d\n",
1123	     __FUNCTION__, sna->render.vertex_start));
1124
1125	sna->render_state.gen6.last_primitive = sna->kgem.nbatch;
1126}
1127
1128static bool gen6_rectangle_begin(struct sna *sna,
1129				 const struct sna_composite_op *op)
1130{
1131	int id = 1 << GEN6_VERTEX(op->u.gen6.flags);
1132	int ndwords;
1133
1134	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
1135		return true;
1136
1137	ndwords = op->need_magic_ca_pass ? 60 : 6;
1138	if ((sna->render.vb_id & id) == 0)
1139		ndwords += 5;
1140	if (!kgem_check_batch(&sna->kgem, ndwords))
1141		return false;
1142
1143	if ((sna->render.vb_id & id) == 0)
1144		gen6_emit_vertex_buffer(sna, op);
1145
1146	gen6_emit_primitive(sna);
1147	return true;
1148}
1149
1150static int gen6_get_rectangles__flush(struct sna *sna,
1151				      const struct sna_composite_op *op)
1152{
1153	/* Preventing discarding new vbo after lock contention */
1154	if (sna_vertex_wait__locked(&sna->render)) {
1155		int rem = vertex_space(sna);
1156		if (rem > op->floats_per_rect)
1157			return rem;
1158	}
1159
1160	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5))
1161		return 0;
1162	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
1163		return 0;
1164
1165	if (sna->render.vertex_offset) {
1166		gen4_vertex_flush(sna);
1167		if (gen6_magic_ca_pass(sna, op)) {
1168			gen6_emit_flush(sna);
1169			gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags));
1170			gen6_emit_wm(sna,
1171				     GEN6_KERNEL(op->u.gen6.flags),
1172				     GEN6_VERTEX(op->u.gen6.flags) >> 2);
1173		}
1174	}
1175
1176	return gen4_vertex_finish(sna);
1177}
1178
1179inline static int gen6_get_rectangles(struct sna *sna,
1180				      const struct sna_composite_op *op,
1181				      int want,
1182				      void (*emit_state)(struct sna *, const struct sna_composite_op *op))
1183{
1184	int rem;
1185
1186	assert(want);
1187
1188start:
1189	rem = vertex_space(sna);
1190	if (unlikely(rem < op->floats_per_rect)) {
1191		DBG(("flushing vbo for %s: %d < %d\n",
1192		     __FUNCTION__, rem, op->floats_per_rect));
1193		rem = gen6_get_rectangles__flush(sna, op);
1194		if (unlikely(rem == 0))
1195			goto flush;
1196	}
1197
1198	if (unlikely(sna->render.vertex_offset == 0)) {
1199		if (!gen6_rectangle_begin(sna, op))
1200			goto flush;
1201		else
1202			goto start;
1203	}
1204
1205	assert(rem <= vertex_space(sna));
1206	assert(op->floats_per_rect <= rem);
1207	if (want > 1 && want * op->floats_per_rect > rem)
1208		want = rem / op->floats_per_rect;
1209
1210	assert(want > 0);
1211	sna->render.vertex_index += 3*want;
1212	return want;
1213
1214flush:
1215	if (sna->render.vertex_offset) {
1216		gen4_vertex_flush(sna);
1217		gen6_magic_ca_pass(sna, op);
1218	}
1219	sna_vertex_wait__locked(&sna->render);
1220	_kgem_submit(&sna->kgem);
1221	emit_state(sna, op);
1222	goto start;
1223}
1224
1225inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna,
1226							 uint16_t *offset)
1227{
1228	uint32_t *table;
1229
1230	sna->kgem.surface -=
1231		sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1232	/* Clear all surplus entries to zero in case of prefetch */
1233	table = memset(sna->kgem.batch + sna->kgem.surface,
1234		       0, sizeof(struct gen6_surface_state_padded));
1235
1236	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
1237
1238	*offset = sna->kgem.surface;
1239	return table;
1240}
1241
1242static bool
1243gen6_get_batch(struct sna *sna, const struct sna_composite_op *op)
1244{
1245	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1246
1247	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
1248		DBG(("%s: flushing batch: %d < %d+%d\n",
1249		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
1250		     150, 4*8));
1251		kgem_submit(&sna->kgem);
1252		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1253	}
1254
1255	if (sna->render_state.gen6.needs_invariant)
1256		gen6_emit_invariant(sna);
1257
1258	return kgem_bo_is_dirty(op->dst.bo);
1259}
1260
1261static void gen6_emit_composite_state(struct sna *sna,
1262				      const struct sna_composite_op *op)
1263{
1264	uint32_t *binding_table;
1265	uint16_t offset;
1266	bool dirty;
1267
1268	dirty = gen6_get_batch(sna, op);
1269
1270	binding_table = gen6_composite_get_binding_table(sna, &offset);
1271
1272	binding_table[0] =
1273		gen6_bind_bo(sna,
1274			    op->dst.bo, op->dst.width, op->dst.height,
1275			    gen6_get_dest_format(op->dst.format),
1276			    true);
1277	binding_table[1] =
1278		gen6_bind_bo(sna,
1279			     op->src.bo, op->src.width, op->src.height,
1280			     op->src.card_format,
1281			     false);
1282	if (op->mask.bo) {
1283		binding_table[2] =
1284			gen6_bind_bo(sna,
1285				     op->mask.bo,
1286				     op->mask.width,
1287				     op->mask.height,
1288				     op->mask.card_format,
1289				     false);
1290	}
1291
1292	if (sna->kgem.surface == offset &&
1293	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table &&
1294	    (op->mask.bo == NULL ||
1295	     sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) {
1296		sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1297		offset = sna->render_state.gen6.surface_table;
1298	}
1299
1300	gen6_emit_state(sna, op, offset | dirty);
1301}
1302
1303static void
1304gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op)
1305{
1306	assert (sna->render.vertex_offset == 0);
1307	if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) {
1308		if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
1309			gen4_vertex_finish(sna);
1310
1311		DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
1312		     sna->render_state.gen6.floats_per_vertex,
1313		     op->floats_per_vertex,
1314		     sna->render.vertex_index,
1315		     (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
1316		sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
1317		sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
1318		sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex;
1319	}
1320	assert((sna->render.vertex_used % op->floats_per_vertex) == 0);
1321}
1322
1323fastcall static void
1324gen6_render_composite_blt(struct sna *sna,
1325			  const struct sna_composite_op *op,
1326			  const struct sna_composite_rectangles *r)
1327{
1328	gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state);
1329	op->prim_emit(sna, op, r);
1330}
1331
1332fastcall static void
1333gen6_render_composite_box(struct sna *sna,
1334			  const struct sna_composite_op *op,
1335			  const BoxRec *box)
1336{
1337	struct sna_composite_rectangles r;
1338
1339	gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state);
1340
1341	DBG(("  %s: (%d, %d), (%d, %d)\n",
1342	     __FUNCTION__,
1343	     box->x1, box->y1, box->x2, box->y2));
1344
1345	r.dst.x = box->x1;
1346	r.dst.y = box->y1;
1347	r.width  = box->x2 - box->x1;
1348	r.height = box->y2 - box->y1;
1349	r.src = r.mask = r.dst;
1350
1351	op->prim_emit(sna, op, &r);
1352}
1353
1354static void
1355gen6_render_composite_boxes__blt(struct sna *sna,
1356				 const struct sna_composite_op *op,
1357				 const BoxRec *box, int nbox)
1358{
1359	DBG(("composite_boxes(%d)\n", nbox));
1360
1361	do {
1362		int nbox_this_time;
1363
1364		nbox_this_time = gen6_get_rectangles(sna, op, nbox,
1365						     gen6_emit_composite_state);
1366		nbox -= nbox_this_time;
1367
1368		do {
1369			struct sna_composite_rectangles r;
1370
1371			DBG(("  %s: (%d, %d), (%d, %d)\n",
1372			     __FUNCTION__,
1373			     box->x1, box->y1, box->x2, box->y2));
1374
1375			r.dst.x = box->x1;
1376			r.dst.y = box->y1;
1377			r.width  = box->x2 - box->x1;
1378			r.height = box->y2 - box->y1;
1379			r.src = r.mask = r.dst;
1380
1381			op->prim_emit(sna, op, &r);
1382			box++;
1383		} while (--nbox_this_time);
1384	} while (nbox);
1385}
1386
1387static void
1388gen6_render_composite_boxes(struct sna *sna,
1389			    const struct sna_composite_op *op,
1390			    const BoxRec *box, int nbox)
1391{
1392	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1393
1394	do {
1395		int nbox_this_time;
1396		float *v;
1397
1398		nbox_this_time = gen6_get_rectangles(sna, op, nbox,
1399						     gen6_emit_composite_state);
1400		assert(nbox_this_time);
1401		nbox -= nbox_this_time;
1402
1403		v = sna->render.vertices + sna->render.vertex_used;
1404		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1405
1406		op->emit_boxes(op, box, nbox_this_time, v);
1407		box += nbox_this_time;
1408	} while (nbox);
1409}
1410
1411static void
1412gen6_render_composite_boxes__thread(struct sna *sna,
1413				    const struct sna_composite_op *op,
1414				    const BoxRec *box, int nbox)
1415{
1416	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1417
1418	sna_vertex_lock(&sna->render);
1419	do {
1420		int nbox_this_time;
1421		float *v;
1422
1423		nbox_this_time = gen6_get_rectangles(sna, op, nbox,
1424						     gen6_emit_composite_state);
1425		assert(nbox_this_time);
1426		nbox -= nbox_this_time;
1427
1428		v = sna->render.vertices + sna->render.vertex_used;
1429		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1430
1431		sna_vertex_acquire__locked(&sna->render);
1432		sna_vertex_unlock(&sna->render);
1433
1434		op->emit_boxes(op, box, nbox_this_time, v);
1435		box += nbox_this_time;
1436
1437		sna_vertex_lock(&sna->render);
1438		sna_vertex_release__locked(&sna->render);
1439	} while (nbox);
1440	sna_vertex_unlock(&sna->render);
1441}
1442
1443#ifndef MAX
1444#define MAX(a,b) ((a) > (b) ? (a) : (b))
1445#endif
1446
1447static uint32_t
1448gen6_composite_create_blend_state(struct sna_static_stream *stream)
1449{
1450	char *base, *ptr;
1451	int src, dst;
1452
1453	base = sna_static_stream_map(stream,
1454				     GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE,
1455				     64);
1456
1457	ptr = base;
1458	for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) {
1459		for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) {
1460			struct gen6_blend_state *blend =
1461				(struct gen6_blend_state *)ptr;
1462
1463			blend->blend0.dest_blend_factor = dst;
1464			blend->blend0.source_blend_factor = src;
1465			blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD;
1466			blend->blend0.blend_enable =
1467				!(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE);
1468
1469			blend->blend1.post_blend_clamp_enable = 1;
1470			blend->blend1.pre_blend_clamp_enable = 1;
1471
1472			ptr += GEN6_BLEND_STATE_PADDED_SIZE;
1473		}
1474	}
1475
1476	return sna_static_stream_offsetof(stream, base);
1477}
1478
1479static uint32_t gen6_bind_video_source(struct sna *sna,
1480				       struct kgem_bo *src_bo,
1481				       uint32_t src_offset,
1482				       int src_width,
1483				       int src_height,
1484				       int src_pitch,
1485				       uint32_t src_surf_format)
1486{
1487	struct gen6_surface_state *ss;
1488
1489	sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1490
1491	ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
1492	ss->ss0.surface_type = GEN6_SURFACE_2D;
1493	ss->ss0.surface_format = src_surf_format;
1494
1495	ss->ss1.base_addr =
1496		kgem_add_reloc(&sna->kgem,
1497			       sna->kgem.surface + 1,
1498			       src_bo,
1499			       I915_GEM_DOMAIN_SAMPLER << 16,
1500			       src_offset);
1501
1502	ss->ss2.width  = src_width - 1;
1503	ss->ss2.height = src_height - 1;
1504	ss->ss3.pitch  = src_pitch - 1;
1505
1506	return sna->kgem.surface * sizeof(uint32_t);
1507}
1508
1509static void gen6_emit_video_state(struct sna *sna,
1510				  const struct sna_composite_op *op)
1511{
1512	struct sna_video_frame *frame = op->priv;
1513	uint32_t src_surf_format;
1514	uint32_t src_surf_base[6];
1515	int src_width[6];
1516	int src_height[6];
1517	int src_pitch[6];
1518	uint32_t *binding_table;
1519	uint16_t offset;
1520	bool dirty;
1521	int n_src, n;
1522
1523	dirty = gen6_get_batch(sna, op);
1524
1525	src_surf_base[0] = 0;
1526	src_surf_base[1] = 0;
1527	src_surf_base[2] = frame->VBufOffset;
1528	src_surf_base[3] = frame->VBufOffset;
1529	src_surf_base[4] = frame->UBufOffset;
1530	src_surf_base[5] = frame->UBufOffset;
1531
1532	if (is_planar_fourcc(frame->id)) {
1533		src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM;
1534		src_width[1]  = src_width[0]  = frame->width;
1535		src_height[1] = src_height[0] = frame->height;
1536		src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
1537		src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
1538			frame->width / 2;
1539		src_height[4] = src_height[5] = src_height[2] = src_height[3] =
1540			frame->height / 2;
1541		src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
1542			frame->pitch[0];
1543		n_src = 6;
1544	} else {
1545		if (frame->id == FOURCC_UYVY)
1546			src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY;
1547		else
1548			src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL;
1549
1550		src_width[0]  = frame->width;
1551		src_height[0] = frame->height;
1552		src_pitch[0]  = frame->pitch[0];
1553		n_src = 1;
1554	}
1555
1556	binding_table = gen6_composite_get_binding_table(sna, &offset);
1557
1558	binding_table[0] =
1559		gen6_bind_bo(sna,
1560			     op->dst.bo, op->dst.width, op->dst.height,
1561			     gen6_get_dest_format(op->dst.format),
1562			     true);
1563	for (n = 0; n < n_src; n++) {
1564		binding_table[1+n] =
1565			gen6_bind_video_source(sna,
1566					       frame->bo,
1567					       src_surf_base[n],
1568					       src_width[n],
1569					       src_height[n],
1570					       src_pitch[n],
1571					       src_surf_format);
1572	}
1573
1574	gen6_emit_state(sna, op, offset | dirty);
1575}
1576
1577static bool
1578gen6_render_video(struct sna *sna,
1579		  struct sna_video *video,
1580		  struct sna_video_frame *frame,
1581		  RegionPtr dstRegion,
1582		  PixmapPtr pixmap)
1583{
1584	struct sna_composite_op tmp;
1585	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1586	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1587	int src_width = frame->src.x2 - frame->src.x1;
1588	int src_height = frame->src.y2 - frame->src.y1;
1589	float src_offset_x, src_offset_y;
1590	float src_scale_x, src_scale_y;
1591	int nbox, pix_xoff, pix_yoff;
1592	struct sna_pixmap *priv;
1593	unsigned filter;
1594	BoxPtr box;
1595
1596	DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n",
1597	     __FUNCTION__,
1598	     src_width, src_height, dst_width, dst_height,
1599	     (long)REGION_NUM_RECTS(dstRegion),
1600	     REGION_EXTENTS(NULL, dstRegion)->x1,
1601	     REGION_EXTENTS(NULL, dstRegion)->y1,
1602	     REGION_EXTENTS(NULL, dstRegion)->x2,
1603	     REGION_EXTENTS(NULL, dstRegion)->y2));
1604
1605	priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
1606	if (priv == NULL)
1607		return false;
1608
1609	memset(&tmp, 0, sizeof(tmp));
1610
1611	tmp.dst.pixmap = pixmap;
1612	tmp.dst.width  = pixmap->drawable.width;
1613	tmp.dst.height = pixmap->drawable.height;
1614	tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
1615	tmp.dst.bo = priv->gpu_bo;
1616
1617	tmp.src.bo = frame->bo;
1618	tmp.mask.bo = NULL;
1619
1620	tmp.floats_per_vertex = 3;
1621	tmp.floats_per_rect = 9;
1622
1623	if (src_width == dst_width && src_height == dst_height)
1624		filter = SAMPLER_FILTER_NEAREST;
1625	else
1626		filter = SAMPLER_FILTER_BILINEAR;
1627
1628	tmp.u.gen6.flags =
1629		GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
1630					       SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
1631			       NO_BLEND,
1632			       is_planar_fourcc(frame->id) ?
1633			       GEN6_WM_KERNEL_VIDEO_PLANAR :
1634			       GEN6_WM_KERNEL_VIDEO_PACKED,
1635			       2);
1636	tmp.priv = frame;
1637
1638	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
1639	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1640		kgem_submit(&sna->kgem);
1641		assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
1642		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1643	}
1644
1645	gen6_emit_video_state(sna, &tmp);
1646	gen6_align_vertex(sna, &tmp);
1647
1648	/* Set up the offset for translating from the given region (in screen
1649	 * coordinates) to the backing pixmap.
1650	 */
1651#ifdef COMPOSITE
1652	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
1653	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
1654#else
1655	pix_xoff = 0;
1656	pix_yoff = 0;
1657#endif
1658
1659	src_scale_x = (float)src_width / dst_width / frame->width;
1660	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1661
1662	src_scale_y = (float)src_height / dst_height / frame->height;
1663	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1664
1665	box = REGION_RECTS(dstRegion);
1666	nbox = REGION_NUM_RECTS(dstRegion);
1667	while (nbox--) {
1668		BoxRec r;
1669
1670		r.x1 = box->x1 + pix_xoff;
1671		r.x2 = box->x2 + pix_xoff;
1672		r.y1 = box->y1 + pix_yoff;
1673		r.y2 = box->y2 + pix_yoff;
1674
1675		gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state);
1676
1677		OUT_VERTEX(r.x2, r.y2);
1678		OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1679		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1680
1681		OUT_VERTEX(r.x1, r.y2);
1682		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1683		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1684
1685		OUT_VERTEX(r.x1, r.y1);
1686		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1687		OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1688
1689		if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
1690			sna_damage_add_box(&priv->gpu_damage, &r);
1691			sna_damage_subtract_box(&priv->cpu_damage, &r);
1692		}
1693		box++;
1694	}
1695
1696	gen4_vertex_flush(sna);
1697	return true;
1698}
1699
1700static int
1701gen6_composite_picture(struct sna *sna,
1702		       PicturePtr picture,
1703		       struct sna_composite_channel *channel,
1704		       int x, int y,
1705		       int w, int h,
1706		       int dst_x, int dst_y,
1707		       bool precise)
1708{
1709	PixmapPtr pixmap;
1710	uint32_t color;
1711	int16_t dx, dy;
1712
1713	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1714	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
1715
1716	channel->is_solid = false;
1717	channel->card_format = -1;
1718
1719	if (sna_picture_is_solid(picture, &color))
1720		return gen4_channel_init_solid(sna, channel, color);
1721
1722	if (picture->pDrawable == NULL) {
1723		int ret;
1724
1725		if (picture->pSourcePict->type == SourcePictTypeLinear)
1726			return gen4_channel_init_linear(sna, picture, channel,
1727							x, y,
1728							w, h,
1729							dst_x, dst_y);
1730
1731		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
1732		ret = -1;
1733		if (!precise)
1734			ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1735								      x, y, w, h, dst_x, dst_y);
1736		if (ret == -1)
1737			ret = sna_render_picture_fixup(sna, picture, channel,
1738						       x, y, w, h, dst_x, dst_y);
1739		return ret;
1740	}
1741
1742	if (picture->alphaMap) {
1743		DBG(("%s -- fixup, alphamap\n", __FUNCTION__));
1744		return sna_render_picture_fixup(sna, picture, channel,
1745						x, y, w, h, dst_x, dst_y);
1746	}
1747
1748	if (!gen6_check_repeat(picture))
1749		return sna_render_picture_fixup(sna, picture, channel,
1750						x, y, w, h, dst_x, dst_y);
1751
1752	if (!gen6_check_filter(picture))
1753		return sna_render_picture_fixup(sna, picture, channel,
1754						x, y, w, h, dst_x, dst_y);
1755
1756	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1757	channel->filter = picture->filter;
1758
1759	pixmap = get_drawable_pixmap(picture->pDrawable);
1760	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
1761
1762	x += dx + picture->pDrawable->x;
1763	y += dy + picture->pDrawable->y;
1764
1765	channel->is_affine = sna_transform_is_affine(picture->transform);
1766	if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
1767		DBG(("%s: integer translation (%d, %d), removing\n",
1768		     __FUNCTION__, dx, dy));
1769		x += dx;
1770		y += dy;
1771		channel->transform = NULL;
1772		channel->filter = PictFilterNearest;
1773	} else
1774		channel->transform = picture->transform;
1775
1776	channel->pict_format = picture->format;
1777	channel->card_format = gen6_get_card_format(picture->format);
1778	if (channel->card_format == (unsigned)-1)
1779		return sna_render_picture_convert(sna, picture, channel, pixmap,
1780						  x, y, w, h, dst_x, dst_y,
1781						  false);
1782
1783	if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
1784		DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__,
1785		     pixmap->drawable.width, pixmap->drawable.height));
1786		return sna_render_picture_extract(sna, picture, channel,
1787						  x, y, w, h, dst_x, dst_y);
1788	}
1789
1790	return sna_render_pixmap_bo(sna, channel, pixmap,
1791				    x, y, w, h, dst_x, dst_y);
1792}
1793
1794inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel)
1795{
1796	channel->repeat = gen6_repeat(channel->repeat);
1797	channel->filter = gen6_filter(channel->filter);
1798	if (channel->card_format == (unsigned)-1)
1799		channel->card_format = gen6_get_card_format(channel->pict_format);
1800	assert(channel->card_format != (unsigned)-1);
1801}
1802
1803static void gen6_render_composite_done(struct sna *sna,
1804				       const struct sna_composite_op *op)
1805{
1806	DBG(("%s\n", __FUNCTION__));
1807
1808	assert(!sna->render.active);
1809	if (sna->render.vertex_offset) {
1810		gen4_vertex_flush(sna);
1811		gen6_magic_ca_pass(sna, op);
1812	}
1813
1814	if (op->mask.bo)
1815		kgem_bo_destroy(&sna->kgem, op->mask.bo);
1816	if (op->src.bo)
1817		kgem_bo_destroy(&sna->kgem, op->src.bo);
1818
1819	sna_render_composite_redirect_done(sna, op);
1820}
1821
1822static bool
1823gen6_composite_set_target(struct sna *sna,
1824			  struct sna_composite_op *op,
1825			  PicturePtr dst,
1826			  int x, int y, int w, int h,
1827			  bool partial)
1828{
1829	BoxRec box;
1830
1831	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1832	op->dst.format = dst->format;
1833	op->dst.width = op->dst.pixmap->drawable.width;
1834	op->dst.height = op->dst.pixmap->drawable.height;
1835
1836	if (w && h) {
1837		box.x1 = x;
1838		box.y1 = y;
1839		box.x2 = x + w;
1840		box.y2 = y + h;
1841	} else
1842		sna_render_picture_extents(dst, &box);
1843
1844	op->dst.bo = sna_drawable_use_bo(dst->pDrawable,
1845					 PREFER_GPU | FORCE_GPU | RENDER_GPU,
1846					 &box, &op->damage);
1847	if (op->dst.bo == NULL)
1848		return false;
1849
1850	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1851			    &op->dst.x, &op->dst.y);
1852
1853	DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1854	     __FUNCTION__,
1855	     op->dst.pixmap, (int)op->dst.format,
1856	     op->dst.width, op->dst.height,
1857	     op->dst.bo->pitch,
1858	     op->dst.x, op->dst.y,
1859	     op->damage ? *op->damage : (void *)-1));
1860
1861	assert(op->dst.bo->proxy == NULL);
1862
1863	if (too_large(op->dst.width, op->dst.height) &&
1864	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
1865		return false;
1866
1867	return true;
1868}
1869
1870inline static bool can_switch_to_blt(struct sna *sna,
1871				     struct kgem_bo *bo,
1872				     unsigned flags)
1873{
1874	if (sna->kgem.ring != KGEM_RENDER)
1875		return true;
1876
1877	if (NO_RING_SWITCH)
1878		return false;
1879
1880	if (!sna->kgem.has_semaphores)
1881		return false;
1882
1883	if (flags & COPY_LAST)
1884		return true;
1885
1886	if (bo && RQ_IS_BLT(bo->rq))
1887		return true;
1888
1889	return kgem_ring_is_idle(&sna->kgem, KGEM_BLT);
1890}
1891
1892static inline bool untiled_tlb_miss(struct kgem_bo *bo)
1893{
1894	return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096;
1895}
1896
1897static int prefer_blt_bo(struct sna *sna, struct kgem_bo *bo)
1898{
1899	if (RQ_IS_BLT(bo->rq))
1900		return true;
1901
1902	return bo->tiling == I915_TILING_NONE || bo->scanout;
1903}
1904
1905inline static bool prefer_blt_ring(struct sna *sna,
1906				   struct kgem_bo *bo,
1907				   unsigned flags)
1908{
1909	return can_switch_to_blt(sna, bo, flags);
1910}
1911
1912static bool
1913try_blt(struct sna *sna,
1914	PicturePtr dst, PicturePtr src,
1915	int width, int height)
1916{
1917	struct kgem_bo *bo;
1918
1919	bo = __sna_drawable_peek_bo(dst->pDrawable);
1920	if (bo == NULL)
1921		return true;
1922
1923	if (bo->rq)
1924		return RQ_IS_BLT(bo->rq);
1925
1926	if (sna->kgem.ring == KGEM_BLT) {
1927		DBG(("%s: already performing BLT\n", __FUNCTION__));
1928		return true;
1929	}
1930
1931	if (too_large(width, height)) {
1932		DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
1933		     __FUNCTION__, width, height));
1934		return true;
1935	}
1936
1937	if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, NULL, 0))
1938		return true;
1939
1940	return false;
1941}
1942
1943static bool
1944check_gradient(PicturePtr picture, bool precise)
1945{
1946	if (picture->pDrawable)
1947		return false;
1948
1949	switch (picture->pSourcePict->type) {
1950	case SourcePictTypeSolidFill:
1951	case SourcePictTypeLinear:
1952		return false;
1953	default:
1954		return precise;
1955	}
1956}
1957
1958static bool
1959has_alphamap(PicturePtr p)
1960{
1961	return p->alphaMap != NULL;
1962}
1963
1964static bool
1965need_upload(PicturePtr p)
1966{
1967	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
1968}
1969
1970static bool
1971source_is_busy(PixmapPtr pixmap)
1972{
1973	struct sna_pixmap *priv = sna_pixmap(pixmap);
1974	if (priv == NULL || priv->clear)
1975		return false;
1976
1977	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
1978		return true;
1979
1980	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
1981		return true;
1982
1983	return priv->gpu_damage && !priv->cpu_damage;
1984}
1985
1986static bool
1987source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise)
1988{
1989	if (sna_picture_is_solid(p, NULL))
1990		return false;
1991
1992	if (p->pSourcePict)
1993		return check_gradient(p, precise);
1994
1995	if (!gen6_check_repeat(p) || !gen6_check_format(p->format))
1996		return true;
1997
1998	if (pixmap && source_is_busy(pixmap))
1999		return false;
2000
2001	return has_alphamap(p) || !gen6_check_filter(p) || need_upload(p);
2002}
2003
2004static bool
2005gen6_composite_fallback(struct sna *sna,
2006			PicturePtr src,
2007			PicturePtr mask,
2008			PicturePtr dst)
2009{
2010	PixmapPtr src_pixmap;
2011	PixmapPtr mask_pixmap;
2012	PixmapPtr dst_pixmap;
2013	bool src_fallback, mask_fallback;
2014
2015	if (!gen6_check_dst_format(dst->format)) {
2016		DBG(("%s: unknown destination format: %d\n",
2017		     __FUNCTION__, dst->format));
2018		return true;
2019	}
2020
2021	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
2022
2023	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
2024	src_fallback = source_fallback(src, src_pixmap,
2025				       dst->polyMode == PolyModePrecise);
2026
2027	if (mask) {
2028		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
2029		mask_fallback = source_fallback(mask, mask_pixmap,
2030						dst->polyMode == PolyModePrecise);
2031	} else {
2032		mask_pixmap = NULL;
2033		mask_fallback = false;
2034	}
2035
2036	/* If we are using the destination as a source and need to
2037	 * readback in order to upload the source, do it all
2038	 * on the cpu.
2039	 */
2040	if (src_pixmap == dst_pixmap && src_fallback) {
2041		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
2042		return true;
2043	}
2044	if (mask_pixmap == dst_pixmap && mask_fallback) {
2045		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
2046		return true;
2047	}
2048
2049	/* If anything is on the GPU, push everything out to the GPU */
2050	if (dst_use_gpu(dst_pixmap)) {
2051		DBG(("%s: dst is already on the GPU, try to use GPU\n",
2052		     __FUNCTION__));
2053		return false;
2054	}
2055
2056	if (src_pixmap && !src_fallback) {
2057		DBG(("%s: src is already on the GPU, try to use GPU\n",
2058		     __FUNCTION__));
2059		return false;
2060	}
2061	if (mask_pixmap && !mask_fallback) {
2062		DBG(("%s: mask is already on the GPU, try to use GPU\n",
2063		     __FUNCTION__));
2064		return false;
2065	}
2066
2067	/* However if the dst is not on the GPU and we need to
2068	 * render one of the sources using the CPU, we may
2069	 * as well do the entire operation in place onthe CPU.
2070	 */
2071	if (src_fallback) {
2072		DBG(("%s: dst is on the CPU and src will fallback\n",
2073		     __FUNCTION__));
2074		return true;
2075	}
2076
2077	if (mask && mask_fallback) {
2078		DBG(("%s: dst is on the CPU and mask will fallback\n",
2079		     __FUNCTION__));
2080		return true;
2081	}
2082
2083	if (too_large(dst_pixmap->drawable.width,
2084		      dst_pixmap->drawable.height) &&
2085	    dst_is_cpu(dst_pixmap)) {
2086		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
2087		return true;
2088	}
2089
2090	DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
2091	     __FUNCTION__));
2092	return dst_use_cpu(dst_pixmap);
2093}
2094
2095static int
2096reuse_source(struct sna *sna,
2097	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
2098	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
2099{
2100	uint32_t color;
2101
2102	if (src_x != msk_x || src_y != msk_y)
2103		return false;
2104
2105	if (src == mask) {
2106		DBG(("%s: mask is source\n", __FUNCTION__));
2107		*mc = *sc;
2108		mc->bo = kgem_bo_reference(mc->bo);
2109		return true;
2110	}
2111
2112	if (sna_picture_is_solid(mask, &color))
2113		return gen4_channel_init_solid(sna, mc, color);
2114
2115	if (sc->is_solid)
2116		return false;
2117
2118	if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
2119		return false;
2120
2121	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
2122
2123	if (!sna_transform_equal(src->transform, mask->transform))
2124		return false;
2125
2126	if (!sna_picture_alphamap_equal(src, mask))
2127		return false;
2128
2129	if (!gen6_check_repeat(mask))
2130		return false;
2131
2132	if (!gen6_check_filter(mask))
2133		return false;
2134
2135	if (!gen6_check_format(mask->format))
2136		return false;
2137
2138	DBG(("%s: reusing source channel for mask with a twist\n",
2139	     __FUNCTION__));
2140
2141	*mc = *sc;
2142	mc->repeat = gen6_repeat(mask->repeat ? mask->repeatType : RepeatNone);
2143	mc->filter = gen6_filter(mask->filter);
2144	mc->pict_format = mask->format;
2145	mc->card_format = gen6_get_card_format(mask->format);
2146	mc->bo = kgem_bo_reference(mc->bo);
2147	return true;
2148}
2149
2150static bool
2151prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp)
2152{
2153	if (untiled_tlb_miss(tmp->dst.bo) ||
2154	    untiled_tlb_miss(tmp->src.bo))
2155		return true;
2156
2157	if (kgem_bo_is_render(tmp->dst.bo) ||
2158	    kgem_bo_is_render(tmp->src.bo))
2159		return false;
2160
2161	if (!prefer_blt_ring(sna, tmp->dst.bo, 0))
2162		return false;
2163
2164	return prefer_blt_bo(sna, tmp->dst.bo) || prefer_blt_bo(sna, tmp->src.bo);
2165}
2166
2167static bool
2168gen6_render_composite(struct sna *sna,
2169		      uint8_t op,
2170		      PicturePtr src,
2171		      PicturePtr mask,
2172		      PicturePtr dst,
2173		      int16_t src_x, int16_t src_y,
2174		      int16_t msk_x, int16_t msk_y,
2175		      int16_t dst_x, int16_t dst_y,
2176		      int16_t width, int16_t height,
2177		      struct sna_composite_op *tmp)
2178{
2179	if (op >= ARRAY_SIZE(gen6_blend_op))
2180		return false;
2181
2182	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
2183	     width, height, sna->kgem.ring));
2184
2185	if (mask == NULL &&
2186	    try_blt(sna, dst, src, width, height) &&
2187	    sna_blt_composite(sna, op,
2188			      src, dst,
2189			      src_x, src_y,
2190			      dst_x, dst_y,
2191			      width, height,
2192			      tmp, false))
2193		return true;
2194
2195	if (gen6_composite_fallback(sna, src, mask, dst))
2196		return false;
2197
2198	if (need_tiling(sna, width, height))
2199		return sna_tiling_composite(op, src, mask, dst,
2200					    src_x, src_y,
2201					    msk_x, msk_y,
2202					    dst_x, dst_y,
2203					    width, height,
2204					    tmp);
2205
2206	if (op == PictOpClear)
2207		op = PictOpSrc;
2208	tmp->op = op;
2209	if (!gen6_composite_set_target(sna, tmp, dst,
2210				       dst_x, dst_y, width, height,
2211				       op > PictOpSrc || dst->pCompositeClip->data))
2212		return false;
2213
2214	switch (gen6_composite_picture(sna, src, &tmp->src,
2215				       src_x, src_y,
2216				       width, height,
2217				       dst_x, dst_y,
2218				       dst->polyMode == PolyModePrecise)) {
2219	case -1:
2220		goto cleanup_dst;
2221	case 0:
2222		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
2223			goto cleanup_dst;
2224		/* fall through to fixup */
2225	case 1:
2226		/* Did we just switch rings to prepare the source? */
2227		if (mask == NULL &&
2228		    prefer_blt_composite(sna, tmp) &&
2229		    sna_blt_composite__convert(sna,
2230					       dst_x, dst_y, width, height,
2231					       tmp))
2232			return true;
2233
2234		gen6_composite_channel_convert(&tmp->src);
2235		break;
2236	}
2237
2238	tmp->is_affine = tmp->src.is_affine;
2239	tmp->has_component_alpha = false;
2240	tmp->need_magic_ca_pass = false;
2241
2242	tmp->mask.bo = NULL;
2243	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
2244	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
2245
2246	if (mask) {
2247		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
2248			tmp->has_component_alpha = true;
2249
2250			/* Check if it's component alpha that relies on a source alpha and on
2251			 * the source value.  We can only get one of those into the single
2252			 * source value that we get to blend with.
2253			 */
2254			if (gen6_blend_op[op].src_alpha &&
2255			    (gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) {
2256				if (op != PictOpOver)
2257					goto cleanup_src;
2258
2259				tmp->need_magic_ca_pass = true;
2260				tmp->op = PictOpOutReverse;
2261			}
2262		}
2263
2264		if (!reuse_source(sna,
2265				  src, &tmp->src, src_x, src_y,
2266				  mask, &tmp->mask, msk_x, msk_y)) {
2267			switch (gen6_composite_picture(sna, mask, &tmp->mask,
2268						       msk_x, msk_y,
2269						       width, height,
2270						       dst_x, dst_y,
2271						       dst->polyMode == PolyModePrecise)) {
2272			case -1:
2273				goto cleanup_src;
2274			case 0:
2275				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
2276					goto cleanup_src;
2277				/* fall through to fixup */
2278			case 1:
2279				gen6_composite_channel_convert(&tmp->mask);
2280				break;
2281			}
2282		}
2283
2284		tmp->is_affine &= tmp->mask.is_affine;
2285	}
2286
2287	tmp->u.gen6.flags =
2288		GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
2289					      tmp->src.repeat,
2290					      tmp->mask.filter,
2291					      tmp->mask.repeat),
2292			       gen6_get_blend(tmp->op,
2293					      tmp->has_component_alpha,
2294					      tmp->dst.format),
2295			       gen6_choose_composite_kernel(tmp->op,
2296							    tmp->mask.bo != NULL,
2297							    tmp->has_component_alpha,
2298							    tmp->is_affine),
2299			       gen4_choose_composite_emitter(sna, tmp));
2300
2301	tmp->blt   = gen6_render_composite_blt;
2302	tmp->box   = gen6_render_composite_box;
2303	tmp->boxes = gen6_render_composite_boxes__blt;
2304	if (tmp->emit_boxes) {
2305		tmp->boxes = gen6_render_composite_boxes;
2306		tmp->thread_boxes = gen6_render_composite_boxes__thread;
2307	}
2308	tmp->done  = gen6_render_composite_done;
2309
2310	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
2311	if (!kgem_check_bo(&sna->kgem,
2312			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2313			   NULL)) {
2314		kgem_submit(&sna->kgem);
2315		if (!kgem_check_bo(&sna->kgem,
2316				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2317				   NULL))
2318			goto cleanup_mask;
2319		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2320	}
2321
2322	gen6_emit_composite_state(sna, tmp);
2323	gen6_align_vertex(sna, tmp);
2324	return true;
2325
2326cleanup_mask:
2327	if (tmp->mask.bo)
2328		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2329cleanup_src:
2330	if (tmp->src.bo)
2331		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2332cleanup_dst:
2333	if (tmp->redirect.real_bo)
2334		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2335	return false;
2336}
2337
2338#if !NO_COMPOSITE_SPANS
2339fastcall static void
2340gen6_render_composite_spans_box(struct sna *sna,
2341				const struct sna_composite_spans_op *op,
2342				const BoxRec *box, float opacity)
2343{
2344	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2345	     __FUNCTION__,
2346	     op->base.src.offset[0], op->base.src.offset[1],
2347	     opacity,
2348	     op->base.dst.x, op->base.dst.y,
2349	     box->x1, box->y1,
2350	     box->x2 - box->x1,
2351	     box->y2 - box->y1));
2352
2353	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state);
2354	op->prim_emit(sna, op, box, opacity);
2355}
2356
2357static void
2358gen6_render_composite_spans_boxes(struct sna *sna,
2359				  const struct sna_composite_spans_op *op,
2360				  const BoxRec *box, int nbox,
2361				  float opacity)
2362{
2363	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2364	     __FUNCTION__, nbox,
2365	     op->base.src.offset[0], op->base.src.offset[1],
2366	     opacity,
2367	     op->base.dst.x, op->base.dst.y));
2368
2369	do {
2370		int nbox_this_time;
2371
2372		nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
2373						     gen6_emit_composite_state);
2374		nbox -= nbox_this_time;
2375
2376		do {
2377			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2378			     box->x1, box->y1,
2379			     box->x2 - box->x1,
2380			     box->y2 - box->y1));
2381
2382			op->prim_emit(sna, op, box++, opacity);
2383		} while (--nbox_this_time);
2384	} while (nbox);
2385}
2386
2387fastcall static void
2388gen6_render_composite_spans_boxes__thread(struct sna *sna,
2389					  const struct sna_composite_spans_op *op,
2390					  const struct sna_opacity_box *box,
2391					  int nbox)
2392{
2393	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
2394	     __FUNCTION__, nbox,
2395	     op->base.src.offset[0], op->base.src.offset[1],
2396	     op->base.dst.x, op->base.dst.y));
2397
2398	sna_vertex_lock(&sna->render);
2399	do {
2400		int nbox_this_time;
2401		float *v;
2402
2403		nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
2404						     gen6_emit_composite_state);
2405		assert(nbox_this_time);
2406		nbox -= nbox_this_time;
2407
2408		v = sna->render.vertices + sna->render.vertex_used;
2409		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
2410
2411		sna_vertex_acquire__locked(&sna->render);
2412		sna_vertex_unlock(&sna->render);
2413
2414		op->emit_boxes(op, box, nbox_this_time, v);
2415		box += nbox_this_time;
2416
2417		sna_vertex_lock(&sna->render);
2418		sna_vertex_release__locked(&sna->render);
2419	} while (nbox);
2420	sna_vertex_unlock(&sna->render);
2421}
2422
2423fastcall static void
2424gen6_render_composite_spans_done(struct sna *sna,
2425				 const struct sna_composite_spans_op *op)
2426{
2427	DBG(("%s()\n", __FUNCTION__));
2428	assert(!sna->render.active);
2429
2430	if (sna->render.vertex_offset)
2431		gen4_vertex_flush(sna);
2432
2433	if (op->base.src.bo)
2434		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2435
2436	sna_render_composite_redirect_done(sna, &op->base);
2437}
2438
2439static bool
2440gen6_check_composite_spans(struct sna *sna,
2441			   uint8_t op, PicturePtr src, PicturePtr dst,
2442			   int16_t width, int16_t height,
2443			   unsigned flags)
2444{
2445	DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
2446	     __FUNCTION__, op, width, height, flags));
2447
2448	if (op >= ARRAY_SIZE(gen6_blend_op))
2449		return false;
2450
2451	if (gen6_composite_fallback(sna, src, NULL, dst)) {
2452		DBG(("%s: operation would fallback\n", __FUNCTION__));
2453		return false;
2454	}
2455
2456	if (need_tiling(sna, width, height) &&
2457	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2458		DBG(("%s: fallback, tiled operation not on GPU\n",
2459		     __FUNCTION__));
2460		return false;
2461	}
2462
2463	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
2464		struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
2465		assert(priv);
2466
2467		if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
2468			return true;
2469
2470		if (flags & COMPOSITE_SPANS_INPLACE_HINT)
2471			return false;
2472
2473		return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
2474	}
2475
2476	return true;
2477}
2478
2479static bool
2480gen6_render_composite_spans(struct sna *sna,
2481			    uint8_t op,
2482			    PicturePtr src,
2483			    PicturePtr dst,
2484			    int16_t src_x,  int16_t src_y,
2485			    int16_t dst_x,  int16_t dst_y,
2486			    int16_t width,  int16_t height,
2487			    unsigned flags,
2488			    struct sna_composite_spans_op *tmp)
2489{
2490	DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
2491	     width, height, flags, sna->kgem.ring));
2492
2493	assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags));
2494
2495	if (need_tiling(sna, width, height)) {
2496		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2497		     __FUNCTION__, width, height));
2498		return sna_tiling_composite_spans(op, src, dst,
2499						  src_x, src_y, dst_x, dst_y,
2500						  width, height, flags, tmp);
2501	}
2502
2503	tmp->base.op = op;
2504	if (!gen6_composite_set_target(sna, &tmp->base, dst,
2505				       dst_x, dst_y, width, height, true))
2506		return false;
2507
2508	switch (gen6_composite_picture(sna, src, &tmp->base.src,
2509				       src_x, src_y,
2510				       width, height,
2511				       dst_x, dst_y,
2512				       dst->polyMode == PolyModePrecise)) {
2513	case -1:
2514		goto cleanup_dst;
2515	case 0:
2516		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
2517			goto cleanup_dst;
2518		/* fall through to fixup */
2519	case 1:
2520		gen6_composite_channel_convert(&tmp->base.src);
2521		break;
2522	}
2523	tmp->base.mask.bo = NULL;
2524
2525	tmp->base.is_affine = tmp->base.src.is_affine;
2526	tmp->base.need_magic_ca_pass = false;
2527
2528	tmp->base.u.gen6.flags =
2529		GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter,
2530					      tmp->base.src.repeat,
2531					      SAMPLER_FILTER_NEAREST,
2532					      SAMPLER_EXTEND_PAD),
2533			       gen6_get_blend(tmp->base.op, false, tmp->base.dst.format),
2534			       GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine,
2535			       gen4_choose_spans_emitter(sna, tmp));
2536
2537	tmp->box   = gen6_render_composite_spans_box;
2538	tmp->boxes = gen6_render_composite_spans_boxes;
2539	if (tmp->emit_boxes)
2540		tmp->thread_boxes = gen6_render_composite_spans_boxes__thread;
2541	tmp->done  = gen6_render_composite_spans_done;
2542
2543	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
2544	if (!kgem_check_bo(&sna->kgem,
2545			   tmp->base.dst.bo, tmp->base.src.bo,
2546			   NULL)) {
2547		kgem_submit(&sna->kgem);
2548		if (!kgem_check_bo(&sna->kgem,
2549				   tmp->base.dst.bo, tmp->base.src.bo,
2550				   NULL))
2551			goto cleanup_src;
2552		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2553	}
2554
2555	gen6_emit_composite_state(sna, &tmp->base);
2556	gen6_align_vertex(sna, &tmp->base);
2557	return true;
2558
2559cleanup_src:
2560	if (tmp->base.src.bo)
2561		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2562cleanup_dst:
2563	if (tmp->base.redirect.real_bo)
2564		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2565	return false;
2566}
2567#endif
2568
2569static void
2570gen6_emit_copy_state(struct sna *sna,
2571		     const struct sna_composite_op *op)
2572{
2573	uint32_t *binding_table;
2574	uint16_t offset;
2575	bool dirty;
2576
2577	dirty = gen6_get_batch(sna, op);
2578
2579	binding_table = gen6_composite_get_binding_table(sna, &offset);
2580
2581	binding_table[0] =
2582		gen6_bind_bo(sna,
2583			     op->dst.bo, op->dst.width, op->dst.height,
2584			     gen6_get_dest_format(op->dst.format),
2585			     true);
2586	binding_table[1] =
2587		gen6_bind_bo(sna,
2588			     op->src.bo, op->src.width, op->src.height,
2589			     op->src.card_format,
2590			     false);
2591
2592	if (sna->kgem.surface == offset &&
2593	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
2594		sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
2595		offset = sna->render_state.gen6.surface_table;
2596	}
2597
2598	gen6_emit_state(sna, op, offset | dirty);
2599}
2600
2601static inline bool prefer_blt_copy(struct sna *sna,
2602				   struct kgem_bo *src_bo,
2603				   struct kgem_bo *dst_bo,
2604				   unsigned flags)
2605{
2606	if (flags & COPY_SYNC)
2607		return false;
2608
2609	if (PREFER_RENDER)
2610		return PREFER_RENDER > 0;
2611
2612	if (sna->kgem.ring == KGEM_BLT)
2613		return true;
2614
2615	if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags))
2616		return true;
2617
2618	if (untiled_tlb_miss(src_bo) ||
2619	    untiled_tlb_miss(dst_bo))
2620		return true;
2621
2622	if (kgem_bo_is_render(dst_bo) ||
2623	    kgem_bo_is_render(src_bo))
2624		return false;
2625
2626	if (!prefer_blt_ring(sna, dst_bo, flags))
2627		return false;
2628
2629	return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo);
2630}
2631
2632inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents)
2633{
2634	*extents = box[0];
2635	while (--n) {
2636		box++;
2637
2638		if (box->x1 < extents->x1)
2639			extents->x1 = box->x1;
2640		if (box->x2 > extents->x2)
2641			extents->x2 = box->x2;
2642
2643		if (box->y1 < extents->y1)
2644			extents->y1 = box->y1;
2645		if (box->y2 > extents->y2)
2646			extents->y2 = box->y2;
2647	}
2648}
2649
2650static inline bool
2651overlaps(struct sna *sna,
2652	 struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2653	 struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2654	 const BoxRec *box, int n, BoxRec *extents)
2655{
2656	if (src_bo != dst_bo)
2657		return false;
2658
2659	boxes_extents(box, n, extents);
2660	return (extents->x2 + src_dx > extents->x1 + dst_dx &&
2661		extents->x1 + src_dx < extents->x2 + dst_dx &&
2662		extents->y2 + src_dy > extents->y1 + dst_dy &&
2663		extents->y1 + src_dy < extents->y2 + dst_dy);
2664}
2665
2666static bool
2667gen6_render_copy_boxes(struct sna *sna, uint8_t alu,
2668		       PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2669		       PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2670		       const BoxRec *box, int n, unsigned flags)
2671{
2672	struct sna_composite_op tmp;
2673	BoxRec extents;
2674
2675	DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n",
2676	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu,
2677	     src_bo == dst_bo,
2678	     overlaps(sna,
2679		      src_bo, src_dx, src_dy,
2680		      dst_bo, dst_dx, dst_dy,
2681		      box, n, &extents)));
2682
2683	if (prefer_blt_copy(sna, src_bo, dst_bo, flags) &&
2684	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2685	    sna_blt_copy_boxes(sna, alu,
2686			       src_bo, src_dx, src_dy,
2687			       dst_bo, dst_dx, dst_dy,
2688			       dst->drawable.bitsPerPixel,
2689			       box, n))
2690		return true;
2691
2692	if (!(alu == GXcopy || alu == GXclear)) {
2693fallback_blt:
2694		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
2695			return false;
2696
2697		return sna_blt_copy_boxes_fallback(sna, alu,
2698						   src, src_bo, src_dx, src_dy,
2699						   dst, dst_bo, dst_dx, dst_dy,
2700						   box, n);
2701	}
2702
2703	if (overlaps(sna,
2704		     src_bo, src_dx, src_dy,
2705		     dst_bo, dst_dx, dst_dy,
2706		     box, n, &extents)) {
2707		if (too_large(extents.x2-extents.x1, extents.y2-extents.y1))
2708			goto fallback_blt;
2709
2710		if (can_switch_to_blt(sna, dst_bo, flags) &&
2711		    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2712		    sna_blt_copy_boxes(sna, alu,
2713				       src_bo, src_dx, src_dy,
2714				       dst_bo, dst_dx, dst_dy,
2715				       dst->drawable.bitsPerPixel,
2716				       box, n))
2717			return true;
2718
2719		return sna_render_copy_boxes__overlap(sna, alu,
2720						      src, src_bo, src_dx, src_dy,
2721						      dst, dst_bo, dst_dx, dst_dy,
2722						      box, n, &extents);
2723	}
2724
2725	if (dst->drawable.depth == src->drawable.depth) {
2726		tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth);
2727		tmp.src.pict_format = tmp.dst.format;
2728	} else {
2729		tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
2730		tmp.src.pict_format = sna_format_for_depth(src->drawable.depth);
2731	}
2732	if (!gen6_check_format(tmp.src.pict_format))
2733		goto fallback_blt;
2734
2735	tmp.dst.pixmap = dst;
2736	tmp.dst.width  = dst->drawable.width;
2737	tmp.dst.height = dst->drawable.height;
2738	tmp.dst.bo = dst_bo;
2739	tmp.dst.x = tmp.dst.y = 0;
2740	tmp.damage = NULL;
2741
2742	sna_render_composite_redirect_init(&tmp);
2743	if (too_large(tmp.dst.width, tmp.dst.height)) {
2744		int i;
2745
2746		extents = box[0];
2747		for (i = 1; i < n; i++) {
2748			if (box[i].x1 < extents.x1)
2749				extents.x1 = box[i].x1;
2750			if (box[i].y1 < extents.y1)
2751				extents.y1 = box[i].y1;
2752
2753			if (box[i].x2 > extents.x2)
2754				extents.x2 = box[i].x2;
2755			if (box[i].y2 > extents.y2)
2756				extents.y2 = box[i].y2;
2757		}
2758
2759		if (!sna_render_composite_redirect(sna, &tmp,
2760						   extents.x1 + dst_dx,
2761						   extents.y1 + dst_dy,
2762						   extents.x2 - extents.x1,
2763						   extents.y2 - extents.y1,
2764						   n > 1))
2765			goto fallback_tiled;
2766
2767		dst_dx += tmp.dst.x;
2768		dst_dy += tmp.dst.y;
2769
2770		tmp.dst.x = tmp.dst.y = 0;
2771	}
2772
2773	tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format);
2774	if (too_large(src->drawable.width, src->drawable.height)) {
2775		int i;
2776
2777		extents = box[0];
2778		for (i = 1; i < n; i++) {
2779			if (box[i].x1 < extents.x1)
2780				extents.x1 = box[i].x1;
2781			if (box[i].y1 < extents.y1)
2782				extents.y1 = box[i].y1;
2783
2784			if (box[i].x2 > extents.x2)
2785				extents.x2 = box[i].x2;
2786			if (box[i].y2 > extents.y2)
2787				extents.y2 = box[i].y2;
2788		}
2789
2790		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
2791					       extents.x1 + src_dx,
2792					       extents.y1 + src_dy,
2793					       extents.x2 - extents.x1,
2794					       extents.y2 - extents.y1)) {
2795			DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__));
2796			goto fallback_tiled_dst;
2797		}
2798
2799		src_dx += tmp.src.offset[0];
2800		src_dy += tmp.src.offset[1];
2801	} else {
2802		tmp.src.bo = src_bo;
2803		tmp.src.width  = src->drawable.width;
2804		tmp.src.height = src->drawable.height;
2805	}
2806
2807	tmp.mask.bo = NULL;
2808
2809	tmp.floats_per_vertex = 2;
2810	tmp.floats_per_rect = 6;
2811	tmp.need_magic_ca_pass = 0;
2812
2813	tmp.u.gen6.flags = COPY_FLAGS(alu);
2814	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
2815	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER);
2816	assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX);
2817
2818	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
2819	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
2820		kgem_submit(&sna->kgem);
2821		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
2822			DBG(("%s: too large for a single operation\n",
2823			     __FUNCTION__));
2824			goto fallback_tiled_src;
2825		}
2826		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2827	}
2828
2829	gen6_emit_copy_state(sna, &tmp);
2830	gen6_align_vertex(sna, &tmp);
2831
2832	do {
2833		int16_t *v;
2834		int n_this_time;
2835
2836		n_this_time = gen6_get_rectangles(sna, &tmp, n,
2837						  gen6_emit_copy_state);
2838		n -= n_this_time;
2839
2840		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
2841		sna->render.vertex_used += 6 * n_this_time;
2842		assert(sna->render.vertex_used <= sna->render.vertex_size);
2843		do {
2844
2845			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
2846			     box->x1 + src_dx, box->y1 + src_dy,
2847			     box->x1 + dst_dx, box->y1 + dst_dy,
2848			     box->x2 - box->x1, box->y2 - box->y1));
2849			v[0] = box->x2 + dst_dx;
2850			v[2] = box->x2 + src_dx;
2851			v[1]  = v[5] = box->y2 + dst_dy;
2852			v[3]  = v[7] = box->y2 + src_dy;
2853			v[8]  = v[4] = box->x1 + dst_dx;
2854			v[10] = v[6] = box->x1 + src_dx;
2855			v[9]  = box->y1 + dst_dy;
2856			v[11] = box->y1 + src_dy;
2857			v += 12; box++;
2858		} while (--n_this_time);
2859	} while (n);
2860
2861	gen4_vertex_flush(sna);
2862	sna_render_composite_redirect_done(sna, &tmp);
2863	if (tmp.src.bo != src_bo)
2864		kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2865	return true;
2866
2867fallback_tiled_src:
2868	if (tmp.src.bo != src_bo)
2869		kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2870fallback_tiled_dst:
2871	if (tmp.redirect.real_bo)
2872		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2873fallback_tiled:
2874	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2875	    sna_blt_copy_boxes(sna, alu,
2876			       src_bo, src_dx, src_dy,
2877			       dst_bo, dst_dx, dst_dy,
2878			       dst->drawable.bitsPerPixel,
2879			       box, n))
2880		return true;
2881
2882	return sna_tiling_copy_boxes(sna, alu,
2883				     src, src_bo, src_dx, src_dy,
2884				     dst, dst_bo, dst_dx, dst_dy,
2885				     box, n);
2886}
2887
2888static void
2889gen6_render_copy_blt(struct sna *sna,
2890		     const struct sna_copy_op *op,
2891		     int16_t sx, int16_t sy,
2892		     int16_t w,  int16_t h,
2893		     int16_t dx, int16_t dy)
2894{
2895	int16_t *v;
2896
2897	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state);
2898
2899	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
2900	sna->render.vertex_used += 6;
2901	assert(sna->render.vertex_used <= sna->render.vertex_size);
2902
2903	v[0]  = dx+w; v[1]  = dy+h;
2904	v[2]  = sx+w; v[3]  = sy+h;
2905	v[4]  = dx;   v[5]  = dy+h;
2906	v[6]  = sx;   v[7]  = sy+h;
2907	v[8]  = dx;   v[9]  = dy;
2908	v[10] = sx;   v[11] = sy;
2909}
2910
2911static void
2912gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
2913{
2914	DBG(("%s()\n", __FUNCTION__));
2915
2916	assert(!sna->render.active);
2917	if (sna->render.vertex_offset)
2918		gen4_vertex_flush(sna);
2919}
2920
2921static bool
2922gen6_render_copy(struct sna *sna, uint8_t alu,
2923		 PixmapPtr src, struct kgem_bo *src_bo,
2924		 PixmapPtr dst, struct kgem_bo *dst_bo,
2925		 struct sna_copy_op *op)
2926{
2927	DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n",
2928	     __FUNCTION__, alu,
2929	     src->drawable.width, src->drawable.height,
2930	     dst->drawable.width, dst->drawable.height));
2931
2932	if (prefer_blt_copy(sna, src_bo, dst_bo, 0) &&
2933	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2934	    sna_blt_copy(sna, alu,
2935			 src_bo, dst_bo,
2936			 dst->drawable.bitsPerPixel,
2937			 op))
2938		return true;
2939
2940	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
2941	    too_large(src->drawable.width, src->drawable.height) ||
2942	    too_large(dst->drawable.width, dst->drawable.height)) {
2943fallback:
2944		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
2945			return false;
2946
2947		return sna_blt_copy(sna, alu, src_bo, dst_bo,
2948				    dst->drawable.bitsPerPixel,
2949				    op);
2950	}
2951
2952	if (dst->drawable.depth == src->drawable.depth) {
2953		op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
2954		op->base.src.pict_format = op->base.dst.format;
2955	} else {
2956		op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
2957		op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
2958	}
2959	if (!gen6_check_format(op->base.src.pict_format))
2960		goto fallback;
2961
2962	op->base.dst.pixmap = dst;
2963	op->base.dst.width  = dst->drawable.width;
2964	op->base.dst.height = dst->drawable.height;
2965	op->base.dst.bo = dst_bo;
2966
2967	op->base.src.bo = src_bo;
2968	op->base.src.card_format =
2969		gen6_get_card_format(op->base.src.pict_format);
2970	op->base.src.width  = src->drawable.width;
2971	op->base.src.height = src->drawable.height;
2972
2973	op->base.mask.bo = NULL;
2974
2975	op->base.floats_per_vertex = 2;
2976	op->base.floats_per_rect = 6;
2977
2978	op->base.u.gen6.flags = COPY_FLAGS(alu);
2979	assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
2980	assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER);
2981	assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX);
2982
2983	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
2984	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2985		kgem_submit(&sna->kgem);
2986		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
2987			goto fallback;
2988		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2989	}
2990
2991	gen6_emit_copy_state(sna, &op->base);
2992	gen6_align_vertex(sna, &op->base);
2993
2994	op->blt  = gen6_render_copy_blt;
2995	op->done = gen6_render_copy_done;
2996	return true;
2997}
2998
2999static void
3000gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
3001{
3002	uint32_t *binding_table;
3003	uint16_t offset;
3004	bool dirty;
3005
3006	dirty = gen6_get_batch(sna, op);
3007
3008	binding_table = gen6_composite_get_binding_table(sna, &offset);
3009
3010	binding_table[0] =
3011		gen6_bind_bo(sna,
3012			     op->dst.bo, op->dst.width, op->dst.height,
3013			     gen6_get_dest_format(op->dst.format),
3014			     true);
3015	binding_table[1] =
3016		gen6_bind_bo(sna,
3017			     op->src.bo, 1, 1,
3018			     GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
3019			     false);
3020
3021	if (sna->kgem.surface == offset &&
3022	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
3023		sna->kgem.surface +=
3024			sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t);
3025		offset = sna->render_state.gen6.surface_table;
3026	}
3027
3028	gen6_emit_state(sna, op, offset | dirty);
3029}
3030
3031static inline bool prefer_blt_fill(struct sna *sna,
3032				   struct kgem_bo *bo)
3033{
3034	if (PREFER_RENDER)
3035		return PREFER_RENDER < 0;
3036
3037	if (kgem_bo_is_render(bo))
3038		return false;
3039
3040	if (untiled_tlb_miss(bo))
3041		return true;
3042
3043	if (!prefer_blt_ring(sna, bo, 0))
3044		return false;
3045
3046	return prefer_blt_bo(sna, bo);
3047}
3048
3049static bool
3050gen6_render_fill_boxes(struct sna *sna,
3051		       CARD8 op,
3052		       PictFormat format,
3053		       const xRenderColor *color,
3054		       PixmapPtr dst, struct kgem_bo *dst_bo,
3055		       const BoxRec *box, int n)
3056{
3057	struct sna_composite_op tmp;
3058	uint32_t pixel;
3059
3060	DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
3061	     __FUNCTION__, op,
3062	     color->red, color->green, color->blue, color->alpha, (int)format));
3063
3064	if (op >= ARRAY_SIZE(gen6_blend_op)) {
3065		DBG(("%s: fallback due to unhandled blend op: %d\n",
3066		     __FUNCTION__, op));
3067		return false;
3068	}
3069
3070	if (prefer_blt_fill(sna, dst_bo) || !gen6_check_dst_format(format)) {
3071		uint8_t alu = GXinvalid;
3072
3073		if (op <= PictOpSrc) {
3074			pixel = 0;
3075			if (op == PictOpClear)
3076				alu = GXclear;
3077			else if (sna_get_pixel_from_rgba(&pixel,
3078							 color->red,
3079							 color->green,
3080							 color->blue,
3081							 color->alpha,
3082							 format))
3083				alu = GXcopy;
3084		}
3085
3086		if (alu != GXinvalid &&
3087		    sna_blt_fill_boxes(sna, alu,
3088				       dst_bo, dst->drawable.bitsPerPixel,
3089				       pixel, box, n))
3090			return true;
3091
3092		if (!gen6_check_dst_format(format))
3093			return false;
3094	}
3095
3096	if (op == PictOpClear) {
3097		pixel = 0;
3098		op = PictOpSrc;
3099	} else if (!sna_get_pixel_from_rgba(&pixel,
3100					    color->red,
3101					    color->green,
3102					    color->blue,
3103					    color->alpha,
3104					    PICT_a8r8g8b8))
3105		return false;
3106
3107	DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
3108	     __FUNCTION__, pixel, n,
3109	     box[0].x1, box[0].y1, box[0].x2, box[0].y2));
3110
3111	tmp.dst.pixmap = dst;
3112	tmp.dst.width  = dst->drawable.width;
3113	tmp.dst.height = dst->drawable.height;
3114	tmp.dst.format = format;
3115	tmp.dst.bo = dst_bo;
3116	tmp.dst.x = tmp.dst.y = 0;
3117	tmp.damage = NULL;
3118
3119	sna_render_composite_redirect_init(&tmp);
3120	if (too_large(dst->drawable.width, dst->drawable.height)) {
3121		BoxRec extents;
3122
3123		boxes_extents(box, n, &extents);
3124		if (!sna_render_composite_redirect(sna, &tmp,
3125						   extents.x1, extents.y1,
3126						   extents.x2 - extents.x1,
3127						   extents.y2 - extents.y1,
3128						   n > 1))
3129			return sna_tiling_fill_boxes(sna, op, format, color,
3130						     dst, dst_bo, box, n);
3131	}
3132
3133	tmp.src.bo = sna_render_get_solid(sna, pixel);
3134	tmp.mask.bo = NULL;
3135
3136	tmp.floats_per_vertex = 2;
3137	tmp.floats_per_rect = 6;
3138	tmp.need_magic_ca_pass = false;
3139
3140	tmp.u.gen6.flags = FILL_FLAGS(op, format);
3141	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3142	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
3143	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
3144
3145	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3146		kgem_submit(&sna->kgem);
3147		assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
3148	}
3149
3150	gen6_emit_fill_state(sna, &tmp);
3151	gen6_align_vertex(sna, &tmp);
3152
3153	do {
3154		int n_this_time;
3155		int16_t *v;
3156
3157		n_this_time = gen6_get_rectangles(sna, &tmp, n,
3158						  gen6_emit_fill_state);
3159		n -= n_this_time;
3160
3161		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
3162		sna->render.vertex_used += 6 * n_this_time;
3163		assert(sna->render.vertex_used <= sna->render.vertex_size);
3164		do {
3165			DBG(("	(%d, %d), (%d, %d)\n",
3166			     box->x1, box->y1, box->x2, box->y2));
3167
3168			v[0] = box->x2;
3169			v[5] = v[1] = box->y2;
3170			v[8] = v[4] = box->x1;
3171			v[9] = box->y1;
3172			v[2] = v[3]  = v[7]  = 1;
3173			v[6] = v[10] = v[11] = 0;
3174			v += 12; box++;
3175		} while (--n_this_time);
3176	} while (n);
3177
3178	gen4_vertex_flush(sna);
3179	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3180	sna_render_composite_redirect_done(sna, &tmp);
3181	return true;
3182}
3183
3184static void
3185gen6_render_op_fill_blt(struct sna *sna,
3186			const struct sna_fill_op *op,
3187			int16_t x, int16_t y, int16_t w, int16_t h)
3188{
3189	int16_t *v;
3190
3191	DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
3192
3193	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state);
3194
3195	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3196	sna->render.vertex_used += 6;
3197	assert(sna->render.vertex_used <= sna->render.vertex_size);
3198
3199	v[0] = x+w;
3200	v[4] = v[8] = x;
3201	v[1] = v[5] = y+h;
3202	v[9] = y;
3203
3204	v[2] = v[3]  = v[7]  = 1;
3205	v[6] = v[10] = v[11] = 0;
3206}
3207
3208fastcall static void
3209gen6_render_op_fill_box(struct sna *sna,
3210			const struct sna_fill_op *op,
3211			const BoxRec *box)
3212{
3213	int16_t *v;
3214
3215	DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
3216	     box->x1, box->y1, box->x2, box->y2));
3217
3218	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state);
3219
3220	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3221	sna->render.vertex_used += 6;
3222	assert(sna->render.vertex_used <= sna->render.vertex_size);
3223
3224	v[0] = box->x2;
3225	v[8] = v[4] = box->x1;
3226	v[5] = v[1] = box->y2;
3227	v[9] = box->y1;
3228
3229	v[7] = v[2]  = v[3]  = 1;
3230	v[6] = v[10] = v[11] = 0;
3231}
3232
3233fastcall static void
3234gen6_render_op_fill_boxes(struct sna *sna,
3235			  const struct sna_fill_op *op,
3236			  const BoxRec *box,
3237			  int nbox)
3238{
3239	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
3240	     box->x1, box->y1, box->x2, box->y2, nbox));
3241
3242	do {
3243		int nbox_this_time;
3244		int16_t *v;
3245
3246		nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
3247						     gen6_emit_fill_state);
3248		nbox -= nbox_this_time;
3249
3250		v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3251		sna->render.vertex_used += 6 * nbox_this_time;
3252		assert(sna->render.vertex_used <= sna->render.vertex_size);
3253
3254		do {
3255			v[0] = box->x2;
3256			v[8] = v[4] = box->x1;
3257			v[5] = v[1] = box->y2;
3258			v[9] = box->y1;
3259			v[7] = v[2]  = v[3]  = 1;
3260			v[6] = v[10] = v[11] = 0;
3261			box++; v += 12;
3262		} while (--nbox_this_time);
3263	} while (nbox);
3264}
3265
3266static void
3267gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op)
3268{
3269	DBG(("%s()\n", __FUNCTION__));
3270
3271	assert(!sna->render.active);
3272	if (sna->render.vertex_offset)
3273		gen4_vertex_flush(sna);
3274	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
3275}
3276
3277static bool
3278gen6_render_fill(struct sna *sna, uint8_t alu,
3279		 PixmapPtr dst, struct kgem_bo *dst_bo,
3280		 uint32_t color,
3281		 struct sna_fill_op *op)
3282{
3283	DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color));
3284
3285	if (prefer_blt_fill(sna, dst_bo) &&
3286	    sna_blt_fill(sna, alu,
3287			 dst_bo, dst->drawable.bitsPerPixel,
3288			 color,
3289			 op))
3290		return true;
3291
3292	if (!(alu == GXcopy || alu == GXclear) ||
3293	    too_large(dst->drawable.width, dst->drawable.height))
3294		return sna_blt_fill(sna, alu,
3295				    dst_bo, dst->drawable.bitsPerPixel,
3296				    color,
3297				    op);
3298
3299	if (alu == GXclear)
3300		color = 0;
3301
3302	op->base.dst.pixmap = dst;
3303	op->base.dst.width  = dst->drawable.width;
3304	op->base.dst.height = dst->drawable.height;
3305	op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
3306	op->base.dst.bo = dst_bo;
3307	op->base.dst.x = op->base.dst.y = 0;
3308
3309	op->base.src.bo =
3310		sna_render_get_solid(sna,
3311				     sna_rgba_for_color(color,
3312							dst->drawable.depth));
3313	op->base.mask.bo = NULL;
3314
3315	op->base.need_magic_ca_pass = false;
3316	op->base.floats_per_vertex = 2;
3317	op->base.floats_per_rect = 6;
3318
3319	op->base.u.gen6.flags = FILL_FLAGS_NOBLEND;
3320	assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3321	assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER);
3322	assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX);
3323
3324	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3325		kgem_submit(&sna->kgem);
3326		assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
3327	}
3328
3329	gen6_emit_fill_state(sna, &op->base);
3330	gen6_align_vertex(sna, &op->base);
3331
3332	op->blt  = gen6_render_op_fill_blt;
3333	op->box  = gen6_render_op_fill_box;
3334	op->boxes = gen6_render_op_fill_boxes;
3335	op->done = gen6_render_op_fill_done;
3336	return true;
3337}
3338
3339static bool
3340gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3341			     uint32_t color,
3342			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
3343			     uint8_t alu)
3344{
3345	BoxRec box;
3346
3347	box.x1 = x1;
3348	box.y1 = y1;
3349	box.x2 = x2;
3350	box.y2 = y2;
3351
3352	return sna_blt_fill_boxes(sna, alu,
3353				  bo, dst->drawable.bitsPerPixel,
3354				  color, &box, 1);
3355}
3356
3357static bool
3358gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3359		     uint32_t color,
3360		     int16_t x1, int16_t y1,
3361		     int16_t x2, int16_t y2,
3362		     uint8_t alu)
3363{
3364	struct sna_composite_op tmp;
3365	int16_t *v;
3366
3367	/* Prefer to use the BLT if already engaged */
3368	if (prefer_blt_fill(sna, bo) &&
3369	    gen6_render_fill_one_try_blt(sna, dst, bo, color,
3370					 x1, y1, x2, y2, alu))
3371		return true;
3372
3373	/* Must use the BLT if we can't RENDER... */
3374	if (!(alu == GXcopy || alu == GXclear) ||
3375	    too_large(dst->drawable.width, dst->drawable.height))
3376		return gen6_render_fill_one_try_blt(sna, dst, bo, color,
3377						    x1, y1, x2, y2, alu);
3378
3379	if (alu == GXclear)
3380		color = 0;
3381
3382	tmp.dst.pixmap = dst;
3383	tmp.dst.width  = dst->drawable.width;
3384	tmp.dst.height = dst->drawable.height;
3385	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3386	tmp.dst.bo = bo;
3387	tmp.dst.x = tmp.dst.y = 0;
3388
3389	tmp.src.bo =
3390		sna_render_get_solid(sna,
3391				     sna_rgba_for_color(color,
3392							dst->drawable.depth));
3393	tmp.mask.bo = NULL;
3394
3395	tmp.floats_per_vertex = 2;
3396	tmp.floats_per_rect = 6;
3397	tmp.need_magic_ca_pass = false;
3398
3399	tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
3400	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3401	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
3402	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
3403
3404	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3405		kgem_submit(&sna->kgem);
3406		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3407			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3408			return false;
3409		}
3410	}
3411
3412	gen6_emit_fill_state(sna, &tmp);
3413	gen6_align_vertex(sna, &tmp);
3414
3415	gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
3416
3417	DBG(("	(%d, %d), (%d, %d)\n", x1, y1, x2, y2));
3418
3419	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3420	sna->render.vertex_used += 6;
3421	assert(sna->render.vertex_used <= sna->render.vertex_size);
3422
3423	v[0] = x2;
3424	v[8] = v[4] = x1;
3425	v[5] = v[1] = y2;
3426	v[9] = y1;
3427	v[7] = v[2]  = v[3]  = 1;
3428	v[6] = v[10] = v[11] = 0;
3429
3430	gen4_vertex_flush(sna);
3431	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3432
3433	return true;
3434}
3435
3436static bool
3437gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3438{
3439	BoxRec box;
3440
3441	box.x1 = 0;
3442	box.y1 = 0;
3443	box.x2 = dst->drawable.width;
3444	box.y2 = dst->drawable.height;
3445
3446	return sna_blt_fill_boxes(sna, GXclear,
3447				  bo, dst->drawable.bitsPerPixel,
3448				  0, &box, 1);
3449}
3450
3451static bool
3452gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3453{
3454	struct sna_composite_op tmp;
3455	int16_t *v;
3456
3457	DBG(("%s: %dx%d\n",
3458	     __FUNCTION__,
3459	     dst->drawable.width,
3460	     dst->drawable.height));
3461
3462	/* Prefer to use the BLT if, and only if, already engaged */
3463	if (sna->kgem.ring == KGEM_BLT &&
3464	    gen6_render_clear_try_blt(sna, dst, bo))
3465		return true;
3466
3467	/* Must use the BLT if we can't RENDER... */
3468	if (too_large(dst->drawable.width, dst->drawable.height))
3469		return gen6_render_clear_try_blt(sna, dst, bo);
3470
3471	tmp.dst.pixmap = dst;
3472	tmp.dst.width  = dst->drawable.width;
3473	tmp.dst.height = dst->drawable.height;
3474	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3475	tmp.dst.bo = bo;
3476	tmp.dst.x = tmp.dst.y = 0;
3477
3478	tmp.src.bo = sna_render_get_solid(sna, 0);
3479	tmp.mask.bo = NULL;
3480
3481	tmp.floats_per_vertex = 2;
3482	tmp.floats_per_rect = 6;
3483	tmp.need_magic_ca_pass = false;
3484
3485	tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
3486	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3487	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
3488	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
3489
3490	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3491		kgem_submit(&sna->kgem);
3492		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3493			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3494			return false;
3495		}
3496	}
3497
3498	gen6_emit_fill_state(sna, &tmp);
3499	gen6_align_vertex(sna, &tmp);
3500
3501	gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
3502
3503	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3504	sna->render.vertex_used += 6;
3505	assert(sna->render.vertex_used <= sna->render.vertex_size);
3506
3507	v[0] = dst->drawable.width;
3508	v[5] = v[1] = dst->drawable.height;
3509	v[8] = v[4] = 0;
3510	v[9] = 0;
3511
3512	v[7] = v[2]  = v[3]  = 1;
3513	v[6] = v[10] = v[11] = 0;
3514
3515	gen4_vertex_flush(sna);
3516	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3517
3518	return true;
3519}
3520
3521static void gen6_render_flush(struct sna *sna)
3522{
3523	gen4_vertex_close(sna);
3524
3525	assert(sna->render.vb_id == 0);
3526	assert(sna->render.vertex_offset == 0);
3527}
3528
3529static void
3530gen6_render_context_switch(struct kgem *kgem,
3531			   int new_mode)
3532{
3533	if (kgem->nbatch) {
3534		DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode));
3535		_kgem_submit(kgem);
3536	}
3537
3538	kgem->ring = new_mode;
3539}
3540
3541static void
3542gen6_render_retire(struct kgem *kgem)
3543{
3544	struct sna *sna;
3545
3546	if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire))
3547		kgem->ring = kgem->mode;
3548
3549	sna = container_of(kgem, struct sna, kgem);
3550	if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
3551		DBG(("%s: resetting idle vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
3552		sna->render.vertex_used = 0;
3553		sna->render.vertex_index = 0;
3554	}
3555}
3556
3557static void
3558gen6_render_expire(struct kgem *kgem)
3559{
3560	struct sna *sna;
3561
3562	sna = container_of(kgem, struct sna, kgem);
3563	if (sna->render.vbo && !sna->render.vertex_used) {
3564		DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
3565		kgem_bo_destroy(kgem, sna->render.vbo);
3566		assert(!sna->render.active);
3567		sna->render.vbo = NULL;
3568		sna->render.vertices = sna->render.vertex_data;
3569		sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
3570		sna->render.vertex_used = 0;
3571		sna->render.vertex_index = 0;
3572	}
3573}
3574
3575static void gen6_render_reset(struct sna *sna)
3576{
3577	sna->render_state.gen6.needs_invariant = true;
3578	sna->render_state.gen6.first_state_packet = true;
3579	sna->render_state.gen6.ve_id = 3 << 2;
3580	sna->render_state.gen6.last_primitive = -1;
3581
3582	sna->render_state.gen6.num_sf_outputs = 0;
3583	sna->render_state.gen6.samplers = -1;
3584	sna->render_state.gen6.blend = -1;
3585	sna->render_state.gen6.kernel = -1;
3586	sna->render_state.gen6.drawrect_offset = -1;
3587	sna->render_state.gen6.drawrect_limit = -1;
3588	sna->render_state.gen6.surface_table = -1;
3589
3590	sna->render.vertex_offset = 0;
3591	sna->render.nvertex_reloc = 0;
3592	sna->render.vb_id = 0;
3593}
3594
3595static void gen6_render_fini(struct sna *sna)
3596{
3597	kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo);
3598}
3599
3600static bool is_gt2(struct sna *sna)
3601{
3602	return sna->PciInfo->device_id & 0x30;
3603}
3604
3605static bool is_mobile(struct sna *sna)
3606{
3607	return (sna->PciInfo->device_id & 0xf) == 0x6;
3608}
3609
3610static bool gen6_render_setup(struct sna *sna)
3611{
3612	struct gen6_render_state *state = &sna->render_state.gen6;
3613	struct sna_static_stream general;
3614	struct gen6_sampler_state *ss;
3615	int i, j, k, l, m;
3616
3617	state->info = &gt1_info;
3618	if (is_gt2(sna))
3619		state->info = &gt2_info; /* XXX requires GT_MODE WiZ disabled */
3620
3621	sna_static_stream_init(&general);
3622
3623	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
3624	 * dumps, you know it points to zero.
3625	 */
3626	null_create(&general);
3627	scratch_create(&general);
3628
3629	for (m = 0; m < GEN6_KERNEL_COUNT; m++) {
3630		if (wm_kernels[m].size) {
3631			state->wm_kernel[m][1] =
3632				sna_static_stream_add(&general,
3633						      wm_kernels[m].data,
3634						      wm_kernels[m].size,
3635						      64);
3636		} else {
3637			if (USE_8_PIXEL_DISPATCH) {
3638				state->wm_kernel[m][0] =
3639					sna_static_stream_compile_wm(sna, &general,
3640								     wm_kernels[m].data, 8);
3641			}
3642
3643			if (USE_16_PIXEL_DISPATCH) {
3644				state->wm_kernel[m][1] =
3645					sna_static_stream_compile_wm(sna, &general,
3646								     wm_kernels[m].data, 16);
3647			}
3648
3649			if (USE_32_PIXEL_DISPATCH) {
3650				state->wm_kernel[m][2] =
3651					sna_static_stream_compile_wm(sna, &general,
3652								     wm_kernels[m].data, 32);
3653			}
3654		}
3655		if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) {
3656			state->wm_kernel[m][1] =
3657				sna_static_stream_compile_wm(sna, &general,
3658							     wm_kernels[m].data, 16);
3659		}
3660	}
3661
3662	ss = sna_static_stream_map(&general,
3663				   2 * sizeof(*ss) *
3664				   (2 +
3665				    FILTER_COUNT * EXTEND_COUNT *
3666				    FILTER_COUNT * EXTEND_COUNT),
3667				   32);
3668	state->wm_state = sna_static_stream_offsetof(&general, ss);
3669	sampler_copy_init(ss); ss += 2;
3670	sampler_fill_init(ss); ss += 2;
3671	for (i = 0; i < FILTER_COUNT; i++) {
3672		for (j = 0; j < EXTEND_COUNT; j++) {
3673			for (k = 0; k < FILTER_COUNT; k++) {
3674				for (l = 0; l < EXTEND_COUNT; l++) {
3675					sampler_state_init(ss++, i, j);
3676					sampler_state_init(ss++, k, l);
3677				}
3678			}
3679		}
3680	}
3681
3682	state->cc_blend = gen6_composite_create_blend_state(&general);
3683
3684	state->general_bo = sna_static_stream_fini(sna, &general);
3685	return state->general_bo != NULL;
3686}
3687
3688const char *gen6_render_init(struct sna *sna, const char *backend)
3689{
3690	if (!gen6_render_setup(sna))
3691		return backend;
3692
3693	sna->kgem.context_switch = gen6_render_context_switch;
3694	sna->kgem.retire = gen6_render_retire;
3695	sna->kgem.expire = gen6_render_expire;
3696
3697#if !NO_COMPOSITE
3698	sna->render.composite = gen6_render_composite;
3699	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
3700#endif
3701
3702#if !NO_COMPOSITE_SPANS
3703	sna->render.check_composite_spans = gen6_check_composite_spans;
3704	sna->render.composite_spans = gen6_render_composite_spans;
3705	if (is_mobile(sna))
3706		sna->render.prefer_gpu |= PREFER_GPU_SPANS;
3707#endif
3708	sna->render.video = gen6_render_video;
3709
3710#if !NO_COPY_BOXES
3711	sna->render.copy_boxes = gen6_render_copy_boxes;
3712#endif
3713#if !NO_COPY
3714	sna->render.copy = gen6_render_copy;
3715#endif
3716
3717#if !NO_FILL_BOXES
3718	sna->render.fill_boxes = gen6_render_fill_boxes;
3719#endif
3720#if !NO_FILL
3721	sna->render.fill = gen6_render_fill;
3722#endif
3723#if !NO_FILL_ONE
3724	sna->render.fill_one = gen6_render_fill_one;
3725#endif
3726#if !NO_FILL_CLEAR
3727	sna->render.clear = gen6_render_clear;
3728#endif
3729
3730	sna->render.flush = gen6_render_flush;
3731	sna->render.reset = gen6_render_reset;
3732	sna->render.fini = gen6_render_fini;
3733
3734	sna->render.max_3d_size = GEN6_MAX_SIZE;
3735	sna->render.max_3d_pitch = 1 << 18;
3736	return sna->render_state.gen6.info->name;
3737}
3738