gen6_render.c revision 42542f5f
1/*
2 * Copyright © 2006,2008,2011 Intel Corporation
3 * Copyright © 2007 Red Hat, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 *    Wang Zhenyu <zhenyu.z.wang@sna.com>
26 *    Eric Anholt <eric@anholt.net>
27 *    Carl Worth <cworth@redhat.com>
28 *    Keith Packard <keithp@keithp.com>
29 *    Chris Wilson <chris@chris-wilson.co.uk>
30 *
31 */
32
33#ifdef HAVE_CONFIG_H
34#include "config.h"
35#endif
36
37#include "sna.h"
38#include "sna_reg.h"
39#include "sna_render.h"
40#include "sna_render_inline.h"
41#include "sna_video.h"
42
43#include "brw/brw.h"
44#include "gen6_render.h"
45#include "gen6_common.h"
46#include "gen4_common.h"
47#include "gen4_source.h"
48#include "gen4_vertex.h"
49
50#define NO_COMPOSITE 0
51#define NO_COMPOSITE_SPANS 0
52#define NO_COPY 0
53#define NO_COPY_BOXES 0
54#define NO_FILL 0
55#define NO_FILL_BOXES 0
56#define NO_FILL_ONE 0
57#define NO_FILL_CLEAR 0
58
59#define USE_8_PIXEL_DISPATCH 1
60#define USE_16_PIXEL_DISPATCH 1
61#define USE_32_PIXEL_DISPATCH 0
62
63#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
64#error "Must select at least 8, 16 or 32 pixel dispatch"
65#endif
66
67#define GEN6_MAX_SIZE 8192
68
69struct gt_info {
70	const char *name;
71	int max_vs_threads;
72	int max_gs_threads;
73	int max_wm_threads;
74	struct {
75		int size;
76		int max_vs_entries;
77		int max_gs_entries;
78	} urb;
79	int gt;
80};
81
82static const struct gt_info gt1_info = {
83	.name = "Sandybridge (gen6, gt1)",
84	.max_vs_threads = 24,
85	.max_gs_threads = 21,
86	.max_wm_threads = 40,
87	.urb = { 32, 256, 256 },
88	.gt = 1,
89};
90
91static const struct gt_info gt2_info = {
92	.name = "Sandybridge (gen6, gt2)",
93	.max_vs_threads = 60,
94	.max_gs_threads = 60,
95	.max_wm_threads = 80,
96	.urb = { 64, 256, 256 },
97	.gt = 2,
98};
99
100static const uint32_t ps_kernel_packed[][4] = {
101#include "exa_wm_src_affine.g6b"
102#include "exa_wm_src_sample_argb.g6b"
103#include "exa_wm_yuv_rgb.g6b"
104#include "exa_wm_write.g6b"
105};
106
107static const uint32_t ps_kernel_planar[][4] = {
108#include "exa_wm_src_affine.g6b"
109#include "exa_wm_src_sample_planar.g6b"
110#include "exa_wm_yuv_rgb.g6b"
111#include "exa_wm_write.g6b"
112};
113
114#define NOKERNEL(kernel_enum, func, ns) \
115    [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns}
116#define KERNEL(kernel_enum, kernel, ns) \
117    [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns}
118
119static const struct wm_kernel_info {
120	const char *name;
121	const void *data;
122	unsigned int size;
123	unsigned int num_surfaces;
124} wm_kernels[] = {
125	NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
126	NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
127
128	NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
129	NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
130
131	NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
132	NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
133
134	NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
135	NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
136
137	NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
138	NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
139
140	KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
141	KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
142};
143#undef KERNEL
144
145static const struct blendinfo {
146	bool src_alpha;
147	uint32_t src_blend;
148	uint32_t dst_blend;
149} gen6_blend_op[] = {
150	/* Clear */	{0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO},
151	/* Src */	{0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO},
152	/* Dst */	{0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE},
153	/* Over */	{1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
154	/* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE},
155	/* In */	{0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
156	/* InReverse */	{1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA},
157	/* Out */	{0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
158	/* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
159	/* Atop */	{1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
160	/* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA},
161	/* Xor */	{1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
162	/* Add */	{0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE},
163};
164
165/**
166 * Highest-valued BLENDFACTOR used in gen6_blend_op.
167 *
168 * This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR,
169 * GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
170 * GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
171 */
172#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1)
173
174#define GEN6_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen6_blend_state), 64)
175
176#define BLEND_OFFSET(s, d) \
177	(((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE)
178
179#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO)
180#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO)
181
182#define SAMPLER_OFFSET(sf, se, mf, me) \
183	(((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state))
184
185#define VERTEX_2s2s 0
186
187#define COPY_SAMPLER 0
188#define COPY_VERTEX VERTEX_2s2s
189#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX)
190
191#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state))
192#define FILL_VERTEX VERTEX_2s2s
193#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
194#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
195
196#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0)
197#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0)
198#define GEN6_KERNEL(f) (((f) >> 16) & 0xf)
199#define GEN6_VERTEX(f) (((f) >> 0) & 0xf)
200#define GEN6_SET_FLAGS(S, B, K, V)  (((S) | (K)) << 16 | ((B) | (V)))
201
202#define OUT_BATCH(v) batch_emit(sna, v)
203#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
204#define OUT_VERTEX_F(v) vertex_emit(sna, v)
205
206static inline bool too_large(int width, int height)
207{
208	return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE;
209}
210
211static uint32_t gen6_get_blend(int op,
212			       bool has_component_alpha,
213			       uint32_t dst_format)
214{
215	uint32_t src, dst;
216
217	src = gen6_blend_op[op].src_blend;
218	dst = gen6_blend_op[op].dst_blend;
219
220	/* If there's no dst alpha channel, adjust the blend op so that
221	 * we'll treat it always as 1.
222	 */
223	if (PICT_FORMAT_A(dst_format) == 0) {
224		if (src == GEN6_BLENDFACTOR_DST_ALPHA)
225			src = GEN6_BLENDFACTOR_ONE;
226		else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA)
227			src = GEN6_BLENDFACTOR_ZERO;
228	}
229
230	/* If the source alpha is being used, then we should only be in a
231	 * case where the source blend factor is 0, and the source blend
232	 * value is the mask channels multiplied by the source picture's alpha.
233	 */
234	if (has_component_alpha && gen6_blend_op[op].src_alpha) {
235		if (dst == GEN6_BLENDFACTOR_SRC_ALPHA)
236			dst = GEN6_BLENDFACTOR_SRC_COLOR;
237		else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA)
238			dst = GEN6_BLENDFACTOR_INV_SRC_COLOR;
239	}
240
241	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
242	     op, dst_format, PICT_FORMAT_A(dst_format),
243	     src, dst, (int)BLEND_OFFSET(src, dst)));
244	return BLEND_OFFSET(src, dst);
245}
246
247static uint32_t gen6_get_card_format(PictFormat format)
248{
249	switch (format) {
250	default:
251		return -1;
252	case PICT_a8r8g8b8:
253		return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
254	case PICT_x8r8g8b8:
255		return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
256	case PICT_a8b8g8r8:
257		return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
258	case PICT_x8b8g8r8:
259		return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM;
260#ifdef PICT_a2r10g10b10
261	case PICT_a2r10g10b10:
262		return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
263	case PICT_x2r10g10b10:
264		return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM;
265#endif
266	case PICT_r8g8b8:
267		return GEN6_SURFACEFORMAT_R8G8B8_UNORM;
268	case PICT_r5g6b5:
269		return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
270	case PICT_a1r5g5b5:
271		return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
272	case PICT_a8:
273		return GEN6_SURFACEFORMAT_A8_UNORM;
274	case PICT_a4r4g4b4:
275		return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
276	}
277}
278
279static uint32_t gen6_get_dest_format(PictFormat format)
280{
281	switch (format) {
282	default:
283		return -1;
284	case PICT_a8r8g8b8:
285	case PICT_x8r8g8b8:
286		return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
287	case PICT_a8b8g8r8:
288	case PICT_x8b8g8r8:
289		return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
290#ifdef PICT_a2r10g10b10
291	case PICT_a2r10g10b10:
292	case PICT_x2r10g10b10:
293		return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
294#endif
295	case PICT_r5g6b5:
296		return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
297	case PICT_x1r5g5b5:
298	case PICT_a1r5g5b5:
299		return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
300	case PICT_a8:
301		return GEN6_SURFACEFORMAT_A8_UNORM;
302	case PICT_a4r4g4b4:
303	case PICT_x4r4g4b4:
304		return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
305	}
306}
307
308static bool gen6_check_dst_format(PictFormat format)
309{
310	if (gen6_get_dest_format(format) != -1)
311		return true;
312
313	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
314	return false;
315}
316
317static bool gen6_check_format(uint32_t format)
318{
319	if (gen6_get_card_format(format) != -1)
320		return true;
321
322	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
323	return false;
324}
325
326static uint32_t gen6_filter(uint32_t filter)
327{
328	switch (filter) {
329	default:
330		assert(0);
331	case PictFilterNearest:
332		return SAMPLER_FILTER_NEAREST;
333	case PictFilterBilinear:
334		return SAMPLER_FILTER_BILINEAR;
335	}
336}
337
338static uint32_t gen6_check_filter(PicturePtr picture)
339{
340	switch (picture->filter) {
341	case PictFilterNearest:
342	case PictFilterBilinear:
343		return true;
344	default:
345		return false;
346	}
347}
348
349static uint32_t gen6_repeat(uint32_t repeat)
350{
351	switch (repeat) {
352	default:
353		assert(0);
354	case RepeatNone:
355		return SAMPLER_EXTEND_NONE;
356	case RepeatNormal:
357		return SAMPLER_EXTEND_REPEAT;
358	case RepeatPad:
359		return SAMPLER_EXTEND_PAD;
360	case RepeatReflect:
361		return SAMPLER_EXTEND_REFLECT;
362	}
363}
364
365static bool gen6_check_repeat(PicturePtr picture)
366{
367	if (!picture->repeat)
368		return true;
369
370	switch (picture->repeatType) {
371	case RepeatNone:
372	case RepeatNormal:
373	case RepeatPad:
374	case RepeatReflect:
375		return true;
376	default:
377		return false;
378	}
379}
380
381static int
382gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
383{
384	int base;
385
386	if (has_mask) {
387		if (is_ca) {
388			if (gen6_blend_op[op].src_alpha)
389				base = GEN6_WM_KERNEL_MASKSA;
390			else
391				base = GEN6_WM_KERNEL_MASKCA;
392		} else
393			base = GEN6_WM_KERNEL_MASK;
394	} else
395		base = GEN6_WM_KERNEL_NOMASK;
396
397	return base + !is_affine;
398}
399
400static void
401gen6_emit_urb(struct sna *sna)
402{
403	OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
404	OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
405		  (sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
406	OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
407		  (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
408}
409
410static void
411gen6_emit_state_base_address(struct sna *sna)
412{
413	OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2));
414	OUT_BATCH(0); /* general */
415	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
416				 sna->kgem.nbatch,
417				 NULL,
418				 I915_GEM_DOMAIN_INSTRUCTION << 16,
419				 BASE_ADDRESS_MODIFY));
420	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
421				 sna->kgem.nbatch,
422				 sna->render_state.gen6.general_bo,
423				 I915_GEM_DOMAIN_INSTRUCTION << 16,
424				 BASE_ADDRESS_MODIFY));
425	OUT_BATCH(0); /* indirect */
426	OUT_BATCH(kgem_add_reloc(&sna->kgem,
427				 sna->kgem.nbatch,
428				 sna->render_state.gen6.general_bo,
429				 I915_GEM_DOMAIN_INSTRUCTION << 16,
430				 BASE_ADDRESS_MODIFY));
431
432	/* upper bounds, disable */
433	OUT_BATCH(0);
434	OUT_BATCH(BASE_ADDRESS_MODIFY);
435	OUT_BATCH(0);
436	OUT_BATCH(BASE_ADDRESS_MODIFY);
437}
438
439static void
440gen6_emit_viewports(struct sna *sna)
441{
442	OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
443		  GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
444		  (4 - 2));
445	OUT_BATCH(0);
446	OUT_BATCH(0);
447	OUT_BATCH(0);
448}
449
450static void
451gen6_emit_vs(struct sna *sna)
452{
453	/* disable VS constant buffer */
454	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
455	OUT_BATCH(0);
456	OUT_BATCH(0);
457	OUT_BATCH(0);
458	OUT_BATCH(0);
459
460	OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
461	OUT_BATCH(0); /* no VS kernel */
462	OUT_BATCH(0);
463	OUT_BATCH(0);
464	OUT_BATCH(0);
465	OUT_BATCH(0); /* pass-through */
466}
467
468static void
469gen6_emit_gs(struct sna *sna)
470{
471	/* disable GS constant buffer */
472	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
473	OUT_BATCH(0);
474	OUT_BATCH(0);
475	OUT_BATCH(0);
476	OUT_BATCH(0);
477
478	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
479	OUT_BATCH(0); /* no GS kernel */
480	OUT_BATCH(0);
481	OUT_BATCH(0);
482	OUT_BATCH(0);
483	OUT_BATCH(0);
484	OUT_BATCH(0); /* pass-through */
485}
486
487static void
488gen6_emit_clip(struct sna *sna)
489{
490	OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
491	OUT_BATCH(0);
492	OUT_BATCH(0); /* pass-through */
493	OUT_BATCH(0);
494}
495
496static void
497gen6_emit_wm_constants(struct sna *sna)
498{
499	/* disable WM constant buffer */
500	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
501	OUT_BATCH(0);
502	OUT_BATCH(0);
503	OUT_BATCH(0);
504	OUT_BATCH(0);
505}
506
507static void
508gen6_emit_null_depth_buffer(struct sna *sna)
509{
510	OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2));
511	OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
512		  GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
513	OUT_BATCH(0);
514	OUT_BATCH(0);
515	OUT_BATCH(0);
516	OUT_BATCH(0);
517	OUT_BATCH(0);
518
519	OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2));
520	OUT_BATCH(0);
521}
522
523static void
524gen6_emit_invariant(struct sna *sna)
525{
526	OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D);
527
528	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
529	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
530		  GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
531	OUT_BATCH(0);
532
533	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
534	OUT_BATCH(1);
535
536	gen6_emit_urb(sna);
537
538	gen6_emit_state_base_address(sna);
539
540	gen6_emit_viewports(sna);
541	gen6_emit_vs(sna);
542	gen6_emit_gs(sna);
543	gen6_emit_clip(sna);
544	gen6_emit_wm_constants(sna);
545	gen6_emit_null_depth_buffer(sna);
546
547	sna->render_state.gen6.needs_invariant = false;
548}
549
550static bool
551gen6_emit_cc(struct sna *sna, int blend)
552{
553	struct gen6_render_state *render = &sna->render_state.gen6;
554
555	if (render->blend == blend)
556		return blend != NO_BLEND;
557
558	DBG(("%s: blend = %x\n", __FUNCTION__, blend));
559
560	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
561	OUT_BATCH((render->cc_blend + blend) | 1);
562	if (render->blend == (unsigned)-1) {
563		OUT_BATCH(1);
564		OUT_BATCH(1);
565	} else {
566		OUT_BATCH(0);
567		OUT_BATCH(0);
568	}
569
570	render->blend = blend;
571	return blend != NO_BLEND;
572}
573
574static void
575gen6_emit_sampler(struct sna *sna, uint32_t state)
576{
577	if (sna->render_state.gen6.samplers == state)
578		return;
579
580	sna->render_state.gen6.samplers = state;
581
582	DBG(("%s: sampler = %x\n", __FUNCTION__, state));
583
584	OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
585		  GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
586		  (4 - 2));
587	OUT_BATCH(0); /* VS */
588	OUT_BATCH(0); /* GS */
589	OUT_BATCH(sna->render_state.gen6.wm_state + state);
590}
591
592static void
593gen6_emit_sf(struct sna *sna, bool has_mask)
594{
595	int num_sf_outputs = has_mask ? 2 : 1;
596
597	if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs)
598		return;
599
600	DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
601	     __FUNCTION__, num_sf_outputs, 1, 0));
602
603	sna->render_state.gen6.num_sf_outputs = num_sf_outputs;
604
605	OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
606	OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT |
607		  1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT |
608		  1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT);
609	OUT_BATCH(0);
610	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
611	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
612	OUT_BATCH(0);
613	OUT_BATCH(0);
614	OUT_BATCH(0);
615	OUT_BATCH(0);
616	OUT_BATCH(0); /* DW9 */
617	OUT_BATCH(0);
618	OUT_BATCH(0);
619	OUT_BATCH(0);
620	OUT_BATCH(0);
621	OUT_BATCH(0); /* DW14 */
622	OUT_BATCH(0);
623	OUT_BATCH(0);
624	OUT_BATCH(0);
625	OUT_BATCH(0);
626	OUT_BATCH(0); /* DW19 */
627}
628
629static void
630gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask)
631{
632	const uint32_t *kernels;
633
634	if (sna->render_state.gen6.kernel == kernel)
635		return;
636
637	sna->render_state.gen6.kernel = kernel;
638	kernels = sna->render_state.gen6.wm_kernel[kernel];
639
640	DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n",
641	     __FUNCTION__,
642	     wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces,
643	    kernels[0], kernels[1], kernels[2]));
644
645	OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
646	OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
647	OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
648		  wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
649	OUT_BATCH(0); /* scratch space */
650	OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT |
651		  8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT |
652		  6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT);
653	OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT |
654		  (kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) |
655		  (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) |
656		  (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) |
657		  GEN6_3DSTATE_WM_DISPATCH_ENABLE);
658	OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT |
659		  GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
660	OUT_BATCH(kernels[2]);
661	OUT_BATCH(kernels[1]);
662}
663
664static bool
665gen6_emit_binding_table(struct sna *sna, uint16_t offset)
666{
667	if (sna->render_state.gen6.surface_table == offset)
668		return false;
669
670	/* Binding table pointers */
671	OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS |
672		  GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
673		  (4 - 2));
674	OUT_BATCH(0);		/* vs */
675	OUT_BATCH(0);		/* gs */
676	/* Only the PS uses the binding table */
677	OUT_BATCH(offset*4);
678
679	sna->render_state.gen6.surface_table = offset;
680	return true;
681}
682
683static bool
684gen6_emit_drawing_rectangle(struct sna *sna,
685			    const struct sna_composite_op *op)
686{
687	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
688	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
689
690	assert(!too_large(op->dst.x, op->dst.y));
691	assert(!too_large(op->dst.width, op->dst.height));
692
693	if (sna->render_state.gen6.drawrect_limit  == limit &&
694	    sna->render_state.gen6.drawrect_offset == offset)
695		return false;
696
697	/* [DevSNB-C+{W/A}] Before any depth stall flush (including those
698	 * produced by non-pipelined state commands), software needs to first
699	 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
700	 * 0.
701	 *
702	 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
703	 * BEFORE the pipe-control with a post-sync op and no write-cache
704	 * flushes.
705	 */
706	if (!sna->render_state.gen6.first_state_packet) {
707		OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
708		OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
709			  GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
710		OUT_BATCH(0);
711		OUT_BATCH(0);
712	}
713
714	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
715	OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME);
716	OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
717				 sna->render_state.gen6.general_bo,
718				 I915_GEM_DOMAIN_INSTRUCTION << 16 |
719				 I915_GEM_DOMAIN_INSTRUCTION,
720				 64));
721	OUT_BATCH(0);
722
723	DBG(("%s: offset=(%d, %d), limit=(%d, %d)\n",
724	     __FUNCTION__, op->dst.x, op->dst.y, op->dst.width, op->dst.width));
725	OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
726	OUT_BATCH(0);
727	OUT_BATCH(limit);
728	OUT_BATCH(offset);
729
730	sna->render_state.gen6.drawrect_offset = offset;
731	sna->render_state.gen6.drawrect_limit = limit;
732	return true;
733}
734
735static void
736gen6_emit_vertex_elements(struct sna *sna,
737			  const struct sna_composite_op *op)
738{
739	/*
740	 * vertex data in vertex buffer
741	 *    position: (x, y)
742	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
743	 *    texture coordinate 1 if (has_mask is true): same as above
744	 */
745	struct gen6_render_state *render = &sna->render_state.gen6;
746	uint32_t src_format, dw;
747	int id = GEN6_VERTEX(op->u.gen6.flags);
748	bool has_mask;
749
750	DBG(("%s: setup id=%d\n", __FUNCTION__, id));
751
752	if (render->ve_id == id)
753		return;
754	render->ve_id = id;
755
756	/* The VUE layout
757	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
758	 *    dword 4-7: position (x, y, 1.0, 1.0),
759	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
760	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
761	 *
762	 * dword 4-15 are fetched from vertex buffer
763	 */
764	has_mask = (id >> 2) != 0;
765	OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS |
766		((2 * (3 + has_mask)) + 1 - 2));
767
768	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
769		  GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
770		  0 << VE0_OFFSET_SHIFT);
771	OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
772		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
773		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
774		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
775
776	/* x,y */
777	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
778		  GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
779		  0 << VE0_OFFSET_SHIFT);
780	OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
781		  GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
782		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
783		  GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
784
785	/* u0, v0, w0 */
786	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
787	dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
788	switch (id & 3) {
789	default:
790		assert(0);
791	case 0:
792		src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED;
793		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
794		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
795		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
796		break;
797	case 1:
798		src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
799		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
800		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
801		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
802		break;
803	case 2:
804		src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
805		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
806		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
807		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
808		break;
809	case 3:
810		src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
811		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
812		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
813		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
814		break;
815	}
816	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
817		  src_format << VE0_FORMAT_SHIFT |
818		  4 << VE0_OFFSET_SHIFT);
819	OUT_BATCH(dw);
820
821	/* u1, v1, w1 */
822	if (has_mask) {
823		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
824		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
825		dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
826		switch (id >> 2) {
827		case 1:
828			src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
829			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
830			dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
831			dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
832			break;
833		default:
834			assert(0);
835		case 2:
836			src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
837			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
838			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
839			dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
840			break;
841		case 3:
842			src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
843			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
844			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
845			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
846			break;
847		}
848		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
849			  src_format << VE0_FORMAT_SHIFT |
850			  offset << VE0_OFFSET_SHIFT);
851		OUT_BATCH(dw);
852	}
853}
854
855static void
856gen6_emit_flush(struct sna *sna)
857{
858	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
859	OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH |
860		  GEN6_PIPE_CONTROL_TC_FLUSH |
861		  GEN6_PIPE_CONTROL_CS_STALL);
862	OUT_BATCH(0);
863	OUT_BATCH(0);
864}
865
866static void
867gen6_emit_state(struct sna *sna,
868		const struct sna_composite_op *op,
869		uint16_t wm_binding_table)
870{
871	bool need_flush, need_stall;
872
873	assert(op->dst.bo->exec);
874
875	need_stall = wm_binding_table & 1;
876	need_flush = false;
877	if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)))
878		need_flush = need_stall;
879	gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags));
880	gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2);
881	gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2);
882	gen6_emit_vertex_elements(sna, op);
883
884	need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1);
885	if (gen6_emit_drawing_rectangle(sna, op))
886		need_stall = false;
887	if (need_flush || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
888		gen6_emit_flush(sna);
889		kgem_clear_dirty(&sna->kgem);
890		assert(op->dst.bo->exec);
891		kgem_bo_mark_dirty(op->dst.bo);
892		need_stall = false;
893	}
894	if (need_stall) {
895		OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
896		OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
897			  GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
898		OUT_BATCH(0);
899		OUT_BATCH(0);
900	}
901	sna->render_state.gen6.first_state_packet = false;
902}
903
904static bool gen6_magic_ca_pass(struct sna *sna,
905			       const struct sna_composite_op *op)
906{
907	struct gen6_render_state *state = &sna->render_state.gen6;
908
909	if (!op->need_magic_ca_pass)
910		return false;
911
912	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
913	     sna->render.vertex_start, sna->render.vertex_index));
914
915	gen6_emit_flush(sna);
916
917	gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format));
918	gen6_emit_wm(sna,
919		     gen6_choose_composite_kernel(PictOpAdd,
920						  true, true,
921						  op->is_affine),
922		     true);
923
924	OUT_BATCH(GEN6_3DPRIMITIVE |
925		  GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
926		  _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
927		  0 << 9 |
928		  4);
929	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
930	OUT_BATCH(sna->render.vertex_start);
931	OUT_BATCH(1);	/* single instance */
932	OUT_BATCH(0);	/* start instance location */
933	OUT_BATCH(0);	/* index buffer offset, ignored */
934
935	state->last_primitive = sna->kgem.nbatch;
936	return true;
937}
938
939typedef struct gen6_surface_state_padded {
940	struct gen6_surface_state state;
941	char pad[32 - sizeof(struct gen6_surface_state)];
942} gen6_surface_state_padded;
943
944static void null_create(struct sna_static_stream *stream)
945{
946	/* A bunch of zeros useful for legacy border color and depth-stencil */
947	sna_static_stream_map(stream, 64, 64);
948}
949
950static void scratch_create(struct sna_static_stream *stream)
951{
952	/* 64 bytes of scratch space for random writes, such as
953	 * the pipe-control w/a.
954	 */
955	sna_static_stream_map(stream, 64, 64);
956}
957
958static void
959sampler_state_init(struct gen6_sampler_state *sampler_state,
960		   sampler_filter_t filter,
961		   sampler_extend_t extend)
962{
963	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
964
965	/* We use the legacy mode to get the semantics specified by
966	 * the Render extension. */
967	sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY;
968
969	switch (filter) {
970	default:
971	case SAMPLER_FILTER_NEAREST:
972		sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST;
973		sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
974		break;
975	case SAMPLER_FILTER_BILINEAR:
976		sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR;
977		sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR;
978		break;
979	}
980
981	switch (extend) {
982	default:
983	case SAMPLER_EXTEND_NONE:
984		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
985		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
986		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
987		break;
988	case SAMPLER_EXTEND_REPEAT:
989		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
990		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
991		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
992		break;
993	case SAMPLER_EXTEND_PAD:
994		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
995		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
996		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
997		break;
998	case SAMPLER_EXTEND_REFLECT:
999		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
1000		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
1001		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
1002		break;
1003	}
1004}
1005
1006static void
1007sampler_copy_init(struct gen6_sampler_state *ss)
1008{
1009	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1010	ss->ss3.non_normalized_coord = 1;
1011
1012	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1013}
1014
1015static void
1016sampler_fill_init(struct gen6_sampler_state *ss)
1017{
1018	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
1019	ss->ss3.non_normalized_coord = 1;
1020
1021	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1022}
1023
1024static uint32_t
1025gen6_tiling_bits(uint32_t tiling)
1026{
1027	switch (tiling) {
1028	default: assert(0);
1029	case I915_TILING_NONE: return 0;
1030	case I915_TILING_X: return GEN6_SURFACE_TILED;
1031	case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y;
1032	}
1033}
1034
1035/**
1036 * Sets up the common fields for a surface state buffer for the given
1037 * picture in the given surface state buffer.
1038 */
1039static int
1040gen6_bind_bo(struct sna *sna,
1041	     struct kgem_bo *bo,
1042	     uint32_t width,
1043	     uint32_t height,
1044	     uint32_t format,
1045	     bool is_dst)
1046{
1047	uint32_t *ss;
1048	uint32_t domains;
1049	uint16_t offset;
1050	uint32_t is_scanout = is_dst && bo->scanout;
1051
1052	/* After the first bind, we manage the cache domains within the batch */
1053	offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
1054	if (offset) {
1055		DBG(("[%x]  bo(handle=%d), format=%d, reuse %s binding\n",
1056		     offset, bo->handle, format,
1057		     is_dst ? "render" : "sampler"));
1058		assert(offset >= sna->kgem.surface);
1059		if (is_dst)
1060			kgem_bo_mark_dirty(bo);
1061		return offset * sizeof(uint32_t);
1062	}
1063
1064	offset = sna->kgem.surface -=
1065		sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1066	ss = sna->kgem.batch + offset;
1067	ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT |
1068		 GEN6_SURFACE_BLEND_ENABLED |
1069		 format << GEN6_SURFACE_FORMAT_SHIFT);
1070	if (is_dst) {
1071		ss[0] |= GEN6_SURFACE_RC_READ_WRITE;
1072		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
1073	} else
1074		domains = I915_GEM_DOMAIN_SAMPLER << 16;
1075	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
1076	ss[2] = ((width - 1)  << GEN6_SURFACE_WIDTH_SHIFT |
1077		 (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT);
1078	assert(bo->pitch <= (1 << 18));
1079	ss[3] = (gen6_tiling_bits(bo->tiling) |
1080		 (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT);
1081	ss[4] = 0;
1082	ss[5] = (is_scanout || bo->io) ? 0 : 3 << 16;
1083
1084	kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
1085
1086	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
1087	     offset, bo->handle, ss[1],
1088	     format, width, height, bo->pitch, bo->tiling,
1089	     domains & 0xffff ? "render" : "sampler"));
1090
1091	return offset * sizeof(uint32_t);
1092}
1093
1094static void gen6_emit_vertex_buffer(struct sna *sna,
1095				    const struct sna_composite_op *op)
1096{
1097	int id = GEN6_VERTEX(op->u.gen6.flags);
1098
1099	OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3);
1100	OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
1101		  4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT);
1102	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
1103	OUT_BATCH(0);
1104	OUT_BATCH(~0); /* max address: disabled */
1105	OUT_BATCH(0);
1106
1107	sna->render.vb_id |= 1 << id;
1108}
1109
1110static void gen6_emit_primitive(struct sna *sna)
1111{
1112	if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) {
1113		DBG(("%s: continuing previous primitive, start=%d, index=%d\n",
1114		     __FUNCTION__,
1115		     sna->render.vertex_start,
1116		     sna->render.vertex_index));
1117		sna->render.vertex_offset = sna->kgem.nbatch - 5;
1118		return;
1119	}
1120
1121	OUT_BATCH(GEN6_3DPRIMITIVE |
1122		  GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
1123		  _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
1124		  0 << 9 |
1125		  4);
1126	sna->render.vertex_offset = sna->kgem.nbatch;
1127	OUT_BATCH(0);	/* vertex count, to be filled in later */
1128	OUT_BATCH(sna->render.vertex_index);
1129	OUT_BATCH(1);	/* single instance */
1130	OUT_BATCH(0);	/* start instance location */
1131	OUT_BATCH(0);	/* index buffer offset, ignored */
1132	sna->render.vertex_start = sna->render.vertex_index;
1133	DBG(("%s: started new primitive: index=%d\n",
1134	     __FUNCTION__, sna->render.vertex_start));
1135
1136	sna->render_state.gen6.last_primitive = sna->kgem.nbatch;
1137}
1138
1139static bool gen6_rectangle_begin(struct sna *sna,
1140				 const struct sna_composite_op *op)
1141{
1142	int id = 1 << GEN6_VERTEX(op->u.gen6.flags);
1143	int ndwords;
1144
1145	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
1146		return true;
1147
1148	ndwords = op->need_magic_ca_pass ? 60 : 6;
1149	if ((sna->render.vb_id & id) == 0)
1150		ndwords += 5;
1151	if (!kgem_check_batch(&sna->kgem, ndwords))
1152		return false;
1153
1154	if ((sna->render.vb_id & id) == 0)
1155		gen6_emit_vertex_buffer(sna, op);
1156
1157	gen6_emit_primitive(sna);
1158	return true;
1159}
1160
1161static int gen6_get_rectangles__flush(struct sna *sna,
1162				      const struct sna_composite_op *op)
1163{
1164	/* Preventing discarding new vbo after lock contention */
1165	if (sna_vertex_wait__locked(&sna->render)) {
1166		int rem = vertex_space(sna);
1167		if (rem > op->floats_per_rect)
1168			return rem;
1169	}
1170
1171	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5))
1172		return 0;
1173	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
1174		return 0;
1175
1176	if (sna->render.vertex_offset) {
1177		gen4_vertex_flush(sna);
1178		if (gen6_magic_ca_pass(sna, op)) {
1179			gen6_emit_flush(sna);
1180			gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags));
1181			gen6_emit_wm(sna,
1182				     GEN6_KERNEL(op->u.gen6.flags),
1183				     GEN6_VERTEX(op->u.gen6.flags) >> 2);
1184		}
1185	}
1186
1187	return gen4_vertex_finish(sna);
1188}
1189
1190inline static int gen6_get_rectangles(struct sna *sna,
1191				      const struct sna_composite_op *op,
1192				      int want,
1193				      void (*emit_state)(struct sna *, const struct sna_composite_op *op))
1194{
1195	int rem;
1196
1197	assert(want);
1198
1199start:
1200	rem = vertex_space(sna);
1201	if (unlikely(rem < op->floats_per_rect)) {
1202		DBG(("flushing vbo for %s: %d < %d\n",
1203		     __FUNCTION__, rem, op->floats_per_rect));
1204		rem = gen6_get_rectangles__flush(sna, op);
1205		if (unlikely(rem == 0))
1206			goto flush;
1207	}
1208
1209	if (unlikely(sna->render.vertex_offset == 0)) {
1210		if (!gen6_rectangle_begin(sna, op))
1211			goto flush;
1212		else
1213			goto start;
1214	}
1215
1216	assert(rem <= vertex_space(sna));
1217	assert(op->floats_per_rect <= rem);
1218	if (want > 1 && want * op->floats_per_rect > rem)
1219		want = rem / op->floats_per_rect;
1220
1221	assert(want > 0);
1222	sna->render.vertex_index += 3*want;
1223	return want;
1224
1225flush:
1226	if (sna->render.vertex_offset) {
1227		gen4_vertex_flush(sna);
1228		gen6_magic_ca_pass(sna, op);
1229	}
1230	sna_vertex_wait__locked(&sna->render);
1231	_kgem_submit(&sna->kgem);
1232	emit_state(sna, op);
1233	goto start;
1234}
1235
1236inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna,
1237							 uint16_t *offset)
1238{
1239	uint32_t *table;
1240
1241	sna->kgem.surface -=
1242		sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1243	/* Clear all surplus entries to zero in case of prefetch */
1244	table = memset(sna->kgem.batch + sna->kgem.surface,
1245		       0, sizeof(struct gen6_surface_state_padded));
1246
1247	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
1248
1249	*offset = sna->kgem.surface;
1250	return table;
1251}
1252
1253static bool
1254gen6_get_batch(struct sna *sna, const struct sna_composite_op *op)
1255{
1256	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1257
1258	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
1259		DBG(("%s: flushing batch: %d < %d+%d\n",
1260		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
1261		     150, 4*8));
1262		kgem_submit(&sna->kgem);
1263		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1264	}
1265
1266	if (sna->render_state.gen6.needs_invariant)
1267		gen6_emit_invariant(sna);
1268
1269	return kgem_bo_is_dirty(op->dst.bo);
1270}
1271
1272static void gen6_emit_composite_state(struct sna *sna,
1273				      const struct sna_composite_op *op)
1274{
1275	uint32_t *binding_table;
1276	uint16_t offset;
1277	bool dirty;
1278
1279	dirty = gen6_get_batch(sna, op);
1280
1281	binding_table = gen6_composite_get_binding_table(sna, &offset);
1282
1283	binding_table[0] =
1284		gen6_bind_bo(sna,
1285			    op->dst.bo, op->dst.width, op->dst.height,
1286			    gen6_get_dest_format(op->dst.format),
1287			    true);
1288	binding_table[1] =
1289		gen6_bind_bo(sna,
1290			     op->src.bo, op->src.width, op->src.height,
1291			     op->src.card_format,
1292			     false);
1293	if (op->mask.bo) {
1294		binding_table[2] =
1295			gen6_bind_bo(sna,
1296				     op->mask.bo,
1297				     op->mask.width,
1298				     op->mask.height,
1299				     op->mask.card_format,
1300				     false);
1301	}
1302
1303	if (sna->kgem.surface == offset &&
1304	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table &&
1305	    (op->mask.bo == NULL ||
1306	     sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) {
1307		sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1308		offset = sna->render_state.gen6.surface_table;
1309	}
1310
1311	gen6_emit_state(sna, op, offset | dirty);
1312}
1313
1314static void
1315gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op)
1316{
1317	assert (sna->render.vertex_offset == 0);
1318	if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) {
1319		DBG(("aligning vertex: was %d, now %d floats per vertex\n",
1320		     sna->render_state.gen6.floats_per_vertex,
1321		     op->floats_per_vertex));
1322		gen4_vertex_align(sna, op);
1323		sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex;
1324	}
1325	assert((sna->render.vertex_used % op->floats_per_vertex) == 0);
1326}
1327
1328fastcall static void
1329gen6_render_composite_blt(struct sna *sna,
1330			  const struct sna_composite_op *op,
1331			  const struct sna_composite_rectangles *r)
1332{
1333	gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state);
1334	op->prim_emit(sna, op, r);
1335}
1336
1337fastcall static void
1338gen6_render_composite_box(struct sna *sna,
1339			  const struct sna_composite_op *op,
1340			  const BoxRec *box)
1341{
1342	struct sna_composite_rectangles r;
1343
1344	gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state);
1345
1346	DBG(("  %s: (%d, %d), (%d, %d)\n",
1347	     __FUNCTION__,
1348	     box->x1, box->y1, box->x2, box->y2));
1349
1350	r.dst.x = box->x1;
1351	r.dst.y = box->y1;
1352	r.width  = box->x2 - box->x1;
1353	r.height = box->y2 - box->y1;
1354	r.src = r.mask = r.dst;
1355
1356	op->prim_emit(sna, op, &r);
1357}
1358
1359static void
1360gen6_render_composite_boxes__blt(struct sna *sna,
1361				 const struct sna_composite_op *op,
1362				 const BoxRec *box, int nbox)
1363{
1364	DBG(("composite_boxes(%d)\n", nbox));
1365
1366	do {
1367		int nbox_this_time;
1368
1369		nbox_this_time = gen6_get_rectangles(sna, op, nbox,
1370						     gen6_emit_composite_state);
1371		nbox -= nbox_this_time;
1372
1373		do {
1374			struct sna_composite_rectangles r;
1375
1376			DBG(("  %s: (%d, %d), (%d, %d)\n",
1377			     __FUNCTION__,
1378			     box->x1, box->y1, box->x2, box->y2));
1379
1380			r.dst.x = box->x1;
1381			r.dst.y = box->y1;
1382			r.width  = box->x2 - box->x1;
1383			r.height = box->y2 - box->y1;
1384			r.src = r.mask = r.dst;
1385
1386			op->prim_emit(sna, op, &r);
1387			box++;
1388		} while (--nbox_this_time);
1389	} while (nbox);
1390}
1391
1392static void
1393gen6_render_composite_boxes(struct sna *sna,
1394			    const struct sna_composite_op *op,
1395			    const BoxRec *box, int nbox)
1396{
1397	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1398
1399	do {
1400		int nbox_this_time;
1401		float *v;
1402
1403		nbox_this_time = gen6_get_rectangles(sna, op, nbox,
1404						     gen6_emit_composite_state);
1405		assert(nbox_this_time);
1406		nbox -= nbox_this_time;
1407
1408		v = sna->render.vertices + sna->render.vertex_used;
1409		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1410
1411		op->emit_boxes(op, box, nbox_this_time, v);
1412		box += nbox_this_time;
1413	} while (nbox);
1414}
1415
1416static void
1417gen6_render_composite_boxes__thread(struct sna *sna,
1418				    const struct sna_composite_op *op,
1419				    const BoxRec *box, int nbox)
1420{
1421	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1422
1423	sna_vertex_lock(&sna->render);
1424	do {
1425		int nbox_this_time;
1426		float *v;
1427
1428		nbox_this_time = gen6_get_rectangles(sna, op, nbox,
1429						     gen6_emit_composite_state);
1430		assert(nbox_this_time);
1431		nbox -= nbox_this_time;
1432
1433		v = sna->render.vertices + sna->render.vertex_used;
1434		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1435
1436		sna_vertex_acquire__locked(&sna->render);
1437		sna_vertex_unlock(&sna->render);
1438
1439		op->emit_boxes(op, box, nbox_this_time, v);
1440		box += nbox_this_time;
1441
1442		sna_vertex_lock(&sna->render);
1443		sna_vertex_release__locked(&sna->render);
1444	} while (nbox);
1445	sna_vertex_unlock(&sna->render);
1446}
1447
1448#ifndef MAX
1449#define MAX(a,b) ((a) > (b) ? (a) : (b))
1450#endif
1451
1452static uint32_t
1453gen6_composite_create_blend_state(struct sna_static_stream *stream)
1454{
1455	char *base, *ptr;
1456	int src, dst;
1457
1458	base = sna_static_stream_map(stream,
1459				     GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE,
1460				     64);
1461
1462	ptr = base;
1463	for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) {
1464		for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) {
1465			struct gen6_blend_state *blend =
1466				(struct gen6_blend_state *)ptr;
1467
1468			blend->blend0.dest_blend_factor = dst;
1469			blend->blend0.source_blend_factor = src;
1470			blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD;
1471			blend->blend0.blend_enable =
1472				!(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE);
1473
1474			blend->blend1.post_blend_clamp_enable = 1;
1475			blend->blend1.pre_blend_clamp_enable = 1;
1476
1477			ptr += GEN6_BLEND_STATE_PADDED_SIZE;
1478		}
1479	}
1480
1481	return sna_static_stream_offsetof(stream, base);
1482}
1483
1484static uint32_t gen6_bind_video_source(struct sna *sna,
1485				       struct kgem_bo *src_bo,
1486				       uint32_t src_offset,
1487				       int src_width,
1488				       int src_height,
1489				       int src_pitch,
1490				       uint32_t src_surf_format)
1491{
1492	struct gen6_surface_state *ss;
1493
1494	sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1495
1496	ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
1497	ss->ss0.surface_type = GEN6_SURFACE_2D;
1498	ss->ss0.surface_format = src_surf_format;
1499
1500	ss->ss1.base_addr =
1501		kgem_add_reloc(&sna->kgem,
1502			       sna->kgem.surface + 1,
1503			       src_bo,
1504			       I915_GEM_DOMAIN_SAMPLER << 16,
1505			       src_offset);
1506
1507	ss->ss2.width  = src_width - 1;
1508	ss->ss2.height = src_height - 1;
1509	ss->ss3.pitch  = src_pitch - 1;
1510
1511	return sna->kgem.surface * sizeof(uint32_t);
1512}
1513
1514static void gen6_emit_video_state(struct sna *sna,
1515				  const struct sna_composite_op *op)
1516{
1517	struct sna_video_frame *frame = op->priv;
1518	uint32_t src_surf_format;
1519	uint32_t src_surf_base[6];
1520	int src_width[6];
1521	int src_height[6];
1522	int src_pitch[6];
1523	uint32_t *binding_table;
1524	uint16_t offset;
1525	bool dirty;
1526	int n_src, n;
1527
1528	dirty = gen6_get_batch(sna, op);
1529
1530	src_surf_base[0] = 0;
1531	src_surf_base[1] = 0;
1532	src_surf_base[2] = frame->VBufOffset;
1533	src_surf_base[3] = frame->VBufOffset;
1534	src_surf_base[4] = frame->UBufOffset;
1535	src_surf_base[5] = frame->UBufOffset;
1536
1537	if (is_planar_fourcc(frame->id)) {
1538		src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM;
1539		src_width[1]  = src_width[0]  = frame->width;
1540		src_height[1] = src_height[0] = frame->height;
1541		src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
1542		src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
1543			frame->width / 2;
1544		src_height[4] = src_height[5] = src_height[2] = src_height[3] =
1545			frame->height / 2;
1546		src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
1547			frame->pitch[0];
1548		n_src = 6;
1549	} else {
1550		if (frame->id == FOURCC_UYVY)
1551			src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY;
1552		else
1553			src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL;
1554
1555		src_width[0]  = frame->width;
1556		src_height[0] = frame->height;
1557		src_pitch[0]  = frame->pitch[0];
1558		n_src = 1;
1559	}
1560
1561	binding_table = gen6_composite_get_binding_table(sna, &offset);
1562
1563	binding_table[0] =
1564		gen6_bind_bo(sna,
1565			     op->dst.bo, op->dst.width, op->dst.height,
1566			     gen6_get_dest_format(op->dst.format),
1567			     true);
1568	for (n = 0; n < n_src; n++) {
1569		binding_table[1+n] =
1570			gen6_bind_video_source(sna,
1571					       frame->bo,
1572					       src_surf_base[n],
1573					       src_width[n],
1574					       src_height[n],
1575					       src_pitch[n],
1576					       src_surf_format);
1577	}
1578
1579	gen6_emit_state(sna, op, offset | dirty);
1580}
1581
1582static bool
1583gen6_render_video(struct sna *sna,
1584		  struct sna_video *video,
1585		  struct sna_video_frame *frame,
1586		  RegionPtr dstRegion,
1587		  PixmapPtr pixmap)
1588{
1589	struct sna_composite_op tmp;
1590	struct sna_pixmap *priv = sna_pixmap(pixmap);
1591	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1592	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1593	int src_width = frame->src.x2 - frame->src.x1;
1594	int src_height = frame->src.y2 - frame->src.y1;
1595	float src_offset_x, src_offset_y;
1596	float src_scale_x, src_scale_y;
1597	int nbox, pix_xoff, pix_yoff;
1598	unsigned filter;
1599	const BoxRec *box;
1600
1601	DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
1602	     __FUNCTION__,
1603	     src_width, src_height, dst_width, dst_height,
1604	     region_num_rects(dstRegion),
1605	     REGION_EXTENTS(NULL, dstRegion)->x1,
1606	     REGION_EXTENTS(NULL, dstRegion)->y1,
1607	     REGION_EXTENTS(NULL, dstRegion)->x2,
1608	     REGION_EXTENTS(NULL, dstRegion)->y2));
1609
1610	assert(priv->gpu_bo);
1611	memset(&tmp, 0, sizeof(tmp));
1612
1613	tmp.dst.pixmap = pixmap;
1614	tmp.dst.width  = pixmap->drawable.width;
1615	tmp.dst.height = pixmap->drawable.height;
1616	tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
1617	tmp.dst.bo = priv->gpu_bo;
1618
1619	tmp.src.bo = frame->bo;
1620	tmp.mask.bo = NULL;
1621
1622	tmp.floats_per_vertex = 3;
1623	tmp.floats_per_rect = 9;
1624
1625	if (src_width == dst_width && src_height == dst_height)
1626		filter = SAMPLER_FILTER_NEAREST;
1627	else
1628		filter = SAMPLER_FILTER_BILINEAR;
1629
1630	tmp.u.gen6.flags =
1631		GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
1632					       SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
1633			       NO_BLEND,
1634			       is_planar_fourcc(frame->id) ?
1635			       GEN6_WM_KERNEL_VIDEO_PLANAR :
1636			       GEN6_WM_KERNEL_VIDEO_PACKED,
1637			       2);
1638	tmp.priv = frame;
1639
1640	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
1641	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1642		kgem_submit(&sna->kgem);
1643		assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
1644		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1645	}
1646
1647	gen6_align_vertex(sna, &tmp);
1648	gen6_emit_video_state(sna, &tmp);
1649
1650	/* Set up the offset for translating from the given region (in screen
1651	 * coordinates) to the backing pixmap.
1652	 */
1653#ifdef COMPOSITE
1654	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
1655	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
1656#else
1657	pix_xoff = 0;
1658	pix_yoff = 0;
1659#endif
1660
1661	src_scale_x = (float)src_width / dst_width / frame->width;
1662	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1663
1664	src_scale_y = (float)src_height / dst_height / frame->height;
1665	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1666
1667	box = region_rects(dstRegion);
1668	nbox = region_num_rects(dstRegion);
1669	while (nbox--) {
1670		BoxRec r;
1671
1672		r.x1 = box->x1 + pix_xoff;
1673		r.x2 = box->x2 + pix_xoff;
1674		r.y1 = box->y1 + pix_yoff;
1675		r.y2 = box->y2 + pix_yoff;
1676
1677		gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state);
1678
1679		OUT_VERTEX(r.x2, r.y2);
1680		OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1681		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1682
1683		OUT_VERTEX(r.x1, r.y2);
1684		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1685		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1686
1687		OUT_VERTEX(r.x1, r.y1);
1688		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1689		OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1690
1691		if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
1692			sna_damage_add_box(&priv->gpu_damage, &r);
1693			sna_damage_subtract_box(&priv->cpu_damage, &r);
1694		}
1695		box++;
1696	}
1697
1698	gen4_vertex_flush(sna);
1699	return true;
1700}
1701
1702static int
1703gen6_composite_picture(struct sna *sna,
1704		       PicturePtr picture,
1705		       struct sna_composite_channel *channel,
1706		       int x, int y,
1707		       int w, int h,
1708		       int dst_x, int dst_y,
1709		       bool precise)
1710{
1711	PixmapPtr pixmap;
1712	uint32_t color;
1713	int16_t dx, dy;
1714
1715	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d), precise=%d\n",
1716	     __FUNCTION__, x, y, w, h, dst_x, dst_y, precise));
1717
1718	channel->is_solid = false;
1719	channel->card_format = -1;
1720
1721	if (sna_picture_is_solid(picture, &color))
1722		return gen4_channel_init_solid(sna, channel, color);
1723
1724	if (picture->pDrawable == NULL) {
1725		int ret;
1726
1727		if (picture->pSourcePict->type == SourcePictTypeLinear)
1728			return gen4_channel_init_linear(sna, picture, channel,
1729							x, y,
1730							w, h,
1731							dst_x, dst_y);
1732
1733		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
1734		ret = -1;
1735		if (!precise)
1736			ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1737								      x, y, w, h, dst_x, dst_y);
1738		if (ret == -1)
1739			ret = sna_render_picture_fixup(sna, picture, channel,
1740						       x, y, w, h, dst_x, dst_y);
1741		return ret;
1742	}
1743
1744	if (picture->alphaMap) {
1745		DBG(("%s -- fixup, alphamap\n", __FUNCTION__));
1746		return sna_render_picture_fixup(sna, picture, channel,
1747						x, y, w, h, dst_x, dst_y);
1748	}
1749
1750	if (!gen6_check_repeat(picture))
1751		return sna_render_picture_fixup(sna, picture, channel,
1752						x, y, w, h, dst_x, dst_y);
1753
1754	if (!gen6_check_filter(picture))
1755		return sna_render_picture_fixup(sna, picture, channel,
1756						x, y, w, h, dst_x, dst_y);
1757
1758	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1759	channel->filter = picture->filter;
1760
1761	pixmap = get_drawable_pixmap(picture->pDrawable);
1762	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
1763
1764	x += dx + picture->pDrawable->x;
1765	y += dy + picture->pDrawable->y;
1766
1767	channel->is_affine = sna_transform_is_affine(picture->transform);
1768	if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
1769		DBG(("%s: integer translation (%d, %d), removing\n",
1770		     __FUNCTION__, dx, dy));
1771		x += dx;
1772		y += dy;
1773		channel->transform = NULL;
1774		channel->filter = PictFilterNearest;
1775
1776		if (channel->repeat &&
1777		    (x >= 0 &&
1778		     y >= 0 &&
1779		     x + w < pixmap->drawable.width &&
1780		     y + h < pixmap->drawable.height)) {
1781			struct sna_pixmap *priv = sna_pixmap(pixmap);
1782			if (priv && priv->clear) {
1783				DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
1784				return gen4_channel_init_solid(sna, channel, priv->clear_color);
1785			}
1786		}
1787	} else
1788		channel->transform = picture->transform;
1789
1790	channel->pict_format = picture->format;
1791	channel->card_format = gen6_get_card_format(picture->format);
1792	if (channel->card_format == (unsigned)-1)
1793		return sna_render_picture_convert(sna, picture, channel, pixmap,
1794						  x, y, w, h, dst_x, dst_y,
1795						  false);
1796
1797	if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
1798		DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__,
1799		     pixmap->drawable.width, pixmap->drawable.height));
1800		return sna_render_picture_extract(sna, picture, channel,
1801						  x, y, w, h, dst_x, dst_y);
1802	}
1803
1804	DBG(("%s: pixmap, repeat=%d, filter=%d, transform?=%d [affine? %d], format=%08x\n",
1805	     __FUNCTION__,
1806	     channel->repeat, channel->filter,
1807	     channel->transform != NULL, channel->is_affine,
1808	     channel->pict_format));
1809	if (channel->transform) {
1810#define f2d(x) (((double)(x))/65536.)
1811		DBG(("%s: transform=[%f %f %f, %f %f %f, %f %f %f] (raw [%x %x %x, %x %x %x, %x %x %x])\n",
1812		     __FUNCTION__,
1813		     f2d(channel->transform->matrix[0][0]),
1814		     f2d(channel->transform->matrix[0][1]),
1815		     f2d(channel->transform->matrix[0][2]),
1816		     f2d(channel->transform->matrix[1][0]),
1817		     f2d(channel->transform->matrix[1][1]),
1818		     f2d(channel->transform->matrix[1][2]),
1819		     f2d(channel->transform->matrix[2][0]),
1820		     f2d(channel->transform->matrix[2][1]),
1821		     f2d(channel->transform->matrix[2][2]),
1822		     channel->transform->matrix[0][0],
1823		     channel->transform->matrix[0][1],
1824		     channel->transform->matrix[0][2],
1825		     channel->transform->matrix[1][0],
1826		     channel->transform->matrix[1][1],
1827		     channel->transform->matrix[1][2],
1828		     channel->transform->matrix[2][0],
1829		     channel->transform->matrix[2][1],
1830		     channel->transform->matrix[2][2]));
1831#undef f2d
1832	}
1833
1834	return sna_render_pixmap_bo(sna, channel, pixmap,
1835				    x, y, w, h, dst_x, dst_y);
1836}
1837
1838inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel)
1839{
1840	channel->repeat = gen6_repeat(channel->repeat);
1841	channel->filter = gen6_filter(channel->filter);
1842	if (channel->card_format == (unsigned)-1)
1843		channel->card_format = gen6_get_card_format(channel->pict_format);
1844	assert(channel->card_format != (unsigned)-1);
1845}
1846
1847static void gen6_render_composite_done(struct sna *sna,
1848				       const struct sna_composite_op *op)
1849{
1850	DBG(("%s\n", __FUNCTION__));
1851
1852	assert(!sna->render.active);
1853	if (sna->render.vertex_offset) {
1854		gen4_vertex_flush(sna);
1855		gen6_magic_ca_pass(sna, op);
1856	}
1857
1858	if (op->mask.bo)
1859		kgem_bo_destroy(&sna->kgem, op->mask.bo);
1860	if (op->src.bo)
1861		kgem_bo_destroy(&sna->kgem, op->src.bo);
1862
1863	sna_render_composite_redirect_done(sna, op);
1864}
1865
1866inline static bool
1867gen6_composite_set_target(struct sna *sna,
1868			  struct sna_composite_op *op,
1869			  PicturePtr dst,
1870			  int x, int y, int w, int h,
1871			  bool partial)
1872{
1873	BoxRec box;
1874	unsigned int hint;
1875
1876	DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial));
1877
1878	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1879	op->dst.format = dst->format;
1880	op->dst.width = op->dst.pixmap->drawable.width;
1881	op->dst.height = op->dst.pixmap->drawable.height;
1882
1883	if (w && h) {
1884		box.x1 = x;
1885		box.y1 = y;
1886		box.x2 = x + w;
1887		box.y2 = y + h;
1888	} else
1889		sna_render_picture_extents(dst, &box);
1890
1891	hint = PREFER_GPU | FORCE_GPU | RENDER_GPU;
1892	if (!partial) {
1893		hint |= IGNORE_DAMAGE;
1894		if (w == op->dst.width && h == op->dst.height)
1895			hint |= REPLACES;
1896	}
1897
1898	op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
1899	if (op->dst.bo == NULL)
1900		return false;
1901
1902	if (hint & REPLACES) {
1903		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
1904		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
1905	}
1906
1907	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1908			    &op->dst.x, &op->dst.y);
1909
1910	DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1911	     __FUNCTION__,
1912	     op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
1913	     op->dst.width, op->dst.height,
1914	     op->dst.bo->pitch,
1915	     op->dst.x, op->dst.y,
1916	     op->damage ? *op->damage : (void *)-1));
1917
1918	assert(op->dst.bo->proxy == NULL);
1919
1920	if (too_large(op->dst.width, op->dst.height) &&
1921	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
1922		return false;
1923
1924	return true;
1925}
1926
1927static bool
1928try_blt(struct sna *sna,
1929	PicturePtr dst, PicturePtr src,
1930	int width, int height)
1931{
1932	struct kgem_bo *bo;
1933
1934	if (sna->kgem.mode == KGEM_BLT) {
1935		DBG(("%s: already performing BLT\n", __FUNCTION__));
1936		return true;
1937	}
1938
1939	if (too_large(width, height)) {
1940		DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
1941		     __FUNCTION__, width, height));
1942		return true;
1943	}
1944
1945	bo = __sna_drawable_peek_bo(dst->pDrawable);
1946	if (bo == NULL)
1947		return true;
1948	if (bo->rq)
1949		return RQ_IS_BLT(bo->rq);
1950
1951	if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0))
1952		return true;
1953
1954	if (src->pDrawable) {
1955		bo = __sna_drawable_peek_bo(src->pDrawable);
1956		if (bo == NULL)
1957			return true;
1958
1959		if (prefer_blt_bo(sna, bo))
1960			return true;
1961	}
1962
1963	if (sna->kgem.ring == KGEM_BLT) {
1964		DBG(("%s: already performing BLT\n", __FUNCTION__));
1965		return true;
1966	}
1967
1968	return false;
1969}
1970
1971static bool
1972check_gradient(PicturePtr picture, bool precise)
1973{
1974	if (picture->pDrawable)
1975		return false;
1976
1977	switch (picture->pSourcePict->type) {
1978	case SourcePictTypeSolidFill:
1979	case SourcePictTypeLinear:
1980		return false;
1981	default:
1982		return precise;
1983	}
1984}
1985
1986static bool
1987has_alphamap(PicturePtr p)
1988{
1989	return p->alphaMap != NULL;
1990}
1991
1992static bool
1993need_upload(PicturePtr p)
1994{
1995	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
1996}
1997
1998static bool
1999source_is_busy(PixmapPtr pixmap)
2000{
2001	struct sna_pixmap *priv = sna_pixmap(pixmap);
2002	if (priv == NULL || priv->clear)
2003		return false;
2004
2005	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
2006		return true;
2007
2008	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
2009		return true;
2010
2011	return priv->gpu_damage && !priv->cpu_damage;
2012}
2013
2014static bool
2015source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise)
2016{
2017	if (sna_picture_is_solid(p, NULL))
2018		return false;
2019
2020	if (p->pSourcePict)
2021		return check_gradient(p, precise);
2022
2023	if (!gen6_check_repeat(p) || !gen6_check_format(p->format))
2024		return true;
2025
2026	if (pixmap && source_is_busy(pixmap))
2027		return false;
2028
2029	return has_alphamap(p) || !gen6_check_filter(p) || need_upload(p);
2030}
2031
2032static bool
2033gen6_composite_fallback(struct sna *sna,
2034			PicturePtr src,
2035			PicturePtr mask,
2036			PicturePtr dst)
2037{
2038	PixmapPtr src_pixmap;
2039	PixmapPtr mask_pixmap;
2040	PixmapPtr dst_pixmap;
2041	bool src_fallback, mask_fallback;
2042
2043	if (!gen6_check_dst_format(dst->format)) {
2044		DBG(("%s: unknown destination format: %d\n",
2045		     __FUNCTION__, dst->format));
2046		return true;
2047	}
2048
2049	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
2050
2051	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
2052	src_fallback = source_fallback(src, src_pixmap,
2053				       dst->polyMode == PolyModePrecise);
2054
2055	if (mask) {
2056		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
2057		mask_fallback = source_fallback(mask, mask_pixmap,
2058						dst->polyMode == PolyModePrecise);
2059	} else {
2060		mask_pixmap = NULL;
2061		mask_fallback = false;
2062	}
2063
2064	/* If we are using the destination as a source and need to
2065	 * readback in order to upload the source, do it all
2066	 * on the cpu.
2067	 */
2068	if (src_pixmap == dst_pixmap && src_fallback) {
2069		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
2070		return true;
2071	}
2072	if (mask_pixmap == dst_pixmap && mask_fallback) {
2073		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
2074		return true;
2075	}
2076
2077	/* If anything is on the GPU, push everything out to the GPU */
2078	if (dst_use_gpu(dst_pixmap)) {
2079		DBG(("%s: dst is already on the GPU, try to use GPU\n",
2080		     __FUNCTION__));
2081		return false;
2082	}
2083
2084	if (src_pixmap && !src_fallback) {
2085		DBG(("%s: src is already on the GPU, try to use GPU\n",
2086		     __FUNCTION__));
2087		return false;
2088	}
2089	if (mask_pixmap && !mask_fallback) {
2090		DBG(("%s: mask is already on the GPU, try to use GPU\n",
2091		     __FUNCTION__));
2092		return false;
2093	}
2094
2095	/* However if the dst is not on the GPU and we need to
2096	 * render one of the sources using the CPU, we may
2097	 * as well do the entire operation in place onthe CPU.
2098	 */
2099	if (src_fallback) {
2100		DBG(("%s: dst is on the CPU and src will fallback\n",
2101		     __FUNCTION__));
2102		return true;
2103	}
2104
2105	if (mask && mask_fallback) {
2106		DBG(("%s: dst is on the CPU and mask will fallback\n",
2107		     __FUNCTION__));
2108		return true;
2109	}
2110
2111	if (too_large(dst_pixmap->drawable.width,
2112		      dst_pixmap->drawable.height) &&
2113	    dst_is_cpu(dst_pixmap)) {
2114		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
2115		return true;
2116	}
2117
2118	DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
2119	     __FUNCTION__));
2120	return dst_use_cpu(dst_pixmap);
2121}
2122
2123static int
2124reuse_source(struct sna *sna,
2125	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
2126	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
2127{
2128	uint32_t color;
2129
2130	if (src_x != msk_x || src_y != msk_y)
2131		return false;
2132
2133	if (src == mask) {
2134		DBG(("%s: mask is source\n", __FUNCTION__));
2135		*mc = *sc;
2136		mc->bo = kgem_bo_reference(mc->bo);
2137		return true;
2138	}
2139
2140	if (sna_picture_is_solid(mask, &color))
2141		return gen4_channel_init_solid(sna, mc, color);
2142
2143	if (sc->is_solid)
2144		return false;
2145
2146	if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
2147		return false;
2148
2149	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
2150
2151	if (!sna_transform_equal(src->transform, mask->transform))
2152		return false;
2153
2154	if (!sna_picture_alphamap_equal(src, mask))
2155		return false;
2156
2157	if (!gen6_check_repeat(mask))
2158		return false;
2159
2160	if (!gen6_check_filter(mask))
2161		return false;
2162
2163	if (!gen6_check_format(mask->format))
2164		return false;
2165
2166	DBG(("%s: reusing source channel for mask with a twist\n",
2167	     __FUNCTION__));
2168
2169	*mc = *sc;
2170	mc->repeat = gen6_repeat(mask->repeat ? mask->repeatType : RepeatNone);
2171	mc->filter = gen6_filter(mask->filter);
2172	mc->pict_format = mask->format;
2173	mc->card_format = gen6_get_card_format(mask->format);
2174	mc->bo = kgem_bo_reference(mc->bo);
2175	return true;
2176}
2177
2178static bool
2179gen6_render_composite(struct sna *sna,
2180		      uint8_t op,
2181		      PicturePtr src,
2182		      PicturePtr mask,
2183		      PicturePtr dst,
2184		      int16_t src_x, int16_t src_y,
2185		      int16_t msk_x, int16_t msk_y,
2186		      int16_t dst_x, int16_t dst_y,
2187		      int16_t width, int16_t height,
2188		      unsigned flags,
2189		      struct sna_composite_op *tmp)
2190{
2191	if (op >= ARRAY_SIZE(gen6_blend_op))
2192		return false;
2193
2194	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
2195	     width, height, sna->kgem.ring));
2196
2197	if (mask == NULL &&
2198	    try_blt(sna, dst, src, width, height) &&
2199	    sna_blt_composite(sna, op,
2200			      src, dst,
2201			      src_x, src_y,
2202			      dst_x, dst_y,
2203			      width, height,
2204			      flags, tmp))
2205		return true;
2206
2207	if (gen6_composite_fallback(sna, src, mask, dst))
2208		goto fallback;
2209
2210	if (need_tiling(sna, width, height))
2211		return sna_tiling_composite(op, src, mask, dst,
2212					    src_x, src_y,
2213					    msk_x, msk_y,
2214					    dst_x, dst_y,
2215					    width, height,
2216					    tmp);
2217
2218	if (op == PictOpClear && src == sna->clear)
2219		op = PictOpSrc;
2220	tmp->op = op;
2221	if (!gen6_composite_set_target(sna, tmp, dst,
2222				       dst_x, dst_y, width, height,
2223				       flags & COMPOSITE_PARTIAL || op > PictOpSrc))
2224		goto fallback;
2225
2226	switch (gen6_composite_picture(sna, src, &tmp->src,
2227				       src_x, src_y,
2228				       width, height,
2229				       dst_x, dst_y,
2230				       dst->polyMode == PolyModePrecise)) {
2231	case -1:
2232		goto cleanup_dst;
2233	case 0:
2234		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
2235			goto cleanup_dst;
2236		/* fall through to fixup */
2237	case 1:
2238		/* Did we just switch rings to prepare the source? */
2239		if (mask == NULL &&
2240		    prefer_blt_composite(sna, tmp) &&
2241		    sna_blt_composite__convert(sna,
2242					       dst_x, dst_y, width, height,
2243					       tmp))
2244			return true;
2245
2246		gen6_composite_channel_convert(&tmp->src);
2247		break;
2248	}
2249
2250	tmp->is_affine = tmp->src.is_affine;
2251	tmp->has_component_alpha = false;
2252	tmp->need_magic_ca_pass = false;
2253
2254	tmp->mask.bo = NULL;
2255	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
2256	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
2257
2258	if (mask) {
2259		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
2260			tmp->has_component_alpha = true;
2261
2262			/* Check if it's component alpha that relies on a source alpha and on
2263			 * the source value.  We can only get one of those into the single
2264			 * source value that we get to blend with.
2265			 */
2266			if (gen6_blend_op[op].src_alpha &&
2267			    (gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) {
2268				if (op != PictOpOver)
2269					goto cleanup_src;
2270
2271				tmp->need_magic_ca_pass = true;
2272				tmp->op = PictOpOutReverse;
2273			}
2274		}
2275
2276		if (!reuse_source(sna,
2277				  src, &tmp->src, src_x, src_y,
2278				  mask, &tmp->mask, msk_x, msk_y)) {
2279			switch (gen6_composite_picture(sna, mask, &tmp->mask,
2280						       msk_x, msk_y,
2281						       width, height,
2282						       dst_x, dst_y,
2283						       dst->polyMode == PolyModePrecise)) {
2284			case -1:
2285				goto cleanup_src;
2286			case 0:
2287				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
2288					goto cleanup_src;
2289				/* fall through to fixup */
2290			case 1:
2291				gen6_composite_channel_convert(&tmp->mask);
2292				break;
2293			}
2294		}
2295
2296		tmp->is_affine &= tmp->mask.is_affine;
2297	}
2298
2299	tmp->u.gen6.flags =
2300		GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
2301					      tmp->src.repeat,
2302					      tmp->mask.filter,
2303					      tmp->mask.repeat),
2304			       gen6_get_blend(tmp->op,
2305					      tmp->has_component_alpha,
2306					      tmp->dst.format),
2307			       gen6_choose_composite_kernel(tmp->op,
2308							    tmp->mask.bo != NULL,
2309							    tmp->has_component_alpha,
2310							    tmp->is_affine),
2311			       gen4_choose_composite_emitter(sna, tmp));
2312
2313	tmp->blt   = gen6_render_composite_blt;
2314	tmp->box   = gen6_render_composite_box;
2315	tmp->boxes = gen6_render_composite_boxes__blt;
2316	if (tmp->emit_boxes) {
2317		tmp->boxes = gen6_render_composite_boxes;
2318		tmp->thread_boxes = gen6_render_composite_boxes__thread;
2319	}
2320	tmp->done  = gen6_render_composite_done;
2321
2322	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
2323	if (!kgem_check_bo(&sna->kgem,
2324			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2325			   NULL)) {
2326		kgem_submit(&sna->kgem);
2327		if (!kgem_check_bo(&sna->kgem,
2328				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2329				   NULL))
2330			goto cleanup_mask;
2331		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2332	}
2333
2334	gen6_align_vertex(sna, tmp);
2335	gen6_emit_composite_state(sna, tmp);
2336	return true;
2337
2338cleanup_mask:
2339	if (tmp->mask.bo) {
2340		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2341		tmp->mask.bo = NULL;
2342	}
2343cleanup_src:
2344	if (tmp->src.bo) {
2345		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2346		tmp->src.bo = NULL;
2347	}
2348cleanup_dst:
2349	if (tmp->redirect.real_bo) {
2350		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2351		tmp->redirect.real_bo = NULL;
2352	}
2353fallback:
2354	return (mask == NULL &&
2355		sna_blt_composite(sna, op,
2356				  src, dst,
2357				  src_x, src_y,
2358				  dst_x, dst_y,
2359				  width, height,
2360				  flags | COMPOSITE_FALLBACK, tmp));
2361}
2362
2363#if !NO_COMPOSITE_SPANS
2364fastcall static void
2365gen6_render_composite_spans_box(struct sna *sna,
2366				const struct sna_composite_spans_op *op,
2367				const BoxRec *box, float opacity)
2368{
2369	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2370	     __FUNCTION__,
2371	     op->base.src.offset[0], op->base.src.offset[1],
2372	     opacity,
2373	     op->base.dst.x, op->base.dst.y,
2374	     box->x1, box->y1,
2375	     box->x2 - box->x1,
2376	     box->y2 - box->y1));
2377
2378	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state);
2379	op->prim_emit(sna, op, box, opacity);
2380}
2381
2382static void
2383gen6_render_composite_spans_boxes(struct sna *sna,
2384				  const struct sna_composite_spans_op *op,
2385				  const BoxRec *box, int nbox,
2386				  float opacity)
2387{
2388	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2389	     __FUNCTION__, nbox,
2390	     op->base.src.offset[0], op->base.src.offset[1],
2391	     opacity,
2392	     op->base.dst.x, op->base.dst.y));
2393
2394	do {
2395		int nbox_this_time;
2396
2397		nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
2398						     gen6_emit_composite_state);
2399		nbox -= nbox_this_time;
2400
2401		do {
2402			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2403			     box->x1, box->y1,
2404			     box->x2 - box->x1,
2405			     box->y2 - box->y1));
2406
2407			op->prim_emit(sna, op, box++, opacity);
2408		} while (--nbox_this_time);
2409	} while (nbox);
2410}
2411
2412fastcall static void
2413gen6_render_composite_spans_boxes__thread(struct sna *sna,
2414					  const struct sna_composite_spans_op *op,
2415					  const struct sna_opacity_box *box,
2416					  int nbox)
2417{
2418	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
2419	     __FUNCTION__, nbox,
2420	     op->base.src.offset[0], op->base.src.offset[1],
2421	     op->base.dst.x, op->base.dst.y));
2422
2423	sna_vertex_lock(&sna->render);
2424	do {
2425		int nbox_this_time;
2426		float *v;
2427
2428		nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
2429						     gen6_emit_composite_state);
2430		assert(nbox_this_time);
2431		nbox -= nbox_this_time;
2432
2433		v = sna->render.vertices + sna->render.vertex_used;
2434		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
2435
2436		sna_vertex_acquire__locked(&sna->render);
2437		sna_vertex_unlock(&sna->render);
2438
2439		op->emit_boxes(op, box, nbox_this_time, v);
2440		box += nbox_this_time;
2441
2442		sna_vertex_lock(&sna->render);
2443		sna_vertex_release__locked(&sna->render);
2444	} while (nbox);
2445	sna_vertex_unlock(&sna->render);
2446}
2447
2448fastcall static void
2449gen6_render_composite_spans_done(struct sna *sna,
2450				 const struct sna_composite_spans_op *op)
2451{
2452	DBG(("%s()\n", __FUNCTION__));
2453	assert(!sna->render.active);
2454
2455	if (sna->render.vertex_offset)
2456		gen4_vertex_flush(sna);
2457
2458	if (op->base.src.bo)
2459		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2460
2461	sna_render_composite_redirect_done(sna, &op->base);
2462}
2463
2464static bool
2465gen6_check_composite_spans(struct sna *sna,
2466			   uint8_t op, PicturePtr src, PicturePtr dst,
2467			   int16_t width, int16_t height,
2468			   unsigned flags)
2469{
2470	DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
2471	     __FUNCTION__, op, width, height, flags));
2472
2473	if (op >= ARRAY_SIZE(gen6_blend_op))
2474		return false;
2475
2476	if (gen6_composite_fallback(sna, src, NULL, dst)) {
2477		DBG(("%s: operation would fallback\n", __FUNCTION__));
2478		return false;
2479	}
2480
2481	if (need_tiling(sna, width, height) &&
2482	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2483		DBG(("%s: fallback, tiled operation not on GPU\n",
2484		     __FUNCTION__));
2485		return false;
2486	}
2487
2488	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
2489		struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
2490		assert(priv);
2491
2492		if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
2493			return true;
2494
2495		if (flags & COMPOSITE_SPANS_INPLACE_HINT)
2496			return false;
2497
2498		return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
2499	}
2500
2501	return true;
2502}
2503
2504static bool
2505gen6_render_composite_spans(struct sna *sna,
2506			    uint8_t op,
2507			    PicturePtr src,
2508			    PicturePtr dst,
2509			    int16_t src_x,  int16_t src_y,
2510			    int16_t dst_x,  int16_t dst_y,
2511			    int16_t width,  int16_t height,
2512			    unsigned flags,
2513			    struct sna_composite_spans_op *tmp)
2514{
2515	DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
2516	     width, height, flags, sna->kgem.ring));
2517
2518	assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags));
2519
2520	if (need_tiling(sna, width, height)) {
2521		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2522		     __FUNCTION__, width, height));
2523		return sna_tiling_composite_spans(op, src, dst,
2524						  src_x, src_y, dst_x, dst_y,
2525						  width, height, flags, tmp);
2526	}
2527
2528	tmp->base.op = op;
2529	if (!gen6_composite_set_target(sna, &tmp->base, dst,
2530				       dst_x, dst_y, width, height, true))
2531		return false;
2532
2533	switch (gen6_composite_picture(sna, src, &tmp->base.src,
2534				       src_x, src_y,
2535				       width, height,
2536				       dst_x, dst_y,
2537				       dst->polyMode == PolyModePrecise)) {
2538	case -1:
2539		goto cleanup_dst;
2540	case 0:
2541		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
2542			goto cleanup_dst;
2543		/* fall through to fixup */
2544	case 1:
2545		gen6_composite_channel_convert(&tmp->base.src);
2546		break;
2547	}
2548	tmp->base.mask.bo = NULL;
2549
2550	tmp->base.is_affine = tmp->base.src.is_affine;
2551	tmp->base.need_magic_ca_pass = false;
2552
2553	tmp->base.u.gen6.flags =
2554		GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter,
2555					      tmp->base.src.repeat,
2556					      SAMPLER_FILTER_NEAREST,
2557					      SAMPLER_EXTEND_PAD),
2558			       gen6_get_blend(tmp->base.op, false, tmp->base.dst.format),
2559			       GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine,
2560			       gen4_choose_spans_emitter(sna, tmp));
2561
2562	tmp->box   = gen6_render_composite_spans_box;
2563	tmp->boxes = gen6_render_composite_spans_boxes;
2564	if (tmp->emit_boxes)
2565		tmp->thread_boxes = gen6_render_composite_spans_boxes__thread;
2566	tmp->done  = gen6_render_composite_spans_done;
2567
2568	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
2569	if (!kgem_check_bo(&sna->kgem,
2570			   tmp->base.dst.bo, tmp->base.src.bo,
2571			   NULL)) {
2572		kgem_submit(&sna->kgem);
2573		if (!kgem_check_bo(&sna->kgem,
2574				   tmp->base.dst.bo, tmp->base.src.bo,
2575				   NULL))
2576			goto cleanup_src;
2577		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2578	}
2579
2580	gen6_align_vertex(sna, &tmp->base);
2581	gen6_emit_composite_state(sna, &tmp->base);
2582	return true;
2583
2584cleanup_src:
2585	if (tmp->base.src.bo)
2586		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2587cleanup_dst:
2588	if (tmp->base.redirect.real_bo)
2589		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2590	return false;
2591}
2592#endif
2593
2594static void
2595gen6_emit_copy_state(struct sna *sna,
2596		     const struct sna_composite_op *op)
2597{
2598	uint32_t *binding_table;
2599	uint16_t offset;
2600	bool dirty;
2601
2602	dirty = gen6_get_batch(sna, op);
2603
2604	binding_table = gen6_composite_get_binding_table(sna, &offset);
2605
2606	binding_table[0] =
2607		gen6_bind_bo(sna,
2608			     op->dst.bo, op->dst.width, op->dst.height,
2609			     gen6_get_dest_format(op->dst.format),
2610			     true);
2611	binding_table[1] =
2612		gen6_bind_bo(sna,
2613			     op->src.bo, op->src.width, op->src.height,
2614			     op->src.card_format,
2615			     false);
2616
2617	if (sna->kgem.surface == offset &&
2618	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
2619		sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
2620		offset = sna->render_state.gen6.surface_table;
2621	}
2622
2623	gen6_emit_state(sna, op, offset | dirty);
2624}
2625
2626static inline bool prefer_blt_copy(struct sna *sna,
2627				   struct kgem_bo *src_bo,
2628				   struct kgem_bo *dst_bo,
2629				   unsigned flags)
2630{
2631	if (flags & COPY_SYNC)
2632		return false;
2633
2634	if (PREFER_RENDER)
2635		return PREFER_RENDER > 0;
2636
2637	if (sna->kgem.ring == KGEM_BLT)
2638		return true;
2639
2640	if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags))
2641		return true;
2642
2643	if (untiled_tlb_miss(src_bo) ||
2644	    untiled_tlb_miss(dst_bo))
2645		return true;
2646
2647	if (force_blt_ring(sna))
2648		return true;
2649
2650	if (kgem_bo_is_render(dst_bo) ||
2651	    kgem_bo_is_render(src_bo))
2652		return false;
2653
2654	if (prefer_render_ring(sna, dst_bo))
2655		return false;
2656
2657	if (!prefer_blt_ring(sna, dst_bo, flags))
2658		return false;
2659
2660	return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo);
2661}
2662
2663static bool
2664gen6_render_copy_boxes(struct sna *sna, uint8_t alu,
2665		       const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2666		       const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2667		       const BoxRec *box, int n, unsigned flags)
2668{
2669	struct sna_composite_op tmp;
2670	BoxRec extents;
2671
2672	DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n",
2673	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu,
2674	     src_bo == dst_bo,
2675	     overlaps(sna,
2676		      src_bo, src_dx, src_dy,
2677		      dst_bo, dst_dx, dst_dy,
2678		      box, n, flags, &extents)));
2679
2680	if (prefer_blt_copy(sna, src_bo, dst_bo, flags) &&
2681	    sna_blt_compare_depth(src, dst) &&
2682	    sna_blt_copy_boxes(sna, alu,
2683			       src_bo, src_dx, src_dy,
2684			       dst_bo, dst_dx, dst_dy,
2685			       dst->bitsPerPixel,
2686			       box, n))
2687		return true;
2688
2689	if (!(alu == GXcopy || alu == GXclear)) {
2690fallback_blt:
2691		if (!sna_blt_compare_depth(src, dst))
2692			return false;
2693
2694		return sna_blt_copy_boxes_fallback(sna, alu,
2695						   src, src_bo, src_dx, src_dy,
2696						   dst, dst_bo, dst_dx, dst_dy,
2697						   box, n);
2698	}
2699
2700	if (overlaps(sna,
2701		     src_bo, src_dx, src_dy,
2702		     dst_bo, dst_dx, dst_dy,
2703		     box, n, flags,
2704		     &extents)) {
2705		bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1);
2706
2707		if ((big || can_switch_to_blt(sna, dst_bo, flags)) &&
2708		    sna_blt_copy_boxes(sna, alu,
2709				       src_bo, src_dx, src_dy,
2710				       dst_bo, dst_dx, dst_dy,
2711				       dst->bitsPerPixel,
2712				       box, n))
2713			return true;
2714
2715		if (big)
2716			goto fallback_blt;
2717
2718		assert(src_bo == dst_bo);
2719		assert(src->depth == dst->depth);
2720		assert(src->width == dst->width);
2721		assert(src->height == dst->height);
2722		return sna_render_copy_boxes__overlap(sna, alu,
2723						      src, src_bo,
2724						      src_dx, src_dy,
2725						      dst_dx, dst_dy,
2726						      box, n, &extents);
2727	}
2728
2729	if (dst->depth == src->depth) {
2730		tmp.dst.format = sna_render_format_for_depth(dst->depth);
2731		tmp.src.pict_format = tmp.dst.format;
2732	} else {
2733		tmp.dst.format = sna_format_for_depth(dst->depth);
2734		tmp.src.pict_format = sna_format_for_depth(src->depth);
2735	}
2736	if (!gen6_check_format(tmp.src.pict_format))
2737		goto fallback_blt;
2738
2739	tmp.dst.pixmap = (PixmapPtr)dst;
2740	tmp.dst.width  = dst->width;
2741	tmp.dst.height = dst->height;
2742	tmp.dst.bo = dst_bo;
2743	tmp.dst.x = tmp.dst.y = 0;
2744	tmp.damage = NULL;
2745
2746	sna_render_composite_redirect_init(&tmp);
2747	if (too_large(tmp.dst.width, tmp.dst.height)) {
2748		int i;
2749
2750		extents = box[0];
2751		for (i = 1; i < n; i++) {
2752			if (box[i].x1 < extents.x1)
2753				extents.x1 = box[i].x1;
2754			if (box[i].y1 < extents.y1)
2755				extents.y1 = box[i].y1;
2756
2757			if (box[i].x2 > extents.x2)
2758				extents.x2 = box[i].x2;
2759			if (box[i].y2 > extents.y2)
2760				extents.y2 = box[i].y2;
2761		}
2762
2763		if (!sna_render_composite_redirect(sna, &tmp,
2764						   extents.x1 + dst_dx,
2765						   extents.y1 + dst_dy,
2766						   extents.x2 - extents.x1,
2767						   extents.y2 - extents.y1,
2768						   n > 1))
2769			goto fallback_tiled;
2770	}
2771
2772	tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format);
2773	if (too_large(src->width, src->height)) {
2774		int i;
2775
2776		extents = box[0];
2777		for (i = 1; i < n; i++) {
2778			if (box[i].x1 < extents.x1)
2779				extents.x1 = box[i].x1;
2780			if (box[i].y1 < extents.y1)
2781				extents.y1 = box[i].y1;
2782
2783			if (box[i].x2 > extents.x2)
2784				extents.x2 = box[i].x2;
2785			if (box[i].y2 > extents.y2)
2786				extents.y2 = box[i].y2;
2787		}
2788
2789		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
2790					       extents.x1 + src_dx,
2791					       extents.y1 + src_dy,
2792					       extents.x2 - extents.x1,
2793					       extents.y2 - extents.y1)) {
2794			DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__));
2795			goto fallback_tiled_dst;
2796		}
2797	} else {
2798		tmp.src.bo = src_bo;
2799		tmp.src.width  = src->width;
2800		tmp.src.height = src->height;
2801		tmp.src.offset[0] = tmp.src.offset[1] = 0;
2802	}
2803
2804	tmp.mask.bo = NULL;
2805
2806	tmp.floats_per_vertex = 2;
2807	tmp.floats_per_rect = 6;
2808	tmp.need_magic_ca_pass = 0;
2809
2810	tmp.u.gen6.flags = COPY_FLAGS(alu);
2811	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
2812	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER);
2813	assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX);
2814
2815	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
2816	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
2817		kgem_submit(&sna->kgem);
2818		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
2819			DBG(("%s: too large for a single operation\n",
2820			     __FUNCTION__));
2821			if (tmp.src.bo != src_bo)
2822				kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2823			if (tmp.redirect.real_bo)
2824				kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2825			goto fallback_blt;
2826		}
2827		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2828	}
2829
2830	src_dx += tmp.src.offset[0];
2831	src_dy += tmp.src.offset[1];
2832
2833	dst_dx += tmp.dst.x;
2834	dst_dy += tmp.dst.y;
2835
2836	tmp.dst.x = tmp.dst.y = 0;
2837
2838	gen6_align_vertex(sna, &tmp);
2839	gen6_emit_copy_state(sna, &tmp);
2840
2841	do {
2842		int16_t *v;
2843		int n_this_time;
2844
2845		n_this_time = gen6_get_rectangles(sna, &tmp, n,
2846						  gen6_emit_copy_state);
2847		n -= n_this_time;
2848
2849		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
2850		sna->render.vertex_used += 6 * n_this_time;
2851		assert(sna->render.vertex_used <= sna->render.vertex_size);
2852		do {
2853
2854			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
2855			     box->x1 + src_dx, box->y1 + src_dy,
2856			     box->x1 + dst_dx, box->y1 + dst_dy,
2857			     box->x2 - box->x1, box->y2 - box->y1));
2858			v[0] = box->x2 + dst_dx;
2859			v[2] = box->x2 + src_dx;
2860			v[1]  = v[5] = box->y2 + dst_dy;
2861			v[3]  = v[7] = box->y2 + src_dy;
2862			v[8]  = v[4] = box->x1 + dst_dx;
2863			v[10] = v[6] = box->x1 + src_dx;
2864			v[9]  = box->y1 + dst_dy;
2865			v[11] = box->y1 + src_dy;
2866			v += 12; box++;
2867		} while (--n_this_time);
2868	} while (n);
2869
2870	gen4_vertex_flush(sna);
2871	sna_render_composite_redirect_done(sna, &tmp);
2872	if (tmp.src.bo != src_bo)
2873		kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2874	return true;
2875
2876fallback_tiled_dst:
2877	if (tmp.redirect.real_bo)
2878		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2879fallback_tiled:
2880	if (sna_blt_compare_depth(src, dst) &&
2881	    sna_blt_copy_boxes(sna, alu,
2882			       src_bo, src_dx, src_dy,
2883			       dst_bo, dst_dx, dst_dy,
2884			       dst->bitsPerPixel,
2885			       box, n))
2886		return true;
2887
2888	return sna_tiling_copy_boxes(sna, alu,
2889				     src, src_bo, src_dx, src_dy,
2890				     dst, dst_bo, dst_dx, dst_dy,
2891				     box, n);
2892}
2893
2894static void
2895gen6_render_copy_blt(struct sna *sna,
2896		     const struct sna_copy_op *op,
2897		     int16_t sx, int16_t sy,
2898		     int16_t w,  int16_t h,
2899		     int16_t dx, int16_t dy)
2900{
2901	int16_t *v;
2902
2903	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state);
2904
2905	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
2906	sna->render.vertex_used += 6;
2907	assert(sna->render.vertex_used <= sna->render.vertex_size);
2908
2909	v[0]  = dx+w; v[1]  = dy+h;
2910	v[2]  = sx+w; v[3]  = sy+h;
2911	v[4]  = dx;   v[5]  = dy+h;
2912	v[6]  = sx;   v[7]  = sy+h;
2913	v[8]  = dx;   v[9]  = dy;
2914	v[10] = sx;   v[11] = sy;
2915}
2916
2917static void
2918gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
2919{
2920	DBG(("%s()\n", __FUNCTION__));
2921
2922	assert(!sna->render.active);
2923	if (sna->render.vertex_offset)
2924		gen4_vertex_flush(sna);
2925}
2926
2927static bool
2928gen6_render_copy(struct sna *sna, uint8_t alu,
2929		 PixmapPtr src, struct kgem_bo *src_bo,
2930		 PixmapPtr dst, struct kgem_bo *dst_bo,
2931		 struct sna_copy_op *op)
2932{
2933	DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n",
2934	     __FUNCTION__, alu,
2935	     src->drawable.width, src->drawable.height,
2936	     dst->drawable.width, dst->drawable.height));
2937
2938	if (prefer_blt_copy(sna, src_bo, dst_bo, 0) &&
2939	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2940	    sna_blt_copy(sna, alu,
2941			 src_bo, dst_bo,
2942			 dst->drawable.bitsPerPixel,
2943			 op))
2944		return true;
2945
2946	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
2947	    too_large(src->drawable.width, src->drawable.height) ||
2948	    too_large(dst->drawable.width, dst->drawable.height)) {
2949fallback:
2950		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
2951			return false;
2952
2953		return sna_blt_copy(sna, alu, src_bo, dst_bo,
2954				    dst->drawable.bitsPerPixel,
2955				    op);
2956	}
2957
2958	if (dst->drawable.depth == src->drawable.depth) {
2959		op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
2960		op->base.src.pict_format = op->base.dst.format;
2961	} else {
2962		op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
2963		op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
2964	}
2965	if (!gen6_check_format(op->base.src.pict_format))
2966		goto fallback;
2967
2968	op->base.dst.pixmap = dst;
2969	op->base.dst.width  = dst->drawable.width;
2970	op->base.dst.height = dst->drawable.height;
2971	op->base.dst.bo = dst_bo;
2972
2973	op->base.src.bo = src_bo;
2974	op->base.src.card_format =
2975		gen6_get_card_format(op->base.src.pict_format);
2976	op->base.src.width  = src->drawable.width;
2977	op->base.src.height = src->drawable.height;
2978
2979	op->base.mask.bo = NULL;
2980
2981	op->base.floats_per_vertex = 2;
2982	op->base.floats_per_rect = 6;
2983
2984	op->base.u.gen6.flags = COPY_FLAGS(alu);
2985	assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
2986	assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER);
2987	assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX);
2988
2989	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
2990	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2991		kgem_submit(&sna->kgem);
2992		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
2993			goto fallback;
2994		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2995	}
2996
2997	gen6_align_vertex(sna, &op->base);
2998	gen6_emit_copy_state(sna, &op->base);
2999
3000	op->blt  = gen6_render_copy_blt;
3001	op->done = gen6_render_copy_done;
3002	return true;
3003}
3004
3005static void
3006gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
3007{
3008	uint32_t *binding_table;
3009	uint16_t offset;
3010	bool dirty;
3011
3012	dirty = gen6_get_batch(sna, op);
3013
3014	binding_table = gen6_composite_get_binding_table(sna, &offset);
3015
3016	binding_table[0] =
3017		gen6_bind_bo(sna,
3018			     op->dst.bo, op->dst.width, op->dst.height,
3019			     gen6_get_dest_format(op->dst.format),
3020			     true);
3021	binding_table[1] =
3022		gen6_bind_bo(sna,
3023			     op->src.bo, 1, 1,
3024			     GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
3025			     false);
3026
3027	if (sna->kgem.surface == offset &&
3028	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
3029		sna->kgem.surface +=
3030			sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t);
3031		offset = sna->render_state.gen6.surface_table;
3032	}
3033
3034	gen6_emit_state(sna, op, offset | dirty);
3035}
3036
3037static bool
3038gen6_render_fill_boxes(struct sna *sna,
3039		       CARD8 op,
3040		       PictFormat format,
3041		       const xRenderColor *color,
3042		       const DrawableRec *dst, struct kgem_bo *dst_bo,
3043		       const BoxRec *box, int n)
3044{
3045	struct sna_composite_op tmp;
3046	uint32_t pixel;
3047
3048	DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
3049	     __FUNCTION__, op,
3050	     color->red, color->green, color->blue, color->alpha, (int)format));
3051
3052	if (op >= ARRAY_SIZE(gen6_blend_op)) {
3053		DBG(("%s: fallback due to unhandled blend op: %d\n",
3054		     __FUNCTION__, op));
3055		return false;
3056	}
3057
3058	if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) ||
3059	    !gen6_check_dst_format(format)) {
3060		uint8_t alu = GXinvalid;
3061
3062		if (op <= PictOpSrc) {
3063			pixel = 0;
3064			if (op == PictOpClear)
3065				alu = GXclear;
3066			else if (sna_get_pixel_from_rgba(&pixel,
3067							 color->red,
3068							 color->green,
3069							 color->blue,
3070							 color->alpha,
3071							 format))
3072				alu = GXcopy;
3073		}
3074
3075		if (alu != GXinvalid &&
3076		    sna_blt_fill_boxes(sna, alu,
3077				       dst_bo, dst->bitsPerPixel,
3078				       pixel, box, n))
3079			return true;
3080
3081		if (!gen6_check_dst_format(format))
3082			return false;
3083	}
3084
3085	if (op == PictOpClear) {
3086		pixel = 0;
3087		op = PictOpSrc;
3088	} else if (!sna_get_pixel_from_rgba(&pixel,
3089					    color->red,
3090					    color->green,
3091					    color->blue,
3092					    color->alpha,
3093					    PICT_a8r8g8b8))
3094		return false;
3095
3096	DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
3097	     __FUNCTION__, pixel, n,
3098	     box[0].x1, box[0].y1, box[0].x2, box[0].y2));
3099
3100	tmp.dst.pixmap = (PixmapPtr)dst;
3101	tmp.dst.width  = dst->width;
3102	tmp.dst.height = dst->height;
3103	tmp.dst.format = format;
3104	tmp.dst.bo = dst_bo;
3105	tmp.dst.x = tmp.dst.y = 0;
3106	tmp.damage = NULL;
3107
3108	sna_render_composite_redirect_init(&tmp);
3109	if (too_large(dst->width, dst->height)) {
3110		BoxRec extents;
3111
3112		boxes_extents(box, n, &extents);
3113		if (!sna_render_composite_redirect(sna, &tmp,
3114						   extents.x1, extents.y1,
3115						   extents.x2 - extents.x1,
3116						   extents.y2 - extents.y1,
3117						   n > 1))
3118			return sna_tiling_fill_boxes(sna, op, format, color,
3119						     dst, dst_bo, box, n);
3120	}
3121
3122	tmp.src.bo = sna_render_get_solid(sna, pixel);
3123	tmp.mask.bo = NULL;
3124
3125	tmp.floats_per_vertex = 2;
3126	tmp.floats_per_rect = 6;
3127	tmp.need_magic_ca_pass = false;
3128
3129	tmp.u.gen6.flags = FILL_FLAGS(op, format);
3130	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3131	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
3132	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
3133
3134	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3135	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3136		kgem_submit(&sna->kgem);
3137		assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
3138	}
3139
3140	gen6_align_vertex(sna, &tmp);
3141	gen6_emit_fill_state(sna, &tmp);
3142
3143	do {
3144		int n_this_time;
3145		int16_t *v;
3146
3147		n_this_time = gen6_get_rectangles(sna, &tmp, n,
3148						  gen6_emit_fill_state);
3149		n -= n_this_time;
3150
3151		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
3152		sna->render.vertex_used += 6 * n_this_time;
3153		assert(sna->render.vertex_used <= sna->render.vertex_size);
3154		do {
3155			DBG(("	(%d, %d), (%d, %d)\n",
3156			     box->x1, box->y1, box->x2, box->y2));
3157
3158			v[0] = box->x2;
3159			v[5] = v[1] = box->y2;
3160			v[8] = v[4] = box->x1;
3161			v[9] = box->y1;
3162			v[2] = v[3]  = v[7]  = 1;
3163			v[6] = v[10] = v[11] = 0;
3164			v += 12; box++;
3165		} while (--n_this_time);
3166	} while (n);
3167
3168	gen4_vertex_flush(sna);
3169	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3170	sna_render_composite_redirect_done(sna, &tmp);
3171	return true;
3172}
3173
3174static void
3175gen6_render_op_fill_blt(struct sna *sna,
3176			const struct sna_fill_op *op,
3177			int16_t x, int16_t y, int16_t w, int16_t h)
3178{
3179	int16_t *v;
3180
3181	DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
3182
3183	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state);
3184
3185	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3186	sna->render.vertex_used += 6;
3187	assert(sna->render.vertex_used <= sna->render.vertex_size);
3188
3189	v[0] = x+w;
3190	v[4] = v[8] = x;
3191	v[1] = v[5] = y+h;
3192	v[9] = y;
3193
3194	v[2] = v[3]  = v[7]  = 1;
3195	v[6] = v[10] = v[11] = 0;
3196}
3197
3198fastcall static void
3199gen6_render_op_fill_box(struct sna *sna,
3200			const struct sna_fill_op *op,
3201			const BoxRec *box)
3202{
3203	int16_t *v;
3204
3205	DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
3206	     box->x1, box->y1, box->x2, box->y2));
3207
3208	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state);
3209
3210	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3211	sna->render.vertex_used += 6;
3212	assert(sna->render.vertex_used <= sna->render.vertex_size);
3213
3214	v[0] = box->x2;
3215	v[8] = v[4] = box->x1;
3216	v[5] = v[1] = box->y2;
3217	v[9] = box->y1;
3218
3219	v[7] = v[2]  = v[3]  = 1;
3220	v[6] = v[10] = v[11] = 0;
3221}
3222
3223fastcall static void
3224gen6_render_op_fill_boxes(struct sna *sna,
3225			  const struct sna_fill_op *op,
3226			  const BoxRec *box,
3227			  int nbox)
3228{
3229	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
3230	     box->x1, box->y1, box->x2, box->y2, nbox));
3231
3232	do {
3233		int nbox_this_time;
3234		int16_t *v;
3235
3236		nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
3237						     gen6_emit_fill_state);
3238		nbox -= nbox_this_time;
3239
3240		v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3241		sna->render.vertex_used += 6 * nbox_this_time;
3242		assert(sna->render.vertex_used <= sna->render.vertex_size);
3243
3244		do {
3245			v[0] = box->x2;
3246			v[8] = v[4] = box->x1;
3247			v[5] = v[1] = box->y2;
3248			v[9] = box->y1;
3249			v[7] = v[2]  = v[3]  = 1;
3250			v[6] = v[10] = v[11] = 0;
3251			box++; v += 12;
3252		} while (--nbox_this_time);
3253	} while (nbox);
3254}
3255
3256static void
3257gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op)
3258{
3259	DBG(("%s()\n", __FUNCTION__));
3260
3261	assert(!sna->render.active);
3262	if (sna->render.vertex_offset)
3263		gen4_vertex_flush(sna);
3264	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
3265}
3266
3267static bool
3268gen6_render_fill(struct sna *sna, uint8_t alu,
3269		 PixmapPtr dst, struct kgem_bo *dst_bo,
3270		 uint32_t color, unsigned flags,
3271		 struct sna_fill_op *op)
3272{
3273	DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color));
3274
3275	if (prefer_blt_fill(sna, dst_bo, flags) &&
3276	    sna_blt_fill(sna, alu,
3277			 dst_bo, dst->drawable.bitsPerPixel,
3278			 color,
3279			 op))
3280		return true;
3281
3282	if (!(alu == GXcopy || alu == GXclear) ||
3283	    too_large(dst->drawable.width, dst->drawable.height))
3284		return sna_blt_fill(sna, alu,
3285				    dst_bo, dst->drawable.bitsPerPixel,
3286				    color,
3287				    op);
3288
3289	if (alu == GXclear)
3290		color = 0;
3291
3292	op->base.dst.pixmap = dst;
3293	op->base.dst.width  = dst->drawable.width;
3294	op->base.dst.height = dst->drawable.height;
3295	op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
3296	op->base.dst.bo = dst_bo;
3297	op->base.dst.x = op->base.dst.y = 0;
3298
3299	op->base.src.bo =
3300		sna_render_get_solid(sna,
3301				     sna_rgba_for_color(color,
3302							dst->drawable.depth));
3303	op->base.mask.bo = NULL;
3304
3305	op->base.need_magic_ca_pass = false;
3306	op->base.floats_per_vertex = 2;
3307	op->base.floats_per_rect = 6;
3308
3309	op->base.u.gen6.flags = FILL_FLAGS_NOBLEND;
3310	assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3311	assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER);
3312	assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX);
3313
3314	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3315	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3316		kgem_submit(&sna->kgem);
3317		assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
3318	}
3319
3320	gen6_align_vertex(sna, &op->base);
3321	gen6_emit_fill_state(sna, &op->base);
3322
3323	op->blt  = gen6_render_op_fill_blt;
3324	op->box  = gen6_render_op_fill_box;
3325	op->boxes = gen6_render_op_fill_boxes;
3326	op->points = NULL;
3327	op->done = gen6_render_op_fill_done;
3328	return true;
3329}
3330
3331static bool
3332gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3333			     uint32_t color,
3334			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
3335			     uint8_t alu)
3336{
3337	BoxRec box;
3338
3339	box.x1 = x1;
3340	box.y1 = y1;
3341	box.x2 = x2;
3342	box.y2 = y2;
3343
3344	return sna_blt_fill_boxes(sna, alu,
3345				  bo, dst->drawable.bitsPerPixel,
3346				  color, &box, 1);
3347}
3348
3349static bool
3350gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3351		     uint32_t color,
3352		     int16_t x1, int16_t y1,
3353		     int16_t x2, int16_t y2,
3354		     uint8_t alu)
3355{
3356	struct sna_composite_op tmp;
3357	int16_t *v;
3358
3359	/* Prefer to use the BLT if already engaged */
3360	if (prefer_blt_fill(sna, bo, FILL_BOXES) &&
3361	    gen6_render_fill_one_try_blt(sna, dst, bo, color,
3362					 x1, y1, x2, y2, alu))
3363		return true;
3364
3365	/* Must use the BLT if we can't RENDER... */
3366	if (!(alu == GXcopy || alu == GXclear) ||
3367	    too_large(dst->drawable.width, dst->drawable.height))
3368		return gen6_render_fill_one_try_blt(sna, dst, bo, color,
3369						    x1, y1, x2, y2, alu);
3370
3371	if (alu == GXclear)
3372		color = 0;
3373
3374	tmp.dst.pixmap = dst;
3375	tmp.dst.width  = dst->drawable.width;
3376	tmp.dst.height = dst->drawable.height;
3377	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3378	tmp.dst.bo = bo;
3379	tmp.dst.x = tmp.dst.y = 0;
3380
3381	tmp.src.bo =
3382		sna_render_get_solid(sna,
3383				     sna_rgba_for_color(color,
3384							dst->drawable.depth));
3385	tmp.mask.bo = NULL;
3386
3387	tmp.floats_per_vertex = 2;
3388	tmp.floats_per_rect = 6;
3389	tmp.need_magic_ca_pass = false;
3390
3391	tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
3392	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3393	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
3394	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
3395
3396	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
3397	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3398		kgem_submit(&sna->kgem);
3399		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3400			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3401			return false;
3402		}
3403	}
3404
3405	gen6_align_vertex(sna, &tmp);
3406	gen6_emit_fill_state(sna, &tmp);
3407
3408	gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
3409
3410	DBG(("	(%d, %d), (%d, %d)\n", x1, y1, x2, y2));
3411
3412	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3413	sna->render.vertex_used += 6;
3414	assert(sna->render.vertex_used <= sna->render.vertex_size);
3415
3416	v[0] = x2;
3417	v[8] = v[4] = x1;
3418	v[5] = v[1] = y2;
3419	v[9] = y1;
3420	v[7] = v[2]  = v[3]  = 1;
3421	v[6] = v[10] = v[11] = 0;
3422
3423	gen4_vertex_flush(sna);
3424	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3425
3426	return true;
3427}
3428
3429static bool
3430gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3431{
3432	BoxRec box;
3433
3434	box.x1 = 0;
3435	box.y1 = 0;
3436	box.x2 = dst->drawable.width;
3437	box.y2 = dst->drawable.height;
3438
3439	return sna_blt_fill_boxes(sna, GXclear,
3440				  bo, dst->drawable.bitsPerPixel,
3441				  0, &box, 1);
3442}
3443
3444static bool
3445gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3446{
3447	struct sna_composite_op tmp;
3448	int16_t *v;
3449
3450	DBG(("%s: %dx%d\n",
3451	     __FUNCTION__,
3452	     dst->drawable.width,
3453	     dst->drawable.height));
3454
3455	/* Prefer to use the BLT if, and only if, already engaged */
3456	if (sna->kgem.ring == KGEM_BLT &&
3457	    gen6_render_clear_try_blt(sna, dst, bo))
3458		return true;
3459
3460	/* Must use the BLT if we can't RENDER... */
3461	if (too_large(dst->drawable.width, dst->drawable.height))
3462		return gen6_render_clear_try_blt(sna, dst, bo);
3463
3464	tmp.dst.pixmap = dst;
3465	tmp.dst.width  = dst->drawable.width;
3466	tmp.dst.height = dst->drawable.height;
3467	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3468	tmp.dst.bo = bo;
3469	tmp.dst.x = tmp.dst.y = 0;
3470
3471	tmp.src.bo = sna_render_get_solid(sna, 0);
3472	tmp.mask.bo = NULL;
3473
3474	tmp.floats_per_vertex = 2;
3475	tmp.floats_per_rect = 6;
3476	tmp.need_magic_ca_pass = false;
3477
3478	tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
3479	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3480	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
3481	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
3482
3483	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
3484	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3485		kgem_submit(&sna->kgem);
3486		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3487			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3488			return false;
3489		}
3490	}
3491
3492	gen6_align_vertex(sna, &tmp);
3493	gen6_emit_fill_state(sna, &tmp);
3494
3495	gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
3496
3497	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3498	sna->render.vertex_used += 6;
3499	assert(sna->render.vertex_used <= sna->render.vertex_size);
3500
3501	v[0] = dst->drawable.width;
3502	v[5] = v[1] = dst->drawable.height;
3503	v[8] = v[4] = 0;
3504	v[9] = 0;
3505
3506	v[7] = v[2]  = v[3]  = 1;
3507	v[6] = v[10] = v[11] = 0;
3508
3509	gen4_vertex_flush(sna);
3510	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3511
3512	return true;
3513}
3514
3515static void gen6_render_reset(struct sna *sna)
3516{
3517	sna->render_state.gen6.needs_invariant = true;
3518	sna->render_state.gen6.first_state_packet = true;
3519	sna->render_state.gen6.ve_id = 3 << 2;
3520	sna->render_state.gen6.last_primitive = -1;
3521
3522	sna->render_state.gen6.num_sf_outputs = 0;
3523	sna->render_state.gen6.samplers = -1;
3524	sna->render_state.gen6.blend = -1;
3525	sna->render_state.gen6.kernel = -1;
3526	sna->render_state.gen6.drawrect_offset = -1;
3527	sna->render_state.gen6.drawrect_limit = -1;
3528	sna->render_state.gen6.surface_table = -1;
3529
3530	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
3531		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
3532		discard_vbo(sna);
3533	}
3534
3535	sna->render.vertex_offset = 0;
3536	sna->render.nvertex_reloc = 0;
3537	sna->render.vb_id = 0;
3538}
3539
3540static void gen6_render_fini(struct sna *sna)
3541{
3542	kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo);
3543}
3544
3545static bool is_gt2(struct sna *sna, int devid)
3546{
3547	return devid & 0x30;
3548}
3549
3550static bool is_mobile(struct sna *sna, int devid)
3551{
3552	return (devid & 0xf) == 0x6;
3553}
3554
3555static bool gen6_render_setup(struct sna *sna, int devid)
3556{
3557	struct gen6_render_state *state = &sna->render_state.gen6;
3558	struct sna_static_stream general;
3559	struct gen6_sampler_state *ss;
3560	int i, j, k, l, m;
3561
3562	state->info = &gt1_info;
3563	if (is_gt2(sna, devid))
3564		state->info = &gt2_info; /* XXX requires GT_MODE WiZ disabled */
3565	state->gt = state->info->gt;
3566
3567	sna_static_stream_init(&general);
3568
3569	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
3570	 * dumps, you know it points to zero.
3571	 */
3572	null_create(&general);
3573	scratch_create(&general);
3574
3575	for (m = 0; m < GEN6_KERNEL_COUNT; m++) {
3576		if (wm_kernels[m].size) {
3577			state->wm_kernel[m][1] =
3578				sna_static_stream_add(&general,
3579						      wm_kernels[m].data,
3580						      wm_kernels[m].size,
3581						      64);
3582		} else {
3583			if (USE_8_PIXEL_DISPATCH) {
3584				state->wm_kernel[m][0] =
3585					sna_static_stream_compile_wm(sna, &general,
3586								     wm_kernels[m].data, 8);
3587			}
3588
3589			if (USE_16_PIXEL_DISPATCH) {
3590				state->wm_kernel[m][1] =
3591					sna_static_stream_compile_wm(sna, &general,
3592								     wm_kernels[m].data, 16);
3593			}
3594
3595			if (USE_32_PIXEL_DISPATCH) {
3596				state->wm_kernel[m][2] =
3597					sna_static_stream_compile_wm(sna, &general,
3598								     wm_kernels[m].data, 32);
3599			}
3600		}
3601		if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) {
3602			state->wm_kernel[m][1] =
3603				sna_static_stream_compile_wm(sna, &general,
3604							     wm_kernels[m].data, 16);
3605		}
3606	}
3607
3608	ss = sna_static_stream_map(&general,
3609				   2 * sizeof(*ss) *
3610				   (2 +
3611				    FILTER_COUNT * EXTEND_COUNT *
3612				    FILTER_COUNT * EXTEND_COUNT),
3613				   32);
3614	state->wm_state = sna_static_stream_offsetof(&general, ss);
3615	sampler_copy_init(ss); ss += 2;
3616	sampler_fill_init(ss); ss += 2;
3617	for (i = 0; i < FILTER_COUNT; i++) {
3618		for (j = 0; j < EXTEND_COUNT; j++) {
3619			for (k = 0; k < FILTER_COUNT; k++) {
3620				for (l = 0; l < EXTEND_COUNT; l++) {
3621					sampler_state_init(ss++, i, j);
3622					sampler_state_init(ss++, k, l);
3623				}
3624			}
3625		}
3626	}
3627
3628	state->cc_blend = gen6_composite_create_blend_state(&general);
3629
3630	state->general_bo = sna_static_stream_fini(sna, &general);
3631	return state->general_bo != NULL;
3632}
3633
3634const char *gen6_render_init(struct sna *sna, const char *backend)
3635{
3636	int devid = intel_get_device_id(sna->scrn);
3637
3638	if (!gen6_render_setup(sna, devid))
3639		return backend;
3640
3641	sna->kgem.context_switch = gen6_render_context_switch;
3642	sna->kgem.retire = gen6_render_retire;
3643	sna->kgem.expire = gen4_render_expire;
3644
3645#if !NO_COMPOSITE
3646	sna->render.composite = gen6_render_composite;
3647	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
3648#endif
3649
3650#if !NO_COMPOSITE_SPANS
3651	sna->render.check_composite_spans = gen6_check_composite_spans;
3652	sna->render.composite_spans = gen6_render_composite_spans;
3653	if (is_mobile(sna, devid))
3654		sna->render.prefer_gpu |= PREFER_GPU_SPANS;
3655#endif
3656	sna->render.video = gen6_render_video;
3657
3658#if !NO_COPY_BOXES
3659	sna->render.copy_boxes = gen6_render_copy_boxes;
3660#endif
3661#if !NO_COPY
3662	sna->render.copy = gen6_render_copy;
3663#endif
3664
3665#if !NO_FILL_BOXES
3666	sna->render.fill_boxes = gen6_render_fill_boxes;
3667#endif
3668#if !NO_FILL
3669	sna->render.fill = gen6_render_fill;
3670#endif
3671#if !NO_FILL_ONE
3672	sna->render.fill_one = gen6_render_fill_one;
3673#endif
3674#if !NO_FILL_CLEAR
3675	sna->render.clear = gen6_render_clear;
3676#endif
3677
3678	sna->render.flush = gen4_render_flush;
3679	sna->render.reset = gen6_render_reset;
3680	sna->render.fini = gen6_render_fini;
3681
3682	sna->render.max_3d_size = GEN6_MAX_SIZE;
3683	sna->render.max_3d_pitch = 1 << 18;
3684	return sna->render_state.gen6.info->name;
3685}
3686