1/*
2 * Copyright © 2006,2008,2011 Intel Corporation
3 * Copyright © 2007 Red Hat, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 *    Wang Zhenyu <zhenyu.z.wang@sna.com>
26 *    Eric Anholt <eric@anholt.net>
27 *    Carl Worth <cworth@redhat.com>
28 *    Keith Packard <keithp@keithp.com>
29 *    Chris Wilson <chris@chris-wilson.co.uk>
30 *
31 */
32
33#ifdef HAVE_CONFIG_H
34#include "config.h"
35#endif
36
37#include "sna.h"
38#include "sna_reg.h"
39#include "sna_render.h"
40#include "sna_render_inline.h"
41#include "sna_video.h"
42
43#include "brw/brw.h"
44#include "gen6_render.h"
45#include "gen6_common.h"
46#include "gen4_common.h"
47#include "gen4_source.h"
48#include "gen4_vertex.h"
49
50#define ALWAYS_INVALIDATE 0
51#define ALWAYS_FLUSH 0
52#define ALWAYS_STALL 0
53
54#define NO_COMPOSITE 0
55#define NO_COMPOSITE_SPANS 0
56#define NO_COPY 0
57#define NO_COPY_BOXES 0
58#define NO_FILL 0
59#define NO_FILL_BOXES 0
60#define NO_FILL_ONE 0
61#define NO_FILL_CLEAR 0
62
63#define USE_8_PIXEL_DISPATCH 1
64#define USE_16_PIXEL_DISPATCH 1
65#define USE_32_PIXEL_DISPATCH 0
66
67#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
68#error "Must select at least 8, 16 or 32 pixel dispatch"
69#endif
70
71#define GEN6_MAX_SIZE 8192
72
73struct gt_info {
74	const char *name;
75	int max_vs_threads;
76	int max_gs_threads;
77	int max_wm_threads;
78	struct {
79		int size;
80		int max_vs_entries;
81		int max_gs_entries;
82	} urb;
83	int gt;
84};
85
86static const struct gt_info gt1_info = {
87	.name = "Sandybridge (gen6, gt1)",
88	.max_vs_threads = 24,
89	.max_gs_threads = 21,
90	.max_wm_threads = 40,
91	.urb = { 32, 256, 256 },
92	.gt = 1,
93};
94
95static const struct gt_info gt2_info = {
96	.name = "Sandybridge (gen6, gt2)",
97	.max_vs_threads = 60,
98	.max_gs_threads = 60,
99	.max_wm_threads = 80,
100	.urb = { 64, 256, 256 },
101	.gt = 2,
102};
103
104static const uint32_t ps_kernel_packed_bt601[][4] = {
105#include "exa_wm_src_affine.g6b"
106#include "exa_wm_src_sample_argb.g6b"
107#include "exa_wm_yuv_rgb_bt601.g6b"
108#include "exa_wm_write.g6b"
109};
110
111static const uint32_t ps_kernel_planar_bt601[][4] = {
112#include "exa_wm_src_affine.g6b"
113#include "exa_wm_src_sample_planar.g6b"
114#include "exa_wm_yuv_rgb_bt601.g6b"
115#include "exa_wm_write.g6b"
116};
117
118static const uint32_t ps_kernel_nv12_bt601[][4] = {
119#include "exa_wm_src_affine.g6b"
120#include "exa_wm_src_sample_nv12.g6b"
121#include "exa_wm_yuv_rgb_bt601.g6b"
122#include "exa_wm_write.g6b"
123};
124
125static const uint32_t ps_kernel_packed_bt709[][4] = {
126#include "exa_wm_src_affine.g6b"
127#include "exa_wm_src_sample_argb.g6b"
128#include "exa_wm_yuv_rgb_bt709.g6b"
129#include "exa_wm_write.g6b"
130};
131
132static const uint32_t ps_kernel_planar_bt709[][4] = {
133#include "exa_wm_src_affine.g6b"
134#include "exa_wm_src_sample_planar.g6b"
135#include "exa_wm_yuv_rgb_bt709.g6b"
136#include "exa_wm_write.g6b"
137};
138
139static const uint32_t ps_kernel_nv12_bt709[][4] = {
140#include "exa_wm_src_affine.g6b"
141#include "exa_wm_src_sample_nv12.g6b"
142#include "exa_wm_yuv_rgb_bt709.g6b"
143#include "exa_wm_write.g6b"
144};
145
146#define NOKERNEL(kernel_enum, func, ns) \
147    [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns}
148#define KERNEL(kernel_enum, kernel, ns) \
149    [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns}
150
151static const struct wm_kernel_info {
152	const char *name;
153	const void *data;
154	unsigned int size;
155	unsigned int num_surfaces;
156} wm_kernels[] = {
157	NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
158	NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
159
160	NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
161	NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
162
163	NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
164	NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
165
166	NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
167	NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
168
169	NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
170	NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
171
172
173	KERNEL(VIDEO_PLANAR_BT601, ps_kernel_planar_bt601, 7),
174	KERNEL(VIDEO_NV12_BT601, ps_kernel_nv12_bt601, 7),
175	KERNEL(VIDEO_PACKED_BT601, ps_kernel_packed_bt601, 2),
176
177	KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7),
178	KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7),
179	KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2),
180};
181#undef KERNEL
182
183static const struct blendinfo {
184	bool src_alpha;
185	uint32_t src_blend;
186	uint32_t dst_blend;
187} gen6_blend_op[] = {
188	/* Clear */	{0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO},
189	/* Src */	{0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO},
190	/* Dst */	{0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE},
191	/* Over */	{1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
192	/* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE},
193	/* In */	{0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
194	/* InReverse */	{1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA},
195	/* Out */	{0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
196	/* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
197	/* Atop */	{1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
198	/* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA},
199	/* Xor */	{1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
200	/* Add */	{0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE},
201};
202
203/**
204 * Highest-valued BLENDFACTOR used in gen6_blend_op.
205 *
206 * This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR,
207 * GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
208 * GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
209 */
210#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1)
211
212#define GEN6_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen6_blend_state), 64)
213
214#define BLEND_OFFSET(s, d) \
215	(((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE)
216
217#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO)
218#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO)
219
220#define SAMPLER_OFFSET(sf, se, mf, me) \
221	(((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state))
222
223#define VERTEX_2s2s 0
224
225#define COPY_SAMPLER 0
226#define COPY_VERTEX VERTEX_2s2s
227#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX)
228
229#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state))
230#define FILL_VERTEX VERTEX_2s2s
231#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
232#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
233
234#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0)
235#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0)
236#define GEN6_KERNEL(f) (((f) >> 16) & 0xf)
237#define GEN6_VERTEX(f) (((f) >> 0) & 0xf)
238#define GEN6_SET_FLAGS(S, B, K, V)  (((S) | (K)) << 16 | ((B) | (V)))
239
240#define OUT_BATCH(v) batch_emit(sna, v)
241#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
242#define OUT_VERTEX_F(v) vertex_emit(sna, v)
243
244static inline bool too_large(int width, int height)
245{
246	return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE;
247}
248
249static uint32_t gen6_get_blend(int op,
250			       bool has_component_alpha,
251			       uint32_t dst_format)
252{
253	uint32_t src, dst;
254
255	src = gen6_blend_op[op].src_blend;
256	dst = gen6_blend_op[op].dst_blend;
257
258	/* If there's no dst alpha channel, adjust the blend op so that
259	 * we'll treat it always as 1.
260	 */
261	if (PICT_FORMAT_A(dst_format) == 0) {
262		if (src == GEN6_BLENDFACTOR_DST_ALPHA)
263			src = GEN6_BLENDFACTOR_ONE;
264		else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA)
265			src = GEN6_BLENDFACTOR_ZERO;
266	}
267
268	/* If the source alpha is being used, then we should only be in a
269	 * case where the source blend factor is 0, and the source blend
270	 * value is the mask channels multiplied by the source picture's alpha.
271	 */
272	if (has_component_alpha && gen6_blend_op[op].src_alpha) {
273		if (dst == GEN6_BLENDFACTOR_SRC_ALPHA)
274			dst = GEN6_BLENDFACTOR_SRC_COLOR;
275		else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA)
276			dst = GEN6_BLENDFACTOR_INV_SRC_COLOR;
277	}
278
279	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
280	     op, dst_format, PICT_FORMAT_A(dst_format),
281	     src, dst, (int)BLEND_OFFSET(src, dst)));
282	return BLEND_OFFSET(src, dst);
283}
284
285static uint32_t gen6_get_card_format(PictFormat format)
286{
287	switch (format) {
288	default:
289		return -1;
290	case PICT_a8r8g8b8:
291		return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
292	case PICT_x8r8g8b8:
293		return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
294	case PICT_a8b8g8r8:
295		return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
296	case PICT_x8b8g8r8:
297		return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM;
298#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
299	case PICT_a2r10g10b10:
300		return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
301	case PICT_x2r10g10b10:
302		return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM;
303#endif
304	case PICT_r8g8b8:
305		return GEN6_SURFACEFORMAT_R8G8B8_UNORM;
306	case PICT_r5g6b5:
307		return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
308	case PICT_a1r5g5b5:
309		return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
310	case PICT_a8:
311		return GEN6_SURFACEFORMAT_A8_UNORM;
312	case PICT_a4r4g4b4:
313		return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
314	}
315}
316
317static uint32_t gen6_get_dest_format(PictFormat format)
318{
319	switch (format) {
320	default:
321		return -1;
322	case PICT_a8r8g8b8:
323	case PICT_x8r8g8b8:
324		return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
325	case PICT_a8b8g8r8:
326	case PICT_x8b8g8r8:
327		return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
328#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
329	case PICT_a2r10g10b10:
330	case PICT_x2r10g10b10:
331		return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
332#endif
333	case PICT_r5g6b5:
334		return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
335	case PICT_x1r5g5b5:
336	case PICT_a1r5g5b5:
337		return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
338	case PICT_a8:
339		return GEN6_SURFACEFORMAT_A8_UNORM;
340	case PICT_a4r4g4b4:
341	case PICT_x4r4g4b4:
342		return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
343	}
344}
345
346static bool gen6_check_dst_format(PictFormat format)
347{
348	if (gen6_get_dest_format(format) != -1)
349		return true;
350
351	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
352	return false;
353}
354
355static bool gen6_check_format(uint32_t format)
356{
357	if (gen6_get_card_format(format) != -1)
358		return true;
359
360	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
361	return false;
362}
363
364static uint32_t gen6_filter(uint32_t filter)
365{
366	switch (filter) {
367	default:
368		assert(0);
369	case PictFilterNearest:
370		return SAMPLER_FILTER_NEAREST;
371	case PictFilterBilinear:
372		return SAMPLER_FILTER_BILINEAR;
373	}
374}
375
376static uint32_t gen6_check_filter(PicturePtr picture)
377{
378	switch (picture->filter) {
379	case PictFilterNearest:
380	case PictFilterBilinear:
381		return true;
382	default:
383		return false;
384	}
385}
386
387static uint32_t gen6_repeat(uint32_t repeat)
388{
389	switch (repeat) {
390	default:
391		assert(0);
392	case RepeatNone:
393		return SAMPLER_EXTEND_NONE;
394	case RepeatNormal:
395		return SAMPLER_EXTEND_REPEAT;
396	case RepeatPad:
397		return SAMPLER_EXTEND_PAD;
398	case RepeatReflect:
399		return SAMPLER_EXTEND_REFLECT;
400	}
401}
402
403static bool gen6_check_repeat(PicturePtr picture)
404{
405	if (!picture->repeat)
406		return true;
407
408	switch (picture->repeatType) {
409	case RepeatNone:
410	case RepeatNormal:
411	case RepeatPad:
412	case RepeatReflect:
413		return true;
414	default:
415		return false;
416	}
417}
418
419static int
420gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
421{
422	int base;
423
424	if (has_mask) {
425		if (is_ca) {
426			if (gen6_blend_op[op].src_alpha)
427				base = GEN6_WM_KERNEL_MASKSA;
428			else
429				base = GEN6_WM_KERNEL_MASKCA;
430		} else
431			base = GEN6_WM_KERNEL_MASK;
432	} else
433		base = GEN6_WM_KERNEL_NOMASK;
434
435	return base + !is_affine;
436}
437
438inline static void
439gen6_emit_pipe_invalidate(struct sna *sna)
440{
441	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
442	OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH |
443		  GEN6_PIPE_CONTROL_TC_FLUSH |
444		  GEN6_PIPE_CONTROL_CS_STALL);
445	OUT_BATCH(0);
446	OUT_BATCH(0);
447}
448
449inline static void
450gen6_emit_pipe_flush(struct sna *sna, bool need_stall)
451{
452	unsigned stall;
453
454	stall = 0;
455	if (need_stall)
456		stall = GEN6_PIPE_CONTROL_CS_STALL;
457
458	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
459	OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH | stall);
460	OUT_BATCH(0);
461	OUT_BATCH(0);
462}
463
464inline static void
465gen6_emit_pipe_stall(struct sna *sna)
466{
467	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
468	OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
469		  GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
470	OUT_BATCH(0);
471	OUT_BATCH(0);
472}
473
474static void
475gen6_emit_urb(struct sna *sna)
476{
477	OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
478	OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
479		  (sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
480	OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
481		  (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
482}
483
484static void
485gen6_emit_state_base_address(struct sna *sna)
486{
487	OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2));
488	OUT_BATCH(0); /* general */
489	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
490				 sna->kgem.nbatch,
491				 NULL,
492				 I915_GEM_DOMAIN_INSTRUCTION << 16,
493				 BASE_ADDRESS_MODIFY));
494	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
495				 sna->kgem.nbatch,
496				 sna->render_state.gen6.general_bo,
497				 I915_GEM_DOMAIN_INSTRUCTION << 16,
498				 BASE_ADDRESS_MODIFY));
499	OUT_BATCH(0); /* indirect */
500	OUT_BATCH(kgem_add_reloc(&sna->kgem,
501				 sna->kgem.nbatch,
502				 sna->render_state.gen6.general_bo,
503				 I915_GEM_DOMAIN_INSTRUCTION << 16,
504				 BASE_ADDRESS_MODIFY));
505
506	/* upper bounds, disable */
507	OUT_BATCH(0);
508	OUT_BATCH(BASE_ADDRESS_MODIFY);
509	OUT_BATCH(0);
510	OUT_BATCH(BASE_ADDRESS_MODIFY);
511}
512
513static void
514gen6_emit_viewports(struct sna *sna)
515{
516	OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
517		  GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
518		  (4 - 2));
519	OUT_BATCH(0);
520	OUT_BATCH(0);
521	OUT_BATCH(0);
522}
523
524static void
525gen6_emit_vs(struct sna *sna)
526{
527	/* disable VS constant buffer */
528	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
529	OUT_BATCH(0);
530	OUT_BATCH(0);
531	OUT_BATCH(0);
532	OUT_BATCH(0);
533
534	OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
535	OUT_BATCH(0); /* no VS kernel */
536	OUT_BATCH(0);
537	OUT_BATCH(0);
538	OUT_BATCH(0);
539	OUT_BATCH(0); /* pass-through */
540}
541
542static void
543gen6_emit_gs(struct sna *sna)
544{
545	/* disable GS constant buffer */
546	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
547	OUT_BATCH(0);
548	OUT_BATCH(0);
549	OUT_BATCH(0);
550	OUT_BATCH(0);
551
552	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
553	OUT_BATCH(0); /* no GS kernel */
554	OUT_BATCH(0);
555	OUT_BATCH(0);
556	OUT_BATCH(0);
557	OUT_BATCH(0);
558	OUT_BATCH(0); /* pass-through */
559}
560
561static void
562gen6_emit_clip(struct sna *sna)
563{
564	OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
565	OUT_BATCH(0);
566	OUT_BATCH(0); /* pass-through */
567	OUT_BATCH(0);
568}
569
570static void
571gen6_emit_wm_constants(struct sna *sna)
572{
573	/* disable WM constant buffer */
574	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
575	OUT_BATCH(0);
576	OUT_BATCH(0);
577	OUT_BATCH(0);
578	OUT_BATCH(0);
579}
580
581static void
582gen6_emit_null_depth_buffer(struct sna *sna)
583{
584	OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2));
585	OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
586		  GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
587	OUT_BATCH(0);
588	OUT_BATCH(0);
589	OUT_BATCH(0);
590	OUT_BATCH(0);
591	OUT_BATCH(0);
592
593	OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2));
594	OUT_BATCH(0);
595}
596
597static void
598gen6_emit_invariant(struct sna *sna)
599{
600	OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D);
601
602	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
603	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
604		  GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
605	OUT_BATCH(0);
606
607	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
608	OUT_BATCH(1);
609
610	gen6_emit_urb(sna);
611
612	gen6_emit_state_base_address(sna);
613
614	gen6_emit_viewports(sna);
615	gen6_emit_vs(sna);
616	gen6_emit_gs(sna);
617	gen6_emit_clip(sna);
618	gen6_emit_wm_constants(sna);
619	gen6_emit_null_depth_buffer(sna);
620
621	sna->render_state.gen6.needs_invariant = false;
622}
623
624static void
625gen6_emit_cc(struct sna *sna, int blend)
626{
627	struct gen6_render_state *render = &sna->render_state.gen6;
628
629	if (render->blend == blend)
630		return;
631
632	DBG(("%s: blend = %x\n", __FUNCTION__, blend));
633
634	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
635	OUT_BATCH((render->cc_blend + blend) | 1);
636	if (render->blend == (unsigned)-1) {
637		OUT_BATCH(1);
638		OUT_BATCH(1);
639	} else {
640		OUT_BATCH(0);
641		OUT_BATCH(0);
642	}
643
644	render->blend = blend;
645}
646
647static void
648gen6_emit_sampler(struct sna *sna, uint32_t state)
649{
650	if (sna->render_state.gen6.samplers == state)
651		return;
652
653	sna->render_state.gen6.samplers = state;
654
655	DBG(("%s: sampler = %x\n", __FUNCTION__, state));
656
657	OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
658		  GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
659		  (4 - 2));
660	OUT_BATCH(0); /* VS */
661	OUT_BATCH(0); /* GS */
662	OUT_BATCH(sna->render_state.gen6.wm_state + state);
663}
664
665static void
666gen6_emit_sf(struct sna *sna, bool has_mask)
667{
668	int num_sf_outputs = has_mask ? 2 : 1;
669
670	if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs)
671		return;
672
673	DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
674	     __FUNCTION__, num_sf_outputs, 1, 0));
675
676	sna->render_state.gen6.num_sf_outputs = num_sf_outputs;
677
678	OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
679	OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT |
680		  1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT |
681		  1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT);
682	OUT_BATCH(0);
683	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
684	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
685	OUT_BATCH(0);
686	OUT_BATCH(0);
687	OUT_BATCH(0);
688	OUT_BATCH(0);
689	OUT_BATCH(0); /* DW9 */
690	OUT_BATCH(0);
691	OUT_BATCH(0);
692	OUT_BATCH(0);
693	OUT_BATCH(0);
694	OUT_BATCH(0); /* DW14 */
695	OUT_BATCH(0);
696	OUT_BATCH(0);
697	OUT_BATCH(0);
698	OUT_BATCH(0);
699	OUT_BATCH(0); /* DW19 */
700}
701
702static void
703gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask)
704{
705	const uint32_t *kernels;
706
707	if (sna->render_state.gen6.kernel == kernel)
708		return;
709
710	sna->render_state.gen6.kernel = kernel;
711	kernels = sna->render_state.gen6.wm_kernel[kernel];
712
713	DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n",
714	     __FUNCTION__,
715	     wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces,
716	    kernels[0], kernels[1], kernels[2]));
717
718	OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
719	OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
720	OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
721		  wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
722	OUT_BATCH(0); /* scratch space */
723	OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT |
724		  8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT |
725		  6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT);
726	OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT |
727		  (kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) |
728		  (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) |
729		  (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) |
730		  GEN6_3DSTATE_WM_DISPATCH_ENABLE);
731	OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT |
732		  GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
733	OUT_BATCH(kernels[2]);
734	OUT_BATCH(kernels[1]);
735}
736
737static bool
738gen6_emit_binding_table(struct sna *sna, uint16_t offset)
739{
740	if (sna->render_state.gen6.surface_table == offset)
741		return false;
742
743	/* Binding table pointers */
744	OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS |
745		  GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
746		  (4 - 2));
747	OUT_BATCH(0);		/* vs */
748	OUT_BATCH(0);		/* gs */
749	/* Only the PS uses the binding table */
750	OUT_BATCH(offset*4);
751
752	sna->render_state.gen6.surface_table = offset;
753	return true;
754}
755
756static bool
757gen6_emit_drawing_rectangle(struct sna *sna,
758			    const struct sna_composite_op *op)
759{
760	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
761	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
762
763	assert(!too_large(abs(op->dst.x), abs(op->dst.y)));
764	assert(!too_large(op->dst.width, op->dst.height));
765
766	if (sna->render_state.gen6.drawrect_limit  == limit &&
767	    sna->render_state.gen6.drawrect_offset == offset)
768		return true;
769
770	/* [DevSNB-C+{W/A}] Before any depth stall flush (including those
771	 * produced by non-pipelined state commands), software needs to first
772	 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
773	 * 0.
774	 *
775	 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
776	 * BEFORE the pipe-control with a post-sync op and no write-cache
777	 * flushes.
778	 */
779	if (!sna->render_state.gen6.first_state_packet)
780		gen6_emit_pipe_stall(sna);
781
782	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
783	OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME);
784	OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
785				 sna->render_state.gen6.general_bo,
786				 I915_GEM_DOMAIN_INSTRUCTION << 16 |
787				 I915_GEM_DOMAIN_INSTRUCTION,
788				 64));
789	OUT_BATCH(0);
790
791	DBG(("%s: offset=(%d, %d), limit=(%d, %d)\n",
792	     __FUNCTION__, op->dst.x, op->dst.y, op->dst.width, op->dst.height));
793	OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
794	OUT_BATCH(0);
795	OUT_BATCH(limit);
796	OUT_BATCH(offset);
797
798	sna->render_state.gen6.drawrect_offset = offset;
799	sna->render_state.gen6.drawrect_limit = limit;
800	return false;
801}
802
803static void
804gen6_emit_vertex_elements(struct sna *sna,
805			  const struct sna_composite_op *op)
806{
807	/*
808	 * vertex data in vertex buffer
809	 *    position: (x, y)
810	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
811	 *    texture coordinate 1 if (has_mask is true): same as above
812	 */
813	struct gen6_render_state *render = &sna->render_state.gen6;
814	uint32_t src_format, dw;
815	int id = GEN6_VERTEX(op->u.gen6.flags);
816	bool has_mask;
817
818	DBG(("%s: setup id=%d\n", __FUNCTION__, id));
819
820	if (render->ve_id == id)
821		return;
822	render->ve_id = id;
823
824	/* The VUE layout
825	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
826	 *    dword 4-7: position (x, y, 1.0, 1.0),
827	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
828	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
829	 *
830	 * dword 4-15 are fetched from vertex buffer
831	 */
832	has_mask = (id >> 2) != 0;
833	OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS |
834		((2 * (3 + has_mask)) + 1 - 2));
835
836	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
837		  GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
838		  0 << VE0_OFFSET_SHIFT);
839	OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
840		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
841		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
842		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
843
844	/* x,y */
845	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
846		  GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
847		  0 << VE0_OFFSET_SHIFT);
848	OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
849		  GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
850		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
851		  GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
852
853	/* u0, v0, w0 */
854	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
855	dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
856	switch (id & 3) {
857	default:
858		assert(0);
859	case 0:
860		src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED;
861		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
862		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
863		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
864		break;
865	case 1:
866		src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
867		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
868		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
869		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
870		break;
871	case 2:
872		src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
873		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
874		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
875		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
876		break;
877	case 3:
878		src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
879		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
880		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
881		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
882		break;
883	}
884	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
885		  src_format << VE0_FORMAT_SHIFT |
886		  4 << VE0_OFFSET_SHIFT);
887	OUT_BATCH(dw);
888
889	/* u1, v1, w1 */
890	if (has_mask) {
891		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
892		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
893		dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
894		switch (id >> 2) {
895		case 1:
896			src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
897			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
898			dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
899			dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
900			break;
901		default:
902			assert(0);
903		case 2:
904			src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
905			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
906			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
907			dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
908			break;
909		case 3:
910			src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
911			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
912			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
913			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
914			break;
915		}
916		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
917			  src_format << VE0_FORMAT_SHIFT |
918			  offset << VE0_OFFSET_SHIFT);
919		OUT_BATCH(dw);
920	}
921}
922
923static void
924gen6_emit_state(struct sna *sna,
925		const struct sna_composite_op *op,
926		uint16_t wm_binding_table)
927{
928	bool need_invalidate;
929	bool need_flush;
930	bool need_stall;
931
932	assert(op->dst.bo->exec);
933
934	need_flush = wm_binding_table & 1;
935	if (ALWAYS_FLUSH)
936		need_flush = true;
937
938	wm_binding_table &= ~1;
939	need_stall = sna->render_state.gen6.surface_table != wm_binding_table;
940	if (ALWAYS_STALL)
941		need_stall = true;
942
943	need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo);
944	if (ALWAYS_INVALIDATE)
945		need_invalidate = true;
946
947	if (need_invalidate) {
948		gen6_emit_pipe_invalidate(sna);
949		kgem_clear_dirty(&sna->kgem);
950		assert(op->dst.bo->exec);
951		kgem_bo_mark_dirty(op->dst.bo);
952
953		need_flush = false;
954		need_stall = false;
955		sna->render_state.gen6.first_state_packet = true;
956	}
957	if (need_flush) {
958		gen6_emit_pipe_flush(sna, need_stall);
959		need_stall = false;
960		sna->render_state.gen6.first_state_packet = true;
961	}
962
963	need_stall &= gen6_emit_drawing_rectangle(sna, op);
964	if (need_stall)
965		gen6_emit_pipe_stall(sna);
966
967	gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags));
968	gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags));
969	gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2);
970	gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2);
971	gen6_emit_vertex_elements(sna, op);
972	gen6_emit_binding_table(sna, wm_binding_table);
973
974	sna->render_state.gen6.first_state_packet = false;
975}
976
977static bool gen6_magic_ca_pass(struct sna *sna,
978			       const struct sna_composite_op *op)
979{
980	struct gen6_render_state *state = &sna->render_state.gen6;
981
982	if (!op->need_magic_ca_pass)
983		return false;
984
985	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
986	     sna->render.vertex_start, sna->render.vertex_index));
987
988	gen6_emit_pipe_stall(sna);
989
990	gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format));
991	gen6_emit_wm(sna,
992		     gen6_choose_composite_kernel(PictOpAdd,
993						  true, true,
994						  op->is_affine),
995		     true);
996
997	OUT_BATCH(GEN6_3DPRIMITIVE |
998		  GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
999		  _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
1000		  0 << 9 |
1001		  4);
1002	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
1003	OUT_BATCH(sna->render.vertex_start);
1004	OUT_BATCH(1);	/* single instance */
1005	OUT_BATCH(0);	/* start instance location */
1006	OUT_BATCH(0);	/* index buffer offset, ignored */
1007
1008	state->last_primitive = sna->kgem.nbatch;
1009	return true;
1010}
1011
1012typedef struct gen6_surface_state_padded {
1013	struct gen6_surface_state state;
1014	char pad[32 - sizeof(struct gen6_surface_state)];
1015} gen6_surface_state_padded;
1016
1017static void null_create(struct sna_static_stream *stream)
1018{
1019	/* A bunch of zeros useful for legacy border color and depth-stencil */
1020	sna_static_stream_map(stream, 64, 64);
1021}
1022
1023static void scratch_create(struct sna_static_stream *stream)
1024{
1025	/* 64 bytes of scratch space for random writes, such as
1026	 * the pipe-control w/a.
1027	 */
1028	sna_static_stream_map(stream, 64, 64);
1029}
1030
1031static void
1032sampler_state_init(struct gen6_sampler_state *sampler_state,
1033		   sampler_filter_t filter,
1034		   sampler_extend_t extend)
1035{
1036	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
1037
1038	/* We use the legacy mode to get the semantics specified by
1039	 * the Render extension. */
1040	sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY;
1041
1042	switch (filter) {
1043	default:
1044	case SAMPLER_FILTER_NEAREST:
1045		sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST;
1046		sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
1047		break;
1048	case SAMPLER_FILTER_BILINEAR:
1049		sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR;
1050		sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR;
1051		break;
1052	}
1053
1054	switch (extend) {
1055	default:
1056	case SAMPLER_EXTEND_NONE:
1057		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
1058		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
1059		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
1060		break;
1061	case SAMPLER_EXTEND_REPEAT:
1062		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
1063		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
1064		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
1065		break;
1066	case SAMPLER_EXTEND_PAD:
1067		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
1068		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
1069		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
1070		break;
1071	case SAMPLER_EXTEND_REFLECT:
1072		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
1073		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
1074		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
1075		break;
1076	}
1077}
1078
1079static void
1080sampler_copy_init(struct gen6_sampler_state *ss)
1081{
1082	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1083	ss->ss3.non_normalized_coord = 1;
1084
1085	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1086}
1087
1088static void
1089sampler_fill_init(struct gen6_sampler_state *ss)
1090{
1091	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
1092	ss->ss3.non_normalized_coord = 1;
1093
1094	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1095}
1096
1097static uint32_t
1098gen6_tiling_bits(uint32_t tiling)
1099{
1100	switch (tiling) {
1101	default: assert(0);
1102	case I915_TILING_NONE: return 0;
1103	case I915_TILING_X: return GEN6_SURFACE_TILED;
1104	case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y;
1105	}
1106}
1107
1108/**
1109 * Sets up the common fields for a surface state buffer for the given
1110 * picture in the given surface state buffer.
1111 */
1112static int
1113gen6_bind_bo(struct sna *sna,
1114	     struct kgem_bo *bo,
1115	     uint32_t width,
1116	     uint32_t height,
1117	     uint32_t format,
1118	     bool is_dst)
1119{
1120	uint32_t *ss;
1121	uint32_t domains;
1122	uint16_t offset;
1123	uint32_t is_scanout = is_dst && bo->scanout;
1124
1125	/* After the first bind, we manage the cache domains within the batch */
1126	offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
1127	if (offset) {
1128		DBG(("[%x]  bo(handle=%d), format=%d, reuse %s binding\n",
1129		     offset, bo->handle, format,
1130		     is_dst ? "render" : "sampler"));
1131		assert(offset >= sna->kgem.surface);
1132		if (is_dst)
1133			kgem_bo_mark_dirty(bo);
1134		return offset * sizeof(uint32_t);
1135	}
1136
1137	offset = sna->kgem.surface -=
1138		sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1139	ss = sna->kgem.batch + offset;
1140	ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT |
1141		 GEN6_SURFACE_BLEND_ENABLED |
1142		 format << GEN6_SURFACE_FORMAT_SHIFT);
1143	if (is_dst) {
1144		ss[0] |= GEN6_SURFACE_RC_READ_WRITE;
1145		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
1146	} else
1147		domains = I915_GEM_DOMAIN_SAMPLER << 16;
1148	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
1149	ss[2] = ((width - 1)  << GEN6_SURFACE_WIDTH_SHIFT |
1150		 (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT);
1151	assert(bo->pitch <= (1 << 18));
1152	ss[3] = (gen6_tiling_bits(bo->tiling) |
1153		 (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT);
1154	ss[4] = 0;
1155	ss[5] = (is_scanout || bo->io) ? 0 : 3 << 16;
1156
1157	kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
1158
1159	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
1160	     offset, bo->handle, ss[1],
1161	     format, width, height, bo->pitch, bo->tiling,
1162	     domains & 0xffff ? "render" : "sampler"));
1163
1164	return offset * sizeof(uint32_t);
1165}
1166
1167static void gen6_emit_vertex_buffer(struct sna *sna,
1168				    const struct sna_composite_op *op)
1169{
1170	int id = GEN6_VERTEX(op->u.gen6.flags);
1171
1172	OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3);
1173	OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
1174		  4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT);
1175	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
1176	OUT_BATCH(0);
1177	OUT_BATCH(~0); /* max address: disabled */
1178	OUT_BATCH(0);
1179
1180	sna->render.vb_id |= 1 << id;
1181}
1182
1183static void gen6_emit_primitive(struct sna *sna)
1184{
1185	if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) {
1186		DBG(("%s: continuing previous primitive, start=%d, index=%d\n",
1187		     __FUNCTION__,
1188		     sna->render.vertex_start,
1189		     sna->render.vertex_index));
1190		sna->render.vertex_offset = sna->kgem.nbatch - 5;
1191		return;
1192	}
1193
1194	OUT_BATCH(GEN6_3DPRIMITIVE |
1195		  GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
1196		  _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
1197		  0 << 9 |
1198		  4);
1199	sna->render.vertex_offset = sna->kgem.nbatch;
1200	OUT_BATCH(0);	/* vertex count, to be filled in later */
1201	OUT_BATCH(sna->render.vertex_index);
1202	OUT_BATCH(1);	/* single instance */
1203	OUT_BATCH(0);	/* start instance location */
1204	OUT_BATCH(0);	/* index buffer offset, ignored */
1205	sna->render.vertex_start = sna->render.vertex_index;
1206	DBG(("%s: started new primitive: index=%d\n",
1207	     __FUNCTION__, sna->render.vertex_start));
1208
1209	sna->render_state.gen6.last_primitive = sna->kgem.nbatch;
1210}
1211
1212static bool gen6_rectangle_begin(struct sna *sna,
1213				 const struct sna_composite_op *op)
1214{
1215	int id = 1 << GEN6_VERTEX(op->u.gen6.flags);
1216	int ndwords;
1217
1218	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
1219		return true;
1220
1221	ndwords = op->need_magic_ca_pass ? 60 : 6;
1222	if ((sna->render.vb_id & id) == 0)
1223		ndwords += 5;
1224	if (!kgem_check_batch(&sna->kgem, ndwords))
1225		return false;
1226
1227	if ((sna->render.vb_id & id) == 0)
1228		gen6_emit_vertex_buffer(sna, op);
1229
1230	gen6_emit_primitive(sna);
1231	return true;
1232}
1233
1234static int gen6_get_rectangles__flush(struct sna *sna,
1235				      const struct sna_composite_op *op)
1236{
1237	/* Preventing discarding new vbo after lock contention */
1238	if (sna_vertex_wait__locked(&sna->render)) {
1239		int rem = vertex_space(sna);
1240		if (rem > op->floats_per_rect)
1241			return rem;
1242	}
1243
1244	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5))
1245		return 0;
1246	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
1247		return 0;
1248
1249	if (sna->render.vertex_offset) {
1250		gen4_vertex_flush(sna);
1251		if (gen6_magic_ca_pass(sna, op)) {
1252			gen6_emit_pipe_stall(sna);
1253			gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags));
1254			gen6_emit_wm(sna,
1255				     GEN6_KERNEL(op->u.gen6.flags),
1256				     GEN6_VERTEX(op->u.gen6.flags) >> 2);
1257		}
1258	}
1259
1260	return gen4_vertex_finish(sna);
1261}
1262
1263inline static int gen6_get_rectangles(struct sna *sna,
1264				      const struct sna_composite_op *op,
1265				      int want,
1266				      void (*emit_state)(struct sna *, const struct sna_composite_op *op))
1267{
1268	int rem;
1269
1270	assert(want);
1271
1272start:
1273	rem = vertex_space(sna);
1274	if (unlikely(rem < op->floats_per_rect)) {
1275		DBG(("flushing vbo for %s: %d < %d\n",
1276		     __FUNCTION__, rem, op->floats_per_rect));
1277		rem = gen6_get_rectangles__flush(sna, op);
1278		if (unlikely(rem == 0))
1279			goto flush;
1280	}
1281
1282	if (unlikely(sna->render.vertex_offset == 0)) {
1283		if (!gen6_rectangle_begin(sna, op))
1284			goto flush;
1285		else
1286			goto start;
1287	}
1288
1289	assert(rem <= vertex_space(sna));
1290	assert(op->floats_per_rect <= rem);
1291	if (want > 1 && want * op->floats_per_rect > rem)
1292		want = rem / op->floats_per_rect;
1293
1294	assert(want > 0);
1295	sna->render.vertex_index += 3*want;
1296	return want;
1297
1298flush:
1299	if (sna->render.vertex_offset) {
1300		gen4_vertex_flush(sna);
1301		gen6_magic_ca_pass(sna, op);
1302	}
1303	sna_vertex_wait__locked(&sna->render);
1304	_kgem_submit(&sna->kgem);
1305	emit_state(sna, op);
1306	goto start;
1307}
1308
1309inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna,
1310							 uint16_t *offset)
1311{
1312	uint32_t *table;
1313
1314	sna->kgem.surface -=
1315		sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1316	/* Clear all surplus entries to zero in case of prefetch */
1317	table = memset(sna->kgem.batch + sna->kgem.surface,
1318		       0, sizeof(struct gen6_surface_state_padded));
1319
1320	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
1321
1322	*offset = sna->kgem.surface;
1323	return table;
1324}
1325
1326static bool
1327gen6_get_batch(struct sna *sna, const struct sna_composite_op *op)
1328{
1329	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1330
1331	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
1332		DBG(("%s: flushing batch: %d < %d+%d\n",
1333		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
1334		     150, 4*8));
1335		kgem_submit(&sna->kgem);
1336		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1337	}
1338
1339	if (sna->render_state.gen6.needs_invariant)
1340		gen6_emit_invariant(sna);
1341
1342	return kgem_bo_is_dirty(op->dst.bo);
1343}
1344
1345static void gen6_emit_composite_state(struct sna *sna,
1346				      const struct sna_composite_op *op)
1347{
1348	uint32_t *binding_table;
1349	uint16_t offset;
1350	bool dirty;
1351
1352	dirty = gen6_get_batch(sna, op);
1353
1354	binding_table = gen6_composite_get_binding_table(sna, &offset);
1355
1356	binding_table[0] =
1357		gen6_bind_bo(sna,
1358			    op->dst.bo, op->dst.width, op->dst.height,
1359			    gen6_get_dest_format(op->dst.format),
1360			    true);
1361	binding_table[1] =
1362		gen6_bind_bo(sna,
1363			     op->src.bo, op->src.width, op->src.height,
1364			     op->src.card_format,
1365			     false);
1366	if (op->mask.bo) {
1367		binding_table[2] =
1368			gen6_bind_bo(sna,
1369				     op->mask.bo,
1370				     op->mask.width,
1371				     op->mask.height,
1372				     op->mask.card_format,
1373				     false);
1374	}
1375
1376	if (sna->kgem.surface == offset &&
1377	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table &&
1378	    (op->mask.bo == NULL ||
1379	     sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) {
1380		sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1381		offset = sna->render_state.gen6.surface_table;
1382	}
1383
1384	gen6_emit_state(sna, op, offset | dirty);
1385}
1386
1387static void
1388gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op)
1389{
1390	assert (sna->render.vertex_offset == 0);
1391	if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) {
1392		DBG(("aligning vertex: was %d, now %d floats per vertex\n",
1393		     sna->render_state.gen6.floats_per_vertex,
1394		     op->floats_per_vertex));
1395		gen4_vertex_align(sna, op);
1396		sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex;
1397	}
1398	assert((sna->render.vertex_used % op->floats_per_vertex) == 0);
1399}
1400
1401fastcall static void
1402gen6_render_composite_blt(struct sna *sna,
1403			  const struct sna_composite_op *op,
1404			  const struct sna_composite_rectangles *r)
1405{
1406	gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state);
1407	op->prim_emit(sna, op, r);
1408}
1409
1410fastcall static void
1411gen6_render_composite_box(struct sna *sna,
1412			  const struct sna_composite_op *op,
1413			  const BoxRec *box)
1414{
1415	struct sna_composite_rectangles r;
1416
1417	gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state);
1418
1419	DBG(("  %s: (%d, %d), (%d, %d)\n",
1420	     __FUNCTION__,
1421	     box->x1, box->y1, box->x2, box->y2));
1422
1423	r.dst.x = box->x1;
1424	r.dst.y = box->y1;
1425	r.width  = box->x2 - box->x1;
1426	r.height = box->y2 - box->y1;
1427	r.src = r.mask = r.dst;
1428
1429	op->prim_emit(sna, op, &r);
1430}
1431
1432static void
1433gen6_render_composite_boxes__blt(struct sna *sna,
1434				 const struct sna_composite_op *op,
1435				 const BoxRec *box, int nbox)
1436{
1437	DBG(("composite_boxes(%d)\n", nbox));
1438
1439	do {
1440		int nbox_this_time;
1441
1442		nbox_this_time = gen6_get_rectangles(sna, op, nbox,
1443						     gen6_emit_composite_state);
1444		nbox -= nbox_this_time;
1445
1446		do {
1447			struct sna_composite_rectangles r;
1448
1449			DBG(("  %s: (%d, %d), (%d, %d)\n",
1450			     __FUNCTION__,
1451			     box->x1, box->y1, box->x2, box->y2));
1452
1453			r.dst.x = box->x1;
1454			r.dst.y = box->y1;
1455			r.width  = box->x2 - box->x1;
1456			r.height = box->y2 - box->y1;
1457			r.src = r.mask = r.dst;
1458
1459			op->prim_emit(sna, op, &r);
1460			box++;
1461		} while (--nbox_this_time);
1462	} while (nbox);
1463}
1464
1465static void
1466gen6_render_composite_boxes(struct sna *sna,
1467			    const struct sna_composite_op *op,
1468			    const BoxRec *box, int nbox)
1469{
1470	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1471
1472	do {
1473		int nbox_this_time;
1474		float *v;
1475
1476		nbox_this_time = gen6_get_rectangles(sna, op, nbox,
1477						     gen6_emit_composite_state);
1478		assert(nbox_this_time);
1479		nbox -= nbox_this_time;
1480
1481		v = sna->render.vertices + sna->render.vertex_used;
1482		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1483
1484		op->emit_boxes(op, box, nbox_this_time, v);
1485		box += nbox_this_time;
1486	} while (nbox);
1487}
1488
1489static void
1490gen6_render_composite_boxes__thread(struct sna *sna,
1491				    const struct sna_composite_op *op,
1492				    const BoxRec *box, int nbox)
1493{
1494	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1495
1496	sna_vertex_lock(&sna->render);
1497	do {
1498		int nbox_this_time;
1499		float *v;
1500
1501		nbox_this_time = gen6_get_rectangles(sna, op, nbox,
1502						     gen6_emit_composite_state);
1503		assert(nbox_this_time);
1504		nbox -= nbox_this_time;
1505
1506		v = sna->render.vertices + sna->render.vertex_used;
1507		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1508
1509		sna_vertex_acquire__locked(&sna->render);
1510		sna_vertex_unlock(&sna->render);
1511
1512		op->emit_boxes(op, box, nbox_this_time, v);
1513		box += nbox_this_time;
1514
1515		sna_vertex_lock(&sna->render);
1516		sna_vertex_release__locked(&sna->render);
1517	} while (nbox);
1518	sna_vertex_unlock(&sna->render);
1519}
1520
1521#ifndef MAX
1522#define MAX(a,b) ((a) > (b) ? (a) : (b))
1523#endif
1524
1525static uint32_t
1526gen6_composite_create_blend_state(struct sna_static_stream *stream)
1527{
1528	char *base, *ptr;
1529	int src, dst;
1530
1531	base = sna_static_stream_map(stream,
1532				     GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE,
1533				     64);
1534
1535	ptr = base;
1536	for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) {
1537		for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) {
1538			struct gen6_blend_state *blend =
1539				(struct gen6_blend_state *)ptr;
1540
1541			blend->blend0.dest_blend_factor = dst;
1542			blend->blend0.source_blend_factor = src;
1543			blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD;
1544			blend->blend0.blend_enable =
1545				!(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE);
1546
1547			blend->blend1.post_blend_clamp_enable = 1;
1548			blend->blend1.pre_blend_clamp_enable = 1;
1549
1550			ptr += GEN6_BLEND_STATE_PADDED_SIZE;
1551		}
1552	}
1553
1554	return sna_static_stream_offsetof(stream, base);
1555}
1556
1557static uint32_t gen6_bind_video_source(struct sna *sna,
1558				       struct kgem_bo *src_bo,
1559				       uint32_t src_offset,
1560				       int src_width,
1561				       int src_height,
1562				       int src_pitch,
1563				       uint32_t src_surf_format)
1564{
1565	struct gen6_surface_state *ss;
1566
1567	sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1568
1569	ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
1570	ss->ss0.surface_type = GEN6_SURFACE_2D;
1571	ss->ss0.surface_format = src_surf_format;
1572
1573	ss->ss1.base_addr =
1574		kgem_add_reloc(&sna->kgem,
1575			       sna->kgem.surface + 1,
1576			       src_bo,
1577			       I915_GEM_DOMAIN_SAMPLER << 16,
1578			       src_offset);
1579
1580	ss->ss2.width  = src_width - 1;
1581	ss->ss2.height = src_height - 1;
1582	ss->ss3.pitch  = src_pitch - 1;
1583
1584	return sna->kgem.surface * sizeof(uint32_t);
1585}
1586
1587static void gen6_emit_video_state(struct sna *sna,
1588				  const struct sna_composite_op *op)
1589{
1590	struct sna_video_frame *frame = op->priv;
1591	uint32_t src_surf_format[6];
1592	uint32_t src_surf_base[6];
1593	int src_width[6];
1594	int src_height[6];
1595	int src_pitch[6];
1596	uint32_t *binding_table;
1597	uint16_t offset;
1598	bool dirty;
1599	int n_src, n;
1600
1601	dirty = gen6_get_batch(sna, op);
1602
1603	src_surf_base[0] = 0;
1604	src_surf_base[1] = 0;
1605	src_surf_base[2] = frame->VBufOffset;
1606	src_surf_base[3] = frame->VBufOffset;
1607	src_surf_base[4] = frame->UBufOffset;
1608	src_surf_base[5] = frame->UBufOffset;
1609
1610	if (is_planar_fourcc(frame->id)) {
1611		for (n = 0; n < 2; n++) {
1612			src_surf_format[n] = GEN6_SURFACEFORMAT_R8_UNORM;
1613			src_width[n]  = frame->width;
1614			src_height[n] = frame->height;
1615			src_pitch[n]  = frame->pitch[1];
1616		}
1617		for (; n < 6; n++) {
1618			if (is_nv12_fourcc(frame->id))
1619				src_surf_format[n] = GEN6_SURFACEFORMAT_R8G8_UNORM;
1620			else
1621				src_surf_format[n] = GEN6_SURFACEFORMAT_R8_UNORM;
1622			src_width[n]  = frame->width / 2;
1623			src_height[n] = frame->height / 2;
1624			src_pitch[n]  = frame->pitch[0];
1625		}
1626		n_src = 6;
1627	} else {
1628		if (frame->id == FOURCC_UYVY)
1629			src_surf_format[0] = GEN6_SURFACEFORMAT_YCRCB_SWAPY;
1630		else
1631			src_surf_format[0] = GEN6_SURFACEFORMAT_YCRCB_NORMAL;
1632
1633		src_width[0]  = frame->width;
1634		src_height[0] = frame->height;
1635		src_pitch[0]  = frame->pitch[0];
1636		n_src = 1;
1637	}
1638
1639	binding_table = gen6_composite_get_binding_table(sna, &offset);
1640
1641	binding_table[0] =
1642		gen6_bind_bo(sna,
1643			     op->dst.bo, op->dst.width, op->dst.height,
1644			     gen6_get_dest_format(op->dst.format),
1645			     true);
1646	for (n = 0; n < n_src; n++) {
1647		binding_table[1+n] =
1648			gen6_bind_video_source(sna,
1649					       frame->bo,
1650					       src_surf_base[n],
1651					       src_width[n],
1652					       src_height[n],
1653					       src_pitch[n],
1654					       src_surf_format[n]);
1655	}
1656
1657	gen6_emit_state(sna, op, offset | dirty);
1658}
1659
1660static unsigned select_video_kernel(const struct sna_video *video,
1661				    const struct sna_video_frame *frame)
1662{
1663	switch (frame->id) {
1664	case FOURCC_YV12:
1665	case FOURCC_I420:
1666	case FOURCC_XVMC:
1667		return video->colorspace ?
1668			GEN6_WM_KERNEL_VIDEO_PLANAR_BT709 :
1669			GEN6_WM_KERNEL_VIDEO_PLANAR_BT601;
1670
1671	case FOURCC_NV12:
1672		return video->colorspace ?
1673			GEN6_WM_KERNEL_VIDEO_NV12_BT709 :
1674			GEN6_WM_KERNEL_VIDEO_NV12_BT601;
1675
1676	default:
1677		return video->colorspace ?
1678			GEN6_WM_KERNEL_VIDEO_PACKED_BT709 :
1679			GEN6_WM_KERNEL_VIDEO_PACKED_BT601;
1680	}
1681}
1682
1683static bool
1684gen6_render_video(struct sna *sna,
1685		  struct sna_video *video,
1686		  struct sna_video_frame *frame,
1687		  RegionPtr dstRegion,
1688		  PixmapPtr pixmap)
1689{
1690	struct sna_composite_op tmp;
1691	struct sna_pixmap *priv = sna_pixmap(pixmap);
1692	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1693	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1694	int src_width = frame->src.x2 - frame->src.x1;
1695	int src_height = frame->src.y2 - frame->src.y1;
1696	float src_offset_x, src_offset_y;
1697	float src_scale_x, src_scale_y;
1698	unsigned filter;
1699	const BoxRec *box;
1700	int nbox;
1701
1702	DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
1703	     __FUNCTION__,
1704	     src_width, src_height, dst_width, dst_height,
1705	     region_num_rects(dstRegion),
1706	     REGION_EXTENTS(NULL, dstRegion)->x1,
1707	     REGION_EXTENTS(NULL, dstRegion)->y1,
1708	     REGION_EXTENTS(NULL, dstRegion)->x2,
1709	     REGION_EXTENTS(NULL, dstRegion)->y2));
1710
1711	assert(priv->gpu_bo);
1712	memset(&tmp, 0, sizeof(tmp));
1713
1714	tmp.dst.pixmap = pixmap;
1715	tmp.dst.width  = pixmap->drawable.width;
1716	tmp.dst.height = pixmap->drawable.height;
1717	tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
1718	tmp.dst.bo = priv->gpu_bo;
1719
1720	tmp.src.bo = frame->bo;
1721	tmp.mask.bo = NULL;
1722
1723	tmp.floats_per_vertex = 3;
1724	tmp.floats_per_rect = 9;
1725
1726	if (src_width == dst_width && src_height == dst_height)
1727		filter = SAMPLER_FILTER_NEAREST;
1728	else
1729		filter = SAMPLER_FILTER_BILINEAR;
1730
1731	tmp.u.gen6.flags =
1732		GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
1733					       SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
1734			       NO_BLEND,
1735			       select_video_kernel(video, frame),
1736			       2);
1737	tmp.priv = frame;
1738
1739	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
1740	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1741		kgem_submit(&sna->kgem);
1742		assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
1743		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1744	}
1745
1746	gen6_align_vertex(sna, &tmp);
1747	gen6_emit_video_state(sna, &tmp);
1748
1749	src_scale_x = (float)src_width / dst_width / frame->width;
1750	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1751
1752	src_scale_y = (float)src_height / dst_height / frame->height;
1753	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1754
1755	box = region_rects(dstRegion);
1756	nbox = region_num_rects(dstRegion);
1757	while (nbox--) {
1758		gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state);
1759
1760		OUT_VERTEX(box->x2, box->y2);
1761		OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1762		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1763
1764		OUT_VERTEX(box->x1, box->y2);
1765		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1766		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1767
1768		OUT_VERTEX(box->x1, box->y1);
1769		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1770		OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1771
1772		box++;
1773	}
1774	gen4_vertex_flush(sna);
1775
1776	if (!DAMAGE_IS_ALL(priv->gpu_damage))
1777		sna_damage_add(&priv->gpu_damage, dstRegion);
1778
1779	return true;
1780}
1781
1782static int
1783gen6_composite_picture(struct sna *sna,
1784		       PicturePtr picture,
1785		       struct sna_composite_channel *channel,
1786		       int x, int y,
1787		       int w, int h,
1788		       int dst_x, int dst_y,
1789		       bool precise)
1790{
1791	PixmapPtr pixmap;
1792	uint32_t color;
1793	int16_t dx, dy;
1794
1795	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d), precise=%d\n",
1796	     __FUNCTION__, x, y, w, h, dst_x, dst_y, precise));
1797
1798	channel->is_solid = false;
1799	channel->card_format = -1;
1800
1801	if (sna_picture_is_solid(picture, &color))
1802		return gen4_channel_init_solid(sna, channel, color);
1803
1804	if (picture->pDrawable == NULL) {
1805		int ret;
1806
1807		if (picture->pSourcePict->type == SourcePictTypeLinear)
1808			return gen4_channel_init_linear(sna, picture, channel,
1809							x, y,
1810							w, h,
1811							dst_x, dst_y);
1812
1813		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
1814		ret = -1;
1815		if (!precise)
1816			ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1817								      x, y, w, h, dst_x, dst_y);
1818		if (ret == -1)
1819			ret = sna_render_picture_fixup(sna, picture, channel,
1820						       x, y, w, h, dst_x, dst_y);
1821		return ret;
1822	}
1823
1824	if (picture->alphaMap) {
1825		DBG(("%s -- fixup, alphamap\n", __FUNCTION__));
1826		return sna_render_picture_fixup(sna, picture, channel,
1827						x, y, w, h, dst_x, dst_y);
1828	}
1829
1830	if (!gen6_check_repeat(picture))
1831		return sna_render_picture_fixup(sna, picture, channel,
1832						x, y, w, h, dst_x, dst_y);
1833
1834	if (!gen6_check_filter(picture))
1835		return sna_render_picture_fixup(sna, picture, channel,
1836						x, y, w, h, dst_x, dst_y);
1837
1838	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1839	channel->filter = picture->filter;
1840
1841	pixmap = get_drawable_pixmap(picture->pDrawable);
1842	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
1843
1844	x += dx + picture->pDrawable->x;
1845	y += dy + picture->pDrawable->y;
1846
1847	channel->is_affine = sna_transform_is_affine(picture->transform);
1848	if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
1849		DBG(("%s: integer translation (%d, %d), removing\n",
1850		     __FUNCTION__, dx, dy));
1851		x += dx;
1852		y += dy;
1853		channel->transform = NULL;
1854		channel->filter = PictFilterNearest;
1855
1856		if (channel->repeat &&
1857		    (x >= 0 &&
1858		     y >= 0 &&
1859		     x + w <= pixmap->drawable.width &&
1860		     y + h <= pixmap->drawable.height)) {
1861			struct sna_pixmap *priv = sna_pixmap(pixmap);
1862			if (priv && priv->clear) {
1863				DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
1864				return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color));
1865			}
1866		}
1867	} else
1868		channel->transform = picture->transform;
1869
1870	channel->pict_format = picture->format;
1871	channel->card_format = gen6_get_card_format(picture->format);
1872	if (channel->card_format == (unsigned)-1)
1873		return sna_render_picture_convert(sna, picture, channel, pixmap,
1874						  x, y, w, h, dst_x, dst_y,
1875						  false);
1876
1877	if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
1878		DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__,
1879		     pixmap->drawable.width, pixmap->drawable.height));
1880		return sna_render_picture_extract(sna, picture, channel,
1881						  x, y, w, h, dst_x, dst_y);
1882	}
1883
1884	DBG(("%s: pixmap, repeat=%d, filter=%d, transform?=%d [affine? %d], format=%08x\n",
1885	     __FUNCTION__,
1886	     channel->repeat, channel->filter,
1887	     channel->transform != NULL, channel->is_affine,
1888	     channel->pict_format));
1889	if (channel->transform) {
1890#define f2d(x) (((double)(x))/65536.)
1891		DBG(("%s: transform=[%f %f %f, %f %f %f, %f %f %f] (raw [%x %x %x, %x %x %x, %x %x %x])\n",
1892		     __FUNCTION__,
1893		     f2d(channel->transform->matrix[0][0]),
1894		     f2d(channel->transform->matrix[0][1]),
1895		     f2d(channel->transform->matrix[0][2]),
1896		     f2d(channel->transform->matrix[1][0]),
1897		     f2d(channel->transform->matrix[1][1]),
1898		     f2d(channel->transform->matrix[1][2]),
1899		     f2d(channel->transform->matrix[2][0]),
1900		     f2d(channel->transform->matrix[2][1]),
1901		     f2d(channel->transform->matrix[2][2]),
1902		     channel->transform->matrix[0][0],
1903		     channel->transform->matrix[0][1],
1904		     channel->transform->matrix[0][2],
1905		     channel->transform->matrix[1][0],
1906		     channel->transform->matrix[1][1],
1907		     channel->transform->matrix[1][2],
1908		     channel->transform->matrix[2][0],
1909		     channel->transform->matrix[2][1],
1910		     channel->transform->matrix[2][2]));
1911#undef f2d
1912	}
1913
1914	return sna_render_pixmap_bo(sna, channel, pixmap,
1915				    x, y, w, h, dst_x, dst_y);
1916}
1917
1918inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel)
1919{
1920	channel->repeat = gen6_repeat(channel->repeat);
1921	channel->filter = gen6_filter(channel->filter);
1922	if (channel->card_format == (unsigned)-1)
1923		channel->card_format = gen6_get_card_format(channel->pict_format);
1924	assert(channel->card_format != (unsigned)-1);
1925}
1926
1927static void gen6_render_composite_done(struct sna *sna,
1928				       const struct sna_composite_op *op)
1929{
1930	DBG(("%s\n", __FUNCTION__));
1931
1932	assert(!sna->render.active);
1933	if (sna->render.vertex_offset) {
1934		gen4_vertex_flush(sna);
1935		gen6_magic_ca_pass(sna, op);
1936	}
1937
1938	if (op->mask.bo)
1939		kgem_bo_destroy(&sna->kgem, op->mask.bo);
1940	if (op->src.bo)
1941		kgem_bo_destroy(&sna->kgem, op->src.bo);
1942
1943	sna_render_composite_redirect_done(sna, op);
1944}
1945
1946inline static bool
1947gen6_composite_set_target(struct sna *sna,
1948			  struct sna_composite_op *op,
1949			  PicturePtr dst,
1950			  int x, int y, int w, int h,
1951			  bool partial)
1952{
1953	BoxRec box;
1954	unsigned int hint;
1955
1956	DBG(("%s: (%d, %d)x(%d, %d), partial?=%d\n", __FUNCTION__, x, y, w, h, partial));
1957
1958	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1959	op->dst.format = dst->format;
1960	op->dst.width = op->dst.pixmap->drawable.width;
1961	op->dst.height = op->dst.pixmap->drawable.height;
1962
1963	if (w && h) {
1964		box.x1 = x;
1965		box.y1 = y;
1966		box.x2 = x + w;
1967		box.y2 = y + h;
1968	} else
1969		sna_render_picture_extents(dst, &box);
1970
1971	hint = PREFER_GPU | RENDER_GPU;
1972	if (!need_tiling(sna, op->dst.width, op->dst.height))
1973		hint |= FORCE_GPU;
1974	if (!partial) {
1975		hint |= IGNORE_DAMAGE;
1976		if (w == op->dst.width && h == op->dst.height)
1977			hint |= REPLACES;
1978	}
1979
1980	op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
1981	if (op->dst.bo == NULL)
1982		return false;
1983
1984	if (hint & REPLACES) {
1985		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
1986		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
1987	}
1988
1989	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1990			    &op->dst.x, &op->dst.y);
1991
1992	DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1993	     __FUNCTION__,
1994	     op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
1995	     op->dst.width, op->dst.height,
1996	     op->dst.bo->pitch,
1997	     op->dst.x, op->dst.y,
1998	     op->damage ? *op->damage : (void *)-1));
1999
2000	assert(op->dst.bo->proxy == NULL);
2001
2002	if (too_large(op->dst.width, op->dst.height) &&
2003	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
2004		return false;
2005
2006	return true;
2007}
2008
2009static bool
2010try_blt(struct sna *sna,
2011	uint8_t op,
2012	PicturePtr src,
2013	PicturePtr mask,
2014	PicturePtr dst,
2015	int16_t src_x, int16_t src_y,
2016	int16_t msk_x, int16_t msk_y,
2017	int16_t dst_x, int16_t dst_y,
2018	int16_t width, int16_t height,
2019	unsigned flags,
2020	struct sna_composite_op *tmp)
2021{
2022	struct kgem_bo *bo;
2023
2024	if (sna->kgem.mode == KGEM_BLT) {
2025		DBG(("%s: already performing BLT\n", __FUNCTION__));
2026		goto execute;
2027	}
2028
2029	if (too_large(width, height)) {
2030		DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
2031		     __FUNCTION__, width, height));
2032		goto execute;
2033	}
2034
2035	bo = __sna_drawable_peek_bo(dst->pDrawable);
2036	if (bo == NULL)
2037		goto execute;
2038
2039	if (untiled_tlb_miss(bo))
2040		goto execute;
2041
2042	if (bo->rq) {
2043		if (RQ_IS_BLT(bo->rq))
2044			goto execute;
2045
2046		return false;
2047	}
2048
2049	if (bo->tiling == I915_TILING_Y)
2050		goto upload;
2051
2052	if (src->pDrawable == dst->pDrawable &&
2053	    can_switch_to_blt(sna, bo, 0))
2054		goto execute;
2055
2056	if (sna_picture_is_solid(src, NULL) && can_switch_to_blt(sna, bo, 0))
2057		goto execute;
2058
2059	if (src->pDrawable) {
2060		struct kgem_bo *s = __sna_drawable_peek_bo(src->pDrawable);
2061		if (s == NULL)
2062			goto execute;
2063
2064		if (prefer_blt_bo(sna, s, bo))
2065			goto execute;
2066	}
2067
2068	if (sna->kgem.ring == KGEM_BLT) {
2069		DBG(("%s: already performing BLT\n", __FUNCTION__));
2070		goto execute;
2071	}
2072
2073upload:
2074	flags |= COMPOSITE_UPLOAD;
2075execute:
2076	return sna_blt_composite(sna, op,
2077				 src, dst,
2078				 src_x, src_y,
2079				 dst_x, dst_y,
2080				 width, height,
2081				 flags, tmp);
2082}
2083
2084static bool
2085check_gradient(PicturePtr picture, bool precise)
2086{
2087	if (picture->pDrawable)
2088		return false;
2089
2090	switch (picture->pSourcePict->type) {
2091	case SourcePictTypeSolidFill:
2092	case SourcePictTypeLinear:
2093		return false;
2094	default:
2095		return precise;
2096	}
2097}
2098
2099static bool
2100has_alphamap(PicturePtr p)
2101{
2102	return p->alphaMap != NULL;
2103}
2104
2105static bool
2106need_upload(PicturePtr p)
2107{
2108	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
2109}
2110
2111static bool
2112source_is_busy(PixmapPtr pixmap)
2113{
2114	struct sna_pixmap *priv = sna_pixmap(pixmap);
2115	if (priv == NULL || priv->clear)
2116		return false;
2117
2118	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
2119		return true;
2120
2121	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
2122		return true;
2123
2124	return priv->gpu_damage && !priv->cpu_damage;
2125}
2126
2127static bool
2128source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise)
2129{
2130	if (sna_picture_is_solid(p, NULL))
2131		return false;
2132
2133	if (p->pSourcePict)
2134		return check_gradient(p, precise);
2135
2136	if (!gen6_check_repeat(p) || !gen6_check_format(p->format))
2137		return true;
2138
2139	if (pixmap && source_is_busy(pixmap))
2140		return false;
2141
2142	return has_alphamap(p) || !gen6_check_filter(p) || need_upload(p);
2143}
2144
2145static bool
2146gen6_composite_fallback(struct sna *sna,
2147			PicturePtr src,
2148			PicturePtr mask,
2149			PicturePtr dst)
2150{
2151	PixmapPtr src_pixmap;
2152	PixmapPtr mask_pixmap;
2153	PixmapPtr dst_pixmap;
2154	bool src_fallback, mask_fallback;
2155
2156	if (!gen6_check_dst_format(dst->format)) {
2157		DBG(("%s: unknown destination format: %d\n",
2158		     __FUNCTION__, dst->format));
2159		return true;
2160	}
2161
2162	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
2163
2164	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
2165	src_fallback = source_fallback(src, src_pixmap,
2166				       dst->polyMode == PolyModePrecise);
2167
2168	if (mask) {
2169		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
2170		mask_fallback = source_fallback(mask, mask_pixmap,
2171						dst->polyMode == PolyModePrecise);
2172	} else {
2173		mask_pixmap = NULL;
2174		mask_fallback = false;
2175	}
2176
2177	/* If we are using the destination as a source and need to
2178	 * readback in order to upload the source, do it all
2179	 * on the cpu.
2180	 */
2181	if (src_pixmap == dst_pixmap && src_fallback) {
2182		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
2183		return true;
2184	}
2185	if (mask_pixmap == dst_pixmap && mask_fallback) {
2186		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
2187		return true;
2188	}
2189
2190	/* If anything is on the GPU, push everything out to the GPU */
2191	if (dst_use_gpu(dst_pixmap)) {
2192		DBG(("%s: dst is already on the GPU, try to use GPU\n",
2193		     __FUNCTION__));
2194		return false;
2195	}
2196
2197	if (src_pixmap && !src_fallback) {
2198		DBG(("%s: src is already on the GPU, try to use GPU\n",
2199		     __FUNCTION__));
2200		return false;
2201	}
2202	if (mask_pixmap && !mask_fallback) {
2203		DBG(("%s: mask is already on the GPU, try to use GPU\n",
2204		     __FUNCTION__));
2205		return false;
2206	}
2207
2208	/* However if the dst is not on the GPU and we need to
2209	 * render one of the sources using the CPU, we may
2210	 * as well do the entire operation in place onthe CPU.
2211	 */
2212	if (src_fallback) {
2213		DBG(("%s: dst is on the CPU and src will fallback\n",
2214		     __FUNCTION__));
2215		return true;
2216	}
2217
2218	if (mask && mask_fallback) {
2219		DBG(("%s: dst is on the CPU and mask will fallback\n",
2220		     __FUNCTION__));
2221		return true;
2222	}
2223
2224	if (too_large(dst_pixmap->drawable.width,
2225		      dst_pixmap->drawable.height) &&
2226	    dst_is_cpu(dst_pixmap)) {
2227		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
2228		return true;
2229	}
2230
2231	DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
2232	     __FUNCTION__));
2233	return dst_use_cpu(dst_pixmap);
2234}
2235
2236static int
2237reuse_source(struct sna *sna,
2238	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
2239	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
2240{
2241	uint32_t color;
2242
2243	if (src_x != msk_x || src_y != msk_y)
2244		return false;
2245
2246	if (src == mask) {
2247		DBG(("%s: mask is source\n", __FUNCTION__));
2248		*mc = *sc;
2249		mc->bo = kgem_bo_reference(mc->bo);
2250		return true;
2251	}
2252
2253	if (sna_picture_is_solid(mask, &color))
2254		return gen4_channel_init_solid(sna, mc, color);
2255
2256	if (sc->is_solid)
2257		return false;
2258
2259	if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
2260		return false;
2261
2262	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
2263
2264	if (!sna_transform_equal(src->transform, mask->transform))
2265		return false;
2266
2267	if (!sna_picture_alphamap_equal(src, mask))
2268		return false;
2269
2270	if (!gen6_check_repeat(mask))
2271		return false;
2272
2273	if (!gen6_check_filter(mask))
2274		return false;
2275
2276	if (!gen6_check_format(mask->format))
2277		return false;
2278
2279	DBG(("%s: reusing source channel for mask with a twist\n",
2280	     __FUNCTION__));
2281
2282	*mc = *sc;
2283	mc->repeat = gen6_repeat(mask->repeat ? mask->repeatType : RepeatNone);
2284	mc->filter = gen6_filter(mask->filter);
2285	mc->pict_format = mask->format;
2286	mc->card_format = gen6_get_card_format(mask->format);
2287	mc->bo = kgem_bo_reference(mc->bo);
2288	return true;
2289}
2290
2291static bool
2292gen6_render_composite(struct sna *sna,
2293		      uint8_t op,
2294		      PicturePtr src,
2295		      PicturePtr mask,
2296		      PicturePtr dst,
2297		      int16_t src_x, int16_t src_y,
2298		      int16_t msk_x, int16_t msk_y,
2299		      int16_t dst_x, int16_t dst_y,
2300		      int16_t width, int16_t height,
2301		      unsigned flags,
2302		      struct sna_composite_op *tmp)
2303{
2304	if (op >= ARRAY_SIZE(gen6_blend_op))
2305		return false;
2306
2307	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
2308	     width, height, sna->kgem.ring));
2309
2310	if (mask == NULL &&
2311	    try_blt(sna, op,
2312		    src, mask, dst,
2313		    src_x, src_y,
2314		    msk_x, msk_y,
2315		    dst_x, dst_y,
2316		    width, height,
2317		    flags, tmp))
2318		return true;
2319
2320	if (gen6_composite_fallback(sna, src, mask, dst))
2321		goto fallback;
2322
2323	if (need_tiling(sna, width, height))
2324		return sna_tiling_composite(op, src, mask, dst,
2325					    src_x, src_y,
2326					    msk_x, msk_y,
2327					    dst_x, dst_y,
2328					    width, height,
2329					    tmp);
2330
2331	if (op == PictOpClear && src == sna->clear)
2332		op = PictOpSrc;
2333	tmp->op = op;
2334	if (!gen6_composite_set_target(sna, tmp, dst,
2335				       dst_x, dst_y, width, height,
2336				       flags & COMPOSITE_PARTIAL || op > PictOpSrc))
2337		goto fallback;
2338
2339	switch (gen6_composite_picture(sna, src, &tmp->src,
2340				       src_x, src_y,
2341				       width, height,
2342				       dst_x, dst_y,
2343				       dst->polyMode == PolyModePrecise)) {
2344	case -1:
2345		goto cleanup_dst;
2346	case 0:
2347		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
2348			goto cleanup_dst;
2349		/* fall through to fixup */
2350	case 1:
2351		/* Did we just switch rings to prepare the source? */
2352		if (mask == NULL &&
2353		    prefer_blt_composite(sna, tmp) &&
2354		    sna_blt_composite__convert(sna,
2355					       dst_x, dst_y, width, height,
2356					       tmp))
2357			return true;
2358
2359		gen6_composite_channel_convert(&tmp->src);
2360		break;
2361	}
2362
2363	tmp->is_affine = tmp->src.is_affine;
2364	tmp->has_component_alpha = false;
2365	tmp->need_magic_ca_pass = false;
2366
2367	tmp->mask.bo = NULL;
2368	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
2369	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
2370
2371	if (mask) {
2372		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
2373			tmp->has_component_alpha = true;
2374
2375			/* Check if it's component alpha that relies on a source alpha and on
2376			 * the source value.  We can only get one of those into the single
2377			 * source value that we get to blend with.
2378			 */
2379			if (gen6_blend_op[op].src_alpha &&
2380			    (gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) {
2381				if (op != PictOpOver)
2382					goto cleanup_src;
2383
2384				tmp->need_magic_ca_pass = true;
2385				tmp->op = PictOpOutReverse;
2386			}
2387		}
2388
2389		if (!reuse_source(sna,
2390				  src, &tmp->src, src_x, src_y,
2391				  mask, &tmp->mask, msk_x, msk_y)) {
2392			switch (gen6_composite_picture(sna, mask, &tmp->mask,
2393						       msk_x, msk_y,
2394						       width, height,
2395						       dst_x, dst_y,
2396						       dst->polyMode == PolyModePrecise)) {
2397			case -1:
2398				goto cleanup_src;
2399			case 0:
2400				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
2401					goto cleanup_src;
2402				/* fall through to fixup */
2403			case 1:
2404				gen6_composite_channel_convert(&tmp->mask);
2405				break;
2406			}
2407		}
2408
2409		tmp->is_affine &= tmp->mask.is_affine;
2410	}
2411
2412	tmp->u.gen6.flags =
2413		GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
2414					      tmp->src.repeat,
2415					      tmp->mask.filter,
2416					      tmp->mask.repeat),
2417			       gen6_get_blend(tmp->op,
2418					      tmp->has_component_alpha,
2419					      tmp->dst.format),
2420			       gen6_choose_composite_kernel(tmp->op,
2421							    tmp->mask.bo != NULL,
2422							    tmp->has_component_alpha,
2423							    tmp->is_affine),
2424			       gen4_choose_composite_emitter(sna, tmp));
2425
2426	tmp->blt   = gen6_render_composite_blt;
2427	tmp->box   = gen6_render_composite_box;
2428	tmp->boxes = gen6_render_composite_boxes__blt;
2429	if (tmp->emit_boxes) {
2430		tmp->boxes = gen6_render_composite_boxes;
2431		tmp->thread_boxes = gen6_render_composite_boxes__thread;
2432	}
2433	tmp->done  = gen6_render_composite_done;
2434
2435	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
2436	if (!kgem_check_bo(&sna->kgem,
2437			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2438			   NULL)) {
2439		kgem_submit(&sna->kgem);
2440		if (!kgem_check_bo(&sna->kgem,
2441				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2442				   NULL))
2443			goto cleanup_mask;
2444		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2445	}
2446
2447	gen6_align_vertex(sna, tmp);
2448	gen6_emit_composite_state(sna, tmp);
2449	return true;
2450
2451cleanup_mask:
2452	if (tmp->mask.bo) {
2453		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2454		tmp->mask.bo = NULL;
2455	}
2456cleanup_src:
2457	if (tmp->src.bo) {
2458		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2459		tmp->src.bo = NULL;
2460	}
2461cleanup_dst:
2462	if (tmp->redirect.real_bo) {
2463		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2464		tmp->redirect.real_bo = NULL;
2465	}
2466fallback:
2467	return (mask == NULL &&
2468		sna_blt_composite(sna, op,
2469				  src, dst,
2470				  src_x, src_y,
2471				  dst_x, dst_y,
2472				  width, height,
2473				  flags | COMPOSITE_FALLBACK, tmp));
2474}
2475
2476#if !NO_COMPOSITE_SPANS
2477fastcall static void
2478gen6_render_composite_spans_box(struct sna *sna,
2479				const struct sna_composite_spans_op *op,
2480				const BoxRec *box, float opacity)
2481{
2482	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2483	     __FUNCTION__,
2484	     op->base.src.offset[0], op->base.src.offset[1],
2485	     opacity,
2486	     op->base.dst.x, op->base.dst.y,
2487	     box->x1, box->y1,
2488	     box->x2 - box->x1,
2489	     box->y2 - box->y1));
2490
2491	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state);
2492	op->prim_emit(sna, op, box, opacity);
2493}
2494
2495static void
2496gen6_render_composite_spans_boxes(struct sna *sna,
2497				  const struct sna_composite_spans_op *op,
2498				  const BoxRec *box, int nbox,
2499				  float opacity)
2500{
2501	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2502	     __FUNCTION__, nbox,
2503	     op->base.src.offset[0], op->base.src.offset[1],
2504	     opacity,
2505	     op->base.dst.x, op->base.dst.y));
2506
2507	do {
2508		int nbox_this_time;
2509
2510		nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
2511						     gen6_emit_composite_state);
2512		nbox -= nbox_this_time;
2513
2514		do {
2515			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2516			     box->x1, box->y1,
2517			     box->x2 - box->x1,
2518			     box->y2 - box->y1));
2519
2520			op->prim_emit(sna, op, box++, opacity);
2521		} while (--nbox_this_time);
2522	} while (nbox);
2523}
2524
2525fastcall static void
2526gen6_render_composite_spans_boxes__thread(struct sna *sna,
2527					  const struct sna_composite_spans_op *op,
2528					  const struct sna_opacity_box *box,
2529					  int nbox)
2530{
2531	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
2532	     __FUNCTION__, nbox,
2533	     op->base.src.offset[0], op->base.src.offset[1],
2534	     op->base.dst.x, op->base.dst.y));
2535
2536	sna_vertex_lock(&sna->render);
2537	do {
2538		int nbox_this_time;
2539		float *v;
2540
2541		nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
2542						     gen6_emit_composite_state);
2543		assert(nbox_this_time);
2544		nbox -= nbox_this_time;
2545
2546		v = sna->render.vertices + sna->render.vertex_used;
2547		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
2548
2549		sna_vertex_acquire__locked(&sna->render);
2550		sna_vertex_unlock(&sna->render);
2551
2552		op->emit_boxes(op, box, nbox_this_time, v);
2553		box += nbox_this_time;
2554
2555		sna_vertex_lock(&sna->render);
2556		sna_vertex_release__locked(&sna->render);
2557	} while (nbox);
2558	sna_vertex_unlock(&sna->render);
2559}
2560
2561fastcall static void
2562gen6_render_composite_spans_done(struct sna *sna,
2563				 const struct sna_composite_spans_op *op)
2564{
2565	DBG(("%s()\n", __FUNCTION__));
2566	assert(!sna->render.active);
2567
2568	if (sna->render.vertex_offset)
2569		gen4_vertex_flush(sna);
2570
2571	if (op->base.src.bo)
2572		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2573
2574	sna_render_composite_redirect_done(sna, &op->base);
2575}
2576
2577static bool
2578gen6_check_composite_spans(struct sna *sna,
2579			   uint8_t op, PicturePtr src, PicturePtr dst,
2580			   int16_t width, int16_t height,
2581			   unsigned flags)
2582{
2583	DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
2584	     __FUNCTION__, op, width, height, flags));
2585
2586	if (op >= ARRAY_SIZE(gen6_blend_op))
2587		return false;
2588
2589	if (gen6_composite_fallback(sna, src, NULL, dst)) {
2590		DBG(("%s: operation would fallback\n", __FUNCTION__));
2591		return false;
2592	}
2593
2594	if (need_tiling(sna, width, height) &&
2595	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2596		DBG(("%s: fallback, tiled operation not on GPU\n",
2597		     __FUNCTION__));
2598		return false;
2599	}
2600
2601	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
2602		struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
2603		assert(priv);
2604
2605		if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
2606			return true;
2607
2608		if (flags & COMPOSITE_SPANS_INPLACE_HINT)
2609			return false;
2610
2611		return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
2612	}
2613
2614	return true;
2615}
2616
2617static bool
2618gen6_render_composite_spans(struct sna *sna,
2619			    uint8_t op,
2620			    PicturePtr src,
2621			    PicturePtr dst,
2622			    int16_t src_x,  int16_t src_y,
2623			    int16_t dst_x,  int16_t dst_y,
2624			    int16_t width,  int16_t height,
2625			    unsigned flags,
2626			    struct sna_composite_spans_op *tmp)
2627{
2628	DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
2629	     width, height, flags, sna->kgem.ring));
2630
2631	assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags));
2632
2633	if (need_tiling(sna, width, height)) {
2634		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2635		     __FUNCTION__, width, height));
2636		return sna_tiling_composite_spans(op, src, dst,
2637						  src_x, src_y, dst_x, dst_y,
2638						  width, height, flags, tmp);
2639	}
2640
2641	tmp->base.op = op;
2642	if (!gen6_composite_set_target(sna, &tmp->base, dst,
2643				       dst_x, dst_y, width, height, true))
2644		return false;
2645
2646	switch (gen6_composite_picture(sna, src, &tmp->base.src,
2647				       src_x, src_y,
2648				       width, height,
2649				       dst_x, dst_y,
2650				       dst->polyMode == PolyModePrecise)) {
2651	case -1:
2652		goto cleanup_dst;
2653	case 0:
2654		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
2655			goto cleanup_dst;
2656		/* fall through to fixup */
2657	case 1:
2658		gen6_composite_channel_convert(&tmp->base.src);
2659		break;
2660	}
2661	tmp->base.mask.bo = NULL;
2662
2663	tmp->base.is_affine = tmp->base.src.is_affine;
2664	tmp->base.need_magic_ca_pass = false;
2665
2666	tmp->base.u.gen6.flags =
2667		GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter,
2668					      tmp->base.src.repeat,
2669					      SAMPLER_FILTER_NEAREST,
2670					      SAMPLER_EXTEND_PAD),
2671			       gen6_get_blend(tmp->base.op, false, tmp->base.dst.format),
2672			       GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine,
2673			       gen4_choose_spans_emitter(sna, tmp));
2674
2675	tmp->box   = gen6_render_composite_spans_box;
2676	tmp->boxes = gen6_render_composite_spans_boxes;
2677	if (tmp->emit_boxes)
2678		tmp->thread_boxes = gen6_render_composite_spans_boxes__thread;
2679	tmp->done  = gen6_render_composite_spans_done;
2680
2681	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
2682	if (!kgem_check_bo(&sna->kgem,
2683			   tmp->base.dst.bo, tmp->base.src.bo,
2684			   NULL)) {
2685		kgem_submit(&sna->kgem);
2686		if (!kgem_check_bo(&sna->kgem,
2687				   tmp->base.dst.bo, tmp->base.src.bo,
2688				   NULL))
2689			goto cleanup_src;
2690		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2691	}
2692
2693	gen6_align_vertex(sna, &tmp->base);
2694	gen6_emit_composite_state(sna, &tmp->base);
2695	return true;
2696
2697cleanup_src:
2698	if (tmp->base.src.bo)
2699		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2700cleanup_dst:
2701	if (tmp->base.redirect.real_bo)
2702		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2703	return false;
2704}
2705#endif
2706
2707static void
2708gen6_emit_copy_state(struct sna *sna,
2709		     const struct sna_composite_op *op)
2710{
2711	uint32_t *binding_table;
2712	uint16_t offset;
2713	bool dirty;
2714
2715	dirty = gen6_get_batch(sna, op);
2716
2717	binding_table = gen6_composite_get_binding_table(sna, &offset);
2718
2719	binding_table[0] =
2720		gen6_bind_bo(sna,
2721			     op->dst.bo, op->dst.width, op->dst.height,
2722			     gen6_get_dest_format(op->dst.format),
2723			     true);
2724	binding_table[1] =
2725		gen6_bind_bo(sna,
2726			     op->src.bo, op->src.width, op->src.height,
2727			     op->src.card_format,
2728			     false);
2729
2730	if (sna->kgem.surface == offset &&
2731	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
2732		sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
2733		offset = sna->render_state.gen6.surface_table;
2734	}
2735
2736	gen6_emit_state(sna, op, offset | dirty);
2737}
2738
2739static inline bool prefer_blt_copy(struct sna *sna,
2740				   struct kgem_bo *src_bo,
2741				   struct kgem_bo *dst_bo,
2742				   unsigned flags)
2743{
2744	if (flags & COPY_SYNC)
2745		return false;
2746
2747	if (PREFER_RENDER)
2748		return PREFER_RENDER > 0;
2749
2750	if (sna->kgem.ring == KGEM_BLT)
2751		return true;
2752
2753	if (flags & COPY_DRI && !sna->kgem.has_semaphores)
2754		return false;
2755
2756	if ((flags & COPY_SMALL || src_bo == dst_bo) &&
2757	    can_switch_to_blt(sna, dst_bo, flags))
2758		return true;
2759
2760	if (untiled_tlb_miss(src_bo) ||
2761	    untiled_tlb_miss(dst_bo))
2762		return true;
2763
2764	if (force_blt_ring(sna, dst_bo))
2765		return true;
2766
2767	if (kgem_bo_is_render(dst_bo) ||
2768	    kgem_bo_is_render(src_bo))
2769		return false;
2770
2771	if (flags & COPY_LAST &&
2772            can_switch_to_blt(sna, dst_bo, flags))
2773		return true;
2774
2775	if (prefer_render_ring(sna, dst_bo))
2776		return false;
2777
2778	if (!prefer_blt_ring(sna, dst_bo, flags))
2779		return false;
2780
2781	return prefer_blt_bo(sna, src_bo, dst_bo);
2782}
2783
2784static bool
2785gen6_render_copy_boxes(struct sna *sna, uint8_t alu,
2786		       const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2787		       const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2788		       const BoxRec *box, int n, unsigned flags)
2789{
2790	struct sna_composite_op tmp;
2791	BoxRec extents;
2792
2793	DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n",
2794	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu,
2795	     src_bo == dst_bo,
2796	     overlaps(sna,
2797		      src_bo, src_dx, src_dy,
2798		      dst_bo, dst_dx, dst_dy,
2799		      box, n, flags, &extents)));
2800
2801	if (prefer_blt_copy(sna, src_bo, dst_bo, flags) &&
2802	    sna_blt_compare_depth(src, dst) &&
2803	    sna_blt_copy_boxes(sna, alu,
2804			       src_bo, src_dx, src_dy,
2805			       dst_bo, dst_dx, dst_dy,
2806			       dst->bitsPerPixel,
2807			       box, n))
2808		return true;
2809
2810	if (!(alu == GXcopy || alu == GXclear)) {
2811fallback_blt:
2812		if (!sna_blt_compare_depth(src, dst))
2813			return false;
2814
2815		return sna_blt_copy_boxes_fallback(sna, alu,
2816						   src, src_bo, src_dx, src_dy,
2817						   dst, dst_bo, dst_dx, dst_dy,
2818						   box, n);
2819	}
2820
2821	if (overlaps(sna,
2822		     src_bo, src_dx, src_dy,
2823		     dst_bo, dst_dx, dst_dy,
2824		     box, n, flags,
2825		     &extents)) {
2826		bool big = too_large(extents.x2-extents.x1, extents.y2-extents.y1);
2827
2828		if ((big || can_switch_to_blt(sna, dst_bo, flags)) &&
2829		    sna_blt_copy_boxes(sna, alu,
2830				       src_bo, src_dx, src_dy,
2831				       dst_bo, dst_dx, dst_dy,
2832				       dst->bitsPerPixel,
2833				       box, n))
2834			return true;
2835
2836		if (big)
2837			goto fallback_blt;
2838
2839		assert(src_bo == dst_bo);
2840		assert(src->depth == dst->depth);
2841		assert(src->width == dst->width);
2842		assert(src->height == dst->height);
2843		return sna_render_copy_boxes__overlap(sna, alu, dst, dst_bo,
2844						      src_dx, src_dy,
2845						      dst_dx, dst_dy,
2846						      box, n, &extents);
2847	}
2848
2849	if (dst->depth == src->depth) {
2850		tmp.dst.format = sna_render_format_for_depth(dst->depth);
2851		tmp.src.pict_format = tmp.dst.format;
2852	} else {
2853		tmp.dst.format = sna_format_for_depth(dst->depth);
2854		tmp.src.pict_format = sna_format_for_depth(src->depth);
2855	}
2856	if (!gen6_check_format(tmp.src.pict_format))
2857		goto fallback_blt;
2858
2859	tmp.dst.pixmap = (PixmapPtr)dst;
2860	tmp.dst.width  = dst->width;
2861	tmp.dst.height = dst->height;
2862	tmp.dst.bo = dst_bo;
2863	tmp.dst.x = tmp.dst.y = 0;
2864	tmp.damage = NULL;
2865
2866	sna_render_composite_redirect_init(&tmp);
2867	if (too_large(tmp.dst.width, tmp.dst.height)) {
2868		int i;
2869
2870		extents = box[0];
2871		for (i = 1; i < n; i++) {
2872			if (box[i].x1 < extents.x1)
2873				extents.x1 = box[i].x1;
2874			if (box[i].y1 < extents.y1)
2875				extents.y1 = box[i].y1;
2876
2877			if (box[i].x2 > extents.x2)
2878				extents.x2 = box[i].x2;
2879			if (box[i].y2 > extents.y2)
2880				extents.y2 = box[i].y2;
2881		}
2882
2883		if (!sna_render_composite_redirect(sna, &tmp,
2884						   extents.x1 + dst_dx,
2885						   extents.y1 + dst_dy,
2886						   extents.x2 - extents.x1,
2887						   extents.y2 - extents.y1,
2888						   n > 1))
2889			goto fallback_tiled;
2890	}
2891
2892	tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format);
2893	if (too_large(src->width, src->height)) {
2894		int i;
2895
2896		extents = box[0];
2897		for (i = 1; i < n; i++) {
2898			if (box[i].x1 < extents.x1)
2899				extents.x1 = box[i].x1;
2900			if (box[i].y1 < extents.y1)
2901				extents.y1 = box[i].y1;
2902
2903			if (box[i].x2 > extents.x2)
2904				extents.x2 = box[i].x2;
2905			if (box[i].y2 > extents.y2)
2906				extents.y2 = box[i].y2;
2907		}
2908
2909		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
2910					       extents.x1 + src_dx,
2911					       extents.y1 + src_dy,
2912					       extents.x2 - extents.x1,
2913					       extents.y2 - extents.y1)) {
2914			DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__));
2915			goto fallback_tiled_dst;
2916		}
2917	} else {
2918		tmp.src.bo = src_bo;
2919		tmp.src.width  = src->width;
2920		tmp.src.height = src->height;
2921		tmp.src.offset[0] = tmp.src.offset[1] = 0;
2922	}
2923
2924	tmp.mask.bo = NULL;
2925
2926	tmp.floats_per_vertex = 2;
2927	tmp.floats_per_rect = 6;
2928	tmp.need_magic_ca_pass = 0;
2929
2930	tmp.u.gen6.flags = COPY_FLAGS(alu);
2931	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
2932	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER);
2933	assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX);
2934
2935	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
2936	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
2937		kgem_submit(&sna->kgem);
2938		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
2939			DBG(("%s: too large for a single operation\n",
2940			     __FUNCTION__));
2941			if (tmp.src.bo != src_bo)
2942				kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2943			if (tmp.redirect.real_bo)
2944				kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2945			goto fallback_blt;
2946		}
2947		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2948	}
2949
2950	src_dx += tmp.src.offset[0];
2951	src_dy += tmp.src.offset[1];
2952
2953	dst_dx += tmp.dst.x;
2954	dst_dy += tmp.dst.y;
2955
2956	tmp.dst.x = tmp.dst.y = 0;
2957
2958	gen6_align_vertex(sna, &tmp);
2959	gen6_emit_copy_state(sna, &tmp);
2960
2961	do {
2962		int16_t *v;
2963		int n_this_time;
2964
2965		n_this_time = gen6_get_rectangles(sna, &tmp, n,
2966						  gen6_emit_copy_state);
2967		n -= n_this_time;
2968
2969		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
2970		sna->render.vertex_used += 6 * n_this_time;
2971		assert(sna->render.vertex_used <= sna->render.vertex_size);
2972		do {
2973
2974			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
2975			     box->x1 + src_dx, box->y1 + src_dy,
2976			     box->x1 + dst_dx, box->y1 + dst_dy,
2977			     box->x2 - box->x1, box->y2 - box->y1));
2978			v[0] = box->x2 + dst_dx;
2979			v[2] = box->x2 + src_dx;
2980			v[1]  = v[5] = box->y2 + dst_dy;
2981			v[3]  = v[7] = box->y2 + src_dy;
2982			v[8]  = v[4] = box->x1 + dst_dx;
2983			v[10] = v[6] = box->x1 + src_dx;
2984			v[9]  = box->y1 + dst_dy;
2985			v[11] = box->y1 + src_dy;
2986			v += 12; box++;
2987		} while (--n_this_time);
2988	} while (n);
2989
2990	gen4_vertex_flush(sna);
2991	sna_render_composite_redirect_done(sna, &tmp);
2992	if (tmp.src.bo != src_bo)
2993		kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2994	return true;
2995
2996fallback_tiled_dst:
2997	if (tmp.redirect.real_bo)
2998		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2999fallback_tiled:
3000	if (sna_blt_compare_depth(src, dst) &&
3001	    sna_blt_copy_boxes(sna, alu,
3002			       src_bo, src_dx, src_dy,
3003			       dst_bo, dst_dx, dst_dy,
3004			       dst->bitsPerPixel,
3005			       box, n))
3006		return true;
3007
3008	return sna_tiling_copy_boxes(sna, alu,
3009				     src, src_bo, src_dx, src_dy,
3010				     dst, dst_bo, dst_dx, dst_dy,
3011				     box, n);
3012}
3013
3014static void
3015gen6_render_copy_blt(struct sna *sna,
3016		     const struct sna_copy_op *op,
3017		     int16_t sx, int16_t sy,
3018		     int16_t w,  int16_t h,
3019		     int16_t dx, int16_t dy)
3020{
3021	int16_t *v;
3022
3023	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state);
3024
3025	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3026	sna->render.vertex_used += 6;
3027	assert(sna->render.vertex_used <= sna->render.vertex_size);
3028
3029	v[0]  = dx+w; v[1]  = dy+h;
3030	v[2]  = sx+w; v[3]  = sy+h;
3031	v[4]  = dx;   v[5]  = dy+h;
3032	v[6]  = sx;   v[7]  = sy+h;
3033	v[8]  = dx;   v[9]  = dy;
3034	v[10] = sx;   v[11] = sy;
3035}
3036
3037static void
3038gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
3039{
3040	DBG(("%s()\n", __FUNCTION__));
3041
3042	assert(!sna->render.active);
3043	if (sna->render.vertex_offset)
3044		gen4_vertex_flush(sna);
3045}
3046
3047static bool
3048gen6_render_copy(struct sna *sna, uint8_t alu,
3049		 PixmapPtr src, struct kgem_bo *src_bo,
3050		 PixmapPtr dst, struct kgem_bo *dst_bo,
3051		 struct sna_copy_op *op)
3052{
3053	DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n",
3054	     __FUNCTION__, alu,
3055	     src->drawable.width, src->drawable.height,
3056	     dst->drawable.width, dst->drawable.height));
3057
3058	if (prefer_blt_copy(sna, src_bo, dst_bo, 0) &&
3059	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
3060	    sna_blt_copy(sna, alu,
3061			 src_bo, dst_bo,
3062			 dst->drawable.bitsPerPixel,
3063			 op))
3064		return true;
3065
3066	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
3067	    too_large(src->drawable.width, src->drawable.height) ||
3068	    too_large(dst->drawable.width, dst->drawable.height)) {
3069fallback:
3070		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
3071			return false;
3072
3073		return sna_blt_copy(sna, alu, src_bo, dst_bo,
3074				    dst->drawable.bitsPerPixel,
3075				    op);
3076	}
3077
3078	if (dst->drawable.depth == src->drawable.depth) {
3079		op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
3080		op->base.src.pict_format = op->base.dst.format;
3081	} else {
3082		op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
3083		op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
3084	}
3085	if (!gen6_check_format(op->base.src.pict_format))
3086		goto fallback;
3087
3088	op->base.dst.pixmap = dst;
3089	op->base.dst.width  = dst->drawable.width;
3090	op->base.dst.height = dst->drawable.height;
3091	op->base.dst.bo = dst_bo;
3092
3093	op->base.src.bo = src_bo;
3094	op->base.src.card_format =
3095		gen6_get_card_format(op->base.src.pict_format);
3096	op->base.src.width  = src->drawable.width;
3097	op->base.src.height = src->drawable.height;
3098
3099	op->base.mask.bo = NULL;
3100
3101	op->base.floats_per_vertex = 2;
3102	op->base.floats_per_rect = 6;
3103
3104	op->base.u.gen6.flags = COPY_FLAGS(alu);
3105	assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3106	assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER);
3107	assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX);
3108
3109	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3110	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
3111		kgem_submit(&sna->kgem);
3112		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
3113			goto fallback;
3114		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
3115	}
3116
3117	gen6_align_vertex(sna, &op->base);
3118	gen6_emit_copy_state(sna, &op->base);
3119
3120	op->blt  = gen6_render_copy_blt;
3121	op->done = gen6_render_copy_done;
3122	return true;
3123}
3124
3125static void
3126gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
3127{
3128	uint32_t *binding_table;
3129	uint16_t offset;
3130	bool dirty;
3131
3132	dirty = gen6_get_batch(sna, op);
3133
3134	binding_table = gen6_composite_get_binding_table(sna, &offset);
3135
3136	binding_table[0] =
3137		gen6_bind_bo(sna,
3138			     op->dst.bo, op->dst.width, op->dst.height,
3139			     gen6_get_dest_format(op->dst.format),
3140			     true);
3141	binding_table[1] =
3142		gen6_bind_bo(sna,
3143			     op->src.bo, 1, 1,
3144			     GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
3145			     false);
3146
3147	if (sna->kgem.surface == offset &&
3148	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
3149		sna->kgem.surface +=
3150			sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t);
3151		offset = sna->render_state.gen6.surface_table;
3152	}
3153
3154	gen6_emit_state(sna, op, offset | dirty);
3155}
3156
3157static bool
3158gen6_render_fill_boxes(struct sna *sna,
3159		       CARD8 op,
3160		       PictFormat format,
3161		       const xRenderColor *color,
3162		       const DrawableRec *dst, struct kgem_bo *dst_bo,
3163		       const BoxRec *box, int n)
3164{
3165	struct sna_composite_op tmp;
3166	uint32_t pixel;
3167
3168	DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
3169	     __FUNCTION__, op,
3170	     color->red, color->green, color->blue, color->alpha, (int)format));
3171
3172	if (op >= ARRAY_SIZE(gen6_blend_op)) {
3173		DBG(("%s: fallback due to unhandled blend op: %d\n",
3174		     __FUNCTION__, op));
3175		return false;
3176	}
3177
3178	if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) ||
3179	    !gen6_check_dst_format(format)) {
3180		uint8_t alu = GXinvalid;
3181
3182		if (op <= PictOpSrc) {
3183			pixel = 0;
3184			if (op == PictOpClear)
3185				alu = GXclear;
3186			else if (sna_get_pixel_from_rgba(&pixel,
3187							 color->red,
3188							 color->green,
3189							 color->blue,
3190							 color->alpha,
3191							 format))
3192				alu = GXcopy;
3193		}
3194
3195		if (alu != GXinvalid &&
3196		    sna_blt_fill_boxes(sna, alu,
3197				       dst_bo, dst->bitsPerPixel,
3198				       pixel, box, n))
3199			return true;
3200
3201		if (!gen6_check_dst_format(format))
3202			return false;
3203	}
3204
3205	if (op == PictOpClear) {
3206		pixel = 0;
3207		op = PictOpSrc;
3208	} else if (!sna_get_pixel_from_rgba(&pixel,
3209					    color->red,
3210					    color->green,
3211					    color->blue,
3212					    color->alpha,
3213					    PICT_a8r8g8b8))
3214		return false;
3215
3216	DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
3217	     __FUNCTION__, pixel, n,
3218	     box[0].x1, box[0].y1, box[0].x2, box[0].y2));
3219
3220	tmp.dst.pixmap = (PixmapPtr)dst;
3221	tmp.dst.width  = dst->width;
3222	tmp.dst.height = dst->height;
3223	tmp.dst.format = format;
3224	tmp.dst.bo = dst_bo;
3225	tmp.dst.x = tmp.dst.y = 0;
3226	tmp.damage = NULL;
3227
3228	sna_render_composite_redirect_init(&tmp);
3229	if (too_large(dst->width, dst->height)) {
3230		BoxRec extents;
3231
3232		boxes_extents(box, n, &extents);
3233		if (!sna_render_composite_redirect(sna, &tmp,
3234						   extents.x1, extents.y1,
3235						   extents.x2 - extents.x1,
3236						   extents.y2 - extents.y1,
3237						   n > 1))
3238			return sna_tiling_fill_boxes(sna, op, format, color,
3239						     dst, dst_bo, box, n);
3240	}
3241
3242	tmp.src.bo = sna_render_get_solid(sna, pixel);
3243	tmp.mask.bo = NULL;
3244
3245	tmp.floats_per_vertex = 2;
3246	tmp.floats_per_rect = 6;
3247	tmp.need_magic_ca_pass = false;
3248
3249	tmp.u.gen6.flags = FILL_FLAGS(op, format);
3250	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3251	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
3252	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
3253
3254	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3255	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3256		kgem_submit(&sna->kgem);
3257		assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
3258	}
3259
3260	gen6_align_vertex(sna, &tmp);
3261	gen6_emit_fill_state(sna, &tmp);
3262
3263	do {
3264		int n_this_time;
3265		int16_t *v;
3266
3267		n_this_time = gen6_get_rectangles(sna, &tmp, n,
3268						  gen6_emit_fill_state);
3269		n -= n_this_time;
3270
3271		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
3272		sna->render.vertex_used += 6 * n_this_time;
3273		assert(sna->render.vertex_used <= sna->render.vertex_size);
3274		do {
3275			DBG(("	(%d, %d), (%d, %d)\n",
3276			     box->x1, box->y1, box->x2, box->y2));
3277
3278			v[0] = box->x2;
3279			v[5] = v[1] = box->y2;
3280			v[8] = v[4] = box->x1;
3281			v[9] = box->y1;
3282			v[2] = v[3]  = v[7]  = 1;
3283			v[6] = v[10] = v[11] = 0;
3284			v += 12; box++;
3285		} while (--n_this_time);
3286	} while (n);
3287
3288	gen4_vertex_flush(sna);
3289	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3290	sna_render_composite_redirect_done(sna, &tmp);
3291	return true;
3292}
3293
3294static void
3295gen6_render_op_fill_blt(struct sna *sna,
3296			const struct sna_fill_op *op,
3297			int16_t x, int16_t y, int16_t w, int16_t h)
3298{
3299	int16_t *v;
3300
3301	DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
3302
3303	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state);
3304
3305	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3306	sna->render.vertex_used += 6;
3307	assert(sna->render.vertex_used <= sna->render.vertex_size);
3308
3309	v[0] = x+w;
3310	v[4] = v[8] = x;
3311	v[1] = v[5] = y+h;
3312	v[9] = y;
3313
3314	v[2] = v[3]  = v[7]  = 1;
3315	v[6] = v[10] = v[11] = 0;
3316}
3317
3318fastcall static void
3319gen6_render_op_fill_box(struct sna *sna,
3320			const struct sna_fill_op *op,
3321			const BoxRec *box)
3322{
3323	int16_t *v;
3324
3325	DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
3326	     box->x1, box->y1, box->x2, box->y2));
3327
3328	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state);
3329
3330	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3331	sna->render.vertex_used += 6;
3332	assert(sna->render.vertex_used <= sna->render.vertex_size);
3333
3334	v[0] = box->x2;
3335	v[8] = v[4] = box->x1;
3336	v[5] = v[1] = box->y2;
3337	v[9] = box->y1;
3338
3339	v[7] = v[2]  = v[3]  = 1;
3340	v[6] = v[10] = v[11] = 0;
3341}
3342
3343fastcall static void
3344gen6_render_op_fill_boxes(struct sna *sna,
3345			  const struct sna_fill_op *op,
3346			  const BoxRec *box,
3347			  int nbox)
3348{
3349	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
3350	     box->x1, box->y1, box->x2, box->y2, nbox));
3351
3352	do {
3353		int nbox_this_time;
3354		int16_t *v;
3355
3356		nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
3357						     gen6_emit_fill_state);
3358		nbox -= nbox_this_time;
3359
3360		v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3361		sna->render.vertex_used += 6 * nbox_this_time;
3362		assert(sna->render.vertex_used <= sna->render.vertex_size);
3363
3364		do {
3365			v[0] = box->x2;
3366			v[8] = v[4] = box->x1;
3367			v[5] = v[1] = box->y2;
3368			v[9] = box->y1;
3369			v[7] = v[2]  = v[3]  = 1;
3370			v[6] = v[10] = v[11] = 0;
3371			box++; v += 12;
3372		} while (--nbox_this_time);
3373	} while (nbox);
3374}
3375
3376static void
3377gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op)
3378{
3379	DBG(("%s()\n", __FUNCTION__));
3380
3381	assert(!sna->render.active);
3382	if (sna->render.vertex_offset)
3383		gen4_vertex_flush(sna);
3384	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
3385}
3386
3387static bool
3388gen6_render_fill(struct sna *sna, uint8_t alu,
3389		 PixmapPtr dst, struct kgem_bo *dst_bo,
3390		 uint32_t color, unsigned flags,
3391		 struct sna_fill_op *op)
3392{
3393	DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color));
3394
3395	if (prefer_blt_fill(sna, dst_bo, flags) &&
3396	    sna_blt_fill(sna, alu,
3397			 dst_bo, dst->drawable.bitsPerPixel,
3398			 color,
3399			 op))
3400		return true;
3401
3402	if (!(alu == GXcopy || alu == GXclear) ||
3403	    too_large(dst->drawable.width, dst->drawable.height))
3404		return sna_blt_fill(sna, alu,
3405				    dst_bo, dst->drawable.bitsPerPixel,
3406				    color,
3407				    op);
3408
3409	if (alu == GXclear)
3410		color = 0;
3411
3412	op->base.dst.pixmap = dst;
3413	op->base.dst.width  = dst->drawable.width;
3414	op->base.dst.height = dst->drawable.height;
3415	op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
3416	op->base.dst.bo = dst_bo;
3417	op->base.dst.x = op->base.dst.y = 0;
3418
3419	op->base.src.bo =
3420		sna_render_get_solid(sna,
3421				     sna_rgba_for_color(color,
3422							dst->drawable.depth));
3423	op->base.mask.bo = NULL;
3424
3425	op->base.need_magic_ca_pass = false;
3426	op->base.floats_per_vertex = 2;
3427	op->base.floats_per_rect = 6;
3428
3429	op->base.u.gen6.flags = FILL_FLAGS_NOBLEND;
3430	assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3431	assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER);
3432	assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX);
3433
3434	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
3435	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3436		kgem_submit(&sna->kgem);
3437		assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
3438	}
3439
3440	gen6_align_vertex(sna, &op->base);
3441	gen6_emit_fill_state(sna, &op->base);
3442
3443	op->blt  = gen6_render_op_fill_blt;
3444	op->box  = gen6_render_op_fill_box;
3445	op->boxes = gen6_render_op_fill_boxes;
3446	op->points = NULL;
3447	op->done = gen6_render_op_fill_done;
3448	return true;
3449}
3450
3451static bool
3452gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3453			     uint32_t color,
3454			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
3455			     uint8_t alu)
3456{
3457	BoxRec box;
3458
3459	box.x1 = x1;
3460	box.y1 = y1;
3461	box.x2 = x2;
3462	box.y2 = y2;
3463
3464	return sna_blt_fill_boxes(sna, alu,
3465				  bo, dst->drawable.bitsPerPixel,
3466				  color, &box, 1);
3467}
3468
3469static bool
3470gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3471		     uint32_t color,
3472		     int16_t x1, int16_t y1,
3473		     int16_t x2, int16_t y2,
3474		     uint8_t alu)
3475{
3476	struct sna_composite_op tmp;
3477	int16_t *v;
3478
3479	/* Prefer to use the BLT if already engaged */
3480	if (prefer_blt_fill(sna, bo, FILL_BOXES) &&
3481	    gen6_render_fill_one_try_blt(sna, dst, bo, color,
3482					 x1, y1, x2, y2, alu))
3483		return true;
3484
3485	/* Must use the BLT if we can't RENDER... */
3486	if (!(alu == GXcopy || alu == GXclear) ||
3487	    too_large(dst->drawable.width, dst->drawable.height))
3488		return gen6_render_fill_one_try_blt(sna, dst, bo, color,
3489						    x1, y1, x2, y2, alu);
3490
3491	if (alu == GXclear)
3492		color = 0;
3493
3494	tmp.dst.pixmap = dst;
3495	tmp.dst.width  = dst->drawable.width;
3496	tmp.dst.height = dst->drawable.height;
3497	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3498	tmp.dst.bo = bo;
3499	tmp.dst.x = tmp.dst.y = 0;
3500
3501	tmp.src.bo =
3502		sna_render_get_solid(sna,
3503				     sna_rgba_for_color(color,
3504							dst->drawable.depth));
3505	tmp.mask.bo = NULL;
3506
3507	tmp.floats_per_vertex = 2;
3508	tmp.floats_per_rect = 6;
3509	tmp.need_magic_ca_pass = false;
3510
3511	tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
3512	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3513	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
3514	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
3515
3516	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
3517	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3518		kgem_submit(&sna->kgem);
3519		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3520			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3521			return false;
3522		}
3523	}
3524
3525	gen6_align_vertex(sna, &tmp);
3526	gen6_emit_fill_state(sna, &tmp);
3527
3528	gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
3529
3530	DBG(("	(%d, %d), (%d, %d)\n", x1, y1, x2, y2));
3531
3532	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3533	sna->render.vertex_used += 6;
3534	assert(sna->render.vertex_used <= sna->render.vertex_size);
3535
3536	v[0] = x2;
3537	v[8] = v[4] = x1;
3538	v[5] = v[1] = y2;
3539	v[9] = y1;
3540	v[7] = v[2]  = v[3]  = 1;
3541	v[6] = v[10] = v[11] = 0;
3542
3543	gen4_vertex_flush(sna);
3544	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3545
3546	return true;
3547}
3548
3549static bool
3550gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3551{
3552	BoxRec box;
3553
3554	box.x1 = 0;
3555	box.y1 = 0;
3556	box.x2 = dst->drawable.width;
3557	box.y2 = dst->drawable.height;
3558
3559	return sna_blt_fill_boxes(sna, GXclear,
3560				  bo, dst->drawable.bitsPerPixel,
3561				  0, &box, 1);
3562}
3563
3564static bool
3565gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3566{
3567	struct sna_composite_op tmp;
3568	int16_t *v;
3569
3570	DBG(("%s: %dx%d\n",
3571	     __FUNCTION__,
3572	     dst->drawable.width,
3573	     dst->drawable.height));
3574
3575	/* Prefer to use the BLT if, and only if, already engaged */
3576	if (sna->kgem.ring == KGEM_BLT &&
3577	    gen6_render_clear_try_blt(sna, dst, bo))
3578		return true;
3579
3580	/* Must use the BLT if we can't RENDER... */
3581	if (too_large(dst->drawable.width, dst->drawable.height))
3582		return gen6_render_clear_try_blt(sna, dst, bo);
3583
3584	tmp.dst.pixmap = dst;
3585	tmp.dst.width  = dst->drawable.width;
3586	tmp.dst.height = dst->drawable.height;
3587	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3588	tmp.dst.bo = bo;
3589	tmp.dst.x = tmp.dst.y = 0;
3590
3591	tmp.src.bo = sna_render_get_solid(sna, 0);
3592	tmp.mask.bo = NULL;
3593
3594	tmp.floats_per_vertex = 2;
3595	tmp.floats_per_rect = 6;
3596	tmp.need_magic_ca_pass = false;
3597
3598	tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
3599	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3600	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
3601	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
3602
3603	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
3604	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3605		kgem_submit(&sna->kgem);
3606		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3607			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3608			return false;
3609		}
3610	}
3611
3612	gen6_align_vertex(sna, &tmp);
3613	gen6_emit_fill_state(sna, &tmp);
3614
3615	gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
3616
3617	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3618	sna->render.vertex_used += 6;
3619	assert(sna->render.vertex_used <= sna->render.vertex_size);
3620
3621	v[0] = dst->drawable.width;
3622	v[5] = v[1] = dst->drawable.height;
3623	v[8] = v[4] = 0;
3624	v[9] = 0;
3625
3626	v[7] = v[2]  = v[3]  = 1;
3627	v[6] = v[10] = v[11] = 0;
3628
3629	gen4_vertex_flush(sna);
3630	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3631
3632	return true;
3633}
3634
3635static void gen6_render_reset(struct sna *sna)
3636{
3637	sna->render_state.gen6.needs_invariant = true;
3638	sna->render_state.gen6.first_state_packet = true;
3639	sna->render_state.gen6.ve_id = 3 << 2;
3640	sna->render_state.gen6.last_primitive = -1;
3641
3642	sna->render_state.gen6.num_sf_outputs = 0;
3643	sna->render_state.gen6.samplers = -1;
3644	sna->render_state.gen6.blend = -1;
3645	sna->render_state.gen6.kernel = -1;
3646	sna->render_state.gen6.drawrect_offset = -1;
3647	sna->render_state.gen6.drawrect_limit = -1;
3648	sna->render_state.gen6.surface_table = -1;
3649
3650	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
3651		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
3652		discard_vbo(sna);
3653	}
3654
3655	sna->render.vertex_offset = 0;
3656	sna->render.nvertex_reloc = 0;
3657	sna->render.vb_id = 0;
3658}
3659
3660static void gen6_render_fini(struct sna *sna)
3661{
3662	kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo);
3663}
3664
3665static bool is_gt2(struct sna *sna, int devid)
3666{
3667	return devid & 0x30;
3668}
3669
3670static bool is_mobile(struct sna *sna, int devid)
3671{
3672	return (devid & 0xf) == 0x6;
3673}
3674
3675static bool gen6_render_setup(struct sna *sna, int devid)
3676{
3677	struct gen6_render_state *state = &sna->render_state.gen6;
3678	struct sna_static_stream general;
3679	struct gen6_sampler_state *ss;
3680	int i, j, k, l, m;
3681
3682	state->info = &gt1_info;
3683	if (is_gt2(sna, devid))
3684		state->info = &gt2_info; /* XXX requires GT_MODE WiZ disabled */
3685	state->gt = state->info->gt;
3686
3687	sna_static_stream_init(&general);
3688
3689	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
3690	 * dumps, you know it points to zero.
3691	 */
3692	null_create(&general);
3693	scratch_create(&general);
3694
3695	for (m = 0; m < GEN6_KERNEL_COUNT; m++) {
3696		if (wm_kernels[m].size) {
3697			state->wm_kernel[m][1] =
3698				sna_static_stream_add(&general,
3699						      wm_kernels[m].data,
3700						      wm_kernels[m].size,
3701						      64);
3702		} else {
3703			if (USE_8_PIXEL_DISPATCH) {
3704				state->wm_kernel[m][0] =
3705					sna_static_stream_compile_wm(sna, &general,
3706								     wm_kernels[m].data, 8);
3707			}
3708
3709			if (USE_16_PIXEL_DISPATCH) {
3710				state->wm_kernel[m][1] =
3711					sna_static_stream_compile_wm(sna, &general,
3712								     wm_kernels[m].data, 16);
3713			}
3714
3715			if (USE_32_PIXEL_DISPATCH) {
3716				state->wm_kernel[m][2] =
3717					sna_static_stream_compile_wm(sna, &general,
3718								     wm_kernels[m].data, 32);
3719			}
3720		}
3721		if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) {
3722			state->wm_kernel[m][1] =
3723				sna_static_stream_compile_wm(sna, &general,
3724							     wm_kernels[m].data, 16);
3725		}
3726	}
3727
3728	ss = sna_static_stream_map(&general,
3729				   2 * sizeof(*ss) *
3730				   (2 +
3731				    FILTER_COUNT * EXTEND_COUNT *
3732				    FILTER_COUNT * EXTEND_COUNT),
3733				   32);
3734	state->wm_state = sna_static_stream_offsetof(&general, ss);
3735	sampler_copy_init(ss); ss += 2;
3736	sampler_fill_init(ss); ss += 2;
3737	for (i = 0; i < FILTER_COUNT; i++) {
3738		for (j = 0; j < EXTEND_COUNT; j++) {
3739			for (k = 0; k < FILTER_COUNT; k++) {
3740				for (l = 0; l < EXTEND_COUNT; l++) {
3741					sampler_state_init(ss++, i, j);
3742					sampler_state_init(ss++, k, l);
3743				}
3744			}
3745		}
3746	}
3747
3748	state->cc_blend = gen6_composite_create_blend_state(&general);
3749
3750	state->general_bo = sna_static_stream_fini(sna, &general);
3751	return state->general_bo != NULL;
3752}
3753
3754const char *gen6_render_init(struct sna *sna, const char *backend)
3755{
3756	int devid = intel_get_device_id(sna->dev);
3757
3758	if (!gen6_render_setup(sna, devid))
3759		return backend;
3760
3761	sna->kgem.context_switch = gen6_render_context_switch;
3762	sna->kgem.retire = gen6_render_retire;
3763	sna->kgem.expire = gen4_render_expire;
3764
3765#if !NO_COMPOSITE
3766	sna->render.composite = gen6_render_composite;
3767	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
3768#endif
3769
3770#if !NO_COMPOSITE_SPANS
3771	sna->render.check_composite_spans = gen6_check_composite_spans;
3772	sna->render.composite_spans = gen6_render_composite_spans;
3773	if (is_mobile(sna, devid))
3774		sna->render.prefer_gpu |= PREFER_GPU_SPANS;
3775#endif
3776	sna->render.video = gen6_render_video;
3777
3778#if !NO_COPY_BOXES
3779	sna->render.copy_boxes = gen6_render_copy_boxes;
3780#endif
3781#if !NO_COPY
3782	sna->render.copy = gen6_render_copy;
3783#endif
3784
3785#if !NO_FILL_BOXES
3786	sna->render.fill_boxes = gen6_render_fill_boxes;
3787#endif
3788#if !NO_FILL
3789	sna->render.fill = gen6_render_fill;
3790#endif
3791#if !NO_FILL_ONE
3792	sna->render.fill_one = gen6_render_fill_one;
3793#endif
3794#if !NO_FILL_CLEAR
3795	sna->render.clear = gen6_render_clear;
3796#endif
3797
3798	sna->render.flush = gen4_render_flush;
3799	sna->render.reset = gen6_render_reset;
3800	sna->render.fini = gen6_render_fini;
3801
3802	sna->render.max_3d_size = GEN6_MAX_SIZE;
3803	sna->render.max_3d_pitch = 1 << 18;
3804	return sna->render_state.gen6.info->name;
3805}
3806