r600_exa.c revision 166b61b6
1b7e1c893Smrg/*
2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3b7e1c893Smrg *
4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"),
6b7e1c893Smrg * to deal in the Software without restriction, including without limitation
7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions:
10b7e1c893Smrg *
11b7e1c893Smrg * The above copyright notice and this permission notice (including the next
12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the
13b7e1c893Smrg * Software.
14b7e1c893Smrg *
15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b7e1c893Smrg * SOFTWARE.
22b7e1c893Smrg *
23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24b7e1c893Smrg *
25b7e1c893Smrg */
26b7e1c893Smrg
27b7e1c893Smrg#ifdef HAVE_CONFIG_H
28b7e1c893Smrg#include "config.h"
29b7e1c893Smrg#endif
30b7e1c893Smrg
31b7e1c893Smrg#include "xf86.h"
32b7e1c893Smrg
33b7e1c893Smrg#include "exa.h"
34b7e1c893Smrg
35b7e1c893Smrg#include "radeon.h"
36b7e1c893Smrg#include "radeon_macros.h"
37b7e1c893Smrg#include "radeon_reg.h"
38b7e1c893Smrg#include "r600_shader.h"
39b7e1c893Smrg#include "r600_reg.h"
40b7e1c893Smrg#include "r600_state.h"
410974d292Smrg#include "radeon_exa_shared.h"
42ad43ddacSmrg#include "radeon_vbo.h"
43ad43ddacSmrg
44b7e1c893Smrg/* #define SHOW_VERTEXES */
45b7e1c893Smrg
46ad43ddacSmrgBool
47ad43ddacSmrgR600SetAccelState(ScrnInfoPtr pScrn,
48ad43ddacSmrg		  struct r600_accel_object *src0,
49ad43ddacSmrg		  struct r600_accel_object *src1,
50ad43ddacSmrg		  struct r600_accel_object *dst,
51ad43ddacSmrg		  uint32_t vs_offset, uint32_t ps_offset,
52ad43ddacSmrg		  int rop, Pixel planemask)
53ad43ddacSmrg{
54ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
55ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
56b13dfe66Smrg    uint32_t pitch_align = 0x7, base_align = 0xff;
57b13dfe66Smrg#if defined(XF86DRM_MODE)
58b13dfe66Smrg    int ret;
59b13dfe66Smrg#endif
60ad43ddacSmrg
61ad43ddacSmrg    if (src0) {
62ad43ddacSmrg	memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
63ad43ddacSmrg	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
64b13dfe66Smrg#if defined(XF86DRM_MODE)
65b13dfe66Smrg	if (info->cs) {
66b13dfe66Smrg	    pitch_align = drmmode_get_pitch_align(pScrn,
67b13dfe66Smrg						  accel_state->src_obj[0].bpp / 8,
68b13dfe66Smrg						  accel_state->src_obj[0].tiling_flags) - 1;
69b13dfe66Smrg	    base_align = drmmode_get_base_align(pScrn,
70b13dfe66Smrg						accel_state->src_obj[0].bpp / 8,
71b13dfe66Smrg						accel_state->src_obj[0].tiling_flags) - 1;
72b13dfe66Smrg	}
73b13dfe66Smrg#endif
74b13dfe66Smrg	/* bad pitch */
75b13dfe66Smrg	if (accel_state->src_obj[0].pitch & pitch_align)
76b13dfe66Smrg	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
77b13dfe66Smrg
78b13dfe66Smrg	/* bad offset */
79b13dfe66Smrg	if (accel_state->src_obj[0].offset & base_align)
80b13dfe66Smrg	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset));
81b13dfe66Smrg
82ad43ddacSmrg    } else {
83ad43ddacSmrg	memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
84ad43ddacSmrg	accel_state->src_size[0] = 0;
85ad43ddacSmrg    }
86ad43ddacSmrg
87ad43ddacSmrg    if (src1) {
88ad43ddacSmrg	memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
89ad43ddacSmrg	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
90b13dfe66Smrg#if defined(XF86DRM_MODE)
91b13dfe66Smrg	if (info->cs) {
92b13dfe66Smrg	    pitch_align = drmmode_get_pitch_align(pScrn,
93b13dfe66Smrg						  accel_state->src_obj[1].bpp / 8,
94b13dfe66Smrg						  accel_state->src_obj[1].tiling_flags) - 1;
95b13dfe66Smrg	    base_align = drmmode_get_base_align(pScrn,
96b13dfe66Smrg						accel_state->src_obj[1].bpp / 8,
97b13dfe66Smrg						accel_state->src_obj[1].tiling_flags) - 1;
98b13dfe66Smrg	}
99b13dfe66Smrg#endif
100b13dfe66Smrg	/* bad pitch */
101b13dfe66Smrg	if (accel_state->src_obj[1].pitch & pitch_align)
102b13dfe66Smrg	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
103b13dfe66Smrg
104b13dfe66Smrg	/* bad offset */
105b13dfe66Smrg	if (accel_state->src_obj[1].offset & base_align)
106b13dfe66Smrg	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset));
107ad43ddacSmrg    } else {
108ad43ddacSmrg	memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
109ad43ddacSmrg	accel_state->src_size[1] = 0;
110ad43ddacSmrg    }
111ad43ddacSmrg
112ad43ddacSmrg    if (dst) {
113ad43ddacSmrg	memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
114ad43ddacSmrg	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
115b13dfe66Smrg#if defined(XF86DRM_MODE)
116b13dfe66Smrg	if (info->cs) {
117b13dfe66Smrg	    pitch_align = drmmode_get_pitch_align(pScrn,
118b13dfe66Smrg						  accel_state->dst_obj.bpp / 8,
119b13dfe66Smrg						  accel_state->dst_obj.tiling_flags) - 1;
120b13dfe66Smrg	    base_align = drmmode_get_base_align(pScrn,
121b13dfe66Smrg						accel_state->dst_obj.bpp / 8,
122b13dfe66Smrg						accel_state->dst_obj.tiling_flags) - 1;
123b13dfe66Smrg	}
124b13dfe66Smrg#endif
125b13dfe66Smrg	if (accel_state->dst_obj.pitch & pitch_align)
126b13dfe66Smrg	    RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
127b13dfe66Smrg
128b13dfe66Smrg	if (accel_state->dst_obj.offset & base_align)
129b13dfe66Smrg	    RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset));
130ad43ddacSmrg    } else {
131ad43ddacSmrg	memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
132ad43ddacSmrg	accel_state->dst_size = 0;
133ad43ddacSmrg    }
134ad43ddacSmrg
135ad43ddacSmrg    accel_state->rop = rop;
136ad43ddacSmrg    accel_state->planemask = planemask;
137ad43ddacSmrg
138ad43ddacSmrg    accel_state->vs_size = 512;
139ad43ddacSmrg    accel_state->ps_size = 512;
140ad43ddacSmrg#if defined(XF86DRM_MODE)
141ad43ddacSmrg    if (info->cs) {
142ad43ddacSmrg	accel_state->vs_mc_addr = vs_offset;
143ad43ddacSmrg	accel_state->ps_mc_addr = ps_offset;
144ad43ddacSmrg
145ad43ddacSmrg	radeon_cs_space_reset_bos(info->cs);
146ad43ddacSmrg	radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
147ad43ddacSmrg					  RADEON_GEM_DOMAIN_VRAM, 0);
148ad43ddacSmrg	if (accel_state->src_obj[0].bo)
149ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
150ad43ddacSmrg					      accel_state->src_obj[0].domain, 0);
151ad43ddacSmrg	if (accel_state->src_obj[1].bo)
152ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
153ad43ddacSmrg					      accel_state->src_obj[1].domain, 0);
154ad43ddacSmrg	if (accel_state->dst_obj.bo)
155ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
156ad43ddacSmrg					      0, accel_state->dst_obj.domain);
157ad43ddacSmrg	ret = radeon_cs_space_check(info->cs);
158ad43ddacSmrg	if (ret)
159ad43ddacSmrg	    RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
160ad43ddacSmrg
161ad43ddacSmrg    } else
162ad43ddacSmrg#endif
163ad43ddacSmrg    {
164ad43ddacSmrg	accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
165ad43ddacSmrg	    vs_offset;
166ad43ddacSmrg	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
167ad43ddacSmrg	    ps_offset;
168ad43ddacSmrg    }
169ad43ddacSmrg
170ad43ddacSmrg    return TRUE;
171ad43ddacSmrg}
172ad43ddacSmrg
173b7e1c893Smrgstatic void
174b7e1c893SmrgR600DoneSolid(PixmapPtr pPix);
175b7e1c893Smrg
176b7e1c893Smrgstatic Bool
177b7e1c893SmrgR600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
178b7e1c893Smrg{
179b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
180b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
181b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
182b7e1c893Smrg    cb_config_t     cb_conf;
183b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
184b7e1c893Smrg    uint32_t a, r, g, b;
185b7e1c893Smrg    float ps_alu_consts[4];
186ad43ddacSmrg    struct r600_accel_object dst;
187b7e1c893Smrg
1880974d292Smrg    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
189ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype failed\n"));
1900974d292Smrg    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
191ad43ddacSmrg	RADEON_FALLBACK(("invalid planemask\n"));
192b7e1c893Smrg
193ad43ddacSmrg#if defined(XF86DRM_MODE)
194ad43ddacSmrg    if (info->cs) {
195ad43ddacSmrg	dst.offset = 0;
196ad43ddacSmrg	dst.bo = radeon_get_pixmap_bo(pPix);
197166b61b6Smrg	dst.tiling_flags = radeon_get_pixmap_tiling(pPix);
198ad43ddacSmrg    } else
199ad43ddacSmrg#endif
200ad43ddacSmrg    {
201ad43ddacSmrg	dst.offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
202ad43ddacSmrg	dst.bo = NULL;
203ad43ddacSmrg    }
204b7e1c893Smrg
205ad43ddacSmrg    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
206ad43ddacSmrg    dst.width = pPix->drawable.width;
207ad43ddacSmrg    dst.height = pPix->drawable.height;
208ad43ddacSmrg    dst.bpp = pPix->drawable.bitsPerPixel;
209ad43ddacSmrg    dst.domain = RADEON_GEM_DOMAIN_VRAM;
2100974d292Smrg
211ad43ddacSmrg    if (!R600SetAccelState(pScrn,
212ad43ddacSmrg			   NULL,
213ad43ddacSmrg			   NULL,
214ad43ddacSmrg			   &dst,
215ad43ddacSmrg			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
216ad43ddacSmrg			   alu, pm))
217b7e1c893Smrg	return FALSE;
218b7e1c893Smrg
219b7e1c893Smrg    CLEAR (cb_conf);
220b7e1c893Smrg    CLEAR (vs_conf);
221b7e1c893Smrg    CLEAR (ps_conf);
222b7e1c893Smrg
223921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
2240974d292Smrg    radeon_cp_start(pScrn);
225b7e1c893Smrg
226921a55d8Smrg    r600_set_default_state(pScrn, accel_state->ib);
227b7e1c893Smrg
228921a55d8Smrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
229921a55d8Smrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
230921a55d8Smrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
231b7e1c893Smrg
232b7e1c893Smrg    /* Shader */
233b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
2340974d292Smrg    vs_conf.shader_size         = accel_state->vs_size;
235b7e1c893Smrg    vs_conf.num_gprs            = 2;
236b7e1c893Smrg    vs_conf.stack_size          = 0;
237ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
238921a55d8Smrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
239b7e1c893Smrg
240b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
2410974d292Smrg    ps_conf.shader_size         = accel_state->ps_size;
242b7e1c893Smrg    ps_conf.num_gprs            = 1;
243b7e1c893Smrg    ps_conf.stack_size          = 0;
244b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
245b7e1c893Smrg    ps_conf.clamp_consts        = 0;
246b7e1c893Smrg    ps_conf.export_mode         = 2;
247ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
248921a55d8Smrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
249b7e1c893Smrg
250b7e1c893Smrg    cb_conf.id = 0;
251ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
252ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
253ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
254ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
255b7e1c893Smrg
256ad43ddacSmrg    if (accel_state->dst_obj.bpp == 8) {
257b7e1c893Smrg	cb_conf.format = COLOR_8;
258b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
259ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 16) {
260b7e1c893Smrg	cb_conf.format = COLOR_5_6_5;
261b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
262b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
263b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN16;
264b13dfe66Smrg#endif
265b7e1c893Smrg    } else {
266b7e1c893Smrg	cb_conf.format = COLOR_8_8_8_8;
267b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
268b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
269b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN32;
270b13dfe66Smrg#endif
271b7e1c893Smrg    }
272b7e1c893Smrg    cb_conf.source_format = 1;
273b7e1c893Smrg    cb_conf.blend_clamp = 1;
2740974d292Smrg    /* Render setup */
2750974d292Smrg    if (accel_state->planemask & 0x000000ff)
276b13dfe66Smrg	cb_conf.pmask |= 4; /* B */
2770974d292Smrg    if (accel_state->planemask & 0x0000ff00)
278b13dfe66Smrg	cb_conf.pmask |= 2; /* G */
2790974d292Smrg    if (accel_state->planemask & 0x00ff0000)
280b13dfe66Smrg	cb_conf.pmask |= 1; /* R */
2810974d292Smrg    if (accel_state->planemask & 0xff000000)
282b13dfe66Smrg	cb_conf.pmask |= 8; /* A */
283b13dfe66Smrg    cb_conf.rop = accel_state->rop;
284b13dfe66Smrg    if (accel_state->dst_obj.tiling_flags == 0)
285b13dfe66Smrg	cb_conf.array_mode = 1;
286b13dfe66Smrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
287b13dfe66Smrg
288b13dfe66Smrg    r600_set_spi(pScrn, accel_state->ib, 0, 0);
289b7e1c893Smrg
290b7e1c893Smrg    /* PS alu constants */
291ad43ddacSmrg    if (accel_state->dst_obj.bpp == 16) {
292b7e1c893Smrg	r = (fg >> 11) & 0x1f;
293b7e1c893Smrg	g = (fg >> 5) & 0x3f;
294b7e1c893Smrg	b = (fg >> 0) & 0x1f;
295b7e1c893Smrg	ps_alu_consts[0] = (float)r / 31; /* R */
296b7e1c893Smrg	ps_alu_consts[1] = (float)g / 63; /* G */
297b7e1c893Smrg	ps_alu_consts[2] = (float)b / 31; /* B */
298b7e1c893Smrg	ps_alu_consts[3] = 1.0; /* A */
299ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 8) {
300b7e1c893Smrg	a = (fg >> 0) & 0xff;
301b7e1c893Smrg	ps_alu_consts[0] = 0.0; /* R */
302b7e1c893Smrg	ps_alu_consts[1] = 0.0; /* G */
303b7e1c893Smrg	ps_alu_consts[2] = 0.0; /* B */
304b7e1c893Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
305b7e1c893Smrg    } else {
306b7e1c893Smrg	a = (fg >> 24) & 0xff;
307b7e1c893Smrg	r = (fg >> 16) & 0xff;
308b7e1c893Smrg	g = (fg >> 8) & 0xff;
309b7e1c893Smrg	b = (fg >> 0) & 0xff;
310b7e1c893Smrg	ps_alu_consts[0] = (float)r / 255; /* R */
311b7e1c893Smrg	ps_alu_consts[1] = (float)g / 255; /* G */
312b7e1c893Smrg	ps_alu_consts[2] = (float)b / 255; /* B */
313b7e1c893Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
314b7e1c893Smrg    }
315921a55d8Smrg    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
316921a55d8Smrg			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
317b7e1c893Smrg
318ad43ddacSmrg    if (accel_state->vsync)
3190974d292Smrg	RADEONVlineHelperClear(pScrn);
320b7e1c893Smrg
321b7e1c893Smrg    return TRUE;
322b7e1c893Smrg}
323b7e1c893Smrg
324b7e1c893Smrg
325b7e1c893Smrgstatic void
326b7e1c893SmrgR600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
327b7e1c893Smrg{
328b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
329b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
330b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
331b7e1c893Smrg    float *vb;
332b7e1c893Smrg
333ad43ddacSmrg    if (accel_state->vsync)
3340974d292Smrg	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
335b7e1c893Smrg
336921a55d8Smrg    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
337b7e1c893Smrg
338b7e1c893Smrg    vb[0] = (float)x1;
339b7e1c893Smrg    vb[1] = (float)y1;
340b7e1c893Smrg
341b7e1c893Smrg    vb[2] = (float)x1;
342b7e1c893Smrg    vb[3] = (float)y2;
343b7e1c893Smrg
344b7e1c893Smrg    vb[4] = (float)x2;
345b7e1c893Smrg    vb[5] = (float)y2;
346b7e1c893Smrg
347921a55d8Smrg    radeon_vbo_commit(pScrn, &accel_state->vbo);
348b7e1c893Smrg}
349b7e1c893Smrg
350b7e1c893Smrgstatic void
351b7e1c893SmrgR600DoneSolid(PixmapPtr pPix)
352b7e1c893Smrg{
353b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
354b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
355b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
356b7e1c893Smrg
357ad43ddacSmrg    if (accel_state->vsync)
358921a55d8Smrg	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
359921a55d8Smrg				accel_state->vline_crtc,
360921a55d8Smrg				accel_state->vline_y1,
361921a55d8Smrg				accel_state->vline_y2);
362b7e1c893Smrg
363ad43ddacSmrg    r600_finish_op(pScrn, 8);
364b7e1c893Smrg}
365b7e1c893Smrg
366b7e1c893Smrgstatic void
367ad43ddacSmrgR600DoPrepareCopy(ScrnInfoPtr pScrn)
368b7e1c893Smrg{
369b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
370b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
371b7e1c893Smrg    cb_config_t     cb_conf;
372b7e1c893Smrg    tex_resource_t  tex_res;
373b7e1c893Smrg    tex_sampler_t   tex_samp;
374b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
375b7e1c893Smrg
376b7e1c893Smrg    CLEAR (cb_conf);
377b7e1c893Smrg    CLEAR (tex_res);
378b7e1c893Smrg    CLEAR (tex_samp);
379b7e1c893Smrg    CLEAR (vs_conf);
380b7e1c893Smrg    CLEAR (ps_conf);
381b7e1c893Smrg
382921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
3830974d292Smrg    radeon_cp_start(pScrn);
384b7e1c893Smrg
385921a55d8Smrg    r600_set_default_state(pScrn, accel_state->ib);
386b7e1c893Smrg
387921a55d8Smrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
388921a55d8Smrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
389921a55d8Smrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
390b7e1c893Smrg
391b7e1c893Smrg    /* Shader */
392b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
3930974d292Smrg    vs_conf.shader_size         = accel_state->vs_size;
394b7e1c893Smrg    vs_conf.num_gprs            = 2;
395b7e1c893Smrg    vs_conf.stack_size          = 0;
396ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
397921a55d8Smrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
398b7e1c893Smrg
399b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
4000974d292Smrg    ps_conf.shader_size         = accel_state->ps_size;
401b7e1c893Smrg    ps_conf.num_gprs            = 1;
402b7e1c893Smrg    ps_conf.stack_size          = 0;
403b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
404b7e1c893Smrg    ps_conf.clamp_consts        = 0;
405b7e1c893Smrg    ps_conf.export_mode         = 2;
406ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
407921a55d8Smrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
408b7e1c893Smrg
409b7e1c893Smrg    /* Texture */
410b7e1c893Smrg    tex_res.id                  = 0;
411ad43ddacSmrg    tex_res.w                   = accel_state->src_obj[0].width;
412ad43ddacSmrg    tex_res.h                   = accel_state->src_obj[0].height;
413ad43ddacSmrg    tex_res.pitch               = accel_state->src_obj[0].pitch;
414b7e1c893Smrg    tex_res.depth               = 0;
415b7e1c893Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
416ad43ddacSmrg    tex_res.base                = accel_state->src_obj[0].offset;
417ad43ddacSmrg    tex_res.mip_base            = accel_state->src_obj[0].offset;
4180974d292Smrg    tex_res.size                = accel_state->src_size[0];
419ad43ddacSmrg    tex_res.bo                  = accel_state->src_obj[0].bo;
420ad43ddacSmrg    tex_res.mip_bo              = accel_state->src_obj[0].bo;
421ad43ddacSmrg    if (accel_state->src_obj[0].bpp == 8) {
422b7e1c893Smrg	tex_res.format              = FMT_8;
423b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
424b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
425b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
426b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
427ad43ddacSmrg    } else if (accel_state->src_obj[0].bpp == 16) {
428b7e1c893Smrg	tex_res.format              = FMT_5_6_5;
429b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
430b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
431b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
432b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
433b7e1c893Smrg    } else {
434b7e1c893Smrg	tex_res.format              = FMT_8_8_8_8;
435b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
436b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
437b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
438b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
439b7e1c893Smrg    }
440b7e1c893Smrg
441b7e1c893Smrg    tex_res.request_size        = 1;
442b7e1c893Smrg    tex_res.base_level          = 0;
443b7e1c893Smrg    tex_res.last_level          = 0;
444b7e1c893Smrg    tex_res.perf_modulation     = 0;
445b13dfe66Smrg    if (accel_state->src_obj[0].tiling_flags == 0)
446b13dfe66Smrg	tex_res.tile_mode           = 1;
447921a55d8Smrg    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
448b7e1c893Smrg
449b7e1c893Smrg    tex_samp.id                 = 0;
450b7e1c893Smrg    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
451b7e1c893Smrg    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
452b7e1c893Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
453b7e1c893Smrg    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
454b7e1c893Smrg    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
455b13dfe66Smrg    tex_samp.mc_coord_truncate  = 1;
456b7e1c893Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
457b7e1c893Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
458921a55d8Smrg    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
459b7e1c893Smrg
460b7e1c893Smrg    cb_conf.id = 0;
461ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
462ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
463ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
464ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
465ad43ddacSmrg    if (accel_state->dst_obj.bpp == 8) {
466b7e1c893Smrg	cb_conf.format = COLOR_8;
467b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
468ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 16) {
469b7e1c893Smrg	cb_conf.format = COLOR_5_6_5;
470b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
471b7e1c893Smrg    } else {
472b7e1c893Smrg	cb_conf.format = COLOR_8_8_8_8;
473b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
474b7e1c893Smrg    }
475b7e1c893Smrg    cb_conf.source_format = 1;
476b7e1c893Smrg    cb_conf.blend_clamp = 1;
477b7e1c893Smrg
4780974d292Smrg    /* Render setup */
4790974d292Smrg    if (accel_state->planemask & 0x000000ff)
480b13dfe66Smrg	cb_conf.pmask |= 4; /* B */
4810974d292Smrg    if (accel_state->planemask & 0x0000ff00)
482b13dfe66Smrg	cb_conf.pmask |= 2; /* G */
4830974d292Smrg    if (accel_state->planemask & 0x00ff0000)
484b13dfe66Smrg	cb_conf.pmask |= 1; /* R */
4850974d292Smrg    if (accel_state->planemask & 0xff000000)
486b13dfe66Smrg	cb_conf.pmask |= 8; /* A */
487b13dfe66Smrg    cb_conf.rop = accel_state->rop;
488b13dfe66Smrg    if (accel_state->dst_obj.tiling_flags == 0)
489b13dfe66Smrg	cb_conf.array_mode = 1;
490b13dfe66Smrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
491b13dfe66Smrg
492b13dfe66Smrg    r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
493b7e1c893Smrg
494b7e1c893Smrg}
495b7e1c893Smrg
496b7e1c893Smrgstatic void
497b7e1c893SmrgR600DoCopy(ScrnInfoPtr pScrn)
498b7e1c893Smrg{
499ad43ddacSmrg    r600_finish_op(pScrn, 16);
500ad43ddacSmrg}
501ad43ddacSmrg
502ad43ddacSmrgstatic void
503ad43ddacSmrgR600DoCopyVline(PixmapPtr pPix)
504ad43ddacSmrg{
505ad43ddacSmrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
506b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
507b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
508b7e1c893Smrg
509ad43ddacSmrg    if (accel_state->vsync)
510921a55d8Smrg	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
511921a55d8Smrg				accel_state->vline_crtc,
512921a55d8Smrg				accel_state->vline_y1,
513921a55d8Smrg				accel_state->vline_y2);
514b7e1c893Smrg
515ad43ddacSmrg    r600_finish_op(pScrn, 16);
516b7e1c893Smrg}
517b7e1c893Smrg
518b7e1c893Smrgstatic void
519b7e1c893SmrgR600AppendCopyVertex(ScrnInfoPtr pScrn,
520b7e1c893Smrg		     int srcX, int srcY,
521b7e1c893Smrg		     int dstX, int dstY,
522b7e1c893Smrg		     int w, int h)
523b7e1c893Smrg{
524921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
525921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
526b7e1c893Smrg    float *vb;
527b7e1c893Smrg
528921a55d8Smrg    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
529b7e1c893Smrg
530b7e1c893Smrg    vb[0] = (float)dstX;
531b7e1c893Smrg    vb[1] = (float)dstY;
532b7e1c893Smrg    vb[2] = (float)srcX;
533b7e1c893Smrg    vb[3] = (float)srcY;
534b7e1c893Smrg
535b7e1c893Smrg    vb[4] = (float)dstX;
536b7e1c893Smrg    vb[5] = (float)(dstY + h);
537b7e1c893Smrg    vb[6] = (float)srcX;
538b7e1c893Smrg    vb[7] = (float)(srcY + h);
539b7e1c893Smrg
540b7e1c893Smrg    vb[8] = (float)(dstX + w);
541b7e1c893Smrg    vb[9] = (float)(dstY + h);
542b7e1c893Smrg    vb[10] = (float)(srcX + w);
543b7e1c893Smrg    vb[11] = (float)(srcY + h);
544b7e1c893Smrg
545921a55d8Smrg    radeon_vbo_commit(pScrn, &accel_state->vbo);
546b7e1c893Smrg}
547b7e1c893Smrg
548b7e1c893Smrgstatic Bool
549b7e1c893SmrgR600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
550b7e1c893Smrg		int xdir, int ydir,
551b7e1c893Smrg		int rop,
552b7e1c893Smrg		Pixel planemask)
553b7e1c893Smrg{
554b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
555b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
556b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
557ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
558ad43ddacSmrg
5590974d292Smrg    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
560ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype src failed\n"));
5610974d292Smrg    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
562ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype dst failed\n"));
5630974d292Smrg    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
564ad43ddacSmrg	RADEON_FALLBACK(("Invalid planemask\n"));
565ad43ddacSmrg
566ad43ddacSmrg    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
567ad43ddacSmrg    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
568ad43ddacSmrg
569ad43ddacSmrg    accel_state->same_surface = FALSE;
570ad43ddacSmrg
571ad43ddacSmrg#if defined(XF86DRM_MODE)
572ad43ddacSmrg    if (info->cs) {
573ad43ddacSmrg	src_obj.offset = 0;
574ad43ddacSmrg	dst_obj.offset = 0;
575ad43ddacSmrg	src_obj.bo = radeon_get_pixmap_bo(pSrc);
576ad43ddacSmrg	dst_obj.bo = radeon_get_pixmap_bo(pDst);
577166b61b6Smrg	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
578166b61b6Smrg	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
579ad43ddacSmrg	if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
580ad43ddacSmrg	    accel_state->same_surface = TRUE;
581ad43ddacSmrg    } else
582b7e1c893Smrg#endif
583ad43ddacSmrg    {
584ad43ddacSmrg	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
585ad43ddacSmrg	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
586ad43ddacSmrg	if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst))
587ad43ddacSmrg	    accel_state->same_surface = TRUE;
588ad43ddacSmrg	src_obj.bo = NULL;
589ad43ddacSmrg	dst_obj.bo = NULL;
590b7e1c893Smrg    }
591b7e1c893Smrg
592ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
593ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
594ad43ddacSmrg    src_obj.bpp = pSrc->drawable.bitsPerPixel;
595ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
596921a55d8Smrg
597ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
598ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
599ad43ddacSmrg    dst_obj.bpp = pDst->drawable.bitsPerPixel;
600ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
601ad43ddacSmrg
602ad43ddacSmrg    if (!R600SetAccelState(pScrn,
603ad43ddacSmrg			   &src_obj,
604ad43ddacSmrg			   NULL,
605ad43ddacSmrg			   &dst_obj,
606ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
607ad43ddacSmrg			   rop, planemask))
608b7e1c893Smrg	return FALSE;
609b7e1c893Smrg
610ad43ddacSmrg    if (accel_state->same_surface == TRUE) {
61130d12090Smrg#if defined(XF86DRM_MODE)
612b13dfe66Smrg	unsigned height = RADEON_ALIGN(pDst->drawable.height,
613b13dfe66Smrg				       drmmode_get_height_align(pScrn, accel_state->dst_obj.tiling_flags));
61430d12090Smrg#else
61530d12090Smrg	unsigned height = pDst->drawable.height;
61630d12090Smrg#endif
617b13dfe66Smrg	unsigned long size = height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
618b7e1c893Smrg
619ad43ddacSmrg#if defined(XF86DRM_MODE)
620ad43ddacSmrg	if (info->cs) {
621ad43ddacSmrg	    if (accel_state->copy_area_bo) {
622ad43ddacSmrg		radeon_bo_unref(accel_state->copy_area_bo);
623ad43ddacSmrg		accel_state->copy_area_bo = NULL;
624b7e1c893Smrg	    }
625ad43ddacSmrg	    accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
626ad43ddacSmrg						       RADEON_GEM_DOMAIN_VRAM,
627ad43ddacSmrg						       0);
628ad43ddacSmrg	    if (accel_state->copy_area_bo == NULL)
629ad43ddacSmrg		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
630ad43ddacSmrg
631ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
632ad43ddacSmrg					      RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_DOMAIN_VRAM);
633ad43ddacSmrg	    if (radeon_cs_space_check(info->cs)) {
634ad43ddacSmrg		radeon_bo_unref(accel_state->copy_area_bo);
635ad43ddacSmrg		accel_state->copy_area_bo = NULL;
636ad43ddacSmrg		return FALSE;
637ad43ddacSmrg	    }
638ad43ddacSmrg	    accel_state->copy_area = (void*)accel_state->copy_area_bo;
639ad43ddacSmrg	} else
640ad43ddacSmrg#endif
641ad43ddacSmrg	{
642ad43ddacSmrg	    if (accel_state->copy_area) {
643ad43ddacSmrg		exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
644ad43ddacSmrg		accel_state->copy_area = NULL;
645ad43ddacSmrg	    }
646ad43ddacSmrg	    accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
647ad43ddacSmrg	    if (!accel_state->copy_area)
648ad43ddacSmrg		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
649b7e1c893Smrg	}
650ad43ddacSmrg    } else
651ad43ddacSmrg	R600DoPrepareCopy(pScrn);
652b7e1c893Smrg
653ad43ddacSmrg    if (accel_state->vsync)
6540974d292Smrg	RADEONVlineHelperClear(pScrn);
655ad43ddacSmrg
656ad43ddacSmrg    return TRUE;
657b7e1c893Smrg}
658b7e1c893Smrg
659b7e1c893Smrgstatic void
660b7e1c893SmrgR600Copy(PixmapPtr pDst,
661b7e1c893Smrg	 int srcX, int srcY,
662b7e1c893Smrg	 int dstX, int dstY,
663b7e1c893Smrg	 int w, int h)
664b7e1c893Smrg{
665b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
666b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
667b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
668b7e1c893Smrg
669b7e1c893Smrg    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
670b7e1c893Smrg	return;
671b7e1c893Smrg
672ad43ddacSmrg    if (accel_state->vsync)
6730974d292Smrg	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
674b7e1c893Smrg
675ad43ddacSmrg    if (accel_state->same_surface && accel_state->copy_area) {
676ad43ddacSmrg	uint32_t orig_offset, tmp_offset;
677ad43ddacSmrg	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
678ad43ddacSmrg	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
679b13dfe66Smrg	uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags;
680b13dfe66Smrg	uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags;
681ad43ddacSmrg	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
682ad43ddacSmrg
683ad43ddacSmrg#if defined(XF86DRM_MODE)
684ad43ddacSmrg	if (info->cs) {
685ad43ddacSmrg	    tmp_offset = 0;
686ad43ddacSmrg	    orig_offset = 0;
687ad43ddacSmrg	} else
688ad43ddacSmrg#endif
689ad43ddacSmrg	{
690b7e1c893Smrg	    tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset;
691b7e1c893Smrg	    orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
692ad43ddacSmrg	}
693b7e1c893Smrg
694ad43ddacSmrg	/* src to tmp */
695ad43ddacSmrg	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
696ad43ddacSmrg	accel_state->dst_obj.bo = accel_state->copy_area_bo;
697ad43ddacSmrg	accel_state->dst_obj.offset = tmp_offset;
698b13dfe66Smrg	accel_state->dst_obj.tiling_flags = 0;
699ad43ddacSmrg	R600DoPrepareCopy(pScrn);
700b7e1c893Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
701b7e1c893Smrg	R600DoCopy(pScrn);
702ad43ddacSmrg
703ad43ddacSmrg	/* tmp to dst */
704ad43ddacSmrg	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
705ad43ddacSmrg	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
706ad43ddacSmrg	accel_state->src_obj[0].offset = tmp_offset;
707b13dfe66Smrg	accel_state->src_obj[0].tiling_flags = 0;
708ad43ddacSmrg	accel_state->dst_obj.domain = orig_dst_domain;
709ad43ddacSmrg	accel_state->dst_obj.bo = orig_bo;
710ad43ddacSmrg	accel_state->dst_obj.offset = orig_offset;
711b13dfe66Smrg	accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags;
712ad43ddacSmrg	R600DoPrepareCopy(pScrn);
713ad43ddacSmrg	R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
714ad43ddacSmrg	R600DoCopyVline(pDst);
715ad43ddacSmrg
716ad43ddacSmrg	/* restore state */
717ad43ddacSmrg	accel_state->src_obj[0].domain = orig_src_domain;
718ad43ddacSmrg	accel_state->src_obj[0].bo = orig_bo;
719ad43ddacSmrg	accel_state->src_obj[0].offset = orig_offset;
720b13dfe66Smrg	accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags;
721ad43ddacSmrg    } else
722b7e1c893Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
723b7e1c893Smrg
724b7e1c893Smrg}
725b7e1c893Smrg
726b7e1c893Smrgstatic void
727b7e1c893SmrgR600DoneCopy(PixmapPtr pDst)
728b7e1c893Smrg{
729b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
730b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
731b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
732b7e1c893Smrg
733b7e1c893Smrg    if (!accel_state->same_surface)
734ad43ddacSmrg	R600DoCopyVline(pDst);
735b7e1c893Smrg
736b7e1c893Smrg    if (accel_state->copy_area) {
737ad43ddacSmrg	if (!info->cs)
738ad43ddacSmrg	    exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
739b7e1c893Smrg	accel_state->copy_area = NULL;
740b7e1c893Smrg    }
741b7e1c893Smrg
742b7e1c893Smrg}
743b7e1c893Smrg
744b7e1c893Smrgstruct blendinfo {
745b7e1c893Smrg    Bool dst_alpha;
746b7e1c893Smrg    Bool src_alpha;
747b7e1c893Smrg    uint32_t blend_cntl;
748b7e1c893Smrg};
749b7e1c893Smrg
750b7e1c893Smrgstatic struct blendinfo R600BlendOp[] = {
751b7e1c893Smrg    /* Clear */
752b7e1c893Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
753b7e1c893Smrg    /* Src */
754b7e1c893Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
755b7e1c893Smrg    /* Dst */
756b7e1c893Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
757b7e1c893Smrg    /* Over */
758b7e1c893Smrg    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
759b7e1c893Smrg    /* OverReverse */
760b7e1c893Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
761b7e1c893Smrg    /* In */
762b7e1c893Smrg    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
763b7e1c893Smrg    /* InReverse */
764b7e1c893Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
765b7e1c893Smrg    /* Out */
766b7e1c893Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
767b7e1c893Smrg    /* OutReverse */
768b7e1c893Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
769b7e1c893Smrg    /* Atop */
770b7e1c893Smrg    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
771b7e1c893Smrg    /* AtopReverse */
772b7e1c893Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
773b7e1c893Smrg    /* Xor */
774b7e1c893Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
775b7e1c893Smrg    /* Add */
776b7e1c893Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
777b7e1c893Smrg};
778b7e1c893Smrg
779b7e1c893Smrgstruct formatinfo {
780b7e1c893Smrg    unsigned int fmt;
781b7e1c893Smrg    uint32_t card_fmt;
782b7e1c893Smrg};
783b7e1c893Smrg
784b7e1c893Smrgstatic struct formatinfo R600TexFormats[] = {
785b7e1c893Smrg    {PICT_a8r8g8b8,	FMT_8_8_8_8},
786b7e1c893Smrg    {PICT_x8r8g8b8,	FMT_8_8_8_8},
787b7e1c893Smrg    {PICT_a8b8g8r8,	FMT_8_8_8_8},
788b7e1c893Smrg    {PICT_x8b8g8r8,	FMT_8_8_8_8},
789ad43ddacSmrg#ifdef PICT_TYPE_BGRA
790ad43ddacSmrg    {PICT_b8g8r8a8,	FMT_8_8_8_8},
791ad43ddacSmrg    {PICT_b8g8r8x8,	FMT_8_8_8_8},
792ad43ddacSmrg#endif
793b7e1c893Smrg    {PICT_r5g6b5,	FMT_5_6_5},
794b7e1c893Smrg    {PICT_a1r5g5b5,	FMT_1_5_5_5},
795b7e1c893Smrg    {PICT_x1r5g5b5,     FMT_1_5_5_5},
796b7e1c893Smrg    {PICT_a8,		FMT_8},
797b7e1c893Smrg};
798b7e1c893Smrg
799b7e1c893Smrgstatic uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
800b7e1c893Smrg{
801b7e1c893Smrg    uint32_t sblend, dblend;
802b7e1c893Smrg
803b7e1c893Smrg    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
804b7e1c893Smrg    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
805b7e1c893Smrg
806b7e1c893Smrg    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
807b7e1c893Smrg     * it as always 1.
808b7e1c893Smrg     */
809b7e1c893Smrg    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
810b7e1c893Smrg	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
811b7e1c893Smrg	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
812b7e1c893Smrg	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
813b7e1c893Smrg	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
814b7e1c893Smrg    }
815b7e1c893Smrg
816b7e1c893Smrg    /* If the source alpha is being used, then we should only be in a case where
817b7e1c893Smrg     * the source blend factor is 0, and the source blend value is the mask
818b7e1c893Smrg     * channels multiplied by the source picture's alpha.
819b7e1c893Smrg     */
820b7e1c893Smrg    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
821b7e1c893Smrg	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
822b7e1c893Smrg	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
823b7e1c893Smrg	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
824b7e1c893Smrg	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
825b7e1c893Smrg	}
826b7e1c893Smrg    }
827b7e1c893Smrg
828b7e1c893Smrg    return sblend | dblend;
829b7e1c893Smrg}
830b7e1c893Smrg
831b7e1c893Smrgstatic Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
832b7e1c893Smrg{
833b7e1c893Smrg    switch (pDstPicture->format) {
834b7e1c893Smrg    case PICT_a8r8g8b8:
835b7e1c893Smrg    case PICT_x8r8g8b8:
836ad43ddacSmrg    case PICT_a8b8g8r8:
837ad43ddacSmrg    case PICT_x8b8g8r8:
838ad43ddacSmrg#ifdef PICT_TYPE_BGRA
839ad43ddacSmrg    case PICT_b8g8r8a8:
840ad43ddacSmrg    case PICT_b8g8r8x8:
841ad43ddacSmrg#endif
842b7e1c893Smrg	*dst_format = COLOR_8_8_8_8;
843b7e1c893Smrg	break;
844b7e1c893Smrg    case PICT_r5g6b5:
845b7e1c893Smrg	*dst_format = COLOR_5_6_5;
846b7e1c893Smrg	break;
847b7e1c893Smrg    case PICT_a1r5g5b5:
848b7e1c893Smrg    case PICT_x1r5g5b5:
849b7e1c893Smrg	*dst_format = COLOR_1_5_5_5;
850b7e1c893Smrg	break;
851b7e1c893Smrg    case PICT_a8:
852b7e1c893Smrg	*dst_format = COLOR_8;
853b7e1c893Smrg	break;
854b7e1c893Smrg    default:
855b7e1c893Smrg	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
856b7e1c893Smrg	       (int)pDstPicture->format));
857b7e1c893Smrg    }
858b7e1c893Smrg    return TRUE;
859b7e1c893Smrg}
860b7e1c893Smrg
861b7e1c893Smrgstatic Bool R600CheckCompositeTexture(PicturePtr pPict,
862b7e1c893Smrg				      PicturePtr pDstPict,
863b7e1c893Smrg				      int op,
864b7e1c893Smrg				      int unit)
865b7e1c893Smrg{
866b7e1c893Smrg    int w = pPict->pDrawable->width;
867b7e1c893Smrg    int h = pPict->pDrawable->height;
868ad43ddacSmrg    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
869b7e1c893Smrg    unsigned int i;
870b7e1c893Smrg    int max_tex_w, max_tex_h;
871b7e1c893Smrg
872b7e1c893Smrg    max_tex_w = 8192;
873b7e1c893Smrg    max_tex_h = 8192;
874b7e1c893Smrg
875b7e1c893Smrg    if ((w > max_tex_w) || (h > max_tex_h))
876b7e1c893Smrg	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
877b7e1c893Smrg
878b7e1c893Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
879b7e1c893Smrg	if (R600TexFormats[i].fmt == pPict->format)
880b7e1c893Smrg	    break;
881b7e1c893Smrg    }
882b7e1c893Smrg    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
883b7e1c893Smrg	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
884b7e1c893Smrg			 (int)pPict->format));
885b7e1c893Smrg
886b7e1c893Smrg    if (pPict->filter != PictFilterNearest &&
887b7e1c893Smrg	pPict->filter != PictFilterBilinear)
888b7e1c893Smrg	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
889b7e1c893Smrg
890b7e1c893Smrg    /* for REPEAT_NONE, Render semantics are that sampling outside the source
891b7e1c893Smrg     * picture results in alpha=0 pixels. We can implement this with a border color
892b7e1c893Smrg     * *if* our source texture has an alpha channel, otherwise we need to fall
893b7e1c893Smrg     * back. If we're not transformed then we hope that upper layers have clipped
894b7e1c893Smrg     * rendering to the bounds of the source drawable, in which case it doesn't
895b7e1c893Smrg     * matter. I have not, however, verified that the X server always does such
896b7e1c893Smrg     * clipping.
897b7e1c893Smrg     */
898b7e1c893Smrg    /* FIXME R6xx */
899ad43ddacSmrg    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
900b7e1c893Smrg	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
901b7e1c893Smrg	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
902b7e1c893Smrg    }
903b7e1c893Smrg
904b13dfe66Smrg    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
905921a55d8Smrg	RADEON_FALLBACK(("non-affine transforms not supported\n"));
906921a55d8Smrg
907b7e1c893Smrg    return TRUE;
908b7e1c893Smrg}
909b7e1c893Smrg
910b7e1c893Smrgstatic Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
911b7e1c893Smrg					int unit)
912b7e1c893Smrg{
913b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
914b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
915b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
916b7e1c893Smrg    int w = pPict->pDrawable->width;
917b7e1c893Smrg    int h = pPict->pDrawable->height;
918ad43ddacSmrg    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
919b7e1c893Smrg    unsigned int i;
920b7e1c893Smrg    tex_resource_t  tex_res;
921b7e1c893Smrg    tex_sampler_t   tex_samp;
922b7e1c893Smrg    int pix_r, pix_g, pix_b, pix_a;
923ad43ddacSmrg    float vs_alu_consts[8];
924b7e1c893Smrg
925b7e1c893Smrg    CLEAR (tex_res);
926b7e1c893Smrg    CLEAR (tex_samp);
927b7e1c893Smrg
928b7e1c893Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
929b7e1c893Smrg	if (R600TexFormats[i].fmt == pPict->format)
930b7e1c893Smrg	    break;
931b7e1c893Smrg    }
932b7e1c893Smrg
933b7e1c893Smrg    /* Texture */
934b7e1c893Smrg    tex_res.id                  = unit;
935b7e1c893Smrg    tex_res.w                   = w;
936b7e1c893Smrg    tex_res.h                   = h;
937ad43ddacSmrg    tex_res.pitch               = accel_state->src_obj[unit].pitch;
938b7e1c893Smrg    tex_res.depth               = 0;
939b7e1c893Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
940ad43ddacSmrg    tex_res.base                = accel_state->src_obj[unit].offset;
941ad43ddacSmrg    tex_res.mip_base            = accel_state->src_obj[unit].offset;
9420974d292Smrg    tex_res.size                = accel_state->src_size[unit];
943b7e1c893Smrg    tex_res.format              = R600TexFormats[i].card_fmt;
944ad43ddacSmrg    tex_res.bo                  = accel_state->src_obj[unit].bo;
945ad43ddacSmrg    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
946b7e1c893Smrg    tex_res.request_size        = 1;
947b7e1c893Smrg
948b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
949b13dfe66Smrg    switch (accel_state->src_obj[unit].bpp) {
950b13dfe66Smrg    case 16:
951b13dfe66Smrg	tex_res.endian = SQ_ENDIAN_8IN16;
952b13dfe66Smrg	break;
953b13dfe66Smrg    case 32:
954b13dfe66Smrg	tex_res.endian = SQ_ENDIAN_8IN32;
955b13dfe66Smrg	break;
956b13dfe66Smrg    default :
957b13dfe66Smrg	break;
958b13dfe66Smrg    }
959b13dfe66Smrg#endif
960b13dfe66Smrg
961b7e1c893Smrg    /* component swizzles */
962b7e1c893Smrg    switch (pPict->format) {
963b7e1c893Smrg    case PICT_a1r5g5b5:
964b7e1c893Smrg    case PICT_a8r8g8b8:
965b7e1c893Smrg	pix_r = SQ_SEL_Z; /* R */
966b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
967b7e1c893Smrg	pix_b = SQ_SEL_X; /* B */
968b7e1c893Smrg	pix_a = SQ_SEL_W; /* A */
969b7e1c893Smrg	break;
970b7e1c893Smrg    case PICT_a8b8g8r8:
971b7e1c893Smrg	pix_r = SQ_SEL_X; /* R */
972b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
973b7e1c893Smrg	pix_b = SQ_SEL_Z; /* B */
974b7e1c893Smrg	pix_a = SQ_SEL_W; /* A */
975b7e1c893Smrg	break;
976b7e1c893Smrg    case PICT_x8b8g8r8:
977b7e1c893Smrg	pix_r = SQ_SEL_X; /* R */
978b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
979b7e1c893Smrg	pix_b = SQ_SEL_Z; /* B */
980b7e1c893Smrg	pix_a = SQ_SEL_1; /* A */
981b7e1c893Smrg	break;
982ad43ddacSmrg#ifdef PICT_TYPE_BGRA
983ad43ddacSmrg    case PICT_b8g8r8a8:
984ad43ddacSmrg	pix_r = SQ_SEL_Y; /* R */
985ad43ddacSmrg	pix_g = SQ_SEL_Z; /* G */
986ad43ddacSmrg	pix_b = SQ_SEL_W; /* B */
987ad43ddacSmrg	pix_a = SQ_SEL_X; /* A */
988ad43ddacSmrg	break;
989ad43ddacSmrg    case PICT_b8g8r8x8:
990ad43ddacSmrg	pix_r = SQ_SEL_Y; /* R */
991ad43ddacSmrg	pix_g = SQ_SEL_Z; /* G */
992ad43ddacSmrg	pix_b = SQ_SEL_W; /* B */
993ad43ddacSmrg	pix_a = SQ_SEL_1; /* A */
994ad43ddacSmrg	break;
995ad43ddacSmrg#endif
996b7e1c893Smrg    case PICT_x1r5g5b5:
997b7e1c893Smrg    case PICT_x8r8g8b8:
998b7e1c893Smrg    case PICT_r5g6b5:
999b7e1c893Smrg	pix_r = SQ_SEL_Z; /* R */
1000b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1001b7e1c893Smrg	pix_b = SQ_SEL_X; /* B */
1002b7e1c893Smrg	pix_a = SQ_SEL_1; /* A */
1003b7e1c893Smrg	break;
1004b7e1c893Smrg    case PICT_a8:
1005b7e1c893Smrg	pix_r = SQ_SEL_0; /* R */
1006b7e1c893Smrg	pix_g = SQ_SEL_0; /* G */
1007b7e1c893Smrg	pix_b = SQ_SEL_0; /* B */
1008b7e1c893Smrg	pix_a = SQ_SEL_X; /* A */
1009b7e1c893Smrg	break;
1010b7e1c893Smrg    default:
1011b7e1c893Smrg	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
1012b7e1c893Smrg    }
1013b7e1c893Smrg
1014b7e1c893Smrg    if (unit == 0) {
1015ad43ddacSmrg	if (!accel_state->msk_pic) {
1016b7e1c893Smrg	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
1017b7e1c893Smrg		pix_r = SQ_SEL_0;
1018b7e1c893Smrg		pix_g = SQ_SEL_0;
1019b7e1c893Smrg		pix_b = SQ_SEL_0;
1020b7e1c893Smrg	    }
1021b7e1c893Smrg
1022b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1023b7e1c893Smrg		pix_a = SQ_SEL_1;
1024b7e1c893Smrg	} else {
1025b7e1c893Smrg	    if (accel_state->component_alpha) {
1026b7e1c893Smrg		if (accel_state->src_alpha) {
1027b7e1c893Smrg		    if (PICT_FORMAT_A(pPict->format) == 0) {
1028b7e1c893Smrg			pix_r = SQ_SEL_1;
1029b7e1c893Smrg			pix_g = SQ_SEL_1;
1030b7e1c893Smrg			pix_b = SQ_SEL_1;
1031b7e1c893Smrg			pix_a = SQ_SEL_1;
1032b7e1c893Smrg		    } else {
1033b7e1c893Smrg			pix_r = pix_a;
1034b7e1c893Smrg			pix_g = pix_a;
1035b7e1c893Smrg			pix_b = pix_a;
1036b7e1c893Smrg		    }
1037b7e1c893Smrg		} else {
1038b7e1c893Smrg		    if (PICT_FORMAT_A(pPict->format) == 0)
1039b7e1c893Smrg			pix_a = SQ_SEL_1;
1040b7e1c893Smrg		}
1041b7e1c893Smrg	    } else {
1042b7e1c893Smrg		if (PICT_FORMAT_RGB(pPict->format) == 0) {
1043b7e1c893Smrg		    pix_r = SQ_SEL_0;
1044b7e1c893Smrg		    pix_g = SQ_SEL_0;
1045b7e1c893Smrg		    pix_b = SQ_SEL_0;
1046b7e1c893Smrg		}
1047b7e1c893Smrg
1048b7e1c893Smrg		if (PICT_FORMAT_A(pPict->format) == 0)
1049b7e1c893Smrg		    pix_a = SQ_SEL_1;
1050b7e1c893Smrg	    }
1051b7e1c893Smrg	}
1052b7e1c893Smrg    } else {
1053b7e1c893Smrg	if (accel_state->component_alpha) {
1054b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1055b7e1c893Smrg		pix_a = SQ_SEL_1;
1056b7e1c893Smrg	} else {
1057b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0) {
1058b7e1c893Smrg		pix_r = SQ_SEL_1;
1059b7e1c893Smrg		pix_g = SQ_SEL_1;
1060b7e1c893Smrg		pix_b = SQ_SEL_1;
1061b7e1c893Smrg		pix_a = SQ_SEL_1;
1062b7e1c893Smrg	    } else {
1063b7e1c893Smrg		pix_r = pix_a;
1064b7e1c893Smrg		pix_g = pix_a;
1065b7e1c893Smrg		pix_b = pix_a;
1066b7e1c893Smrg	    }
1067b7e1c893Smrg	}
1068b7e1c893Smrg    }
1069b7e1c893Smrg
1070b7e1c893Smrg    tex_res.dst_sel_x           = pix_r; /* R */
1071b7e1c893Smrg    tex_res.dst_sel_y           = pix_g; /* G */
1072b7e1c893Smrg    tex_res.dst_sel_z           = pix_b; /* B */
1073b7e1c893Smrg    tex_res.dst_sel_w           = pix_a; /* A */
1074b7e1c893Smrg
1075b7e1c893Smrg    tex_res.base_level          = 0;
1076b7e1c893Smrg    tex_res.last_level          = 0;
1077b7e1c893Smrg    tex_res.perf_modulation     = 0;
1078b13dfe66Smrg    if (accel_state->src_obj[unit].tiling_flags == 0)
1079b13dfe66Smrg	tex_res.tile_mode           = 1;
1080921a55d8Smrg    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
1081b7e1c893Smrg
1082b7e1c893Smrg    tex_samp.id                 = unit;
1083b7e1c893Smrg    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1084b7e1c893Smrg
1085ad43ddacSmrg    switch (repeatType) {
1086ad43ddacSmrg    case RepeatNormal:
1087ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_WRAP;
1088ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_WRAP;
1089ad43ddacSmrg	break;
1090ad43ddacSmrg    case RepeatPad:
1091ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1092ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1093ad43ddacSmrg	break;
1094ad43ddacSmrg    case RepeatReflect:
1095ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1096ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1097ad43ddacSmrg	break;
1098ad43ddacSmrg    case RepeatNone:
1099b7e1c893Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1100b7e1c893Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1101ad43ddacSmrg	break;
1102ad43ddacSmrg    default:
1103ad43ddacSmrg	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1104b7e1c893Smrg    }
1105b7e1c893Smrg
1106b7e1c893Smrg    switch (pPict->filter) {
1107b7e1c893Smrg    case PictFilterNearest:
1108b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1109b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1110b13dfe66Smrg	tex_samp.mc_coord_truncate  = 1;
1111b7e1c893Smrg	break;
1112b7e1c893Smrg    case PictFilterBilinear:
1113b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1114b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1115b7e1c893Smrg	break;
1116b7e1c893Smrg    default:
1117b7e1c893Smrg	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1118b7e1c893Smrg    }
1119b7e1c893Smrg
1120b7e1c893Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
1121b7e1c893Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1122b7e1c893Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
1123921a55d8Smrg    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
1124b7e1c893Smrg
1125b7e1c893Smrg    if (pPict->transform != 0) {
1126b7e1c893Smrg	accel_state->is_transform[unit] = TRUE;
1127b7e1c893Smrg	accel_state->transform[unit] = pPict->transform;
1128ad43ddacSmrg
1129ad43ddacSmrg	vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
1130ad43ddacSmrg	vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
1131ad43ddacSmrg	vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
1132ad43ddacSmrg	vs_alu_consts[3] = 1.0 / w;
1133ad43ddacSmrg
1134ad43ddacSmrg	vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
1135ad43ddacSmrg	vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
1136ad43ddacSmrg	vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
1137ad43ddacSmrg	vs_alu_consts[7] = 1.0 / h;
1138ad43ddacSmrg    } else {
1139b7e1c893Smrg	accel_state->is_transform[unit] = FALSE;
1140b7e1c893Smrg
1141ad43ddacSmrg	vs_alu_consts[0] = 1.0;
1142ad43ddacSmrg	vs_alu_consts[1] = 0.0;
1143ad43ddacSmrg	vs_alu_consts[2] = 0.0;
1144ad43ddacSmrg	vs_alu_consts[3] = 1.0 / w;
1145ad43ddacSmrg
1146ad43ddacSmrg	vs_alu_consts[4] = 0.0;
1147ad43ddacSmrg	vs_alu_consts[5] = 1.0;
1148ad43ddacSmrg	vs_alu_consts[6] = 0.0;
1149ad43ddacSmrg	vs_alu_consts[7] = 1.0 / h;
1150ad43ddacSmrg    }
1151ad43ddacSmrg
1152ad43ddacSmrg    /* VS alu constants */
1153921a55d8Smrg    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
1154921a55d8Smrg			sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
1155ad43ddacSmrg
1156b7e1c893Smrg    return TRUE;
1157b7e1c893Smrg}
1158b7e1c893Smrg
1159b7e1c893Smrgstatic Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1160b7e1c893Smrg			       PicturePtr pDstPicture)
1161b7e1c893Smrg{
1162b7e1c893Smrg    uint32_t tmp1;
1163b7e1c893Smrg    PixmapPtr pSrcPixmap, pDstPixmap;
1164b7e1c893Smrg    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1165b7e1c893Smrg
1166b7e1c893Smrg    /* Check for unsupported compositing operations. */
1167b7e1c893Smrg    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1168b7e1c893Smrg	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1169b7e1c893Smrg
1170ad43ddacSmrg    if (!pSrcPicture->pDrawable)
1171ad43ddacSmrg	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1172ad43ddacSmrg
1173b7e1c893Smrg    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1174b7e1c893Smrg
1175b7e1c893Smrg    max_tex_w = 8192;
1176b7e1c893Smrg    max_tex_h = 8192;
1177b7e1c893Smrg    max_dst_w = 8192;
1178b7e1c893Smrg    max_dst_h = 8192;
1179b7e1c893Smrg
1180b7e1c893Smrg    if (pSrcPixmap->drawable.width >= max_tex_w ||
1181b7e1c893Smrg	pSrcPixmap->drawable.height >= max_tex_h) {
1182b7e1c893Smrg	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1183b7e1c893Smrg			 pSrcPixmap->drawable.width,
1184b7e1c893Smrg			 pSrcPixmap->drawable.height));
1185b7e1c893Smrg    }
1186b7e1c893Smrg
1187b7e1c893Smrg    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1188b7e1c893Smrg
1189b7e1c893Smrg    if (pDstPixmap->drawable.width >= max_dst_w ||
1190b7e1c893Smrg	pDstPixmap->drawable.height >= max_dst_h) {
1191b7e1c893Smrg	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1192b7e1c893Smrg			 pDstPixmap->drawable.width,
1193b7e1c893Smrg			 pDstPixmap->drawable.height));
1194b7e1c893Smrg    }
1195b7e1c893Smrg
1196b7e1c893Smrg    if (pMaskPicture) {
1197ad43ddacSmrg	PixmapPtr pMaskPixmap;
1198ad43ddacSmrg
1199ad43ddacSmrg	if (!pMaskPicture->pDrawable)
1200ad43ddacSmrg	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1201ad43ddacSmrg
1202ad43ddacSmrg	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1203b7e1c893Smrg
1204b7e1c893Smrg	if (pMaskPixmap->drawable.width >= max_tex_w ||
1205b7e1c893Smrg	    pMaskPixmap->drawable.height >= max_tex_h) {
1206b7e1c893Smrg	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1207b7e1c893Smrg			     pMaskPixmap->drawable.width,
1208b7e1c893Smrg			     pMaskPixmap->drawable.height));
1209b7e1c893Smrg	}
1210b7e1c893Smrg
1211b7e1c893Smrg	if (pMaskPicture->componentAlpha) {
1212b7e1c893Smrg	    /* Check if it's component alpha that relies on a source alpha and
1213b7e1c893Smrg	     * on the source value.  We can only get one of those into the
1214b7e1c893Smrg	     * single source value that we get to blend with.
1215b7e1c893Smrg	     */
1216b7e1c893Smrg	    if (R600BlendOp[op].src_alpha &&
1217b7e1c893Smrg		(R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1218b7e1c893Smrg		(BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1219b7e1c893Smrg		RADEON_FALLBACK(("Component alpha not supported with source "
1220b7e1c893Smrg				 "alpha and source value blending.\n"));
1221b7e1c893Smrg	    }
1222b7e1c893Smrg	}
1223b7e1c893Smrg
1224b7e1c893Smrg	if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1225b7e1c893Smrg	    return FALSE;
1226b7e1c893Smrg    }
1227b7e1c893Smrg
1228b7e1c893Smrg    if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1229b7e1c893Smrg	return FALSE;
1230b7e1c893Smrg
1231b7e1c893Smrg    if (!R600GetDestFormat(pDstPicture, &tmp1))
1232b7e1c893Smrg	return FALSE;
1233b7e1c893Smrg
1234b7e1c893Smrg    return TRUE;
1235b7e1c893Smrg
1236b7e1c893Smrg}
1237b7e1c893Smrg
1238b7e1c893Smrgstatic Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1239b7e1c893Smrg				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1240b7e1c893Smrg				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1241b7e1c893Smrg{
1242b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1243b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1244b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1245b13dfe66Smrg    uint32_t dst_format;
1246b7e1c893Smrg    cb_config_t cb_conf;
1247b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
1248ad43ddacSmrg    struct r600_accel_object src_obj, mask_obj, dst_obj;
1249b7e1c893Smrg
1250ad43ddacSmrg    if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8)
1251ad43ddacSmrg	return FALSE;
1252ad43ddacSmrg
1253ad43ddacSmrg#if defined(XF86DRM_MODE)
1254ad43ddacSmrg    if (info->cs) {
1255ad43ddacSmrg	src_obj.offset = 0;
1256ad43ddacSmrg	dst_obj.offset = 0;
1257ad43ddacSmrg	src_obj.bo = radeon_get_pixmap_bo(pSrc);
1258ad43ddacSmrg	dst_obj.bo = radeon_get_pixmap_bo(pDst);
1259166b61b6Smrg	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1260166b61b6Smrg	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1261ad43ddacSmrg    } else
1262ad43ddacSmrg#endif
1263ad43ddacSmrg    {
1264ad43ddacSmrg	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1265ad43ddacSmrg	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1266ad43ddacSmrg	src_obj.bo = NULL;
1267ad43ddacSmrg	dst_obj.bo = NULL;
1268ad43ddacSmrg    }
1269ad43ddacSmrg    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1270ad43ddacSmrg    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1271ad43ddacSmrg
1272ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
1273ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
1274ad43ddacSmrg    src_obj.bpp = pSrc->drawable.bitsPerPixel;
1275ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1276ad43ddacSmrg
1277ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
1278ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
1279ad43ddacSmrg    dst_obj.bpp = pDst->drawable.bitsPerPixel;
1280ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1281b7e1c893Smrg
1282b7e1c893Smrg    if (pMask) {
1283ad43ddacSmrg#if defined(XF86DRM_MODE)
1284ad43ddacSmrg	if (info->cs) {
1285ad43ddacSmrg	    mask_obj.offset = 0;
1286ad43ddacSmrg	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
1287166b61b6Smrg	    mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
1288921a55d8Smrg	} else
1289ad43ddacSmrg#endif
1290ad43ddacSmrg	{
1291ad43ddacSmrg	    mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset;
1292ad43ddacSmrg	    mask_obj.bo = NULL;
1293ad43ddacSmrg	}
1294ad43ddacSmrg	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1295ad43ddacSmrg
1296ad43ddacSmrg	mask_obj.width = pMask->drawable.width;
1297ad43ddacSmrg	mask_obj.height = pMask->drawable.height;
1298ad43ddacSmrg	mask_obj.bpp = pMask->drawable.bitsPerPixel;
1299ad43ddacSmrg	mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1300ad43ddacSmrg
1301ad43ddacSmrg	if (!R600SetAccelState(pScrn,
1302ad43ddacSmrg			       &src_obj,
1303ad43ddacSmrg			       &mask_obj,
1304ad43ddacSmrg			       &dst_obj,
13050974d292Smrg			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1306ad43ddacSmrg			       3, 0xffffffff))
1307ad43ddacSmrg	    return FALSE;
1308ad43ddacSmrg
1309ad43ddacSmrg	accel_state->msk_pic = pMaskPicture;
1310b7e1c893Smrg	if (pMaskPicture->componentAlpha) {
1311b7e1c893Smrg	    accel_state->component_alpha = TRUE;
1312b7e1c893Smrg	    if (R600BlendOp[op].src_alpha)
1313b7e1c893Smrg		accel_state->src_alpha = TRUE;
1314b7e1c893Smrg	    else
1315b7e1c893Smrg		accel_state->src_alpha = FALSE;
1316b7e1c893Smrg	} else {
1317b7e1c893Smrg	    accel_state->component_alpha = FALSE;
1318b7e1c893Smrg	    accel_state->src_alpha = FALSE;
1319b7e1c893Smrg	}
1320b7e1c893Smrg    } else {
1321ad43ddacSmrg	if (!R600SetAccelState(pScrn,
1322ad43ddacSmrg			       &src_obj,
1323ad43ddacSmrg			       NULL,
1324ad43ddacSmrg			       &dst_obj,
1325ad43ddacSmrg			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1326ad43ddacSmrg			       3, 0xffffffff))
1327ad43ddacSmrg	    return FALSE;
1328ad43ddacSmrg
1329ad43ddacSmrg	accel_state->msk_pic = NULL;
1330b7e1c893Smrg	accel_state->component_alpha = FALSE;
1331b7e1c893Smrg	accel_state->src_alpha = FALSE;
1332b7e1c893Smrg    }
1333b7e1c893Smrg
1334b7e1c893Smrg    if (!R600GetDestFormat(pDstPicture, &dst_format))
1335b7e1c893Smrg	return FALSE;
1336b7e1c893Smrg
1337b7e1c893Smrg    CLEAR (cb_conf);
1338b7e1c893Smrg    CLEAR (vs_conf);
1339b7e1c893Smrg    CLEAR (ps_conf);
1340b7e1c893Smrg
1341ad43ddacSmrg    if (pMask)
1342921a55d8Smrg        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1343ad43ddacSmrg    else
1344921a55d8Smrg        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1345b7e1c893Smrg
13460974d292Smrg    radeon_cp_start(pScrn);
1347b7e1c893Smrg
1348921a55d8Smrg    r600_set_default_state(pScrn, accel_state->ib);
1349b7e1c893Smrg
1350921a55d8Smrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1351921a55d8Smrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1352921a55d8Smrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1353b7e1c893Smrg
1354b7e1c893Smrg    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
1355ad43ddacSmrg        R600IBDiscard(pScrn, accel_state->ib);
1356ad43ddacSmrg        return FALSE;
1357b7e1c893Smrg    }
1358b7e1c893Smrg
1359b7e1c893Smrg    if (pMask) {
1360ad43ddacSmrg        if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
1361ad43ddacSmrg            R600IBDiscard(pScrn, accel_state->ib);
1362ad43ddacSmrg            return FALSE;
1363ad43ddacSmrg        }
1364b7e1c893Smrg    } else
1365ad43ddacSmrg        accel_state->is_transform[1] = FALSE;
1366b7e1c893Smrg
13670974d292Smrg    if (pMask) {
1368921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
1369921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
13700974d292Smrg    } else {
1371921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
1372921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
13730974d292Smrg    }
1374b7e1c893Smrg
1375b7e1c893Smrg    /* Shader */
1376b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
13770974d292Smrg    vs_conf.shader_size         = accel_state->vs_size;
1378921a55d8Smrg    vs_conf.num_gprs            = 5;
1379b7e1c893Smrg    vs_conf.stack_size          = 1;
1380ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
1381921a55d8Smrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1382b7e1c893Smrg
1383b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
13840974d292Smrg    ps_conf.shader_size         = accel_state->ps_size;
1385b7e1c893Smrg    ps_conf.num_gprs            = 3;
13860974d292Smrg    ps_conf.stack_size          = 1;
1387b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
1388b7e1c893Smrg    ps_conf.clamp_consts        = 0;
1389b7e1c893Smrg    ps_conf.export_mode         = 2;
1390ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
1391921a55d8Smrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1392b7e1c893Smrg
1393b7e1c893Smrg    cb_conf.id = 0;
1394ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
1395ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
1396ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
1397b7e1c893Smrg    cb_conf.format = dst_format;
1398ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
1399b7e1c893Smrg
1400b7e1c893Smrg    switch (pDstPicture->format) {
1401b7e1c893Smrg    case PICT_a8r8g8b8:
1402b7e1c893Smrg    case PICT_x8r8g8b8:
1403b7e1c893Smrg    case PICT_a1r5g5b5:
1404b7e1c893Smrg    case PICT_x1r5g5b5:
1405b7e1c893Smrg    default:
1406b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
1407b7e1c893Smrg	break;
1408ad43ddacSmrg    case PICT_a8b8g8r8:
1409ad43ddacSmrg    case PICT_x8b8g8r8:
1410ad43ddacSmrg	cb_conf.comp_swap = 0; /* ABGR */
1411ad43ddacSmrg	break;
1412ad43ddacSmrg#ifdef PICT_TYPE_BGRA
1413ad43ddacSmrg    case PICT_b8g8r8a8:
1414ad43ddacSmrg    case PICT_b8g8r8x8:
1415ad43ddacSmrg	cb_conf.comp_swap = 3; /* BGRA */
1416ad43ddacSmrg	break;
1417ad43ddacSmrg#endif
1418b7e1c893Smrg    case PICT_r5g6b5:
1419b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
1420b7e1c893Smrg	break;
1421b7e1c893Smrg    case PICT_a8:
1422b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
1423b7e1c893Smrg	break;
1424b7e1c893Smrg    }
1425b7e1c893Smrg    cb_conf.source_format = 1;
1426b7e1c893Smrg    cb_conf.blend_clamp = 1;
1427b13dfe66Smrg    cb_conf.blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1428b13dfe66Smrg    cb_conf.blend_enable = 1;
1429b13dfe66Smrg    cb_conf.pmask = 0xf;
1430b13dfe66Smrg    cb_conf.rop = 3;
1431b13dfe66Smrg    if (accel_state->dst_obj.tiling_flags == 0)
1432b13dfe66Smrg	cb_conf.array_mode = 1;
1433b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1434b13dfe66Smrg    switch (dst_obj.bpp) {
1435b13dfe66Smrg    case 16:
1436b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN16;
1437b13dfe66Smrg	break;
1438b13dfe66Smrg    case 32:
1439b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN32;
1440b13dfe66Smrg	break;
1441b13dfe66Smrg    default:
1442b13dfe66Smrg	break;
1443b7e1c893Smrg    }
1444b13dfe66Smrg#endif
1445b13dfe66Smrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
14460974d292Smrg
1447b13dfe66Smrg    if (pMask)
1448b13dfe66Smrg	r600_set_spi(pScrn, accel_state->ib, (2 - 1), 2);
1449b13dfe66Smrg    else
1450b13dfe66Smrg	r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
1451b7e1c893Smrg
1452ad43ddacSmrg    if (accel_state->vsync)
14530974d292Smrg	RADEONVlineHelperClear(pScrn);
1454b7e1c893Smrg
1455b7e1c893Smrg    return TRUE;
1456b7e1c893Smrg}
1457b7e1c893Smrg
1458b7e1c893Smrgstatic void R600Composite(PixmapPtr pDst,
1459b7e1c893Smrg			  int srcX, int srcY,
1460b7e1c893Smrg			  int maskX, int maskY,
1461b7e1c893Smrg			  int dstX, int dstY,
1462b7e1c893Smrg			  int w, int h)
1463b7e1c893Smrg{
1464b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1465b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1466b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1467b7e1c893Smrg    float *vb;
1468b7e1c893Smrg
1469b7e1c893Smrg    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1470b7e1c893Smrg       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1471b7e1c893Smrg
1472ad43ddacSmrg    if (accel_state->vsync)
14730974d292Smrg	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1474b7e1c893Smrg
1475ad43ddacSmrg    if (accel_state->msk_pic) {
1476b7e1c893Smrg
1477921a55d8Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1478b7e1c893Smrg
1479b7e1c893Smrg	vb[0] = (float)dstX;
1480b7e1c893Smrg	vb[1] = (float)dstY;
1481ad43ddacSmrg	vb[2] = (float)srcX;
1482ad43ddacSmrg	vb[3] = (float)srcY;
1483ad43ddacSmrg	vb[4] = (float)maskX;
1484ad43ddacSmrg	vb[5] = (float)maskY;
1485b7e1c893Smrg
1486b7e1c893Smrg	vb[6] = (float)dstX;
1487b7e1c893Smrg	vb[7] = (float)(dstY + h);
1488ad43ddacSmrg	vb[8] = (float)srcX;
1489ad43ddacSmrg	vb[9] = (float)(srcY + h);
1490ad43ddacSmrg	vb[10] = (float)maskX;
1491ad43ddacSmrg	vb[11] = (float)(maskY + h);
1492b7e1c893Smrg
1493b7e1c893Smrg	vb[12] = (float)(dstX + w);
1494b7e1c893Smrg	vb[13] = (float)(dstY + h);
1495ad43ddacSmrg	vb[14] = (float)(srcX + w);
1496ad43ddacSmrg	vb[15] = (float)(srcY + h);
1497ad43ddacSmrg	vb[16] = (float)(maskX + w);
1498ad43ddacSmrg	vb[17] = (float)(maskY + h);
1499ad43ddacSmrg
1500921a55d8Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
1501b7e1c893Smrg
1502b7e1c893Smrg    } else {
1503b7e1c893Smrg
1504921a55d8Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1505b7e1c893Smrg
1506b7e1c893Smrg	vb[0] = (float)dstX;
1507b7e1c893Smrg	vb[1] = (float)dstY;
1508ad43ddacSmrg	vb[2] = (float)srcX;
1509ad43ddacSmrg	vb[3] = (float)srcY;
1510b7e1c893Smrg
1511b7e1c893Smrg	vb[4] = (float)dstX;
1512b7e1c893Smrg	vb[5] = (float)(dstY + h);
1513ad43ddacSmrg	vb[6] = (float)srcX;
1514ad43ddacSmrg	vb[7] = (float)(srcY + h);
1515b7e1c893Smrg
1516b7e1c893Smrg	vb[8] = (float)(dstX + w);
1517b7e1c893Smrg	vb[9] = (float)(dstY + h);
1518ad43ddacSmrg	vb[10] = (float)(srcX + w);
1519ad43ddacSmrg	vb[11] = (float)(srcY + h);
1520ad43ddacSmrg
1521921a55d8Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
1522b7e1c893Smrg    }
1523b7e1c893Smrg
1524b7e1c893Smrg
1525b7e1c893Smrg}
1526b7e1c893Smrg
1527b7e1c893Smrgstatic void R600DoneComposite(PixmapPtr pDst)
1528b7e1c893Smrg{
1529b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1530b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1531b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1532ad43ddacSmrg    int vtx_size;
1533b7e1c893Smrg
1534ad43ddacSmrg    if (accel_state->vsync)
1535921a55d8Smrg       r600_cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
1536921a55d8Smrg			       accel_state->vline_crtc,
1537921a55d8Smrg			       accel_state->vline_y1,
1538921a55d8Smrg			       accel_state->vline_y2);
1539b7e1c893Smrg
1540ad43ddacSmrg    vtx_size = accel_state->msk_pic ? 24 : 16;
1541b7e1c893Smrg
1542ad43ddacSmrg    r600_finish_op(pScrn, vtx_size);
1543b7e1c893Smrg}
1544b7e1c893Smrg
1545b7e1c893SmrgBool
1546b7e1c893SmrgR600CopyToVRAM(ScrnInfoPtr pScrn,
1547b7e1c893Smrg	       char *src, int src_pitch,
1548ad43ddacSmrg	       uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_width, uint32_t dst_height, int bpp,
1549b7e1c893Smrg	       int x, int y, int w, int h)
1550b7e1c893Smrg{
1551b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1552ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1553b7e1c893Smrg    uint32_t scratch_mc_addr;
1554b7e1c893Smrg    int wpass = w * (bpp/8);
1555ad43ddacSmrg    int scratch_pitch_bytes = RADEON_ALIGN(wpass, 256);
1556b7e1c893Smrg    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1557b7e1c893Smrg    int scratch_offset = 0, hpass, temph;
1558b7e1c893Smrg    char *dst;
1559b7e1c893Smrg    drmBufPtr scratch;
1560ad43ddacSmrg    struct r600_accel_object scratch_obj, dst_obj;
1561b7e1c893Smrg
1562b7e1c893Smrg    if (dst_pitch & 7)
1563b7e1c893Smrg	return FALSE;
1564b7e1c893Smrg
1565b7e1c893Smrg    if (dst_mc_addr & 0xff)
1566b7e1c893Smrg	return FALSE;
1567b7e1c893Smrg
1568b7e1c893Smrg    scratch = RADEONCPGetBuffer(pScrn);
1569b7e1c893Smrg    if (scratch == NULL)
1570b7e1c893Smrg	return FALSE;
1571b7e1c893Smrg
1572b7e1c893Smrg    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1573b7e1c893Smrg    temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1574b7e1c893Smrg    dst = (char *)scratch->address;
1575b7e1c893Smrg
1576ad43ddacSmrg    scratch_obj.pitch = scratch_pitch;
1577ad43ddacSmrg    scratch_obj.width = w;
1578ad43ddacSmrg    scratch_obj.height = hpass;
1579ad43ddacSmrg    scratch_obj.offset = scratch_mc_addr;
1580ad43ddacSmrg    scratch_obj.bpp = bpp;
1581ad43ddacSmrg    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1582ad43ddacSmrg    scratch_obj.bo = NULL;
1583ad43ddacSmrg
1584ad43ddacSmrg    dst_obj.pitch = dst_pitch;
1585ad43ddacSmrg    dst_obj.width = dst_width;
1586ad43ddacSmrg    dst_obj.height = dst_height;
1587ad43ddacSmrg    dst_obj.offset = dst_mc_addr;
1588ad43ddacSmrg    dst_obj.bo = NULL;
1589ad43ddacSmrg    dst_obj.bpp = bpp;
1590ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1591ad43ddacSmrg
1592ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1593ad43ddacSmrg			   &scratch_obj,
1594ad43ddacSmrg			   NULL,
1595ad43ddacSmrg			   &dst_obj,
1596ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1597ad43ddacSmrg			   3, 0xffffffff))
1598ad43ddacSmrg	return FALSE;
1599ad43ddacSmrg
1600b7e1c893Smrg    /* memcopy from sys to scratch */
1601b7e1c893Smrg    while (temph--) {
1602b7e1c893Smrg	memcpy (dst, src, wpass);
1603b7e1c893Smrg	src += src_pitch;
1604b7e1c893Smrg	dst += scratch_pitch_bytes;
1605b7e1c893Smrg    }
1606b7e1c893Smrg
1607b7e1c893Smrg    while (h) {
1608b7e1c893Smrg	uint32_t offset = scratch_mc_addr + scratch_offset;
1609b7e1c893Smrg	int oldhpass = hpass;
1610b7e1c893Smrg	h -= oldhpass;
1611b7e1c893Smrg	temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1612b7e1c893Smrg
1613b7e1c893Smrg	if (hpass) {
1614b7e1c893Smrg	    scratch_offset = scratch->total/2 - scratch_offset;
1615b7e1c893Smrg	    dst = (char *)scratch->address + scratch_offset;
1616b7e1c893Smrg	    /* wait for the engine to be idle */
1617b7e1c893Smrg	    RADEONWaitForIdleCP(pScrn);
1618b7e1c893Smrg	    //memcopy from sys to scratch
1619b7e1c893Smrg	    while (temph--) {
1620b7e1c893Smrg		memcpy (dst, src, wpass);
1621b7e1c893Smrg		src += src_pitch;
1622b7e1c893Smrg		dst += scratch_pitch_bytes;
1623b7e1c893Smrg	    }
1624b7e1c893Smrg	}
1625b7e1c893Smrg	/* blit from scratch to vram */
1626ad43ddacSmrg	info->accel_state->src_obj[0].height = oldhpass;
1627ad43ddacSmrg	info->accel_state->src_obj[0].offset = offset;
1628ad43ddacSmrg	R600DoPrepareCopy(pScrn);
1629b7e1c893Smrg	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
1630b7e1c893Smrg	R600DoCopy(pScrn);
1631b7e1c893Smrg	y += oldhpass;
1632b7e1c893Smrg    }
1633b7e1c893Smrg
1634b7e1c893Smrg    R600IBDiscard(pScrn, scratch);
1635b7e1c893Smrg
1636b7e1c893Smrg    return TRUE;
1637b7e1c893Smrg}
1638b7e1c893Smrg
1639b7e1c893Smrgstatic Bool
1640b7e1c893SmrgR600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1641b7e1c893Smrg		   char *src, int src_pitch)
1642b7e1c893Smrg{
1643b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1644b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1645b7e1c893Smrg    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1646b7e1c893Smrg    uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1647b7e1c893Smrg    int bpp = pDst->drawable.bitsPerPixel;
1648b7e1c893Smrg
1649b7e1c893Smrg    return R600CopyToVRAM(pScrn,
1650b7e1c893Smrg			  src, src_pitch,
1651ad43ddacSmrg			  dst_pitch, dst_mc_addr, pDst->drawable.width, pDst->drawable.height, bpp,
1652b7e1c893Smrg			  x, y, w, h);
1653b7e1c893Smrg}
1654b7e1c893Smrg
1655b7e1c893Smrgstatic Bool
1656b7e1c893SmrgR600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
1657b7e1c893Smrg		       char *dst, int dst_pitch)
1658b7e1c893Smrg{
1659b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1660b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1661ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1662b7e1c893Smrg    uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1663b7e1c893Smrg    uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1664b7e1c893Smrg    uint32_t src_width = pSrc->drawable.width;
1665b7e1c893Smrg    uint32_t src_height = pSrc->drawable.height;
1666b7e1c893Smrg    int bpp = pSrc->drawable.bitsPerPixel;
1667b7e1c893Smrg    uint32_t scratch_mc_addr;
1668ad43ddacSmrg    int scratch_pitch_bytes = RADEON_ALIGN(dst_pitch, 256);
1669b7e1c893Smrg    int scratch_offset = 0, hpass;
1670b7e1c893Smrg    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1671b7e1c893Smrg    int wpass = w * (bpp/8);
1672b7e1c893Smrg    drmBufPtr scratch;
1673ad43ddacSmrg    struct r600_accel_object scratch_obj, src_obj;
1674b7e1c893Smrg
1675ad43ddacSmrg    /* bad pipe setup in drm prior to 1.32 */
1676ad43ddacSmrg    if (info->dri->pKernelDRMVersion->version_minor < 32) {
1677ad43ddacSmrg	    if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32))
1678ad43ddacSmrg		    return FALSE;
1679ad43ddacSmrg    }
1680c503f109Smrg
1681b7e1c893Smrg    if (src_pitch & 7)
1682b7e1c893Smrg	return FALSE;
1683b7e1c893Smrg
1684b7e1c893Smrg    scratch = RADEONCPGetBuffer(pScrn);
1685b7e1c893Smrg    if (scratch == NULL)
1686b7e1c893Smrg	return FALSE;
1687b7e1c893Smrg
1688b7e1c893Smrg    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1689b7e1c893Smrg    hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1690b7e1c893Smrg
1691ad43ddacSmrg    src_obj.pitch = src_pitch;
1692ad43ddacSmrg    src_obj.width = src_width;
1693ad43ddacSmrg    src_obj.height = src_height;
1694ad43ddacSmrg    src_obj.offset = src_mc_addr;
1695ad43ddacSmrg    src_obj.bo = NULL;
1696ad43ddacSmrg    src_obj.bpp = bpp;
1697ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1698ad43ddacSmrg
1699ad43ddacSmrg    scratch_obj.pitch = scratch_pitch;
1700ad43ddacSmrg    scratch_obj.width = src_width;
1701ad43ddacSmrg    scratch_obj.height = hpass;
1702ad43ddacSmrg    scratch_obj.offset = scratch_mc_addr;
1703ad43ddacSmrg    scratch_obj.bpp = bpp;
1704ad43ddacSmrg    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1705ad43ddacSmrg    scratch_obj.bo = NULL;
1706ad43ddacSmrg
1707ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1708ad43ddacSmrg			   &src_obj,
1709ad43ddacSmrg			   NULL,
1710ad43ddacSmrg			   &scratch_obj,
1711ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1712ad43ddacSmrg			   3, 0xffffffff))
1713ad43ddacSmrg	return FALSE;
1714ad43ddacSmrg
1715b7e1c893Smrg    /* blit from vram to scratch */
1716ad43ddacSmrg    R600DoPrepareCopy(pScrn);
1717b7e1c893Smrg    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1718b7e1c893Smrg    R600DoCopy(pScrn);
1719b7e1c893Smrg
1720b7e1c893Smrg    while (h) {
1721b7e1c893Smrg	char *src = (char *)scratch->address + scratch_offset;
1722b7e1c893Smrg	int oldhpass = hpass;
1723b7e1c893Smrg	h -= oldhpass;
1724b7e1c893Smrg	y += oldhpass;
1725b7e1c893Smrg	hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1726b7e1c893Smrg
1727b7e1c893Smrg	if (hpass) {
1728b7e1c893Smrg	    scratch_offset = scratch->total/2 - scratch_offset;
1729b7e1c893Smrg	    /* blit from vram to scratch */
1730ad43ddacSmrg	    info->accel_state->dst_obj.height = hpass;
1731ad43ddacSmrg	    info->accel_state->dst_obj.offset = scratch_mc_addr + scratch_offset;
1732ad43ddacSmrg	    R600DoPrepareCopy(pScrn);
1733b7e1c893Smrg	    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1734b7e1c893Smrg	    R600DoCopy(pScrn);
1735b7e1c893Smrg	}
1736b7e1c893Smrg
1737b7e1c893Smrg	/* wait for the engine to be idle */
1738b7e1c893Smrg	RADEONWaitForIdleCP(pScrn);
1739b7e1c893Smrg	/* memcopy from scratch to sys */
1740b7e1c893Smrg	while (oldhpass--) {
1741b7e1c893Smrg	    memcpy (dst, src, wpass);
1742b7e1c893Smrg	    dst += dst_pitch;
1743b7e1c893Smrg	    src += scratch_pitch_bytes;
1744b7e1c893Smrg	}
1745b7e1c893Smrg    }
1746b7e1c893Smrg
1747b7e1c893Smrg    R600IBDiscard(pScrn, scratch);
1748b7e1c893Smrg
1749b7e1c893Smrg    return TRUE;
1750b7e1c893Smrg
1751b7e1c893Smrg}
1752b7e1c893Smrg
1753ad43ddacSmrg#if defined(XF86DRM_MODE)
1754ad43ddacSmrg
1755ad43ddacSmrgstatic Bool
1756ad43ddacSmrgR600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
1757ad43ddacSmrg		     char *src, int src_pitch)
1758ad43ddacSmrg{
1759ad43ddacSmrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1760ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1761ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1762ad43ddacSmrg    struct radeon_exa_pixmap_priv *driver_priv;
17630974d292Smrg    struct radeon_bo *scratch = NULL;
17640974d292Smrg    struct radeon_bo *copy_dst;
17650974d292Smrg    unsigned char *dst;
1766ad43ddacSmrg    unsigned size;
1767ad43ddacSmrg    uint32_t dst_domain;
1768ad43ddacSmrg    int bpp = pDst->drawable.bitsPerPixel;
1769b13dfe66Smrg    uint32_t scratch_pitch;
17700974d292Smrg    uint32_t copy_pitch;
1771ad43ddacSmrg    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
17720974d292Smrg    int ret;
17730974d292Smrg    Bool flush = TRUE;
1774ad43ddacSmrg    Bool r;
1775ad43ddacSmrg    int i;
1776ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
1777166b61b6Smrg    uint32_t height, base_align;
1778ad43ddacSmrg
1779ad43ddacSmrg    if (bpp < 8)
1780ad43ddacSmrg	return FALSE;
1781ad43ddacSmrg
1782ad43ddacSmrg    driver_priv = exaGetPixmapDriverPrivate(pDst);
1783921a55d8Smrg    if (!driver_priv || !driver_priv->bo)
1784921a55d8Smrg	return FALSE;
1785921a55d8Smrg
17860974d292Smrg    /* If we know the BO won't be busy, don't bother with a scratch */
17870974d292Smrg    copy_dst = driver_priv->bo;
17880974d292Smrg    copy_pitch = pDst->devKind;
1789166b61b6Smrg    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1790921a55d8Smrg	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1791921a55d8Smrg	    flush = FALSE;
1792921a55d8Smrg	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
1793921a55d8Smrg		goto copy;
1794921a55d8Smrg	}
17950974d292Smrg    }
1796ad43ddacSmrg
1797b13dfe66Smrg    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1798b13dfe66Smrg    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1799b13dfe66Smrg    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1800b13dfe66Smrg    size = scratch_pitch * height * (bpp / 8);
1801b13dfe66Smrg    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1802ad43ddacSmrg    if (scratch == NULL) {
18030974d292Smrg	goto copy;
1804ad43ddacSmrg    }
1805ad43ddacSmrg
1806b13dfe66Smrg    src_obj.pitch = scratch_pitch;
1807ad43ddacSmrg    src_obj.width = w;
1808ad43ddacSmrg    src_obj.height = h;
1809ad43ddacSmrg    src_obj.offset = 0;
1810ad43ddacSmrg    src_obj.bpp = bpp;
1811ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1812ad43ddacSmrg    src_obj.bo = scratch;
1813166b61b6Smrg    src_obj.tiling_flags = 0;
1814ad43ddacSmrg
1815ad43ddacSmrg    dst_obj.pitch = dst_pitch_hw;
1816ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
1817ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
1818ad43ddacSmrg    dst_obj.offset = 0;
1819ad43ddacSmrg    dst_obj.bpp = bpp;
1820ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1821ad43ddacSmrg    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1822166b61b6Smrg    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1823ad43ddacSmrg
1824ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1825ad43ddacSmrg			   &src_obj,
1826ad43ddacSmrg			   NULL,
1827ad43ddacSmrg			   &dst_obj,
1828ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1829ad43ddacSmrg			   3, 0xffffffff)) {
18300974d292Smrg        goto copy;
1831ad43ddacSmrg    }
18320974d292Smrg    copy_dst = scratch;
1833b13dfe66Smrg    copy_pitch = scratch_pitch * (bpp / 8);
18340974d292Smrg    flush = FALSE;
18350974d292Smrg
18360974d292Smrgcopy:
18370974d292Smrg    if (flush)
18380974d292Smrg	radeon_cs_flush_indirect(pScrn);
1839ad43ddacSmrg
18400974d292Smrg    ret = radeon_bo_map(copy_dst, 0);
18410974d292Smrg    if (ret) {
1842ad43ddacSmrg        r = FALSE;
1843ad43ddacSmrg        goto out;
1844ad43ddacSmrg    }
1845ad43ddacSmrg    r = TRUE;
1846ad43ddacSmrg    size = w * bpp / 8;
18470974d292Smrg    dst = copy_dst->ptr;
18480974d292Smrg    if (copy_dst == driver_priv->bo)
18490974d292Smrg	dst += y * copy_pitch + x * bpp / 8;
1850ad43ddacSmrg    for (i = 0; i < h; i++) {
18510974d292Smrg        memcpy(dst + i * copy_pitch, src, size);
1852ad43ddacSmrg        src += src_pitch;
1853ad43ddacSmrg    }
18540974d292Smrg    radeon_bo_unmap(copy_dst);
1855ad43ddacSmrg
18560974d292Smrg    if (copy_dst == scratch) {
18570974d292Smrg	if (info->accel_state->vsync)
18580974d292Smrg	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1859ad43ddacSmrg
18600974d292Smrg	/* blit from gart to vram */
18610974d292Smrg	R600DoPrepareCopy(pScrn);
18620974d292Smrg	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
18630974d292Smrg	R600DoCopyVline(pDst);
18640974d292Smrg    }
1865ad43ddacSmrg
1866ad43ddacSmrgout:
18670974d292Smrg    if (scratch)
18680974d292Smrg	radeon_bo_unref(scratch);
1869ad43ddacSmrg    return r;
1870ad43ddacSmrg}
1871ad43ddacSmrg
1872ad43ddacSmrgstatic Bool
1873ad43ddacSmrgR600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
1874ad43ddacSmrg			 int h, char *dst, int dst_pitch)
1875ad43ddacSmrg{
1876ad43ddacSmrg    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1877ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1878ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1879ad43ddacSmrg    struct radeon_exa_pixmap_priv *driver_priv;
18800974d292Smrg    struct radeon_bo *scratch = NULL;
18810974d292Smrg    struct radeon_bo *copy_src;
1882ad43ddacSmrg    unsigned size;
1883ad43ddacSmrg    uint32_t src_domain = 0;
1884ad43ddacSmrg    int bpp = pSrc->drawable.bitsPerPixel;
1885b13dfe66Smrg    uint32_t scratch_pitch;
18860974d292Smrg    uint32_t copy_pitch;
1887ad43ddacSmrg    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
18880974d292Smrg    int ret;
18890974d292Smrg    Bool flush = FALSE;
1890ad43ddacSmrg    Bool r;
1891ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
1892166b61b6Smrg    uint32_t height, base_align;
1893ad43ddacSmrg
1894ad43ddacSmrg    if (bpp < 8)
1895ad43ddacSmrg	return FALSE;
1896ad43ddacSmrg
1897ad43ddacSmrg    driver_priv = exaGetPixmapDriverPrivate(pSrc);
1898921a55d8Smrg    if (!driver_priv || !driver_priv->bo)
1899921a55d8Smrg	return FALSE;
1900921a55d8Smrg
19010974d292Smrg    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
19020974d292Smrg    copy_src = driver_priv->bo;
19030974d292Smrg    copy_pitch = pSrc->devKind;
1904166b61b6Smrg    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1905921a55d8Smrg	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1906921a55d8Smrg	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
1907921a55d8Smrg	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
1908921a55d8Smrg		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
1909921a55d8Smrg		src_domain = 0;
1910921a55d8Smrg	    else /* A write may be scheduled */
1911921a55d8Smrg		flush = TRUE;
1912921a55d8Smrg	}
1913ad43ddacSmrg
1914921a55d8Smrg	if (!src_domain)
1915921a55d8Smrg	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
1916ad43ddacSmrg
1917921a55d8Smrg	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
1918921a55d8Smrg	    goto copy;
1919921a55d8Smrg    }
1920ad43ddacSmrg
1921b13dfe66Smrg    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1922b13dfe66Smrg    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1923b13dfe66Smrg    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1924b13dfe66Smrg    size = scratch_pitch * height * (bpp / 8);
1925b13dfe66Smrg    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1926ad43ddacSmrg    if (scratch == NULL) {
19270974d292Smrg	goto copy;
1928ad43ddacSmrg    }
1929ad43ddacSmrg    radeon_cs_space_reset_bos(info->cs);
1930ad43ddacSmrg    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
1931ad43ddacSmrg				      RADEON_GEM_DOMAIN_VRAM, 0);
1932ad43ddacSmrg    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
1933ad43ddacSmrg    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
1934ad43ddacSmrg    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1935ad43ddacSmrg    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
19360974d292Smrg    ret = radeon_cs_space_check(info->cs);
19370974d292Smrg    if (ret) {
19380974d292Smrg        goto copy;
1939ad43ddacSmrg    }
1940ad43ddacSmrg
1941ad43ddacSmrg    src_obj.pitch = src_pitch_hw;
1942ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
1943ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
1944ad43ddacSmrg    src_obj.offset = 0;
1945ad43ddacSmrg    src_obj.bpp = bpp;
1946ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1947ad43ddacSmrg    src_obj.bo = radeon_get_pixmap_bo(pSrc);
1948166b61b6Smrg    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1949921a55d8Smrg
1950b13dfe66Smrg    dst_obj.pitch = scratch_pitch;
1951ad43ddacSmrg    dst_obj.width = w;
1952ad43ddacSmrg    dst_obj.height = h;
1953ad43ddacSmrg    dst_obj.offset = 0;
1954ad43ddacSmrg    dst_obj.bo = scratch;
1955ad43ddacSmrg    dst_obj.bpp = bpp;
1956ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1957166b61b6Smrg    dst_obj.tiling_flags = 0;
1958ad43ddacSmrg
1959ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1960ad43ddacSmrg			   &src_obj,
1961ad43ddacSmrg			   NULL,
1962ad43ddacSmrg			   &dst_obj,
1963ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1964ad43ddacSmrg			   3, 0xffffffff)) {
19650974d292Smrg        goto copy;
1966ad43ddacSmrg    }
1967ad43ddacSmrg
1968ad43ddacSmrg    /* blit from vram to gart */
1969ad43ddacSmrg    R600DoPrepareCopy(pScrn);
1970ad43ddacSmrg    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
1971ad43ddacSmrg    R600DoCopy(pScrn);
19720974d292Smrg    copy_src = scratch;
1973b13dfe66Smrg    copy_pitch = scratch_pitch * (bpp / 8);
19740974d292Smrg    flush = TRUE;
1975ad43ddacSmrg
19760974d292Smrgcopy:
19770974d292Smrg    if (flush && info->cs)
1978ad43ddacSmrg	radeon_cs_flush_indirect(pScrn);
1979ad43ddacSmrg
19800974d292Smrg    ret = radeon_bo_map(copy_src, 0);
19810974d292Smrg    if (ret) {
19820974d292Smrg	ErrorF("failed to map pixmap: %d\n", ret);
1983ad43ddacSmrg        r = FALSE;
1984ad43ddacSmrg        goto out;
1985ad43ddacSmrg    }
1986ad43ddacSmrg    r = TRUE;
1987ad43ddacSmrg    w *= bpp / 8;
19880974d292Smrg    if (copy_src == driver_priv->bo)
19890974d292Smrg	size = y * copy_pitch + x * bpp / 8;
19900974d292Smrg    else
19910974d292Smrg	size = 0;
1992ad43ddacSmrg    while (h--) {
19930974d292Smrg        memcpy(dst, copy_src->ptr + size, w);
19940974d292Smrg        size += copy_pitch;
1995ad43ddacSmrg        dst += dst_pitch;
1996ad43ddacSmrg    }
19970974d292Smrg    radeon_bo_unmap(copy_src);
1998ad43ddacSmrgout:
19990974d292Smrg    if (scratch)
20000974d292Smrg	radeon_bo_unref(scratch);
2001ad43ddacSmrg    return r;
2002ad43ddacSmrg}
2003ad43ddacSmrg#endif
2004ad43ddacSmrg
2005b7e1c893Smrgstatic int
2006b7e1c893SmrgR600MarkSync(ScreenPtr pScreen)
2007b7e1c893Smrg{
2008b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
2009b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2010b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2011b7e1c893Smrg
2012b7e1c893Smrg    return ++accel_state->exaSyncMarker;
2013b7e1c893Smrg
2014b7e1c893Smrg}
2015b7e1c893Smrg
2016b7e1c893Smrgstatic void
2017b7e1c893SmrgR600Sync(ScreenPtr pScreen, int marker)
2018b7e1c893Smrg{
2019b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
2020b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2021b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2022b7e1c893Smrg
2023b7e1c893Smrg    if (accel_state->exaMarkerSynced != marker) {
2024ad43ddacSmrg#ifdef XF86DRM_MODE
2025ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2026ad43ddacSmrg	if (!info->cs)
2027ad43ddacSmrg#endif
2028ad43ddacSmrg#endif
2029ad43ddacSmrg	    RADEONWaitForIdleCP(pScrn);
2030b7e1c893Smrg	accel_state->exaMarkerSynced = marker;
2031b7e1c893Smrg    }
2032b7e1c893Smrg
2033b7e1c893Smrg}
2034b7e1c893Smrg
2035b7e1c893Smrgstatic Bool
2036b7e1c893SmrgR600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
2037b7e1c893Smrg{
2038b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2039b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2040b7e1c893Smrg
2041b7e1c893Smrg    /* 512 bytes per shader for now */
2042b7e1c893Smrg    int size = 512 * 9;
2043b7e1c893Smrg
2044b7e1c893Smrg    accel_state->shaders = NULL;
2045b7e1c893Smrg
2046ad43ddacSmrg#ifdef XF86DRM_MODE
2047ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2048ad43ddacSmrg    if (info->cs) {
2049ad43ddacSmrg	accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
2050ad43ddacSmrg						 RADEON_GEM_DOMAIN_VRAM, 0);
2051ad43ddacSmrg	if (accel_state->shaders_bo == NULL) {
2052ad43ddacSmrg	    ErrorF("Allocating shader failed\n");
2053ad43ddacSmrg	    return FALSE;
2054ad43ddacSmrg	}
2055ad43ddacSmrg	return TRUE;
2056ad43ddacSmrg    } else
2057ad43ddacSmrg#endif
2058ad43ddacSmrg#endif
2059ad43ddacSmrg    {
2060ad43ddacSmrg	accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
2061ad43ddacSmrg						 TRUE, NULL, NULL);
2062ad43ddacSmrg
2063ad43ddacSmrg	if (accel_state->shaders == NULL)
2064ad43ddacSmrg	    return FALSE;
2065ad43ddacSmrg    }
2066b7e1c893Smrg
2067b7e1c893Smrg    return TRUE;
2068b7e1c893Smrg}
2069b7e1c893Smrg
2070b7e1c893SmrgBool
2071b7e1c893SmrgR600LoadShaders(ScrnInfoPtr pScrn)
2072b7e1c893Smrg{
2073b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2074b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2075b7e1c893Smrg    RADEONChipFamily ChipSet = info->ChipFamily;
2076b7e1c893Smrg    uint32_t *shader;
2077ad43ddacSmrg#ifdef XF86DRM_MODE
2078ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2079ad43ddacSmrg    int ret;
2080ad43ddacSmrg
2081ad43ddacSmrg    if (info->cs) {
2082ad43ddacSmrg	ret = radeon_bo_map(accel_state->shaders_bo, 1);
2083ad43ddacSmrg	if (ret) {
2084ad43ddacSmrg	    FatalError("failed to map shader %d\n", ret);
2085ad43ddacSmrg	    return FALSE;
2086ad43ddacSmrg	}
2087ad43ddacSmrg	shader = accel_state->shaders_bo->ptr;
2088ad43ddacSmrg    } else
2089ad43ddacSmrg#endif
2090ad43ddacSmrg#endif
2091ad43ddacSmrg	shader = (pointer)((char *)info->FB + accel_state->shaders->offset);
2092b7e1c893Smrg
2093b7e1c893Smrg    /*  solid vs --------------------------------------- */
2094b7e1c893Smrg    accel_state->solid_vs_offset = 0;
2095b7e1c893Smrg    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
2096b7e1c893Smrg
2097b7e1c893Smrg    /*  solid ps --------------------------------------- */
2098b7e1c893Smrg    accel_state->solid_ps_offset = 512;
2099b7e1c893Smrg    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
2100b7e1c893Smrg
2101b7e1c893Smrg    /*  copy vs --------------------------------------- */
2102b7e1c893Smrg    accel_state->copy_vs_offset = 1024;
2103b7e1c893Smrg    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
2104b7e1c893Smrg
2105b7e1c893Smrg    /*  copy ps --------------------------------------- */
2106b7e1c893Smrg    accel_state->copy_ps_offset = 1536;
2107b7e1c893Smrg    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2108b7e1c893Smrg
2109b7e1c893Smrg    /*  comp vs --------------------------------------- */
2110b7e1c893Smrg    accel_state->comp_vs_offset = 2048;
2111b7e1c893Smrg    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2112b7e1c893Smrg
2113b7e1c893Smrg    /*  comp ps --------------------------------------- */
2114b7e1c893Smrg    accel_state->comp_ps_offset = 2560;
2115b7e1c893Smrg    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2116b7e1c893Smrg
2117b7e1c893Smrg    /*  xv vs --------------------------------------- */
21180974d292Smrg    accel_state->xv_vs_offset = 3072;
2119b7e1c893Smrg    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2120b7e1c893Smrg
2121b7e1c893Smrg    /*  xv ps --------------------------------------- */
21220974d292Smrg    accel_state->xv_ps_offset = 3584;
2123b7e1c893Smrg    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2124b7e1c893Smrg
2125ad43ddacSmrg#ifdef XF86DRM_MODE
2126ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2127ad43ddacSmrg    if (info->cs) {
2128ad43ddacSmrg	radeon_bo_unmap(accel_state->shaders_bo);
2129ad43ddacSmrg    }
2130ad43ddacSmrg#endif
2131ad43ddacSmrg#endif
2132ad43ddacSmrg
2133b7e1c893Smrg    return TRUE;
2134b7e1c893Smrg}
2135b7e1c893Smrg
2136b7e1c893Smrgstatic Bool
2137b7e1c893SmrgR600PrepareAccess(PixmapPtr pPix, int index)
2138b7e1c893Smrg{
2139b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2140b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2141b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
2142b7e1c893Smrg
2143b7e1c893Smrg    /* flush HDP read/write caches */
2144b7e1c893Smrg    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2145b7e1c893Smrg
2146b7e1c893Smrg    return TRUE;
2147b7e1c893Smrg}
2148b7e1c893Smrg
2149b7e1c893Smrgstatic void
2150b7e1c893SmrgR600FinishAccess(PixmapPtr pPix, int index)
2151b7e1c893Smrg{
2152b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2153b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2154b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
2155b7e1c893Smrg
2156b7e1c893Smrg    /* flush HDP read/write caches */
2157b7e1c893Smrg    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2158b7e1c893Smrg
2159b7e1c893Smrg}
2160b7e1c893Smrg
2161b7e1c893SmrgBool
2162b7e1c893SmrgR600DrawInit(ScreenPtr pScreen)
2163b7e1c893Smrg{
2164b7e1c893Smrg    ScrnInfoPtr pScrn =  xf86Screens[pScreen->myNum];
2165b7e1c893Smrg    RADEONInfoPtr info   = RADEONPTR(pScrn);
2166b7e1c893Smrg
2167b7e1c893Smrg    if (info->accel_state->exa == NULL) {
2168b7e1c893Smrg	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2169b7e1c893Smrg	return FALSE;
2170b7e1c893Smrg    }
2171b7e1c893Smrg
2172b7e1c893Smrg    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2173b7e1c893Smrg    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2174b7e1c893Smrg
2175b7e1c893Smrg    info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2176b7e1c893Smrg    info->accel_state->exa->Solid = R600Solid;
2177b7e1c893Smrg    info->accel_state->exa->DoneSolid = R600DoneSolid;
2178b7e1c893Smrg
2179b7e1c893Smrg    info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2180b7e1c893Smrg    info->accel_state->exa->Copy = R600Copy;
2181b7e1c893Smrg    info->accel_state->exa->DoneCopy = R600DoneCopy;
2182b7e1c893Smrg
2183b7e1c893Smrg    info->accel_state->exa->MarkSync = R600MarkSync;
2184b7e1c893Smrg    info->accel_state->exa->WaitMarker = R600Sync;
2185b7e1c893Smrg
2186ad43ddacSmrg#ifdef XF86DRM_MODE
2187ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2188ad43ddacSmrg    if (info->cs) {
2189ad43ddacSmrg	info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
2190ad43ddacSmrg	info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
2191ad43ddacSmrg	info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
2192ad43ddacSmrg	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
2193ad43ddacSmrg	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
2194ad43ddacSmrg	info->accel_state->exa->UploadToScreen = R600UploadToScreenCS;
2195ad43ddacSmrg	info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS;
21960974d292Smrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5)
21970974d292Smrg        info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
21980974d292Smrg#endif
2199ad43ddacSmrg    } else
2200ad43ddacSmrg#endif
2201ad43ddacSmrg#endif
2202ad43ddacSmrg    {
2203ad43ddacSmrg	info->accel_state->exa->PrepareAccess = R600PrepareAccess;
2204ad43ddacSmrg	info->accel_state->exa->FinishAccess = R600FinishAccess;
2205ad43ddacSmrg
2206ad43ddacSmrg	/* AGP seems to have problems with gart transfers */
2207ad43ddacSmrg	if (info->accelDFS) {
2208ad43ddacSmrg	    info->accel_state->exa->UploadToScreen = R600UploadToScreen;
2209ad43ddacSmrg	    info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen;
2210ad43ddacSmrg	}
2211b7e1c893Smrg    }
2212b7e1c893Smrg
2213b7e1c893Smrg    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
2214b7e1c893Smrg#ifdef EXA_SUPPORTS_PREPARE_AUX
2215b7e1c893Smrg    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
2216ad43ddacSmrg#endif
2217ad43ddacSmrg
2218ad43ddacSmrg#ifdef XF86DRM_MODE
2219ad43ddacSmrg#ifdef EXA_HANDLES_PIXMAPS
2220ad43ddacSmrg    if (info->cs) {
2221ad43ddacSmrg	info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
2222ad43ddacSmrg#ifdef EXA_MIXED_PIXMAPS
2223ad43ddacSmrg	info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
2224ad43ddacSmrg#endif
2225ad43ddacSmrg    }
2226ad43ddacSmrg#endif
2227b7e1c893Smrg#endif
2228b7e1c893Smrg    info->accel_state->exa->pixmapOffsetAlign = 256;
2229b7e1c893Smrg    info->accel_state->exa->pixmapPitchAlign = 256;
2230b7e1c893Smrg
2231b7e1c893Smrg    info->accel_state->exa->CheckComposite = R600CheckComposite;
2232b7e1c893Smrg    info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2233b7e1c893Smrg    info->accel_state->exa->Composite = R600Composite;
2234b7e1c893Smrg    info->accel_state->exa->DoneComposite = R600DoneComposite;
2235b7e1c893Smrg
2236b7e1c893Smrg#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
2237b7e1c893Smrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
2238b7e1c893Smrg
2239b7e1c893Smrg    info->accel_state->exa->maxPitchBytes = 32768;
2240b7e1c893Smrg    info->accel_state->exa->maxX = 8192;
2241b7e1c893Smrg#else
2242b7e1c893Smrg    info->accel_state->exa->maxX = 8192;
2243b7e1c893Smrg#endif
2244b7e1c893Smrg    info->accel_state->exa->maxY = 8192;
2245b7e1c893Smrg
2246b7e1c893Smrg    /* not supported yet */
2247ad43ddacSmrg    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2248ad43ddacSmrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2249ad43ddacSmrg	info->accel_state->vsync = TRUE;
2250ad43ddacSmrg    } else
2251ad43ddacSmrg	info->accel_state->vsync = FALSE;
2252b7e1c893Smrg
2253b7e1c893Smrg    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
22542f39173dSmrg	free(info->accel_state->exa);
2255b7e1c893Smrg	return FALSE;
2256b7e1c893Smrg    }
2257b7e1c893Smrg
2258ad43ddacSmrg#ifdef XF86DRM_MODE
2259ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2260ad43ddacSmrg    if (!info->cs)
2261ad43ddacSmrg#endif
2262ad43ddacSmrg#endif
2263ad43ddacSmrg	if (!info->gartLocation)
2264ad43ddacSmrg	    return FALSE;
2265b7e1c893Smrg
2266b7e1c893Smrg    info->accel_state->XInited3D = FALSE;
2267b7e1c893Smrg    info->accel_state->copy_area = NULL;
2268ad43ddacSmrg    info->accel_state->src_obj[0].bo = NULL;
2269ad43ddacSmrg    info->accel_state->src_obj[1].bo = NULL;
2270ad43ddacSmrg    info->accel_state->dst_obj.bo = NULL;
2271ad43ddacSmrg    info->accel_state->copy_area_bo = NULL;
2272921a55d8Smrg    info->accel_state->vbo.vb_start_op = -1;
22730974d292Smrg    info->accel_state->finish_op = r600_finish_op;
2274921a55d8Smrg    info->accel_state->vbo.verts_per_op = 3;
22750974d292Smrg    RADEONVlineHelperClear(pScrn);
2276ad43ddacSmrg
2277ad43ddacSmrg#ifdef XF86DRM_MODE
2278ad43ddacSmrg    radeon_vbo_init_lists(pScrn);
2279ad43ddacSmrg#endif
2280b7e1c893Smrg
2281b7e1c893Smrg    if (!R600AllocShaders(pScrn, pScreen))
2282b7e1c893Smrg	return FALSE;
2283b7e1c893Smrg
2284b7e1c893Smrg    if (!R600LoadShaders(pScrn))
2285b7e1c893Smrg	return FALSE;
2286b7e1c893Smrg
2287b7e1c893Smrg    exaMarkSync(pScreen);
2288b7e1c893Smrg
2289b7e1c893Smrg    return TRUE;
2290b7e1c893Smrg
2291b7e1c893Smrg}
2292b7e1c893Smrg
2293