r600_exa.c revision c135eceb
1b7e1c893Smrg/*
2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3b7e1c893Smrg *
4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"),
6b7e1c893Smrg * to deal in the Software without restriction, including without limitation
7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions:
10b7e1c893Smrg *
11b7e1c893Smrg * The above copyright notice and this permission notice (including the next
12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the
13b7e1c893Smrg * Software.
14b7e1c893Smrg *
15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b7e1c893Smrg * SOFTWARE.
22b7e1c893Smrg *
23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24b7e1c893Smrg *
25b7e1c893Smrg */
26b7e1c893Smrg
27b7e1c893Smrg#ifdef HAVE_CONFIG_H
28b7e1c893Smrg#include "config.h"
29b7e1c893Smrg#endif
30b7e1c893Smrg
31b7e1c893Smrg#include "xf86.h"
32b7e1c893Smrg
33b7e1c893Smrg#include "exa.h"
34b7e1c893Smrg
35b7e1c893Smrg#include "radeon.h"
36b7e1c893Smrg#include "radeon_macros.h"
37b7e1c893Smrg#include "radeon_reg.h"
38b7e1c893Smrg#include "r600_shader.h"
39b7e1c893Smrg#include "r600_reg.h"
40b7e1c893Smrg#include "r600_state.h"
410974d292Smrg#include "radeon_exa_shared.h"
42ad43ddacSmrg#include "radeon_vbo.h"
43ad43ddacSmrg
44b7e1c893Smrg/* #define SHOW_VERTEXES */
45b7e1c893Smrg
46ad43ddacSmrgBool
47ad43ddacSmrgR600SetAccelState(ScrnInfoPtr pScrn,
48ad43ddacSmrg		  struct r600_accel_object *src0,
49ad43ddacSmrg		  struct r600_accel_object *src1,
50ad43ddacSmrg		  struct r600_accel_object *dst,
51ad43ddacSmrg		  uint32_t vs_offset, uint32_t ps_offset,
52ad43ddacSmrg		  int rop, Pixel planemask)
53ad43ddacSmrg{
54ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
55ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
56b13dfe66Smrg    uint32_t pitch_align = 0x7, base_align = 0xff;
57b13dfe66Smrg#if defined(XF86DRM_MODE)
58b13dfe66Smrg    int ret;
59b13dfe66Smrg#endif
60ad43ddacSmrg
61ad43ddacSmrg    if (src0) {
62ad43ddacSmrg	memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
63ad43ddacSmrg	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
64b13dfe66Smrg#if defined(XF86DRM_MODE)
65f3a0071aSrjs	if (info->cs && src0->surface) {
66f3a0071aSrjs		accel_state->src_size[0] = src0->surface->bo_size;
67b13dfe66Smrg	}
68b13dfe66Smrg#endif
69f3a0071aSrjs
70b13dfe66Smrg	/* bad pitch */
71b13dfe66Smrg	if (accel_state->src_obj[0].pitch & pitch_align)
72b13dfe66Smrg	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
73b13dfe66Smrg
74b13dfe66Smrg	/* bad offset */
75b13dfe66Smrg	if (accel_state->src_obj[0].offset & base_align)
76b13dfe66Smrg	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset));
77b13dfe66Smrg
78ad43ddacSmrg    } else {
79ad43ddacSmrg	memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
80ad43ddacSmrg	accel_state->src_size[0] = 0;
81ad43ddacSmrg    }
82ad43ddacSmrg
83ad43ddacSmrg    if (src1) {
84ad43ddacSmrg	memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
85ad43ddacSmrg	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
86b13dfe66Smrg#if defined(XF86DRM_MODE)
87f3a0071aSrjs	if (info->cs && src1->surface) {
88f3a0071aSrjs		accel_state->src_size[1] = src1->surface->bo_size;
89b13dfe66Smrg	}
90b13dfe66Smrg#endif
91f3a0071aSrjs
92b13dfe66Smrg	/* bad pitch */
93b13dfe66Smrg	if (accel_state->src_obj[1].pitch & pitch_align)
94b13dfe66Smrg	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
95b13dfe66Smrg
96b13dfe66Smrg	/* bad offset */
97b13dfe66Smrg	if (accel_state->src_obj[1].offset & base_align)
98b13dfe66Smrg	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset));
99ad43ddacSmrg    } else {
100ad43ddacSmrg	memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
101ad43ddacSmrg	accel_state->src_size[1] = 0;
102ad43ddacSmrg    }
103ad43ddacSmrg
104ad43ddacSmrg    if (dst) {
105ad43ddacSmrg	memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
106ad43ddacSmrg	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
107b13dfe66Smrg#if defined(XF86DRM_MODE)
108f3a0071aSrjs	if (info->cs && dst->surface) {
109f3a0071aSrjs		accel_state->dst_size = dst->surface->bo_size;
110f3a0071aSrjs	} else
111b13dfe66Smrg#endif
112f3a0071aSrjs	{
113f3a0071aSrjs		accel_state->dst_obj.tiling_flags = 0;
114f3a0071aSrjs	}
115b13dfe66Smrg	if (accel_state->dst_obj.pitch & pitch_align)
116b13dfe66Smrg	    RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
117b13dfe66Smrg
118b13dfe66Smrg	if (accel_state->dst_obj.offset & base_align)
119b13dfe66Smrg	    RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset));
120ad43ddacSmrg    } else {
121ad43ddacSmrg	memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
122ad43ddacSmrg	accel_state->dst_size = 0;
123ad43ddacSmrg    }
124ad43ddacSmrg
125f3a0071aSrjs#ifdef XF86DRM_MODE
126f3a0071aSrjs    if (info->cs && CS_FULL(info->cs))
127f3a0071aSrjs	radeon_cs_flush_indirect(pScrn);
128f3a0071aSrjs#endif
129f3a0071aSrjs
130ad43ddacSmrg    accel_state->rop = rop;
131ad43ddacSmrg    accel_state->planemask = planemask;
132ad43ddacSmrg
133ad43ddacSmrg    accel_state->vs_size = 512;
134ad43ddacSmrg    accel_state->ps_size = 512;
135ad43ddacSmrg#if defined(XF86DRM_MODE)
136ad43ddacSmrg    if (info->cs) {
137ad43ddacSmrg	accel_state->vs_mc_addr = vs_offset;
138ad43ddacSmrg	accel_state->ps_mc_addr = ps_offset;
139ad43ddacSmrg
140ad43ddacSmrg	radeon_cs_space_reset_bos(info->cs);
141ad43ddacSmrg	radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
142ad43ddacSmrg					  RADEON_GEM_DOMAIN_VRAM, 0);
143ad43ddacSmrg	if (accel_state->src_obj[0].bo)
144ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
145ad43ddacSmrg					      accel_state->src_obj[0].domain, 0);
146ad43ddacSmrg	if (accel_state->src_obj[1].bo)
147ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
148ad43ddacSmrg					      accel_state->src_obj[1].domain, 0);
149ad43ddacSmrg	if (accel_state->dst_obj.bo)
150ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
151ad43ddacSmrg					      0, accel_state->dst_obj.domain);
152ad43ddacSmrg	ret = radeon_cs_space_check(info->cs);
153ad43ddacSmrg	if (ret)
154ad43ddacSmrg	    RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
155ad43ddacSmrg
156ad43ddacSmrg    } else
157ad43ddacSmrg#endif
158ad43ddacSmrg    {
159ad43ddacSmrg	accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
160ad43ddacSmrg	    vs_offset;
161ad43ddacSmrg	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
162ad43ddacSmrg	    ps_offset;
163ad43ddacSmrg    }
164ad43ddacSmrg
165ad43ddacSmrg    return TRUE;
166ad43ddacSmrg}
167ad43ddacSmrg
168b7e1c893Smrgstatic Bool
169b7e1c893SmrgR600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
170b7e1c893Smrg{
171c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
172b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
173b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
174b7e1c893Smrg    cb_config_t     cb_conf;
175b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
176b7e1c893Smrg    uint32_t a, r, g, b;
177b7e1c893Smrg    float ps_alu_consts[4];
178ad43ddacSmrg    struct r600_accel_object dst;
179b7e1c893Smrg
1800974d292Smrg    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
181ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype failed\n"));
1820974d292Smrg    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
183ad43ddacSmrg	RADEON_FALLBACK(("invalid planemask\n"));
184b7e1c893Smrg
185ad43ddacSmrg#if defined(XF86DRM_MODE)
186ad43ddacSmrg    if (info->cs) {
187ad43ddacSmrg	dst.offset = 0;
188ad43ddacSmrg	dst.bo = radeon_get_pixmap_bo(pPix);
189166b61b6Smrg	dst.tiling_flags = radeon_get_pixmap_tiling(pPix);
190f3a0071aSrjs	dst.surface = radeon_get_pixmap_surface(pPix);
191ad43ddacSmrg    } else
192ad43ddacSmrg#endif
193ad43ddacSmrg    {
194ad43ddacSmrg	dst.offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
195ad43ddacSmrg	dst.bo = NULL;
196ad43ddacSmrg    }
197b7e1c893Smrg
198ad43ddacSmrg    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
199ad43ddacSmrg    dst.width = pPix->drawable.width;
200ad43ddacSmrg    dst.height = pPix->drawable.height;
201ad43ddacSmrg    dst.bpp = pPix->drawable.bitsPerPixel;
202ad43ddacSmrg    dst.domain = RADEON_GEM_DOMAIN_VRAM;
2030974d292Smrg
204ad43ddacSmrg    if (!R600SetAccelState(pScrn,
205ad43ddacSmrg			   NULL,
206ad43ddacSmrg			   NULL,
207ad43ddacSmrg			   &dst,
208ad43ddacSmrg			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
209ad43ddacSmrg			   alu, pm))
210b7e1c893Smrg	return FALSE;
211b7e1c893Smrg
212b7e1c893Smrg    CLEAR (cb_conf);
213b7e1c893Smrg    CLEAR (vs_conf);
214b7e1c893Smrg    CLEAR (ps_conf);
215b7e1c893Smrg
216921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
2170974d292Smrg    radeon_cp_start(pScrn);
218b7e1c893Smrg
219921a55d8Smrg    r600_set_default_state(pScrn, accel_state->ib);
220b7e1c893Smrg
221921a55d8Smrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
222921a55d8Smrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
223921a55d8Smrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
224b7e1c893Smrg
225b7e1c893Smrg    /* Shader */
226b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
2270974d292Smrg    vs_conf.shader_size         = accel_state->vs_size;
228b7e1c893Smrg    vs_conf.num_gprs            = 2;
229b7e1c893Smrg    vs_conf.stack_size          = 0;
230ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
231921a55d8Smrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
232b7e1c893Smrg
233b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
2340974d292Smrg    ps_conf.shader_size         = accel_state->ps_size;
235b7e1c893Smrg    ps_conf.num_gprs            = 1;
236b7e1c893Smrg    ps_conf.stack_size          = 0;
237b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
238b7e1c893Smrg    ps_conf.clamp_consts        = 0;
239b7e1c893Smrg    ps_conf.export_mode         = 2;
240ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
241921a55d8Smrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
242b7e1c893Smrg
243b7e1c893Smrg    cb_conf.id = 0;
244ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
245ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
246ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
247ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
248f3a0071aSrjs#ifdef XF86DRM_MODE
249f3a0071aSrjs    cb_conf.surface = accel_state->dst_obj.surface;
250f3a0071aSrjs#endif
251b7e1c893Smrg
252ad43ddacSmrg    if (accel_state->dst_obj.bpp == 8) {
253b7e1c893Smrg	cb_conf.format = COLOR_8;
254b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
255ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 16) {
256b7e1c893Smrg	cb_conf.format = COLOR_5_6_5;
257b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
258b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
259b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN16;
260b13dfe66Smrg#endif
261b7e1c893Smrg    } else {
262b7e1c893Smrg	cb_conf.format = COLOR_8_8_8_8;
263b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
264b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
265b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN32;
266b13dfe66Smrg#endif
267b7e1c893Smrg    }
268b7e1c893Smrg    cb_conf.source_format = 1;
269b7e1c893Smrg    cb_conf.blend_clamp = 1;
2700974d292Smrg    /* Render setup */
2710974d292Smrg    if (accel_state->planemask & 0x000000ff)
272b13dfe66Smrg	cb_conf.pmask |= 4; /* B */
2730974d292Smrg    if (accel_state->planemask & 0x0000ff00)
274b13dfe66Smrg	cb_conf.pmask |= 2; /* G */
2750974d292Smrg    if (accel_state->planemask & 0x00ff0000)
276b13dfe66Smrg	cb_conf.pmask |= 1; /* R */
2770974d292Smrg    if (accel_state->planemask & 0xff000000)
278b13dfe66Smrg	cb_conf.pmask |= 8; /* A */
279b13dfe66Smrg    cb_conf.rop = accel_state->rop;
280b13dfe66Smrg    if (accel_state->dst_obj.tiling_flags == 0)
281f3a0071aSrjs	cb_conf.array_mode = 0;
282b13dfe66Smrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
283b13dfe66Smrg
284b13dfe66Smrg    r600_set_spi(pScrn, accel_state->ib, 0, 0);
285b7e1c893Smrg
286b7e1c893Smrg    /* PS alu constants */
287ad43ddacSmrg    if (accel_state->dst_obj.bpp == 16) {
288b7e1c893Smrg	r = (fg >> 11) & 0x1f;
289b7e1c893Smrg	g = (fg >> 5) & 0x3f;
290b7e1c893Smrg	b = (fg >> 0) & 0x1f;
291b7e1c893Smrg	ps_alu_consts[0] = (float)r / 31; /* R */
292b7e1c893Smrg	ps_alu_consts[1] = (float)g / 63; /* G */
293b7e1c893Smrg	ps_alu_consts[2] = (float)b / 31; /* B */
294b7e1c893Smrg	ps_alu_consts[3] = 1.0; /* A */
295ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 8) {
296b7e1c893Smrg	a = (fg >> 0) & 0xff;
297b7e1c893Smrg	ps_alu_consts[0] = 0.0; /* R */
298b7e1c893Smrg	ps_alu_consts[1] = 0.0; /* G */
299b7e1c893Smrg	ps_alu_consts[2] = 0.0; /* B */
300b7e1c893Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
301b7e1c893Smrg    } else {
302b7e1c893Smrg	a = (fg >> 24) & 0xff;
303b7e1c893Smrg	r = (fg >> 16) & 0xff;
304b7e1c893Smrg	g = (fg >> 8) & 0xff;
305b7e1c893Smrg	b = (fg >> 0) & 0xff;
306b7e1c893Smrg	ps_alu_consts[0] = (float)r / 255; /* R */
307b7e1c893Smrg	ps_alu_consts[1] = (float)g / 255; /* G */
308b7e1c893Smrg	ps_alu_consts[2] = (float)b / 255; /* B */
309b7e1c893Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
310b7e1c893Smrg    }
311921a55d8Smrg    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
312921a55d8Smrg			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
313b7e1c893Smrg
314ad43ddacSmrg    if (accel_state->vsync)
3150974d292Smrg	RADEONVlineHelperClear(pScrn);
316b7e1c893Smrg
317f3a0071aSrjs    accel_state->dst_pix = pPix;
318f3a0071aSrjs    accel_state->fg = fg;
319f3a0071aSrjs
320b7e1c893Smrg    return TRUE;
321b7e1c893Smrg}
322b7e1c893Smrg
323f3a0071aSrjsstatic void
324f3a0071aSrjsR600DoneSolid(PixmapPtr pPix)
325f3a0071aSrjs{
326c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
327f3a0071aSrjs    RADEONInfoPtr info = RADEONPTR(pScrn);
328f3a0071aSrjs    struct radeon_accel_state *accel_state = info->accel_state;
329f3a0071aSrjs
330f3a0071aSrjs    if (accel_state->vsync)
331f3a0071aSrjs	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
332f3a0071aSrjs				accel_state->vline_crtc,
333f3a0071aSrjs				accel_state->vline_y1,
334f3a0071aSrjs				accel_state->vline_y2);
335f3a0071aSrjs
336f3a0071aSrjs    r600_finish_op(pScrn, 8);
337f3a0071aSrjs}
338b7e1c893Smrg
339b7e1c893Smrgstatic void
340b7e1c893SmrgR600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
341b7e1c893Smrg{
342c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
343b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
344b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
345b7e1c893Smrg    float *vb;
346b7e1c893Smrg
347f3a0071aSrjs#ifdef XF86DRM_MODE
348f3a0071aSrjs    if (info->cs && CS_FULL(info->cs)) {
349f3a0071aSrjs	R600DoneSolid(info->accel_state->dst_pix);
350f3a0071aSrjs	radeon_cs_flush_indirect(pScrn);
351f3a0071aSrjs	R600PrepareSolid(accel_state->dst_pix,
352f3a0071aSrjs			 accel_state->rop,
353f3a0071aSrjs			 accel_state->planemask,
354f3a0071aSrjs			 accel_state->fg);
355f3a0071aSrjs    }
356f3a0071aSrjs#endif
357f3a0071aSrjs
358ad43ddacSmrg    if (accel_state->vsync)
3590974d292Smrg	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
360b7e1c893Smrg
361921a55d8Smrg    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
362b7e1c893Smrg
363b7e1c893Smrg    vb[0] = (float)x1;
364b7e1c893Smrg    vb[1] = (float)y1;
365b7e1c893Smrg
366b7e1c893Smrg    vb[2] = (float)x1;
367b7e1c893Smrg    vb[3] = (float)y2;
368b7e1c893Smrg
369b7e1c893Smrg    vb[4] = (float)x2;
370b7e1c893Smrg    vb[5] = (float)y2;
371b7e1c893Smrg
372921a55d8Smrg    radeon_vbo_commit(pScrn, &accel_state->vbo);
373b7e1c893Smrg}
374b7e1c893Smrg
375b7e1c893Smrgstatic void
376ad43ddacSmrgR600DoPrepareCopy(ScrnInfoPtr pScrn)
377b7e1c893Smrg{
378b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
379b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
380b7e1c893Smrg    cb_config_t     cb_conf;
381b7e1c893Smrg    tex_resource_t  tex_res;
382b7e1c893Smrg    tex_sampler_t   tex_samp;
383b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
384b7e1c893Smrg
385b7e1c893Smrg    CLEAR (cb_conf);
386b7e1c893Smrg    CLEAR (tex_res);
387b7e1c893Smrg    CLEAR (tex_samp);
388b7e1c893Smrg    CLEAR (vs_conf);
389b7e1c893Smrg    CLEAR (ps_conf);
390b7e1c893Smrg
391921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
3920974d292Smrg    radeon_cp_start(pScrn);
393b7e1c893Smrg
394921a55d8Smrg    r600_set_default_state(pScrn, accel_state->ib);
395b7e1c893Smrg
396921a55d8Smrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
397921a55d8Smrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
398921a55d8Smrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
399b7e1c893Smrg
400b7e1c893Smrg    /* Shader */
401b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
4020974d292Smrg    vs_conf.shader_size         = accel_state->vs_size;
403b7e1c893Smrg    vs_conf.num_gprs            = 2;
404b7e1c893Smrg    vs_conf.stack_size          = 0;
405ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
406921a55d8Smrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
407b7e1c893Smrg
408b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
4090974d292Smrg    ps_conf.shader_size         = accel_state->ps_size;
410b7e1c893Smrg    ps_conf.num_gprs            = 1;
411b7e1c893Smrg    ps_conf.stack_size          = 0;
412b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
413b7e1c893Smrg    ps_conf.clamp_consts        = 0;
414b7e1c893Smrg    ps_conf.export_mode         = 2;
415ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
416921a55d8Smrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
417b7e1c893Smrg
418b7e1c893Smrg    /* Texture */
419b7e1c893Smrg    tex_res.id                  = 0;
420ad43ddacSmrg    tex_res.w                   = accel_state->src_obj[0].width;
421ad43ddacSmrg    tex_res.h                   = accel_state->src_obj[0].height;
422ad43ddacSmrg    tex_res.pitch               = accel_state->src_obj[0].pitch;
423b7e1c893Smrg    tex_res.depth               = 0;
424b7e1c893Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
425ad43ddacSmrg    tex_res.base                = accel_state->src_obj[0].offset;
426ad43ddacSmrg    tex_res.mip_base            = accel_state->src_obj[0].offset;
4270974d292Smrg    tex_res.size                = accel_state->src_size[0];
428ad43ddacSmrg    tex_res.bo                  = accel_state->src_obj[0].bo;
429ad43ddacSmrg    tex_res.mip_bo              = accel_state->src_obj[0].bo;
430f3a0071aSrjs#ifdef XF86DRM_MODE
431f3a0071aSrjs    tex_res.surface             = accel_state->src_obj[0].surface;
432f3a0071aSrjs#endif
433ad43ddacSmrg    if (accel_state->src_obj[0].bpp == 8) {
434b7e1c893Smrg	tex_res.format              = FMT_8;
435b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
436b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
437b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
438b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
439ad43ddacSmrg    } else if (accel_state->src_obj[0].bpp == 16) {
440b7e1c893Smrg	tex_res.format              = FMT_5_6_5;
441b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
442b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
443b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
444b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
445b7e1c893Smrg    } else {
446b7e1c893Smrg	tex_res.format              = FMT_8_8_8_8;
447b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
448b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
449b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
450b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
451b7e1c893Smrg    }
452b7e1c893Smrg
453b7e1c893Smrg    tex_res.request_size        = 1;
454b7e1c893Smrg    tex_res.base_level          = 0;
455b7e1c893Smrg    tex_res.last_level          = 0;
456b7e1c893Smrg    tex_res.perf_modulation     = 0;
457b13dfe66Smrg    if (accel_state->src_obj[0].tiling_flags == 0)
458b13dfe66Smrg	tex_res.tile_mode           = 1;
459921a55d8Smrg    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
460b7e1c893Smrg
461b7e1c893Smrg    tex_samp.id                 = 0;
462b7e1c893Smrg    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
463b7e1c893Smrg    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
464b7e1c893Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
465b7e1c893Smrg    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
466b7e1c893Smrg    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
467b13dfe66Smrg    tex_samp.mc_coord_truncate  = 1;
468b7e1c893Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
469b7e1c893Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
470921a55d8Smrg    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
471b7e1c893Smrg
472b7e1c893Smrg    cb_conf.id = 0;
473ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
474ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
475ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
476ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
477f3a0071aSrjs#ifdef XF86DRM_MODE
478f3a0071aSrjs    cb_conf.surface = accel_state->dst_obj.surface;
479f3a0071aSrjs#endif
480ad43ddacSmrg    if (accel_state->dst_obj.bpp == 8) {
481b7e1c893Smrg	cb_conf.format = COLOR_8;
482b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
483ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 16) {
484b7e1c893Smrg	cb_conf.format = COLOR_5_6_5;
485b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
486b7e1c893Smrg    } else {
487b7e1c893Smrg	cb_conf.format = COLOR_8_8_8_8;
488b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
489b7e1c893Smrg    }
490b7e1c893Smrg    cb_conf.source_format = 1;
491b7e1c893Smrg    cb_conf.blend_clamp = 1;
492b7e1c893Smrg
4930974d292Smrg    /* Render setup */
4940974d292Smrg    if (accel_state->planemask & 0x000000ff)
495b13dfe66Smrg	cb_conf.pmask |= 4; /* B */
4960974d292Smrg    if (accel_state->planemask & 0x0000ff00)
497b13dfe66Smrg	cb_conf.pmask |= 2; /* G */
4980974d292Smrg    if (accel_state->planemask & 0x00ff0000)
499b13dfe66Smrg	cb_conf.pmask |= 1; /* R */
5000974d292Smrg    if (accel_state->planemask & 0xff000000)
501b13dfe66Smrg	cb_conf.pmask |= 8; /* A */
502b13dfe66Smrg    cb_conf.rop = accel_state->rop;
503b13dfe66Smrg    if (accel_state->dst_obj.tiling_flags == 0)
504f3a0071aSrjs	cb_conf.array_mode = 0;
505b13dfe66Smrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
506b13dfe66Smrg
507b13dfe66Smrg    r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
508b7e1c893Smrg
509b7e1c893Smrg}
510b7e1c893Smrg
511b7e1c893Smrgstatic void
512b7e1c893SmrgR600DoCopy(ScrnInfoPtr pScrn)
513b7e1c893Smrg{
514ad43ddacSmrg    r600_finish_op(pScrn, 16);
515ad43ddacSmrg}
516ad43ddacSmrg
517ad43ddacSmrgstatic void
518ad43ddacSmrgR600DoCopyVline(PixmapPtr pPix)
519ad43ddacSmrg{
520c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
521b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
522b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
523b7e1c893Smrg
524ad43ddacSmrg    if (accel_state->vsync)
525921a55d8Smrg	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
526921a55d8Smrg				accel_state->vline_crtc,
527921a55d8Smrg				accel_state->vline_y1,
528921a55d8Smrg				accel_state->vline_y2);
529b7e1c893Smrg
530ad43ddacSmrg    r600_finish_op(pScrn, 16);
531b7e1c893Smrg}
532b7e1c893Smrg
533b7e1c893Smrgstatic void
534b7e1c893SmrgR600AppendCopyVertex(ScrnInfoPtr pScrn,
535b7e1c893Smrg		     int srcX, int srcY,
536b7e1c893Smrg		     int dstX, int dstY,
537b7e1c893Smrg		     int w, int h)
538b7e1c893Smrg{
539921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
540921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
541b7e1c893Smrg    float *vb;
542b7e1c893Smrg
543921a55d8Smrg    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
544b7e1c893Smrg
545b7e1c893Smrg    vb[0] = (float)dstX;
546b7e1c893Smrg    vb[1] = (float)dstY;
547b7e1c893Smrg    vb[2] = (float)srcX;
548b7e1c893Smrg    vb[3] = (float)srcY;
549b7e1c893Smrg
550b7e1c893Smrg    vb[4] = (float)dstX;
551b7e1c893Smrg    vb[5] = (float)(dstY + h);
552b7e1c893Smrg    vb[6] = (float)srcX;
553b7e1c893Smrg    vb[7] = (float)(srcY + h);
554b7e1c893Smrg
555b7e1c893Smrg    vb[8] = (float)(dstX + w);
556b7e1c893Smrg    vb[9] = (float)(dstY + h);
557b7e1c893Smrg    vb[10] = (float)(srcX + w);
558b7e1c893Smrg    vb[11] = (float)(srcY + h);
559b7e1c893Smrg
560921a55d8Smrg    radeon_vbo_commit(pScrn, &accel_state->vbo);
561b7e1c893Smrg}
562b7e1c893Smrg
563b7e1c893Smrgstatic Bool
564b7e1c893SmrgR600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
565b7e1c893Smrg		int xdir, int ydir,
566b7e1c893Smrg		int rop,
567b7e1c893Smrg		Pixel planemask)
568b7e1c893Smrg{
569c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
570b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
571b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
572ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
573ad43ddacSmrg
5740974d292Smrg    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
575ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype src failed\n"));
5760974d292Smrg    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
577ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype dst failed\n"));
5780974d292Smrg    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
579ad43ddacSmrg	RADEON_FALLBACK(("Invalid planemask\n"));
580ad43ddacSmrg
581ad43ddacSmrg    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
582ad43ddacSmrg    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
583ad43ddacSmrg
584ad43ddacSmrg    accel_state->same_surface = FALSE;
585ad43ddacSmrg
586ad43ddacSmrg#if defined(XF86DRM_MODE)
587ad43ddacSmrg    if (info->cs) {
588ad43ddacSmrg	src_obj.offset = 0;
589ad43ddacSmrg	dst_obj.offset = 0;
590ad43ddacSmrg	src_obj.bo = radeon_get_pixmap_bo(pSrc);
591ad43ddacSmrg	dst_obj.bo = radeon_get_pixmap_bo(pDst);
592166b61b6Smrg	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
593166b61b6Smrg	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
594f3a0071aSrjs	src_obj.surface = radeon_get_pixmap_surface(pSrc);
595f3a0071aSrjs	dst_obj.surface = radeon_get_pixmap_surface(pDst);
596ad43ddacSmrg	if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
597ad43ddacSmrg	    accel_state->same_surface = TRUE;
598ad43ddacSmrg    } else
599b7e1c893Smrg#endif
600ad43ddacSmrg    {
601ad43ddacSmrg	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
602ad43ddacSmrg	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
603ad43ddacSmrg	if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst))
604ad43ddacSmrg	    accel_state->same_surface = TRUE;
605ad43ddacSmrg	src_obj.bo = NULL;
606ad43ddacSmrg	dst_obj.bo = NULL;
607b7e1c893Smrg    }
608b7e1c893Smrg
609ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
610ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
611ad43ddacSmrg    src_obj.bpp = pSrc->drawable.bitsPerPixel;
612ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
613921a55d8Smrg
614ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
615ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
616ad43ddacSmrg    dst_obj.bpp = pDst->drawable.bitsPerPixel;
617ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
618ad43ddacSmrg
619ad43ddacSmrg    if (!R600SetAccelState(pScrn,
620ad43ddacSmrg			   &src_obj,
621ad43ddacSmrg			   NULL,
622ad43ddacSmrg			   &dst_obj,
623ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
624ad43ddacSmrg			   rop, planemask))
625b7e1c893Smrg	return FALSE;
626b7e1c893Smrg
627ad43ddacSmrg    if (accel_state->same_surface == TRUE) {
62830d12090Smrg#if defined(XF86DRM_MODE)
629f3a0071aSrjs	unsigned long size = accel_state->dst_obj.surface->bo_size;
630f3a0071aSrjs	unsigned long align = accel_state->dst_obj.surface->bo_alignment;
63130d12090Smrg#else
63230d12090Smrg	unsigned height = pDst->drawable.height;
633b13dfe66Smrg	unsigned long size = height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
634f3a0071aSrjs#endif
635b7e1c893Smrg
636ad43ddacSmrg#if defined(XF86DRM_MODE)
637ad43ddacSmrg	if (info->cs) {
638ad43ddacSmrg	    if (accel_state->copy_area_bo) {
639ad43ddacSmrg		radeon_bo_unref(accel_state->copy_area_bo);
640ad43ddacSmrg		accel_state->copy_area_bo = NULL;
641b7e1c893Smrg	    }
642f3a0071aSrjs	    accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, align,
643ad43ddacSmrg						       RADEON_GEM_DOMAIN_VRAM,
644ad43ddacSmrg						       0);
645ad43ddacSmrg	    if (accel_state->copy_area_bo == NULL)
646ad43ddacSmrg		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
647ad43ddacSmrg
648ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
649f3a0071aSrjs					      0, RADEON_GEM_DOMAIN_VRAM);
650ad43ddacSmrg	    if (radeon_cs_space_check(info->cs)) {
651ad43ddacSmrg		radeon_bo_unref(accel_state->copy_area_bo);
652ad43ddacSmrg		accel_state->copy_area_bo = NULL;
653ad43ddacSmrg		return FALSE;
654ad43ddacSmrg	    }
655ad43ddacSmrg	    accel_state->copy_area = (void*)accel_state->copy_area_bo;
656ad43ddacSmrg	} else
657ad43ddacSmrg#endif
658ad43ddacSmrg	{
659ad43ddacSmrg	    if (accel_state->copy_area) {
660ad43ddacSmrg		exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
661ad43ddacSmrg		accel_state->copy_area = NULL;
662ad43ddacSmrg	    }
663ad43ddacSmrg	    accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
664ad43ddacSmrg	    if (!accel_state->copy_area)
665ad43ddacSmrg		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
666b7e1c893Smrg	}
667ad43ddacSmrg    } else
668ad43ddacSmrg	R600DoPrepareCopy(pScrn);
669b7e1c893Smrg
670ad43ddacSmrg    if (accel_state->vsync)
6710974d292Smrg	RADEONVlineHelperClear(pScrn);
672ad43ddacSmrg
673f3a0071aSrjs    accel_state->dst_pix = pDst;
674f3a0071aSrjs    accel_state->src_pix = pSrc;
675f3a0071aSrjs    accel_state->xdir = xdir;
676f3a0071aSrjs    accel_state->ydir = ydir;
677f3a0071aSrjs
678ad43ddacSmrg    return TRUE;
679b7e1c893Smrg}
680b7e1c893Smrg
681f3a0071aSrjsstatic void
682f3a0071aSrjsR600DoneCopy(PixmapPtr pDst)
683f3a0071aSrjs{
684c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
685f3a0071aSrjs    RADEONInfoPtr info = RADEONPTR(pScrn);
686f3a0071aSrjs    struct radeon_accel_state *accel_state = info->accel_state;
687f3a0071aSrjs
688f3a0071aSrjs    if (!accel_state->same_surface)
689f3a0071aSrjs	R600DoCopyVline(pDst);
690f3a0071aSrjs
691f3a0071aSrjs    if (accel_state->copy_area) {
692f3a0071aSrjs	if (!info->cs)
693f3a0071aSrjs	    exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
694f3a0071aSrjs	accel_state->copy_area = NULL;
695f3a0071aSrjs    }
696f3a0071aSrjs
697f3a0071aSrjs}
698f3a0071aSrjs
699b7e1c893Smrgstatic void
700b7e1c893SmrgR600Copy(PixmapPtr pDst,
701b7e1c893Smrg	 int srcX, int srcY,
702b7e1c893Smrg	 int dstX, int dstY,
703b7e1c893Smrg	 int w, int h)
704b7e1c893Smrg{
705c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
706b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
707b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
708b7e1c893Smrg
709b7e1c893Smrg    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
710b7e1c893Smrg	return;
711b7e1c893Smrg
712f3a0071aSrjs#ifdef XF86DRM_MODE
713f3a0071aSrjs    if (info->cs && CS_FULL(info->cs)) {
714f3a0071aSrjs	R600DoneCopy(info->accel_state->dst_pix);
715f3a0071aSrjs	radeon_cs_flush_indirect(pScrn);
716f3a0071aSrjs	R600PrepareCopy(accel_state->src_pix,
717f3a0071aSrjs			accel_state->dst_pix,
718f3a0071aSrjs			accel_state->xdir,
719f3a0071aSrjs			accel_state->ydir,
720f3a0071aSrjs			accel_state->rop,
721f3a0071aSrjs			accel_state->planemask);
722f3a0071aSrjs    }
723f3a0071aSrjs#endif
724f3a0071aSrjs
725ad43ddacSmrg    if (accel_state->vsync)
7260974d292Smrg	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
727b7e1c893Smrg
728ad43ddacSmrg    if (accel_state->same_surface && accel_state->copy_area) {
729ad43ddacSmrg	uint32_t orig_offset, tmp_offset;
730ad43ddacSmrg	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
731ad43ddacSmrg	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
732b13dfe66Smrg	uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags;
733b13dfe66Smrg	uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags;
734ad43ddacSmrg	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
735f3a0071aSrjs	int orig_rop = accel_state->rop;
736ad43ddacSmrg
737ad43ddacSmrg#if defined(XF86DRM_MODE)
738ad43ddacSmrg	if (info->cs) {
739ad43ddacSmrg	    tmp_offset = 0;
740ad43ddacSmrg	    orig_offset = 0;
741ad43ddacSmrg	} else
742ad43ddacSmrg#endif
743ad43ddacSmrg	{
744b7e1c893Smrg	    tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset;
745b7e1c893Smrg	    orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
746ad43ddacSmrg	}
747b7e1c893Smrg
748ad43ddacSmrg	/* src to tmp */
749ad43ddacSmrg	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
750ad43ddacSmrg	accel_state->dst_obj.bo = accel_state->copy_area_bo;
751ad43ddacSmrg	accel_state->dst_obj.offset = tmp_offset;
752b13dfe66Smrg	accel_state->dst_obj.tiling_flags = 0;
753f3a0071aSrjs	accel_state->rop = 3;
754ad43ddacSmrg	R600DoPrepareCopy(pScrn);
755b7e1c893Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
756b7e1c893Smrg	R600DoCopy(pScrn);
757ad43ddacSmrg
758ad43ddacSmrg	/* tmp to dst */
759ad43ddacSmrg	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
760ad43ddacSmrg	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
761ad43ddacSmrg	accel_state->src_obj[0].offset = tmp_offset;
762b13dfe66Smrg	accel_state->src_obj[0].tiling_flags = 0;
763ad43ddacSmrg	accel_state->dst_obj.domain = orig_dst_domain;
764ad43ddacSmrg	accel_state->dst_obj.bo = orig_bo;
765ad43ddacSmrg	accel_state->dst_obj.offset = orig_offset;
766b13dfe66Smrg	accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags;
767f3a0071aSrjs	accel_state->rop = orig_rop;
768ad43ddacSmrg	R600DoPrepareCopy(pScrn);
769ad43ddacSmrg	R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
770ad43ddacSmrg	R600DoCopyVline(pDst);
771ad43ddacSmrg
772ad43ddacSmrg	/* restore state */
773ad43ddacSmrg	accel_state->src_obj[0].domain = orig_src_domain;
774ad43ddacSmrg	accel_state->src_obj[0].bo = orig_bo;
775ad43ddacSmrg	accel_state->src_obj[0].offset = orig_offset;
776b13dfe66Smrg	accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags;
777ad43ddacSmrg    } else
778b7e1c893Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
779b7e1c893Smrg
780b7e1c893Smrg}
781b7e1c893Smrg
782b7e1c893Smrgstruct blendinfo {
783b7e1c893Smrg    Bool dst_alpha;
784b7e1c893Smrg    Bool src_alpha;
785b7e1c893Smrg    uint32_t blend_cntl;
786b7e1c893Smrg};
787b7e1c893Smrg
788b7e1c893Smrgstatic struct blendinfo R600BlendOp[] = {
789b7e1c893Smrg    /* Clear */
790b7e1c893Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
791b7e1c893Smrg    /* Src */
792b7e1c893Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
793b7e1c893Smrg    /* Dst */
794b7e1c893Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
795b7e1c893Smrg    /* Over */
796b7e1c893Smrg    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
797b7e1c893Smrg    /* OverReverse */
798b7e1c893Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
799b7e1c893Smrg    /* In */
800b7e1c893Smrg    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
801b7e1c893Smrg    /* InReverse */
802b7e1c893Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
803b7e1c893Smrg    /* Out */
804b7e1c893Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
805b7e1c893Smrg    /* OutReverse */
806b7e1c893Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
807b7e1c893Smrg    /* Atop */
808b7e1c893Smrg    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
809b7e1c893Smrg    /* AtopReverse */
810b7e1c893Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
811b7e1c893Smrg    /* Xor */
812b7e1c893Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
813b7e1c893Smrg    /* Add */
814b7e1c893Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
815b7e1c893Smrg};
816b7e1c893Smrg
817b7e1c893Smrgstruct formatinfo {
818b7e1c893Smrg    unsigned int fmt;
819b7e1c893Smrg    uint32_t card_fmt;
820b7e1c893Smrg};
821b7e1c893Smrg
822b7e1c893Smrgstatic struct formatinfo R600TexFormats[] = {
823b7e1c893Smrg    {PICT_a8r8g8b8,	FMT_8_8_8_8},
824b7e1c893Smrg    {PICT_x8r8g8b8,	FMT_8_8_8_8},
825b7e1c893Smrg    {PICT_a8b8g8r8,	FMT_8_8_8_8},
826b7e1c893Smrg    {PICT_x8b8g8r8,	FMT_8_8_8_8},
827ad43ddacSmrg#ifdef PICT_TYPE_BGRA
828ad43ddacSmrg    {PICT_b8g8r8a8,	FMT_8_8_8_8},
829ad43ddacSmrg    {PICT_b8g8r8x8,	FMT_8_8_8_8},
830ad43ddacSmrg#endif
831b7e1c893Smrg    {PICT_r5g6b5,	FMT_5_6_5},
832b7e1c893Smrg    {PICT_a1r5g5b5,	FMT_1_5_5_5},
833b7e1c893Smrg    {PICT_x1r5g5b5,     FMT_1_5_5_5},
834b7e1c893Smrg    {PICT_a8,		FMT_8},
835b7e1c893Smrg};
836b7e1c893Smrg
837b7e1c893Smrgstatic uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
838b7e1c893Smrg{
839b7e1c893Smrg    uint32_t sblend, dblend;
840b7e1c893Smrg
841b7e1c893Smrg    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
842b7e1c893Smrg    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
843b7e1c893Smrg
844b7e1c893Smrg    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
845b7e1c893Smrg     * it as always 1.
846b7e1c893Smrg     */
847b7e1c893Smrg    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
848b7e1c893Smrg	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
849b7e1c893Smrg	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
850b7e1c893Smrg	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
851b7e1c893Smrg	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
852b7e1c893Smrg    }
853b7e1c893Smrg
854b7e1c893Smrg    /* If the source alpha is being used, then we should only be in a case where
855b7e1c893Smrg     * the source blend factor is 0, and the source blend value is the mask
856b7e1c893Smrg     * channels multiplied by the source picture's alpha.
857b7e1c893Smrg     */
858b7e1c893Smrg    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
859b7e1c893Smrg	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
860b7e1c893Smrg	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
861b7e1c893Smrg	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
862b7e1c893Smrg	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
863b7e1c893Smrg	}
864b7e1c893Smrg    }
865b7e1c893Smrg
866b7e1c893Smrg    return sblend | dblend;
867b7e1c893Smrg}
868b7e1c893Smrg
869b7e1c893Smrgstatic Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
870b7e1c893Smrg{
871b7e1c893Smrg    switch (pDstPicture->format) {
872b7e1c893Smrg    case PICT_a8r8g8b8:
873b7e1c893Smrg    case PICT_x8r8g8b8:
874ad43ddacSmrg    case PICT_a8b8g8r8:
875ad43ddacSmrg    case PICT_x8b8g8r8:
876ad43ddacSmrg#ifdef PICT_TYPE_BGRA
877ad43ddacSmrg    case PICT_b8g8r8a8:
878ad43ddacSmrg    case PICT_b8g8r8x8:
879ad43ddacSmrg#endif
880b7e1c893Smrg	*dst_format = COLOR_8_8_8_8;
881b7e1c893Smrg	break;
882b7e1c893Smrg    case PICT_r5g6b5:
883b7e1c893Smrg	*dst_format = COLOR_5_6_5;
884b7e1c893Smrg	break;
885b7e1c893Smrg    case PICT_a1r5g5b5:
886b7e1c893Smrg    case PICT_x1r5g5b5:
887b7e1c893Smrg	*dst_format = COLOR_1_5_5_5;
888b7e1c893Smrg	break;
889b7e1c893Smrg    case PICT_a8:
890b7e1c893Smrg	*dst_format = COLOR_8;
891b7e1c893Smrg	break;
892b7e1c893Smrg    default:
893b7e1c893Smrg	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
894b7e1c893Smrg	       (int)pDstPicture->format));
895b7e1c893Smrg    }
896b7e1c893Smrg    return TRUE;
897b7e1c893Smrg}
898b7e1c893Smrg
899b7e1c893Smrgstatic Bool R600CheckCompositeTexture(PicturePtr pPict,
900b7e1c893Smrg				      PicturePtr pDstPict,
901b7e1c893Smrg				      int op,
902b7e1c893Smrg				      int unit)
903b7e1c893Smrg{
904ad43ddacSmrg    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
905b7e1c893Smrg    unsigned int i;
906b7e1c893Smrg
907b7e1c893Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
908b7e1c893Smrg	if (R600TexFormats[i].fmt == pPict->format)
909b7e1c893Smrg	    break;
910b7e1c893Smrg    }
911b7e1c893Smrg    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
912b7e1c893Smrg	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
913b7e1c893Smrg			 (int)pPict->format));
914b7e1c893Smrg
915b7e1c893Smrg    if (pPict->filter != PictFilterNearest &&
916b7e1c893Smrg	pPict->filter != PictFilterBilinear)
917b7e1c893Smrg	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
918b7e1c893Smrg
919b7e1c893Smrg    /* for REPEAT_NONE, Render semantics are that sampling outside the source
920b7e1c893Smrg     * picture results in alpha=0 pixels. We can implement this with a border color
921b7e1c893Smrg     * *if* our source texture has an alpha channel, otherwise we need to fall
922b7e1c893Smrg     * back. If we're not transformed then we hope that upper layers have clipped
923b7e1c893Smrg     * rendering to the bounds of the source drawable, in which case it doesn't
924b7e1c893Smrg     * matter. I have not, however, verified that the X server always does such
925b7e1c893Smrg     * clipping.
926b7e1c893Smrg     */
927b7e1c893Smrg    /* FIXME R6xx */
928ad43ddacSmrg    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
929b7e1c893Smrg	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
930b7e1c893Smrg	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
931b7e1c893Smrg    }
932b7e1c893Smrg
933b13dfe66Smrg    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
934921a55d8Smrg	RADEON_FALLBACK(("non-affine transforms not supported\n"));
935921a55d8Smrg
936b7e1c893Smrg    return TRUE;
937b7e1c893Smrg}
938b7e1c893Smrg
939b7e1c893Smrgstatic Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
940b7e1c893Smrg					int unit)
941b7e1c893Smrg{
942c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
943b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
944b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
945c135ecebSveego    unsigned int repeatType;
946b7e1c893Smrg    unsigned int i;
947b7e1c893Smrg    tex_resource_t  tex_res;
948b7e1c893Smrg    tex_sampler_t   tex_samp;
949b7e1c893Smrg    int pix_r, pix_g, pix_b, pix_a;
950ad43ddacSmrg    float vs_alu_consts[8];
951b7e1c893Smrg
952b7e1c893Smrg    CLEAR (tex_res);
953b7e1c893Smrg    CLEAR (tex_samp);
954b7e1c893Smrg
955b7e1c893Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
956b7e1c893Smrg	if (R600TexFormats[i].fmt == pPict->format)
957b7e1c893Smrg	    break;
958b7e1c893Smrg    }
959b7e1c893Smrg
960b7e1c893Smrg    /* Texture */
961c135ecebSveego    if (pPict->pDrawable) {
962c135ecebSveego	tex_res.w               = pPict->pDrawable->width;
963c135ecebSveego	tex_res.h               = pPict->pDrawable->height;
964c135ecebSveego	repeatType              = pPict->repeat ? pPict->repeatType : RepeatNone;
965c135ecebSveego    } else {
966c135ecebSveego	tex_res.w               = 1;
967c135ecebSveego	tex_res.h               = 1;
968c135ecebSveego	repeatType              = RepeatNormal;
969c135ecebSveego    }
970b7e1c893Smrg    tex_res.id                  = unit;
971ad43ddacSmrg    tex_res.pitch               = accel_state->src_obj[unit].pitch;
972b7e1c893Smrg    tex_res.depth               = 0;
973b7e1c893Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
974ad43ddacSmrg    tex_res.base                = accel_state->src_obj[unit].offset;
975ad43ddacSmrg    tex_res.mip_base            = accel_state->src_obj[unit].offset;
9760974d292Smrg    tex_res.size                = accel_state->src_size[unit];
977b7e1c893Smrg    tex_res.format              = R600TexFormats[i].card_fmt;
978ad43ddacSmrg    tex_res.bo                  = accel_state->src_obj[unit].bo;
979ad43ddacSmrg    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
980f3a0071aSrjs#ifdef XF86DRM_MODE
981f3a0071aSrjs    tex_res.surface             = accel_state->src_obj[unit].surface;
982f3a0071aSrjs#endif
983b7e1c893Smrg    tex_res.request_size        = 1;
984b7e1c893Smrg
985b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
986b13dfe66Smrg    switch (accel_state->src_obj[unit].bpp) {
987b13dfe66Smrg    case 16:
988b13dfe66Smrg	tex_res.endian = SQ_ENDIAN_8IN16;
989b13dfe66Smrg	break;
990b13dfe66Smrg    case 32:
991b13dfe66Smrg	tex_res.endian = SQ_ENDIAN_8IN32;
992b13dfe66Smrg	break;
993b13dfe66Smrg    default :
994b13dfe66Smrg	break;
995b13dfe66Smrg    }
996b13dfe66Smrg#endif
997b13dfe66Smrg
998b7e1c893Smrg    /* component swizzles */
999b7e1c893Smrg    switch (pPict->format) {
1000b7e1c893Smrg    case PICT_a1r5g5b5:
1001b7e1c893Smrg    case PICT_a8r8g8b8:
1002b7e1c893Smrg	pix_r = SQ_SEL_Z; /* R */
1003b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1004b7e1c893Smrg	pix_b = SQ_SEL_X; /* B */
1005b7e1c893Smrg	pix_a = SQ_SEL_W; /* A */
1006b7e1c893Smrg	break;
1007b7e1c893Smrg    case PICT_a8b8g8r8:
1008b7e1c893Smrg	pix_r = SQ_SEL_X; /* R */
1009b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1010b7e1c893Smrg	pix_b = SQ_SEL_Z; /* B */
1011b7e1c893Smrg	pix_a = SQ_SEL_W; /* A */
1012b7e1c893Smrg	break;
1013b7e1c893Smrg    case PICT_x8b8g8r8:
1014b7e1c893Smrg	pix_r = SQ_SEL_X; /* R */
1015b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1016b7e1c893Smrg	pix_b = SQ_SEL_Z; /* B */
1017b7e1c893Smrg	pix_a = SQ_SEL_1; /* A */
1018b7e1c893Smrg	break;
1019ad43ddacSmrg#ifdef PICT_TYPE_BGRA
1020ad43ddacSmrg    case PICT_b8g8r8a8:
1021ad43ddacSmrg	pix_r = SQ_SEL_Y; /* R */
1022ad43ddacSmrg	pix_g = SQ_SEL_Z; /* G */
1023ad43ddacSmrg	pix_b = SQ_SEL_W; /* B */
1024ad43ddacSmrg	pix_a = SQ_SEL_X; /* A */
1025ad43ddacSmrg	break;
1026ad43ddacSmrg    case PICT_b8g8r8x8:
1027ad43ddacSmrg	pix_r = SQ_SEL_Y; /* R */
1028ad43ddacSmrg	pix_g = SQ_SEL_Z; /* G */
1029ad43ddacSmrg	pix_b = SQ_SEL_W; /* B */
1030ad43ddacSmrg	pix_a = SQ_SEL_1; /* A */
1031ad43ddacSmrg	break;
1032ad43ddacSmrg#endif
1033b7e1c893Smrg    case PICT_x1r5g5b5:
1034b7e1c893Smrg    case PICT_x8r8g8b8:
1035b7e1c893Smrg    case PICT_r5g6b5:
1036b7e1c893Smrg	pix_r = SQ_SEL_Z; /* R */
1037b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1038b7e1c893Smrg	pix_b = SQ_SEL_X; /* B */
1039b7e1c893Smrg	pix_a = SQ_SEL_1; /* A */
1040b7e1c893Smrg	break;
1041b7e1c893Smrg    case PICT_a8:
1042b7e1c893Smrg	pix_r = SQ_SEL_0; /* R */
1043b7e1c893Smrg	pix_g = SQ_SEL_0; /* G */
1044b7e1c893Smrg	pix_b = SQ_SEL_0; /* B */
1045b7e1c893Smrg	pix_a = SQ_SEL_X; /* A */
1046b7e1c893Smrg	break;
1047b7e1c893Smrg    default:
1048b7e1c893Smrg	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
1049b7e1c893Smrg    }
1050b7e1c893Smrg
1051b7e1c893Smrg    if (unit == 0) {
1052ad43ddacSmrg	if (!accel_state->msk_pic) {
1053b7e1c893Smrg	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
1054b7e1c893Smrg		pix_r = SQ_SEL_0;
1055b7e1c893Smrg		pix_g = SQ_SEL_0;
1056b7e1c893Smrg		pix_b = SQ_SEL_0;
1057b7e1c893Smrg	    }
1058b7e1c893Smrg
1059b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1060b7e1c893Smrg		pix_a = SQ_SEL_1;
1061b7e1c893Smrg	} else {
1062b7e1c893Smrg	    if (accel_state->component_alpha) {
1063b7e1c893Smrg		if (accel_state->src_alpha) {
1064b7e1c893Smrg		    if (PICT_FORMAT_A(pPict->format) == 0) {
1065b7e1c893Smrg			pix_r = SQ_SEL_1;
1066b7e1c893Smrg			pix_g = SQ_SEL_1;
1067b7e1c893Smrg			pix_b = SQ_SEL_1;
1068b7e1c893Smrg			pix_a = SQ_SEL_1;
1069b7e1c893Smrg		    } else {
1070b7e1c893Smrg			pix_r = pix_a;
1071b7e1c893Smrg			pix_g = pix_a;
1072b7e1c893Smrg			pix_b = pix_a;
1073b7e1c893Smrg		    }
1074b7e1c893Smrg		} else {
1075b7e1c893Smrg		    if (PICT_FORMAT_A(pPict->format) == 0)
1076b7e1c893Smrg			pix_a = SQ_SEL_1;
1077b7e1c893Smrg		}
1078b7e1c893Smrg	    } else {
1079b7e1c893Smrg		if (PICT_FORMAT_RGB(pPict->format) == 0) {
1080b7e1c893Smrg		    pix_r = SQ_SEL_0;
1081b7e1c893Smrg		    pix_g = SQ_SEL_0;
1082b7e1c893Smrg		    pix_b = SQ_SEL_0;
1083b7e1c893Smrg		}
1084b7e1c893Smrg
1085b7e1c893Smrg		if (PICT_FORMAT_A(pPict->format) == 0)
1086b7e1c893Smrg		    pix_a = SQ_SEL_1;
1087b7e1c893Smrg	    }
1088b7e1c893Smrg	}
1089b7e1c893Smrg    } else {
1090b7e1c893Smrg	if (accel_state->component_alpha) {
1091b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1092b7e1c893Smrg		pix_a = SQ_SEL_1;
1093b7e1c893Smrg	} else {
1094b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0) {
1095b7e1c893Smrg		pix_r = SQ_SEL_1;
1096b7e1c893Smrg		pix_g = SQ_SEL_1;
1097b7e1c893Smrg		pix_b = SQ_SEL_1;
1098b7e1c893Smrg		pix_a = SQ_SEL_1;
1099b7e1c893Smrg	    } else {
1100b7e1c893Smrg		pix_r = pix_a;
1101b7e1c893Smrg		pix_g = pix_a;
1102b7e1c893Smrg		pix_b = pix_a;
1103b7e1c893Smrg	    }
1104b7e1c893Smrg	}
1105b7e1c893Smrg    }
1106b7e1c893Smrg
1107b7e1c893Smrg    tex_res.dst_sel_x           = pix_r; /* R */
1108b7e1c893Smrg    tex_res.dst_sel_y           = pix_g; /* G */
1109b7e1c893Smrg    tex_res.dst_sel_z           = pix_b; /* B */
1110b7e1c893Smrg    tex_res.dst_sel_w           = pix_a; /* A */
1111b7e1c893Smrg
1112b7e1c893Smrg    tex_res.base_level          = 0;
1113b7e1c893Smrg    tex_res.last_level          = 0;
1114b7e1c893Smrg    tex_res.perf_modulation     = 0;
1115b13dfe66Smrg    if (accel_state->src_obj[unit].tiling_flags == 0)
1116b13dfe66Smrg	tex_res.tile_mode           = 1;
1117921a55d8Smrg    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
1118b7e1c893Smrg
1119b7e1c893Smrg    tex_samp.id                 = unit;
1120b7e1c893Smrg    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1121b7e1c893Smrg
1122ad43ddacSmrg    switch (repeatType) {
1123ad43ddacSmrg    case RepeatNormal:
1124ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_WRAP;
1125ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_WRAP;
1126ad43ddacSmrg	break;
1127ad43ddacSmrg    case RepeatPad:
1128ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1129ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1130ad43ddacSmrg	break;
1131ad43ddacSmrg    case RepeatReflect:
1132ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1133ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1134ad43ddacSmrg	break;
1135ad43ddacSmrg    case RepeatNone:
1136b7e1c893Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1137b7e1c893Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1138ad43ddacSmrg	break;
1139ad43ddacSmrg    default:
1140ad43ddacSmrg	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1141b7e1c893Smrg    }
1142b7e1c893Smrg
1143b7e1c893Smrg    switch (pPict->filter) {
1144b7e1c893Smrg    case PictFilterNearest:
1145b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1146b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1147b13dfe66Smrg	tex_samp.mc_coord_truncate  = 1;
1148b7e1c893Smrg	break;
1149b7e1c893Smrg    case PictFilterBilinear:
1150b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1151b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1152b7e1c893Smrg	break;
1153b7e1c893Smrg    default:
1154b7e1c893Smrg	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1155b7e1c893Smrg    }
1156b7e1c893Smrg
1157b7e1c893Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
1158b7e1c893Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1159b7e1c893Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
1160921a55d8Smrg    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
1161b7e1c893Smrg
1162b7e1c893Smrg    if (pPict->transform != 0) {
1163b7e1c893Smrg	accel_state->is_transform[unit] = TRUE;
1164b7e1c893Smrg	accel_state->transform[unit] = pPict->transform;
1165ad43ddacSmrg
1166ad43ddacSmrg	vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
1167ad43ddacSmrg	vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
1168ad43ddacSmrg	vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
1169c135ecebSveego	vs_alu_consts[3] = 1.0 / tex_res.w;
1170ad43ddacSmrg
1171ad43ddacSmrg	vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
1172ad43ddacSmrg	vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
1173ad43ddacSmrg	vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
1174c135ecebSveego	vs_alu_consts[7] = 1.0 / tex_res.h;
1175ad43ddacSmrg    } else {
1176b7e1c893Smrg	accel_state->is_transform[unit] = FALSE;
1177b7e1c893Smrg
1178ad43ddacSmrg	vs_alu_consts[0] = 1.0;
1179ad43ddacSmrg	vs_alu_consts[1] = 0.0;
1180ad43ddacSmrg	vs_alu_consts[2] = 0.0;
1181c135ecebSveego	vs_alu_consts[3] = 1.0 / tex_res.w;
1182ad43ddacSmrg
1183ad43ddacSmrg	vs_alu_consts[4] = 0.0;
1184ad43ddacSmrg	vs_alu_consts[5] = 1.0;
1185ad43ddacSmrg	vs_alu_consts[6] = 0.0;
1186c135ecebSveego	vs_alu_consts[7] = 1.0 / tex_res.h;
1187ad43ddacSmrg    }
1188ad43ddacSmrg
1189ad43ddacSmrg    /* VS alu constants */
1190921a55d8Smrg    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
1191921a55d8Smrg			sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
1192ad43ddacSmrg
1193b7e1c893Smrg    return TRUE;
1194b7e1c893Smrg}
1195b7e1c893Smrg
1196b7e1c893Smrgstatic Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1197b7e1c893Smrg			       PicturePtr pDstPicture)
1198b7e1c893Smrg{
1199b7e1c893Smrg    uint32_t tmp1;
1200b7e1c893Smrg    PixmapPtr pSrcPixmap, pDstPixmap;
1201b7e1c893Smrg
1202b7e1c893Smrg    /* Check for unsupported compositing operations. */
1203b7e1c893Smrg    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1204b7e1c893Smrg	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1205b7e1c893Smrg
1206c135ecebSveego    if (pSrcPicture->pDrawable) {
1207c135ecebSveego	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1208ad43ddacSmrg
1209c135ecebSveego	if (pSrcPixmap->drawable.width >= 8192 ||
1210c135ecebSveego	    pSrcPixmap->drawable.height >= 8192) {
1211c135ecebSveego	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1212c135ecebSveego			     pSrcPixmap->drawable.width,
1213c135ecebSveego			     pSrcPixmap->drawable.height));
1214c135ecebSveego	}
1215b7e1c893Smrg
1216c135ecebSveego	if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1217c135ecebSveego	    return FALSE;
1218c135ecebSveego    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
1219c135ecebSveego	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1220b7e1c893Smrg
1221b7e1c893Smrg    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1222b7e1c893Smrg
1223c135ecebSveego    if (pDstPixmap->drawable.width >= 8192 ||
1224c135ecebSveego	pDstPixmap->drawable.height >= 8192) {
1225b7e1c893Smrg	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1226b7e1c893Smrg			 pDstPixmap->drawable.width,
1227b7e1c893Smrg			 pDstPixmap->drawable.height));
1228b7e1c893Smrg    }
1229b7e1c893Smrg
1230b7e1c893Smrg    if (pMaskPicture) {
1231ad43ddacSmrg	PixmapPtr pMaskPixmap;
1232ad43ddacSmrg
1233c135ecebSveego	if (pMaskPicture->pDrawable) {
1234c135ecebSveego	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1235b7e1c893Smrg
1236c135ecebSveego	    if (pMaskPixmap->drawable.width >= 8192 ||
1237c135ecebSveego		pMaskPixmap->drawable.height >= 8192) {
1238c135ecebSveego	      RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1239c135ecebSveego			       pMaskPixmap->drawable.width,
1240c135ecebSveego			       pMaskPixmap->drawable.height));
1241c135ecebSveego	    }
1242b7e1c893Smrg
1243c135ecebSveego	    if (pMaskPicture->componentAlpha) {
1244c135ecebSveego		/* Check if it's component alpha that relies on a source alpha and
1245c135ecebSveego		 * on the source value.  We can only get one of those into the
1246c135ecebSveego		 * single source value that we get to blend with.
1247c135ecebSveego		 */
1248c135ecebSveego		if (R600BlendOp[op].src_alpha &&
1249c135ecebSveego		    (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1250c135ecebSveego		    (BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1251c135ecebSveego		    RADEON_FALLBACK(("Component alpha not supported with source "
1252c135ecebSveego				     "alpha and source value blending.\n"));
1253c135ecebSveego		}
1254b7e1c893Smrg	    }
1255b7e1c893Smrg
1256c135ecebSveego	    if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1257c135ecebSveego		return FALSE;
1258c135ecebSveego	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
1259c135ecebSveego	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1260b7e1c893Smrg    }
1261b7e1c893Smrg
1262b7e1c893Smrg    if (!R600GetDestFormat(pDstPicture, &tmp1))
1263b7e1c893Smrg	return FALSE;
1264b7e1c893Smrg
1265b7e1c893Smrg    return TRUE;
1266b7e1c893Smrg
1267b7e1c893Smrg}
1268b7e1c893Smrg
1269b7e1c893Smrgstatic Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1270b7e1c893Smrg				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1271b7e1c893Smrg				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1272b7e1c893Smrg{
1273c135ecebSveego    ScreenPtr pScreen = pDst->drawable.pScreen;
1274c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1275b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1276b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1277b13dfe66Smrg    uint32_t dst_format;
1278b7e1c893Smrg    cb_config_t cb_conf;
1279b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
1280ad43ddacSmrg    struct r600_accel_object src_obj, mask_obj, dst_obj;
1281b7e1c893Smrg
1282c135ecebSveego    if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8))
1283ad43ddacSmrg	return FALSE;
1284ad43ddacSmrg
1285c135ecebSveego    if (!pSrc) {
1286c135ecebSveego	pSrc = RADEONSolidPixmap(pScreen, pSrcPicture->pSourcePict->solidFill.color);
1287c135ecebSveego	if (!pSrc)
1288c135ecebSveego	    RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
1289c135ecebSveego    }
1290c135ecebSveego
1291ad43ddacSmrg#if defined(XF86DRM_MODE)
1292ad43ddacSmrg    if (info->cs) {
1293ad43ddacSmrg	src_obj.offset = 0;
1294ad43ddacSmrg	dst_obj.offset = 0;
1295ad43ddacSmrg	dst_obj.bo = radeon_get_pixmap_bo(pDst);
1296c135ecebSveego	src_obj.bo = radeon_get_pixmap_bo(pSrc);
1297166b61b6Smrg	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1298166b61b6Smrg	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1299f3a0071aSrjs	dst_obj.surface = radeon_get_pixmap_surface(pDst);
1300f3a0071aSrjs	src_obj.surface = radeon_get_pixmap_surface(pSrc);
1301ad43ddacSmrg    } else
1302ad43ddacSmrg#endif
1303ad43ddacSmrg    {
1304ad43ddacSmrg	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1305ad43ddacSmrg	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1306ad43ddacSmrg	src_obj.bo = NULL;
1307ad43ddacSmrg	dst_obj.bo = NULL;
1308ad43ddacSmrg    }
1309ad43ddacSmrg    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1310ad43ddacSmrg    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1311ad43ddacSmrg
1312ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
1313ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
1314ad43ddacSmrg    src_obj.bpp = pSrc->drawable.bitsPerPixel;
1315ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1316ad43ddacSmrg
1317ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
1318ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
1319ad43ddacSmrg    dst_obj.bpp = pDst->drawable.bitsPerPixel;
1320ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1321b7e1c893Smrg
1322c135ecebSveego    if (pMaskPicture) {
1323c135ecebSveego	if (!pMask) {
1324c135ecebSveego	    pMask = RADEONSolidPixmap(pScreen, pMaskPicture->pSourcePict->solidFill.color);
1325c135ecebSveego	    if (!pMask) {
1326c135ecebSveego		if (!pSrcPicture->pDrawable)
1327c135ecebSveego		    pScreen->DestroyPixmap(pSrc);
1328c135ecebSveego		RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
1329c135ecebSveego	    }
1330c135ecebSveego	}
1331c135ecebSveego
1332ad43ddacSmrg#if defined(XF86DRM_MODE)
1333ad43ddacSmrg	if (info->cs) {
1334ad43ddacSmrg	    mask_obj.offset = 0;
1335ad43ddacSmrg	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
1336166b61b6Smrg	    mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
1337f3a0071aSrjs	    mask_obj.surface = radeon_get_pixmap_surface(pMask);
1338921a55d8Smrg	} else
1339ad43ddacSmrg#endif
1340ad43ddacSmrg	{
1341ad43ddacSmrg	    mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset;
1342ad43ddacSmrg	    mask_obj.bo = NULL;
1343ad43ddacSmrg	}
1344ad43ddacSmrg	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1345ad43ddacSmrg
1346ad43ddacSmrg	mask_obj.width = pMask->drawable.width;
1347ad43ddacSmrg	mask_obj.height = pMask->drawable.height;
1348ad43ddacSmrg	mask_obj.bpp = pMask->drawable.bitsPerPixel;
1349ad43ddacSmrg	mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1350ad43ddacSmrg
1351ad43ddacSmrg	if (!R600SetAccelState(pScrn,
1352ad43ddacSmrg			       &src_obj,
1353ad43ddacSmrg			       &mask_obj,
1354ad43ddacSmrg			       &dst_obj,
13550974d292Smrg			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1356ad43ddacSmrg			       3, 0xffffffff))
1357ad43ddacSmrg	    return FALSE;
1358ad43ddacSmrg
1359ad43ddacSmrg	accel_state->msk_pic = pMaskPicture;
1360b7e1c893Smrg	if (pMaskPicture->componentAlpha) {
1361b7e1c893Smrg	    accel_state->component_alpha = TRUE;
1362b7e1c893Smrg	    if (R600BlendOp[op].src_alpha)
1363b7e1c893Smrg		accel_state->src_alpha = TRUE;
1364b7e1c893Smrg	    else
1365b7e1c893Smrg		accel_state->src_alpha = FALSE;
1366b7e1c893Smrg	} else {
1367b7e1c893Smrg	    accel_state->component_alpha = FALSE;
1368b7e1c893Smrg	    accel_state->src_alpha = FALSE;
1369b7e1c893Smrg	}
1370b7e1c893Smrg    } else {
1371ad43ddacSmrg	if (!R600SetAccelState(pScrn,
1372ad43ddacSmrg			       &src_obj,
1373ad43ddacSmrg			       NULL,
1374ad43ddacSmrg			       &dst_obj,
1375ad43ddacSmrg			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1376ad43ddacSmrg			       3, 0xffffffff))
1377ad43ddacSmrg	    return FALSE;
1378ad43ddacSmrg
1379ad43ddacSmrg	accel_state->msk_pic = NULL;
1380b7e1c893Smrg	accel_state->component_alpha = FALSE;
1381b7e1c893Smrg	accel_state->src_alpha = FALSE;
1382b7e1c893Smrg    }
1383b7e1c893Smrg
1384b7e1c893Smrg    if (!R600GetDestFormat(pDstPicture, &dst_format))
1385b7e1c893Smrg	return FALSE;
1386b7e1c893Smrg
1387b7e1c893Smrg    CLEAR (cb_conf);
1388b7e1c893Smrg    CLEAR (vs_conf);
1389b7e1c893Smrg    CLEAR (ps_conf);
1390b7e1c893Smrg
1391ad43ddacSmrg    if (pMask)
1392921a55d8Smrg        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1393ad43ddacSmrg    else
1394921a55d8Smrg        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1395b7e1c893Smrg
13960974d292Smrg    radeon_cp_start(pScrn);
1397b7e1c893Smrg
1398921a55d8Smrg    r600_set_default_state(pScrn, accel_state->ib);
1399b7e1c893Smrg
1400921a55d8Smrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1401921a55d8Smrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1402921a55d8Smrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1403b7e1c893Smrg
1404b7e1c893Smrg    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
1405ad43ddacSmrg        R600IBDiscard(pScrn, accel_state->ib);
1406ad43ddacSmrg        return FALSE;
1407b7e1c893Smrg    }
1408b7e1c893Smrg
1409b7e1c893Smrg    if (pMask) {
1410ad43ddacSmrg        if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
1411ad43ddacSmrg            R600IBDiscard(pScrn, accel_state->ib);
1412ad43ddacSmrg            return FALSE;
1413ad43ddacSmrg        }
1414b7e1c893Smrg    } else
1415ad43ddacSmrg        accel_state->is_transform[1] = FALSE;
1416b7e1c893Smrg
14170974d292Smrg    if (pMask) {
1418921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
1419921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
14200974d292Smrg    } else {
1421921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
1422921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
14230974d292Smrg    }
1424b7e1c893Smrg
1425b7e1c893Smrg    /* Shader */
1426b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
14270974d292Smrg    vs_conf.shader_size         = accel_state->vs_size;
1428921a55d8Smrg    vs_conf.num_gprs            = 5;
1429b7e1c893Smrg    vs_conf.stack_size          = 1;
1430ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
1431921a55d8Smrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1432b7e1c893Smrg
1433b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
14340974d292Smrg    ps_conf.shader_size         = accel_state->ps_size;
1435b7e1c893Smrg    ps_conf.num_gprs            = 3;
14360974d292Smrg    ps_conf.stack_size          = 1;
1437b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
1438b7e1c893Smrg    ps_conf.clamp_consts        = 0;
1439b7e1c893Smrg    ps_conf.export_mode         = 2;
1440ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
1441921a55d8Smrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1442b7e1c893Smrg
1443b7e1c893Smrg    cb_conf.id = 0;
1444ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
1445ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
1446ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
1447b7e1c893Smrg    cb_conf.format = dst_format;
1448ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
1449f3a0071aSrjs#ifdef XF86DRM_MODE
1450f3a0071aSrjs    cb_conf.surface = accel_state->dst_obj.surface;
1451f3a0071aSrjs#endif
1452b7e1c893Smrg
1453b7e1c893Smrg    switch (pDstPicture->format) {
1454b7e1c893Smrg    case PICT_a8r8g8b8:
1455b7e1c893Smrg    case PICT_x8r8g8b8:
1456b7e1c893Smrg    case PICT_a1r5g5b5:
1457b7e1c893Smrg    case PICT_x1r5g5b5:
1458b7e1c893Smrg    default:
1459b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
1460b7e1c893Smrg	break;
1461ad43ddacSmrg    case PICT_a8b8g8r8:
1462ad43ddacSmrg    case PICT_x8b8g8r8:
1463ad43ddacSmrg	cb_conf.comp_swap = 0; /* ABGR */
1464ad43ddacSmrg	break;
1465ad43ddacSmrg#ifdef PICT_TYPE_BGRA
1466ad43ddacSmrg    case PICT_b8g8r8a8:
1467ad43ddacSmrg    case PICT_b8g8r8x8:
1468ad43ddacSmrg	cb_conf.comp_swap = 3; /* BGRA */
1469ad43ddacSmrg	break;
1470ad43ddacSmrg#endif
1471b7e1c893Smrg    case PICT_r5g6b5:
1472b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
1473b7e1c893Smrg	break;
1474b7e1c893Smrg    case PICT_a8:
1475b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
1476b7e1c893Smrg	break;
1477b7e1c893Smrg    }
1478b7e1c893Smrg    cb_conf.source_format = 1;
1479b7e1c893Smrg    cb_conf.blend_clamp = 1;
1480b13dfe66Smrg    cb_conf.blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1481b13dfe66Smrg    cb_conf.blend_enable = 1;
1482b13dfe66Smrg    cb_conf.pmask = 0xf;
1483b13dfe66Smrg    cb_conf.rop = 3;
1484b13dfe66Smrg    if (accel_state->dst_obj.tiling_flags == 0)
1485f3a0071aSrjs	cb_conf.array_mode = 0;
1486b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1487b13dfe66Smrg    switch (dst_obj.bpp) {
1488b13dfe66Smrg    case 16:
1489b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN16;
1490b13dfe66Smrg	break;
1491b13dfe66Smrg    case 32:
1492b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN32;
1493b13dfe66Smrg	break;
1494b13dfe66Smrg    default:
1495b13dfe66Smrg	break;
1496b7e1c893Smrg    }
1497b13dfe66Smrg#endif
1498b13dfe66Smrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
14990974d292Smrg
1500b13dfe66Smrg    if (pMask)
1501b13dfe66Smrg	r600_set_spi(pScrn, accel_state->ib, (2 - 1), 2);
1502b13dfe66Smrg    else
1503b13dfe66Smrg	r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
1504b7e1c893Smrg
1505ad43ddacSmrg    if (accel_state->vsync)
15060974d292Smrg	RADEONVlineHelperClear(pScrn);
1507b7e1c893Smrg
1508f3a0071aSrjs    accel_state->composite_op = op;
1509f3a0071aSrjs    accel_state->dst_pic = pDstPicture;
1510f3a0071aSrjs    accel_state->src_pic = pSrcPicture;
1511f3a0071aSrjs    accel_state->dst_pix = pDst;
1512f3a0071aSrjs    accel_state->msk_pix = pMask;
1513f3a0071aSrjs    accel_state->src_pix = pSrc;
1514f3a0071aSrjs
1515b7e1c893Smrg    return TRUE;
1516b7e1c893Smrg}
1517b7e1c893Smrg
1518c135ecebSveegostatic void R600FinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst,
1519c135ecebSveego				struct radeon_accel_state *accel_state)
1520f3a0071aSrjs{
1521f3a0071aSrjs    int vtx_size;
1522f3a0071aSrjs
1523f3a0071aSrjs    if (accel_state->vsync)
1524f3a0071aSrjs       r600_cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
1525f3a0071aSrjs			       accel_state->vline_crtc,
1526f3a0071aSrjs			       accel_state->vline_y1,
1527f3a0071aSrjs			       accel_state->vline_y2);
1528f3a0071aSrjs
1529f3a0071aSrjs    vtx_size = accel_state->msk_pic ? 24 : 16;
1530f3a0071aSrjs
1531f3a0071aSrjs    r600_finish_op(pScrn, vtx_size);
1532f3a0071aSrjs}
1533f3a0071aSrjs
1534c135ecebSveegostatic void R600DoneComposite(PixmapPtr pDst)
1535c135ecebSveego{
1536c135ecebSveego    ScreenPtr pScreen = pDst->drawable.pScreen;
1537c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1538c135ecebSveego    RADEONInfoPtr info = RADEONPTR(pScrn);
1539c135ecebSveego    struct radeon_accel_state *accel_state = info->accel_state;
1540c135ecebSveego
1541c135ecebSveego    R600FinishComposite(pScrn, pDst, accel_state);
1542c135ecebSveego
1543c135ecebSveego    if (!accel_state->src_pic->pDrawable)
1544c135ecebSveego	pScreen->DestroyPixmap(accel_state->src_pix);
1545c135ecebSveego
1546c135ecebSveego    if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable)
1547c135ecebSveego	pScreen->DestroyPixmap(accel_state->msk_pix);
1548c135ecebSveego}
1549c135ecebSveego
1550b7e1c893Smrgstatic void R600Composite(PixmapPtr pDst,
1551b7e1c893Smrg			  int srcX, int srcY,
1552b7e1c893Smrg			  int maskX, int maskY,
1553b7e1c893Smrg			  int dstX, int dstY,
1554b7e1c893Smrg			  int w, int h)
1555b7e1c893Smrg{
1556c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1557b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1558b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1559b7e1c893Smrg    float *vb;
1560b7e1c893Smrg
1561b7e1c893Smrg    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1562b7e1c893Smrg       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1563b7e1c893Smrg
1564f3a0071aSrjs#ifdef XF86DRM_MODE
1565f3a0071aSrjs    if (info->cs && CS_FULL(info->cs)) {
1566c135ecebSveego	R600FinishComposite(pScrn, pDst, info->accel_state);
1567f3a0071aSrjs	radeon_cs_flush_indirect(pScrn);
1568f3a0071aSrjs	R600PrepareComposite(info->accel_state->composite_op,
1569f3a0071aSrjs			     info->accel_state->src_pic,
1570f3a0071aSrjs			     info->accel_state->msk_pic,
1571f3a0071aSrjs			     info->accel_state->dst_pic,
1572f3a0071aSrjs			     info->accel_state->src_pix,
1573f3a0071aSrjs			     info->accel_state->msk_pix,
1574f3a0071aSrjs			     info->accel_state->dst_pix);
1575f3a0071aSrjs    }
1576f3a0071aSrjs#endif
1577f3a0071aSrjs
1578ad43ddacSmrg    if (accel_state->vsync)
15790974d292Smrg	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1580b7e1c893Smrg
1581ad43ddacSmrg    if (accel_state->msk_pic) {
1582b7e1c893Smrg
1583921a55d8Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1584b7e1c893Smrg
1585b7e1c893Smrg	vb[0] = (float)dstX;
1586b7e1c893Smrg	vb[1] = (float)dstY;
1587ad43ddacSmrg	vb[2] = (float)srcX;
1588ad43ddacSmrg	vb[3] = (float)srcY;
1589ad43ddacSmrg	vb[4] = (float)maskX;
1590ad43ddacSmrg	vb[5] = (float)maskY;
1591b7e1c893Smrg
1592b7e1c893Smrg	vb[6] = (float)dstX;
1593b7e1c893Smrg	vb[7] = (float)(dstY + h);
1594ad43ddacSmrg	vb[8] = (float)srcX;
1595ad43ddacSmrg	vb[9] = (float)(srcY + h);
1596ad43ddacSmrg	vb[10] = (float)maskX;
1597ad43ddacSmrg	vb[11] = (float)(maskY + h);
1598b7e1c893Smrg
1599b7e1c893Smrg	vb[12] = (float)(dstX + w);
1600b7e1c893Smrg	vb[13] = (float)(dstY + h);
1601ad43ddacSmrg	vb[14] = (float)(srcX + w);
1602ad43ddacSmrg	vb[15] = (float)(srcY + h);
1603ad43ddacSmrg	vb[16] = (float)(maskX + w);
1604ad43ddacSmrg	vb[17] = (float)(maskY + h);
1605ad43ddacSmrg
1606921a55d8Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
1607b7e1c893Smrg
1608b7e1c893Smrg    } else {
1609b7e1c893Smrg
1610921a55d8Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1611b7e1c893Smrg
1612b7e1c893Smrg	vb[0] = (float)dstX;
1613b7e1c893Smrg	vb[1] = (float)dstY;
1614ad43ddacSmrg	vb[2] = (float)srcX;
1615ad43ddacSmrg	vb[3] = (float)srcY;
1616b7e1c893Smrg
1617b7e1c893Smrg	vb[4] = (float)dstX;
1618b7e1c893Smrg	vb[5] = (float)(dstY + h);
1619ad43ddacSmrg	vb[6] = (float)srcX;
1620ad43ddacSmrg	vb[7] = (float)(srcY + h);
1621b7e1c893Smrg
1622b7e1c893Smrg	vb[8] = (float)(dstX + w);
1623b7e1c893Smrg	vb[9] = (float)(dstY + h);
1624ad43ddacSmrg	vb[10] = (float)(srcX + w);
1625ad43ddacSmrg	vb[11] = (float)(srcY + h);
1626ad43ddacSmrg
1627921a55d8Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
1628b7e1c893Smrg    }
1629b7e1c893Smrg
1630b7e1c893Smrg
1631b7e1c893Smrg}
1632b7e1c893Smrg
1633b7e1c893SmrgBool
1634b7e1c893SmrgR600CopyToVRAM(ScrnInfoPtr pScrn,
1635b7e1c893Smrg	       char *src, int src_pitch,
1636ad43ddacSmrg	       uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_width, uint32_t dst_height, int bpp,
1637b7e1c893Smrg	       int x, int y, int w, int h)
1638b7e1c893Smrg{
1639b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1640ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1641b7e1c893Smrg    uint32_t scratch_mc_addr;
1642b7e1c893Smrg    int wpass = w * (bpp/8);
1643ad43ddacSmrg    int scratch_pitch_bytes = RADEON_ALIGN(wpass, 256);
1644b7e1c893Smrg    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1645b7e1c893Smrg    int scratch_offset = 0, hpass, temph;
1646b7e1c893Smrg    char *dst;
1647b7e1c893Smrg    drmBufPtr scratch;
1648ad43ddacSmrg    struct r600_accel_object scratch_obj, dst_obj;
1649b7e1c893Smrg
1650b7e1c893Smrg    if (dst_pitch & 7)
1651b7e1c893Smrg	return FALSE;
1652b7e1c893Smrg
1653b7e1c893Smrg    if (dst_mc_addr & 0xff)
1654b7e1c893Smrg	return FALSE;
1655b7e1c893Smrg
1656b7e1c893Smrg    scratch = RADEONCPGetBuffer(pScrn);
1657b7e1c893Smrg    if (scratch == NULL)
1658b7e1c893Smrg	return FALSE;
1659b7e1c893Smrg
1660b7e1c893Smrg    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1661b7e1c893Smrg    temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1662b7e1c893Smrg    dst = (char *)scratch->address;
1663b7e1c893Smrg
1664ad43ddacSmrg    scratch_obj.pitch = scratch_pitch;
1665ad43ddacSmrg    scratch_obj.width = w;
1666ad43ddacSmrg    scratch_obj.height = hpass;
1667ad43ddacSmrg    scratch_obj.offset = scratch_mc_addr;
1668ad43ddacSmrg    scratch_obj.bpp = bpp;
1669ad43ddacSmrg    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1670ad43ddacSmrg    scratch_obj.bo = NULL;
1671ad43ddacSmrg
1672ad43ddacSmrg    dst_obj.pitch = dst_pitch;
1673ad43ddacSmrg    dst_obj.width = dst_width;
1674ad43ddacSmrg    dst_obj.height = dst_height;
1675ad43ddacSmrg    dst_obj.offset = dst_mc_addr;
1676ad43ddacSmrg    dst_obj.bo = NULL;
1677ad43ddacSmrg    dst_obj.bpp = bpp;
1678ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1679ad43ddacSmrg
1680ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1681ad43ddacSmrg			   &scratch_obj,
1682ad43ddacSmrg			   NULL,
1683ad43ddacSmrg			   &dst_obj,
1684ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1685ad43ddacSmrg			   3, 0xffffffff))
1686ad43ddacSmrg	return FALSE;
1687ad43ddacSmrg
1688b7e1c893Smrg    /* memcopy from sys to scratch */
1689b7e1c893Smrg    while (temph--) {
1690b7e1c893Smrg	memcpy (dst, src, wpass);
1691b7e1c893Smrg	src += src_pitch;
1692b7e1c893Smrg	dst += scratch_pitch_bytes;
1693b7e1c893Smrg    }
1694b7e1c893Smrg
1695b7e1c893Smrg    while (h) {
1696b7e1c893Smrg	uint32_t offset = scratch_mc_addr + scratch_offset;
1697b7e1c893Smrg	int oldhpass = hpass;
1698b7e1c893Smrg	h -= oldhpass;
1699b7e1c893Smrg	temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1700b7e1c893Smrg
1701b7e1c893Smrg	if (hpass) {
1702b7e1c893Smrg	    scratch_offset = scratch->total/2 - scratch_offset;
1703b7e1c893Smrg	    dst = (char *)scratch->address + scratch_offset;
1704b7e1c893Smrg	    /* wait for the engine to be idle */
1705b7e1c893Smrg	    RADEONWaitForIdleCP(pScrn);
1706b7e1c893Smrg	    //memcopy from sys to scratch
1707b7e1c893Smrg	    while (temph--) {
1708b7e1c893Smrg		memcpy (dst, src, wpass);
1709b7e1c893Smrg		src += src_pitch;
1710b7e1c893Smrg		dst += scratch_pitch_bytes;
1711b7e1c893Smrg	    }
1712b7e1c893Smrg	}
1713b7e1c893Smrg	/* blit from scratch to vram */
1714ad43ddacSmrg	info->accel_state->src_obj[0].height = oldhpass;
1715ad43ddacSmrg	info->accel_state->src_obj[0].offset = offset;
1716ad43ddacSmrg	R600DoPrepareCopy(pScrn);
1717b7e1c893Smrg	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
1718b7e1c893Smrg	R600DoCopy(pScrn);
1719b7e1c893Smrg	y += oldhpass;
1720b7e1c893Smrg    }
1721b7e1c893Smrg
1722b7e1c893Smrg    R600IBDiscard(pScrn, scratch);
1723b7e1c893Smrg
1724b7e1c893Smrg    return TRUE;
1725b7e1c893Smrg}
1726b7e1c893Smrg
1727b7e1c893Smrgstatic Bool
1728b7e1c893SmrgR600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1729b7e1c893Smrg		   char *src, int src_pitch)
1730b7e1c893Smrg{
1731c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1732b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1733b7e1c893Smrg    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1734b7e1c893Smrg    uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1735b7e1c893Smrg    int bpp = pDst->drawable.bitsPerPixel;
1736b7e1c893Smrg
1737b7e1c893Smrg    return R600CopyToVRAM(pScrn,
1738b7e1c893Smrg			  src, src_pitch,
1739ad43ddacSmrg			  dst_pitch, dst_mc_addr, pDst->drawable.width, pDst->drawable.height, bpp,
1740b7e1c893Smrg			  x, y, w, h);
1741b7e1c893Smrg}
1742b7e1c893Smrg
1743b7e1c893Smrgstatic Bool
1744b7e1c893SmrgR600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
1745b7e1c893Smrg		       char *dst, int dst_pitch)
1746b7e1c893Smrg{
1747c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
1748b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1749ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1750b7e1c893Smrg    uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1751b7e1c893Smrg    uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1752b7e1c893Smrg    uint32_t src_width = pSrc->drawable.width;
1753b7e1c893Smrg    uint32_t src_height = pSrc->drawable.height;
1754b7e1c893Smrg    int bpp = pSrc->drawable.bitsPerPixel;
1755b7e1c893Smrg    uint32_t scratch_mc_addr;
1756ad43ddacSmrg    int scratch_pitch_bytes = RADEON_ALIGN(dst_pitch, 256);
1757b7e1c893Smrg    int scratch_offset = 0, hpass;
1758b7e1c893Smrg    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1759b7e1c893Smrg    int wpass = w * (bpp/8);
1760b7e1c893Smrg    drmBufPtr scratch;
1761ad43ddacSmrg    struct r600_accel_object scratch_obj, src_obj;
1762b7e1c893Smrg
1763ad43ddacSmrg    /* bad pipe setup in drm prior to 1.32 */
1764ad43ddacSmrg    if (info->dri->pKernelDRMVersion->version_minor < 32) {
1765ad43ddacSmrg	    if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32))
1766ad43ddacSmrg		    return FALSE;
1767ad43ddacSmrg    }
1768c503f109Smrg
1769b7e1c893Smrg    if (src_pitch & 7)
1770b7e1c893Smrg	return FALSE;
1771b7e1c893Smrg
1772b7e1c893Smrg    scratch = RADEONCPGetBuffer(pScrn);
1773b7e1c893Smrg    if (scratch == NULL)
1774b7e1c893Smrg	return FALSE;
1775b7e1c893Smrg
1776b7e1c893Smrg    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1777b7e1c893Smrg    hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1778b7e1c893Smrg
1779ad43ddacSmrg    src_obj.pitch = src_pitch;
1780ad43ddacSmrg    src_obj.width = src_width;
1781ad43ddacSmrg    src_obj.height = src_height;
1782ad43ddacSmrg    src_obj.offset = src_mc_addr;
1783ad43ddacSmrg    src_obj.bo = NULL;
1784ad43ddacSmrg    src_obj.bpp = bpp;
1785ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1786ad43ddacSmrg
1787ad43ddacSmrg    scratch_obj.pitch = scratch_pitch;
1788ad43ddacSmrg    scratch_obj.width = src_width;
1789ad43ddacSmrg    scratch_obj.height = hpass;
1790ad43ddacSmrg    scratch_obj.offset = scratch_mc_addr;
1791ad43ddacSmrg    scratch_obj.bpp = bpp;
1792ad43ddacSmrg    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1793ad43ddacSmrg    scratch_obj.bo = NULL;
1794ad43ddacSmrg
1795ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1796ad43ddacSmrg			   &src_obj,
1797ad43ddacSmrg			   NULL,
1798ad43ddacSmrg			   &scratch_obj,
1799ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1800ad43ddacSmrg			   3, 0xffffffff))
1801ad43ddacSmrg	return FALSE;
1802ad43ddacSmrg
1803b7e1c893Smrg    /* blit from vram to scratch */
1804ad43ddacSmrg    R600DoPrepareCopy(pScrn);
1805b7e1c893Smrg    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1806b7e1c893Smrg    R600DoCopy(pScrn);
1807b7e1c893Smrg
1808b7e1c893Smrg    while (h) {
1809b7e1c893Smrg	char *src = (char *)scratch->address + scratch_offset;
1810b7e1c893Smrg	int oldhpass = hpass;
1811b7e1c893Smrg	h -= oldhpass;
1812b7e1c893Smrg	y += oldhpass;
1813b7e1c893Smrg	hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1814b7e1c893Smrg
1815b7e1c893Smrg	if (hpass) {
1816b7e1c893Smrg	    scratch_offset = scratch->total/2 - scratch_offset;
1817b7e1c893Smrg	    /* blit from vram to scratch */
1818ad43ddacSmrg	    info->accel_state->dst_obj.height = hpass;
1819ad43ddacSmrg	    info->accel_state->dst_obj.offset = scratch_mc_addr + scratch_offset;
1820ad43ddacSmrg	    R600DoPrepareCopy(pScrn);
1821b7e1c893Smrg	    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1822b7e1c893Smrg	    R600DoCopy(pScrn);
1823b7e1c893Smrg	}
1824b7e1c893Smrg
1825b7e1c893Smrg	/* wait for the engine to be idle */
1826b7e1c893Smrg	RADEONWaitForIdleCP(pScrn);
1827b7e1c893Smrg	/* memcopy from scratch to sys */
1828b7e1c893Smrg	while (oldhpass--) {
1829b7e1c893Smrg	    memcpy (dst, src, wpass);
1830b7e1c893Smrg	    dst += dst_pitch;
1831b7e1c893Smrg	    src += scratch_pitch_bytes;
1832b7e1c893Smrg	}
1833b7e1c893Smrg    }
1834b7e1c893Smrg
1835b7e1c893Smrg    R600IBDiscard(pScrn, scratch);
1836b7e1c893Smrg
1837b7e1c893Smrg    return TRUE;
1838b7e1c893Smrg
1839b7e1c893Smrg}
1840b7e1c893Smrg
1841ad43ddacSmrg#if defined(XF86DRM_MODE)
1842ad43ddacSmrg
1843ad43ddacSmrgstatic Bool
1844ad43ddacSmrgR600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
1845ad43ddacSmrg		     char *src, int src_pitch)
1846ad43ddacSmrg{
1847c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1848ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1849ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1850ad43ddacSmrg    struct radeon_exa_pixmap_priv *driver_priv;
18510974d292Smrg    struct radeon_bo *scratch = NULL;
18520974d292Smrg    struct radeon_bo *copy_dst;
18530974d292Smrg    unsigned char *dst;
1854ad43ddacSmrg    unsigned size;
1855ad43ddacSmrg    uint32_t dst_domain;
1856ad43ddacSmrg    int bpp = pDst->drawable.bitsPerPixel;
1857b13dfe66Smrg    uint32_t scratch_pitch;
18580974d292Smrg    uint32_t copy_pitch;
1859ad43ddacSmrg    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
18600974d292Smrg    int ret;
18610974d292Smrg    Bool flush = TRUE;
1862ad43ddacSmrg    Bool r;
1863ad43ddacSmrg    int i;
1864ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
1865166b61b6Smrg    uint32_t height, base_align;
1866ad43ddacSmrg
1867ad43ddacSmrg    if (bpp < 8)
1868ad43ddacSmrg	return FALSE;
1869ad43ddacSmrg
1870ad43ddacSmrg    driver_priv = exaGetPixmapDriverPrivate(pDst);
1871921a55d8Smrg    if (!driver_priv || !driver_priv->bo)
1872921a55d8Smrg	return FALSE;
1873921a55d8Smrg
18740974d292Smrg    /* If we know the BO won't be busy, don't bother with a scratch */
18750974d292Smrg    copy_dst = driver_priv->bo;
18760974d292Smrg    copy_pitch = pDst->devKind;
1877166b61b6Smrg    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1878921a55d8Smrg	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1879921a55d8Smrg	    flush = FALSE;
1880921a55d8Smrg	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
1881921a55d8Smrg		goto copy;
1882921a55d8Smrg	}
18830974d292Smrg    }
1884ad43ddacSmrg
1885b13dfe66Smrg    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1886b13dfe66Smrg    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1887b13dfe66Smrg    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1888b13dfe66Smrg    size = scratch_pitch * height * (bpp / 8);
1889b13dfe66Smrg    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1890ad43ddacSmrg    if (scratch == NULL) {
18910974d292Smrg	goto copy;
1892ad43ddacSmrg    }
1893ad43ddacSmrg
1894b13dfe66Smrg    src_obj.pitch = scratch_pitch;
1895ad43ddacSmrg    src_obj.width = w;
1896ad43ddacSmrg    src_obj.height = h;
1897ad43ddacSmrg    src_obj.offset = 0;
1898ad43ddacSmrg    src_obj.bpp = bpp;
1899ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1900ad43ddacSmrg    src_obj.bo = scratch;
1901166b61b6Smrg    src_obj.tiling_flags = 0;
1902f3a0071aSrjs#ifdef XF86DRM_MODE
1903f3a0071aSrjs    src_obj.surface = NULL;
1904f3a0071aSrjs#endif
1905ad43ddacSmrg
1906ad43ddacSmrg    dst_obj.pitch = dst_pitch_hw;
1907ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
1908ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
1909ad43ddacSmrg    dst_obj.offset = 0;
1910ad43ddacSmrg    dst_obj.bpp = bpp;
1911ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1912ad43ddacSmrg    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1913166b61b6Smrg    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1914f3a0071aSrjs#ifdef XF86DRM_MODE
1915f3a0071aSrjs    dst_obj.surface = radeon_get_pixmap_surface(pDst);
1916f3a0071aSrjs#endif
1917ad43ddacSmrg
1918ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1919ad43ddacSmrg			   &src_obj,
1920ad43ddacSmrg			   NULL,
1921ad43ddacSmrg			   &dst_obj,
1922ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1923ad43ddacSmrg			   3, 0xffffffff)) {
19240974d292Smrg        goto copy;
1925ad43ddacSmrg    }
19260974d292Smrg    copy_dst = scratch;
1927b13dfe66Smrg    copy_pitch = scratch_pitch * (bpp / 8);
19280974d292Smrg    flush = FALSE;
19290974d292Smrg
19300974d292Smrgcopy:
19310974d292Smrg    if (flush)
19320974d292Smrg	radeon_cs_flush_indirect(pScrn);
1933ad43ddacSmrg
19340974d292Smrg    ret = radeon_bo_map(copy_dst, 0);
19350974d292Smrg    if (ret) {
1936ad43ddacSmrg        r = FALSE;
1937ad43ddacSmrg        goto out;
1938ad43ddacSmrg    }
1939ad43ddacSmrg    r = TRUE;
1940ad43ddacSmrg    size = w * bpp / 8;
19410974d292Smrg    dst = copy_dst->ptr;
19420974d292Smrg    if (copy_dst == driver_priv->bo)
19430974d292Smrg	dst += y * copy_pitch + x * bpp / 8;
1944ad43ddacSmrg    for (i = 0; i < h; i++) {
19450974d292Smrg        memcpy(dst + i * copy_pitch, src, size);
1946ad43ddacSmrg        src += src_pitch;
1947ad43ddacSmrg    }
19480974d292Smrg    radeon_bo_unmap(copy_dst);
1949ad43ddacSmrg
19500974d292Smrg    if (copy_dst == scratch) {
19510974d292Smrg	if (info->accel_state->vsync)
19520974d292Smrg	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1953ad43ddacSmrg
19540974d292Smrg	/* blit from gart to vram */
19550974d292Smrg	R600DoPrepareCopy(pScrn);
19560974d292Smrg	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
19570974d292Smrg	R600DoCopyVline(pDst);
19580974d292Smrg    }
1959ad43ddacSmrg
1960ad43ddacSmrgout:
19610974d292Smrg    if (scratch)
19620974d292Smrg	radeon_bo_unref(scratch);
1963ad43ddacSmrg    return r;
1964ad43ddacSmrg}
1965ad43ddacSmrg
1966ad43ddacSmrgstatic Bool
1967ad43ddacSmrgR600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
1968ad43ddacSmrg			 int h, char *dst, int dst_pitch)
1969ad43ddacSmrg{
1970c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
1971ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1972ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1973ad43ddacSmrg    struct radeon_exa_pixmap_priv *driver_priv;
19740974d292Smrg    struct radeon_bo *scratch = NULL;
19750974d292Smrg    struct radeon_bo *copy_src;
1976ad43ddacSmrg    unsigned size;
1977ad43ddacSmrg    uint32_t src_domain = 0;
1978ad43ddacSmrg    int bpp = pSrc->drawable.bitsPerPixel;
1979b13dfe66Smrg    uint32_t scratch_pitch;
19800974d292Smrg    uint32_t copy_pitch;
1981ad43ddacSmrg    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
19820974d292Smrg    int ret;
19830974d292Smrg    Bool flush = FALSE;
1984ad43ddacSmrg    Bool r;
1985ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
1986166b61b6Smrg    uint32_t height, base_align;
1987ad43ddacSmrg
1988ad43ddacSmrg    if (bpp < 8)
1989ad43ddacSmrg	return FALSE;
1990ad43ddacSmrg
1991ad43ddacSmrg    driver_priv = exaGetPixmapDriverPrivate(pSrc);
1992921a55d8Smrg    if (!driver_priv || !driver_priv->bo)
1993921a55d8Smrg	return FALSE;
1994921a55d8Smrg
19950974d292Smrg    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
19960974d292Smrg    copy_src = driver_priv->bo;
19970974d292Smrg    copy_pitch = pSrc->devKind;
1998166b61b6Smrg    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1999921a55d8Smrg	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
2000921a55d8Smrg	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
2001921a55d8Smrg	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
2002921a55d8Smrg		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
2003921a55d8Smrg		src_domain = 0;
2004921a55d8Smrg	    else /* A write may be scheduled */
2005921a55d8Smrg		flush = TRUE;
2006921a55d8Smrg	}
2007ad43ddacSmrg
2008921a55d8Smrg	if (!src_domain)
2009921a55d8Smrg	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
2010ad43ddacSmrg
2011921a55d8Smrg	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
2012921a55d8Smrg	    goto copy;
2013921a55d8Smrg    }
2014ad43ddacSmrg
2015b13dfe66Smrg    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
2016b13dfe66Smrg    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
2017b13dfe66Smrg    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
2018b13dfe66Smrg    size = scratch_pitch * height * (bpp / 8);
2019b13dfe66Smrg    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
2020ad43ddacSmrg    if (scratch == NULL) {
20210974d292Smrg	goto copy;
2022ad43ddacSmrg    }
2023ad43ddacSmrg    radeon_cs_space_reset_bos(info->cs);
2024ad43ddacSmrg    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
2025ad43ddacSmrg				      RADEON_GEM_DOMAIN_VRAM, 0);
2026ad43ddacSmrg    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
2027ad43ddacSmrg    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
2028ad43ddacSmrg    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
2029ad43ddacSmrg    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
20300974d292Smrg    ret = radeon_cs_space_check(info->cs);
20310974d292Smrg    if (ret) {
20320974d292Smrg        goto copy;
2033ad43ddacSmrg    }
2034ad43ddacSmrg
2035ad43ddacSmrg    src_obj.pitch = src_pitch_hw;
2036ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
2037ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
2038ad43ddacSmrg    src_obj.offset = 0;
2039ad43ddacSmrg    src_obj.bpp = bpp;
2040ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
2041ad43ddacSmrg    src_obj.bo = radeon_get_pixmap_bo(pSrc);
2042166b61b6Smrg    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
2043f3a0071aSrjs#ifdef XF86DRM_MODE
2044f3a0071aSrjs    src_obj.surface = radeon_get_pixmap_surface(pSrc);
2045f3a0071aSrjs#endif
2046921a55d8Smrg
2047b13dfe66Smrg    dst_obj.pitch = scratch_pitch;
2048ad43ddacSmrg    dst_obj.width = w;
2049ad43ddacSmrg    dst_obj.height = h;
2050ad43ddacSmrg    dst_obj.offset = 0;
2051ad43ddacSmrg    dst_obj.bo = scratch;
2052ad43ddacSmrg    dst_obj.bpp = bpp;
2053ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
2054166b61b6Smrg    dst_obj.tiling_flags = 0;
2055f3a0071aSrjs#ifdef XF86DRM_MODE
2056f3a0071aSrjs    dst_obj.surface = NULL;
2057f3a0071aSrjs#endif
2058ad43ddacSmrg
2059ad43ddacSmrg    if (!R600SetAccelState(pScrn,
2060ad43ddacSmrg			   &src_obj,
2061ad43ddacSmrg			   NULL,
2062ad43ddacSmrg			   &dst_obj,
2063ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
2064ad43ddacSmrg			   3, 0xffffffff)) {
20650974d292Smrg        goto copy;
2066ad43ddacSmrg    }
2067ad43ddacSmrg
2068ad43ddacSmrg    /* blit from vram to gart */
2069ad43ddacSmrg    R600DoPrepareCopy(pScrn);
2070ad43ddacSmrg    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
2071ad43ddacSmrg    R600DoCopy(pScrn);
20720974d292Smrg    copy_src = scratch;
2073b13dfe66Smrg    copy_pitch = scratch_pitch * (bpp / 8);
20740974d292Smrg    flush = TRUE;
2075ad43ddacSmrg
20760974d292Smrgcopy:
20770974d292Smrg    if (flush && info->cs)
2078ad43ddacSmrg	radeon_cs_flush_indirect(pScrn);
2079ad43ddacSmrg
20800974d292Smrg    ret = radeon_bo_map(copy_src, 0);
20810974d292Smrg    if (ret) {
20820974d292Smrg	ErrorF("failed to map pixmap: %d\n", ret);
2083ad43ddacSmrg        r = FALSE;
2084ad43ddacSmrg        goto out;
2085ad43ddacSmrg    }
2086ad43ddacSmrg    r = TRUE;
2087ad43ddacSmrg    w *= bpp / 8;
20880974d292Smrg    if (copy_src == driver_priv->bo)
20890974d292Smrg	size = y * copy_pitch + x * bpp / 8;
20900974d292Smrg    else
20910974d292Smrg	size = 0;
2092ad43ddacSmrg    while (h--) {
20930974d292Smrg        memcpy(dst, copy_src->ptr + size, w);
20940974d292Smrg        size += copy_pitch;
2095ad43ddacSmrg        dst += dst_pitch;
2096ad43ddacSmrg    }
20970974d292Smrg    radeon_bo_unmap(copy_src);
2098ad43ddacSmrgout:
20990974d292Smrg    if (scratch)
21000974d292Smrg	radeon_bo_unref(scratch);
2101ad43ddacSmrg    return r;
2102ad43ddacSmrg}
2103ad43ddacSmrg#endif
2104ad43ddacSmrg
2105b7e1c893Smrgstatic int
2106b7e1c893SmrgR600MarkSync(ScreenPtr pScreen)
2107b7e1c893Smrg{
2108c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2109b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2110b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2111b7e1c893Smrg
2112b7e1c893Smrg    return ++accel_state->exaSyncMarker;
2113b7e1c893Smrg
2114b7e1c893Smrg}
2115b7e1c893Smrg
2116b7e1c893Smrgstatic void
2117b7e1c893SmrgR600Sync(ScreenPtr pScreen, int marker)
2118b7e1c893Smrg{
2119c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2120b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2121b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2122b7e1c893Smrg
2123b7e1c893Smrg    if (accel_state->exaMarkerSynced != marker) {
2124ad43ddacSmrg#ifdef XF86DRM_MODE
2125ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2126ad43ddacSmrg	if (!info->cs)
2127ad43ddacSmrg#endif
2128ad43ddacSmrg#endif
2129ad43ddacSmrg	    RADEONWaitForIdleCP(pScrn);
2130b7e1c893Smrg	accel_state->exaMarkerSynced = marker;
2131b7e1c893Smrg    }
2132b7e1c893Smrg
2133b7e1c893Smrg}
2134b7e1c893Smrg
2135b7e1c893Smrgstatic Bool
2136b7e1c893SmrgR600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
2137b7e1c893Smrg{
2138b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2139b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2140b7e1c893Smrg
2141b7e1c893Smrg    /* 512 bytes per shader for now */
2142b7e1c893Smrg    int size = 512 * 9;
2143b7e1c893Smrg
2144b7e1c893Smrg    accel_state->shaders = NULL;
2145b7e1c893Smrg
2146ad43ddacSmrg#ifdef XF86DRM_MODE
2147ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2148ad43ddacSmrg    if (info->cs) {
2149ad43ddacSmrg	accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
2150ad43ddacSmrg						 RADEON_GEM_DOMAIN_VRAM, 0);
2151ad43ddacSmrg	if (accel_state->shaders_bo == NULL) {
2152ad43ddacSmrg	    ErrorF("Allocating shader failed\n");
2153ad43ddacSmrg	    return FALSE;
2154ad43ddacSmrg	}
2155ad43ddacSmrg	return TRUE;
2156ad43ddacSmrg    } else
2157ad43ddacSmrg#endif
2158ad43ddacSmrg#endif
2159ad43ddacSmrg    {
2160ad43ddacSmrg	accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
2161ad43ddacSmrg						 TRUE, NULL, NULL);
2162ad43ddacSmrg
2163ad43ddacSmrg	if (accel_state->shaders == NULL)
2164ad43ddacSmrg	    return FALSE;
2165ad43ddacSmrg    }
2166b7e1c893Smrg
2167b7e1c893Smrg    return TRUE;
2168b7e1c893Smrg}
2169b7e1c893Smrg
2170b7e1c893SmrgBool
2171b7e1c893SmrgR600LoadShaders(ScrnInfoPtr pScrn)
2172b7e1c893Smrg{
2173b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2174b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2175b7e1c893Smrg    RADEONChipFamily ChipSet = info->ChipFamily;
2176b7e1c893Smrg    uint32_t *shader;
2177ad43ddacSmrg#ifdef XF86DRM_MODE
2178ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2179ad43ddacSmrg    int ret;
2180ad43ddacSmrg
2181ad43ddacSmrg    if (info->cs) {
2182ad43ddacSmrg	ret = radeon_bo_map(accel_state->shaders_bo, 1);
2183ad43ddacSmrg	if (ret) {
2184ad43ddacSmrg	    FatalError("failed to map shader %d\n", ret);
2185ad43ddacSmrg	    return FALSE;
2186ad43ddacSmrg	}
2187ad43ddacSmrg	shader = accel_state->shaders_bo->ptr;
2188ad43ddacSmrg    } else
2189ad43ddacSmrg#endif
2190ad43ddacSmrg#endif
2191ad43ddacSmrg	shader = (pointer)((char *)info->FB + accel_state->shaders->offset);
2192b7e1c893Smrg
2193b7e1c893Smrg    /*  solid vs --------------------------------------- */
2194b7e1c893Smrg    accel_state->solid_vs_offset = 0;
2195b7e1c893Smrg    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
2196b7e1c893Smrg
2197b7e1c893Smrg    /*  solid ps --------------------------------------- */
2198b7e1c893Smrg    accel_state->solid_ps_offset = 512;
2199b7e1c893Smrg    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
2200b7e1c893Smrg
2201b7e1c893Smrg    /*  copy vs --------------------------------------- */
2202b7e1c893Smrg    accel_state->copy_vs_offset = 1024;
2203b7e1c893Smrg    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
2204b7e1c893Smrg
2205b7e1c893Smrg    /*  copy ps --------------------------------------- */
2206b7e1c893Smrg    accel_state->copy_ps_offset = 1536;
2207b7e1c893Smrg    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2208b7e1c893Smrg
2209b7e1c893Smrg    /*  comp vs --------------------------------------- */
2210b7e1c893Smrg    accel_state->comp_vs_offset = 2048;
2211b7e1c893Smrg    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2212b7e1c893Smrg
2213b7e1c893Smrg    /*  comp ps --------------------------------------- */
2214b7e1c893Smrg    accel_state->comp_ps_offset = 2560;
2215b7e1c893Smrg    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2216b7e1c893Smrg
2217b7e1c893Smrg    /*  xv vs --------------------------------------- */
22180974d292Smrg    accel_state->xv_vs_offset = 3072;
2219b7e1c893Smrg    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2220b7e1c893Smrg
2221b7e1c893Smrg    /*  xv ps --------------------------------------- */
22220974d292Smrg    accel_state->xv_ps_offset = 3584;
2223b7e1c893Smrg    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2224b7e1c893Smrg
2225ad43ddacSmrg#ifdef XF86DRM_MODE
2226ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2227ad43ddacSmrg    if (info->cs) {
2228ad43ddacSmrg	radeon_bo_unmap(accel_state->shaders_bo);
2229ad43ddacSmrg    }
2230ad43ddacSmrg#endif
2231ad43ddacSmrg#endif
2232ad43ddacSmrg
2233b7e1c893Smrg    return TRUE;
2234b7e1c893Smrg}
2235b7e1c893Smrg
2236b7e1c893Smrgstatic Bool
2237b7e1c893SmrgR600PrepareAccess(PixmapPtr pPix, int index)
2238b7e1c893Smrg{
2239c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
2240b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2241b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
2242b7e1c893Smrg
2243b7e1c893Smrg    /* flush HDP read/write caches */
2244b7e1c893Smrg    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2245b7e1c893Smrg
2246b7e1c893Smrg    return TRUE;
2247b7e1c893Smrg}
2248b7e1c893Smrg
2249b7e1c893Smrgstatic void
2250b7e1c893SmrgR600FinishAccess(PixmapPtr pPix, int index)
2251b7e1c893Smrg{
2252c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
2253b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2254b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
2255b7e1c893Smrg
2256b7e1c893Smrg    /* flush HDP read/write caches */
2257b7e1c893Smrg    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2258b7e1c893Smrg
2259b7e1c893Smrg}
2260b7e1c893Smrg
2261b7e1c893SmrgBool
2262b7e1c893SmrgR600DrawInit(ScreenPtr pScreen)
2263b7e1c893Smrg{
2264c135ecebSveego    ScrnInfoPtr pScrn =  xf86ScreenToScrn(pScreen);
2265b7e1c893Smrg    RADEONInfoPtr info   = RADEONPTR(pScrn);
2266b7e1c893Smrg
2267b7e1c893Smrg    if (info->accel_state->exa == NULL) {
2268b7e1c893Smrg	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2269b7e1c893Smrg	return FALSE;
2270b7e1c893Smrg    }
2271b7e1c893Smrg
2272b7e1c893Smrg    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2273b7e1c893Smrg    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2274b7e1c893Smrg
2275b7e1c893Smrg    info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2276b7e1c893Smrg    info->accel_state->exa->Solid = R600Solid;
2277b7e1c893Smrg    info->accel_state->exa->DoneSolid = R600DoneSolid;
2278b7e1c893Smrg
2279b7e1c893Smrg    info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2280b7e1c893Smrg    info->accel_state->exa->Copy = R600Copy;
2281b7e1c893Smrg    info->accel_state->exa->DoneCopy = R600DoneCopy;
2282b7e1c893Smrg
2283b7e1c893Smrg    info->accel_state->exa->MarkSync = R600MarkSync;
2284b7e1c893Smrg    info->accel_state->exa->WaitMarker = R600Sync;
2285b7e1c893Smrg
2286ad43ddacSmrg#ifdef XF86DRM_MODE
2287ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2288ad43ddacSmrg    if (info->cs) {
2289ad43ddacSmrg	info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
2290ad43ddacSmrg	info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
2291ad43ddacSmrg	info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
2292ad43ddacSmrg	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
2293ad43ddacSmrg	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
2294ad43ddacSmrg	info->accel_state->exa->UploadToScreen = R600UploadToScreenCS;
2295ad43ddacSmrg	info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS;
22960974d292Smrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5)
22970974d292Smrg        info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
22980974d292Smrg#endif
2299ad43ddacSmrg    } else
2300ad43ddacSmrg#endif
2301ad43ddacSmrg#endif
2302ad43ddacSmrg    {
2303ad43ddacSmrg	info->accel_state->exa->PrepareAccess = R600PrepareAccess;
2304ad43ddacSmrg	info->accel_state->exa->FinishAccess = R600FinishAccess;
2305ad43ddacSmrg
2306ad43ddacSmrg	/* AGP seems to have problems with gart transfers */
2307ad43ddacSmrg	if (info->accelDFS) {
2308ad43ddacSmrg	    info->accel_state->exa->UploadToScreen = R600UploadToScreen;
2309ad43ddacSmrg	    info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen;
2310ad43ddacSmrg	}
2311b7e1c893Smrg    }
2312b7e1c893Smrg
2313b7e1c893Smrg    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
2314b7e1c893Smrg#ifdef EXA_SUPPORTS_PREPARE_AUX
2315b7e1c893Smrg    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
2316ad43ddacSmrg#endif
2317ad43ddacSmrg
2318ad43ddacSmrg#ifdef XF86DRM_MODE
2319ad43ddacSmrg#ifdef EXA_HANDLES_PIXMAPS
2320ad43ddacSmrg    if (info->cs) {
2321ad43ddacSmrg	info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
2322ad43ddacSmrg#ifdef EXA_MIXED_PIXMAPS
2323ad43ddacSmrg	info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
2324ad43ddacSmrg#endif
2325ad43ddacSmrg    }
2326ad43ddacSmrg#endif
2327b7e1c893Smrg#endif
2328b7e1c893Smrg    info->accel_state->exa->pixmapOffsetAlign = 256;
2329b7e1c893Smrg    info->accel_state->exa->pixmapPitchAlign = 256;
2330b7e1c893Smrg
2331b7e1c893Smrg    info->accel_state->exa->CheckComposite = R600CheckComposite;
2332b7e1c893Smrg    info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2333b7e1c893Smrg    info->accel_state->exa->Composite = R600Composite;
2334b7e1c893Smrg    info->accel_state->exa->DoneComposite = R600DoneComposite;
2335b7e1c893Smrg
2336b7e1c893Smrg#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
2337b7e1c893Smrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
2338b7e1c893Smrg
2339b7e1c893Smrg    info->accel_state->exa->maxPitchBytes = 32768;
2340b7e1c893Smrg    info->accel_state->exa->maxX = 8192;
2341b7e1c893Smrg#else
2342b7e1c893Smrg    info->accel_state->exa->maxX = 8192;
2343b7e1c893Smrg#endif
2344b7e1c893Smrg    info->accel_state->exa->maxY = 8192;
2345b7e1c893Smrg
2346b7e1c893Smrg    /* not supported yet */
2347ad43ddacSmrg    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2348ad43ddacSmrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2349ad43ddacSmrg	info->accel_state->vsync = TRUE;
2350ad43ddacSmrg    } else
2351ad43ddacSmrg	info->accel_state->vsync = FALSE;
2352b7e1c893Smrg
2353b7e1c893Smrg    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
23542f39173dSmrg	free(info->accel_state->exa);
2355b7e1c893Smrg	return FALSE;
2356b7e1c893Smrg    }
2357b7e1c893Smrg
2358ad43ddacSmrg#ifdef XF86DRM_MODE
2359ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2360ad43ddacSmrg    if (!info->cs)
2361ad43ddacSmrg#endif
2362ad43ddacSmrg#endif
2363ad43ddacSmrg	if (!info->gartLocation)
2364ad43ddacSmrg	    return FALSE;
2365b7e1c893Smrg
2366b7e1c893Smrg    info->accel_state->XInited3D = FALSE;
2367b7e1c893Smrg    info->accel_state->copy_area = NULL;
2368ad43ddacSmrg    info->accel_state->src_obj[0].bo = NULL;
2369ad43ddacSmrg    info->accel_state->src_obj[1].bo = NULL;
2370ad43ddacSmrg    info->accel_state->dst_obj.bo = NULL;
2371ad43ddacSmrg    info->accel_state->copy_area_bo = NULL;
2372921a55d8Smrg    info->accel_state->vbo.vb_start_op = -1;
23730974d292Smrg    info->accel_state->finish_op = r600_finish_op;
2374921a55d8Smrg    info->accel_state->vbo.verts_per_op = 3;
23750974d292Smrg    RADEONVlineHelperClear(pScrn);
2376ad43ddacSmrg
2377ad43ddacSmrg#ifdef XF86DRM_MODE
2378ad43ddacSmrg    radeon_vbo_init_lists(pScrn);
2379ad43ddacSmrg#endif
2380b7e1c893Smrg
2381b7e1c893Smrg    if (!R600AllocShaders(pScrn, pScreen))
2382b7e1c893Smrg	return FALSE;
2383b7e1c893Smrg
2384b7e1c893Smrg    if (!R600LoadShaders(pScrn))
2385b7e1c893Smrg	return FALSE;
2386b7e1c893Smrg
2387b7e1c893Smrg    exaMarkSync(pScreen);
2388b7e1c893Smrg
2389b7e1c893Smrg    return TRUE;
2390b7e1c893Smrg
2391b7e1c893Smrg}
2392b7e1c893Smrg
2393