r600_exa.c revision c73da4db
1b7e1c893Smrg/*
2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3b7e1c893Smrg *
4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"),
6b7e1c893Smrg * to deal in the Software without restriction, including without limitation
7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions:
10b7e1c893Smrg *
11b7e1c893Smrg * The above copyright notice and this permission notice (including the next
12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the
13b7e1c893Smrg * Software.
14b7e1c893Smrg *
15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b7e1c893Smrg * SOFTWARE.
22b7e1c893Smrg *
23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24b7e1c893Smrg *
25b7e1c893Smrg */
26b7e1c893Smrg
27b7e1c893Smrg#ifdef HAVE_CONFIG_H
28b7e1c893Smrg#include "config.h"
29b7e1c893Smrg#endif
30b7e1c893Smrg
31b7e1c893Smrg#include "xf86.h"
32b7e1c893Smrg
33b7e1c893Smrg#include "exa.h"
34b7e1c893Smrg
35b7e1c893Smrg#include "radeon.h"
36b7e1c893Smrg#include "radeon_macros.h"
37b7e1c893Smrg#include "radeon_reg.h"
38b7e1c893Smrg#include "r600_shader.h"
39b7e1c893Smrg#include "r600_reg.h"
40b7e1c893Smrg#include "r600_state.h"
410974d292Smrg#include "radeon_exa_shared.h"
42ad43ddacSmrg#include "radeon_vbo.h"
43ad43ddacSmrg
44b7e1c893Smrg/* #define SHOW_VERTEXES */
45b7e1c893Smrg
46ad43ddacSmrgBool
47ad43ddacSmrgR600SetAccelState(ScrnInfoPtr pScrn,
48ad43ddacSmrg		  struct r600_accel_object *src0,
49ad43ddacSmrg		  struct r600_accel_object *src1,
50ad43ddacSmrg		  struct r600_accel_object *dst,
51ad43ddacSmrg		  uint32_t vs_offset, uint32_t ps_offset,
52ad43ddacSmrg		  int rop, Pixel planemask)
53ad43ddacSmrg{
54ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
55ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
56b13dfe66Smrg    uint32_t pitch_align = 0x7, base_align = 0xff;
57b13dfe66Smrg#if defined(XF86DRM_MODE)
58b13dfe66Smrg    int ret;
59b13dfe66Smrg#endif
60ad43ddacSmrg
61ad43ddacSmrg    if (src0) {
62ad43ddacSmrg	memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
63ad43ddacSmrg	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
64b13dfe66Smrg#if defined(XF86DRM_MODE)
65f3a0071aSrjs	if (info->cs && src0->surface) {
66f3a0071aSrjs		accel_state->src_size[0] = src0->surface->bo_size;
67b13dfe66Smrg	}
68b13dfe66Smrg#endif
69f3a0071aSrjs
70b13dfe66Smrg	/* bad pitch */
71b13dfe66Smrg	if (accel_state->src_obj[0].pitch & pitch_align)
72b13dfe66Smrg	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
73b13dfe66Smrg
74b13dfe66Smrg	/* bad offset */
75b13dfe66Smrg	if (accel_state->src_obj[0].offset & base_align)
76b13dfe66Smrg	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset));
77b13dfe66Smrg
78ad43ddacSmrg    } else {
79ad43ddacSmrg	memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
80ad43ddacSmrg	accel_state->src_size[0] = 0;
81ad43ddacSmrg    }
82ad43ddacSmrg
83ad43ddacSmrg    if (src1) {
84ad43ddacSmrg	memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
85ad43ddacSmrg	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
86b13dfe66Smrg#if defined(XF86DRM_MODE)
87f3a0071aSrjs	if (info->cs && src1->surface) {
88f3a0071aSrjs		accel_state->src_size[1] = src1->surface->bo_size;
89b13dfe66Smrg	}
90b13dfe66Smrg#endif
91f3a0071aSrjs
92b13dfe66Smrg	/* bad pitch */
93b13dfe66Smrg	if (accel_state->src_obj[1].pitch & pitch_align)
94b13dfe66Smrg	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
95b13dfe66Smrg
96b13dfe66Smrg	/* bad offset */
97b13dfe66Smrg	if (accel_state->src_obj[1].offset & base_align)
98b13dfe66Smrg	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset));
99ad43ddacSmrg    } else {
100ad43ddacSmrg	memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
101ad43ddacSmrg	accel_state->src_size[1] = 0;
102ad43ddacSmrg    }
103ad43ddacSmrg
104ad43ddacSmrg    if (dst) {
105ad43ddacSmrg	memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
106ad43ddacSmrg	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
107b13dfe66Smrg#if defined(XF86DRM_MODE)
108f3a0071aSrjs	if (info->cs && dst->surface) {
109f3a0071aSrjs		accel_state->dst_size = dst->surface->bo_size;
110f3a0071aSrjs	} else
111b13dfe66Smrg#endif
112f3a0071aSrjs	{
113f3a0071aSrjs		accel_state->dst_obj.tiling_flags = 0;
114f3a0071aSrjs	}
115b13dfe66Smrg	if (accel_state->dst_obj.pitch & pitch_align)
116b13dfe66Smrg	    RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
117b13dfe66Smrg
118b13dfe66Smrg	if (accel_state->dst_obj.offset & base_align)
119b13dfe66Smrg	    RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset));
120ad43ddacSmrg    } else {
121ad43ddacSmrg	memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
122ad43ddacSmrg	accel_state->dst_size = 0;
123ad43ddacSmrg    }
124ad43ddacSmrg
125f3a0071aSrjs#ifdef XF86DRM_MODE
126f3a0071aSrjs    if (info->cs && CS_FULL(info->cs))
127f3a0071aSrjs	radeon_cs_flush_indirect(pScrn);
128f3a0071aSrjs#endif
129f3a0071aSrjs
130ad43ddacSmrg    accel_state->rop = rop;
131ad43ddacSmrg    accel_state->planemask = planemask;
132ad43ddacSmrg
133ad43ddacSmrg    accel_state->vs_size = 512;
134ad43ddacSmrg    accel_state->ps_size = 512;
135ad43ddacSmrg#if defined(XF86DRM_MODE)
136ad43ddacSmrg    if (info->cs) {
137ad43ddacSmrg	accel_state->vs_mc_addr = vs_offset;
138ad43ddacSmrg	accel_state->ps_mc_addr = ps_offset;
139ad43ddacSmrg
140ad43ddacSmrg	radeon_cs_space_reset_bos(info->cs);
141ad43ddacSmrg	radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
142ad43ddacSmrg					  RADEON_GEM_DOMAIN_VRAM, 0);
143ad43ddacSmrg	if (accel_state->src_obj[0].bo)
144ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
145ad43ddacSmrg					      accel_state->src_obj[0].domain, 0);
146ad43ddacSmrg	if (accel_state->src_obj[1].bo)
147ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
148ad43ddacSmrg					      accel_state->src_obj[1].domain, 0);
149ad43ddacSmrg	if (accel_state->dst_obj.bo)
150ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
151ad43ddacSmrg					      0, accel_state->dst_obj.domain);
152ad43ddacSmrg	ret = radeon_cs_space_check(info->cs);
153ad43ddacSmrg	if (ret)
154ad43ddacSmrg	    RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
155ad43ddacSmrg
156ad43ddacSmrg    } else
157ad43ddacSmrg#endif
158ad43ddacSmrg    {
159ad43ddacSmrg	accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
160ad43ddacSmrg	    vs_offset;
161ad43ddacSmrg	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
162ad43ddacSmrg	    ps_offset;
163ad43ddacSmrg    }
164ad43ddacSmrg
165ad43ddacSmrg    return TRUE;
166ad43ddacSmrg}
167ad43ddacSmrg
168b7e1c893Smrgstatic Bool
169b7e1c893SmrgR600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
170b7e1c893Smrg{
171c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
172b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
173b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
174b7e1c893Smrg    cb_config_t     cb_conf;
175b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
176b7e1c893Smrg    uint32_t a, r, g, b;
177b7e1c893Smrg    float ps_alu_consts[4];
178ad43ddacSmrg    struct r600_accel_object dst;
179b7e1c893Smrg
1800974d292Smrg    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
181ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype failed\n"));
1820974d292Smrg    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
183ad43ddacSmrg	RADEON_FALLBACK(("invalid planemask\n"));
184b7e1c893Smrg
185ad43ddacSmrg#if defined(XF86DRM_MODE)
186ad43ddacSmrg    if (info->cs) {
187ad43ddacSmrg	dst.offset = 0;
188ad43ddacSmrg	dst.bo = radeon_get_pixmap_bo(pPix);
189166b61b6Smrg	dst.tiling_flags = radeon_get_pixmap_tiling(pPix);
190f3a0071aSrjs	dst.surface = radeon_get_pixmap_surface(pPix);
191ad43ddacSmrg    } else
192ad43ddacSmrg#endif
193ad43ddacSmrg    {
194ad43ddacSmrg	dst.offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
195ad43ddacSmrg	dst.bo = NULL;
196ad43ddacSmrg    }
197b7e1c893Smrg
198ad43ddacSmrg    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
199ad43ddacSmrg    dst.width = pPix->drawable.width;
200ad43ddacSmrg    dst.height = pPix->drawable.height;
201ad43ddacSmrg    dst.bpp = pPix->drawable.bitsPerPixel;
202ad43ddacSmrg    dst.domain = RADEON_GEM_DOMAIN_VRAM;
2030974d292Smrg
204ad43ddacSmrg    if (!R600SetAccelState(pScrn,
205ad43ddacSmrg			   NULL,
206ad43ddacSmrg			   NULL,
207ad43ddacSmrg			   &dst,
208ad43ddacSmrg			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
209ad43ddacSmrg			   alu, pm))
210b7e1c893Smrg	return FALSE;
211b7e1c893Smrg
212b7e1c893Smrg    CLEAR (cb_conf);
213b7e1c893Smrg    CLEAR (vs_conf);
214b7e1c893Smrg    CLEAR (ps_conf);
215b7e1c893Smrg
216921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
2170974d292Smrg    radeon_cp_start(pScrn);
218b7e1c893Smrg
219921a55d8Smrg    r600_set_default_state(pScrn, accel_state->ib);
220b7e1c893Smrg
221921a55d8Smrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
222921a55d8Smrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
223921a55d8Smrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
224b7e1c893Smrg
225b7e1c893Smrg    /* Shader */
226b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
2270974d292Smrg    vs_conf.shader_size         = accel_state->vs_size;
228b7e1c893Smrg    vs_conf.num_gprs            = 2;
229b7e1c893Smrg    vs_conf.stack_size          = 0;
230ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
231921a55d8Smrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
232b7e1c893Smrg
233b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
2340974d292Smrg    ps_conf.shader_size         = accel_state->ps_size;
235b7e1c893Smrg    ps_conf.num_gprs            = 1;
236b7e1c893Smrg    ps_conf.stack_size          = 0;
237b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
238b7e1c893Smrg    ps_conf.clamp_consts        = 0;
239b7e1c893Smrg    ps_conf.export_mode         = 2;
240ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
241921a55d8Smrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
242b7e1c893Smrg
243b7e1c893Smrg    cb_conf.id = 0;
244ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
245ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
246ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
247ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
248f3a0071aSrjs#ifdef XF86DRM_MODE
249c73da4dbSmrg    if (info->cs)
250c73da4dbSmrg        cb_conf.surface = accel_state->dst_obj.surface;
251f3a0071aSrjs#endif
252b7e1c893Smrg
253ad43ddacSmrg    if (accel_state->dst_obj.bpp == 8) {
254b7e1c893Smrg	cb_conf.format = COLOR_8;
255b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
256ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 16) {
257b7e1c893Smrg	cb_conf.format = COLOR_5_6_5;
258b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
259b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
260b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN16;
261b13dfe66Smrg#endif
262b7e1c893Smrg    } else {
263b7e1c893Smrg	cb_conf.format = COLOR_8_8_8_8;
264b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
265b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
266b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN32;
267b13dfe66Smrg#endif
268b7e1c893Smrg    }
269b7e1c893Smrg    cb_conf.source_format = 1;
270b7e1c893Smrg    cb_conf.blend_clamp = 1;
2710974d292Smrg    /* Render setup */
2720974d292Smrg    if (accel_state->planemask & 0x000000ff)
273b13dfe66Smrg	cb_conf.pmask |= 4; /* B */
2740974d292Smrg    if (accel_state->planemask & 0x0000ff00)
275b13dfe66Smrg	cb_conf.pmask |= 2; /* G */
2760974d292Smrg    if (accel_state->planemask & 0x00ff0000)
277b13dfe66Smrg	cb_conf.pmask |= 1; /* R */
2780974d292Smrg    if (accel_state->planemask & 0xff000000)
279b13dfe66Smrg	cb_conf.pmask |= 8; /* A */
280b13dfe66Smrg    cb_conf.rop = accel_state->rop;
281b13dfe66Smrg    if (accel_state->dst_obj.tiling_flags == 0)
282f3a0071aSrjs	cb_conf.array_mode = 0;
283b13dfe66Smrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
284b13dfe66Smrg
285b13dfe66Smrg    r600_set_spi(pScrn, accel_state->ib, 0, 0);
286b7e1c893Smrg
287b7e1c893Smrg    /* PS alu constants */
288ad43ddacSmrg    if (accel_state->dst_obj.bpp == 16) {
289b7e1c893Smrg	r = (fg >> 11) & 0x1f;
290b7e1c893Smrg	g = (fg >> 5) & 0x3f;
291b7e1c893Smrg	b = (fg >> 0) & 0x1f;
292b7e1c893Smrg	ps_alu_consts[0] = (float)r / 31; /* R */
293b7e1c893Smrg	ps_alu_consts[1] = (float)g / 63; /* G */
294b7e1c893Smrg	ps_alu_consts[2] = (float)b / 31; /* B */
295b7e1c893Smrg	ps_alu_consts[3] = 1.0; /* A */
296ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 8) {
297b7e1c893Smrg	a = (fg >> 0) & 0xff;
298b7e1c893Smrg	ps_alu_consts[0] = 0.0; /* R */
299b7e1c893Smrg	ps_alu_consts[1] = 0.0; /* G */
300b7e1c893Smrg	ps_alu_consts[2] = 0.0; /* B */
301b7e1c893Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
302b7e1c893Smrg    } else {
303b7e1c893Smrg	a = (fg >> 24) & 0xff;
304b7e1c893Smrg	r = (fg >> 16) & 0xff;
305b7e1c893Smrg	g = (fg >> 8) & 0xff;
306b7e1c893Smrg	b = (fg >> 0) & 0xff;
307b7e1c893Smrg	ps_alu_consts[0] = (float)r / 255; /* R */
308b7e1c893Smrg	ps_alu_consts[1] = (float)g / 255; /* G */
309b7e1c893Smrg	ps_alu_consts[2] = (float)b / 255; /* B */
310b7e1c893Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
311b7e1c893Smrg    }
312921a55d8Smrg    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
313921a55d8Smrg			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
314b7e1c893Smrg
315ad43ddacSmrg    if (accel_state->vsync)
3160974d292Smrg	RADEONVlineHelperClear(pScrn);
317b7e1c893Smrg
318f3a0071aSrjs    accel_state->dst_pix = pPix;
319f3a0071aSrjs    accel_state->fg = fg;
320f3a0071aSrjs
321b7e1c893Smrg    return TRUE;
322b7e1c893Smrg}
323b7e1c893Smrg
324f3a0071aSrjsstatic void
325f3a0071aSrjsR600DoneSolid(PixmapPtr pPix)
326f3a0071aSrjs{
327c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
328f3a0071aSrjs    RADEONInfoPtr info = RADEONPTR(pScrn);
329f3a0071aSrjs    struct radeon_accel_state *accel_state = info->accel_state;
330f3a0071aSrjs
331f3a0071aSrjs    if (accel_state->vsync)
332f3a0071aSrjs	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
333f3a0071aSrjs				accel_state->vline_crtc,
334f3a0071aSrjs				accel_state->vline_y1,
335f3a0071aSrjs				accel_state->vline_y2);
336f3a0071aSrjs
337f3a0071aSrjs    r600_finish_op(pScrn, 8);
338f3a0071aSrjs}
339b7e1c893Smrg
340b7e1c893Smrgstatic void
341b7e1c893SmrgR600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
342b7e1c893Smrg{
343c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
344b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
345b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
346b7e1c893Smrg    float *vb;
347b7e1c893Smrg
348f3a0071aSrjs#ifdef XF86DRM_MODE
349f3a0071aSrjs    if (info->cs && CS_FULL(info->cs)) {
350f3a0071aSrjs	R600DoneSolid(info->accel_state->dst_pix);
351f3a0071aSrjs	radeon_cs_flush_indirect(pScrn);
352f3a0071aSrjs	R600PrepareSolid(accel_state->dst_pix,
353f3a0071aSrjs			 accel_state->rop,
354f3a0071aSrjs			 accel_state->planemask,
355f3a0071aSrjs			 accel_state->fg);
356f3a0071aSrjs    }
357f3a0071aSrjs#endif
358f3a0071aSrjs
359ad43ddacSmrg    if (accel_state->vsync)
3600974d292Smrg	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
361b7e1c893Smrg
362921a55d8Smrg    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
363b7e1c893Smrg
364b7e1c893Smrg    vb[0] = (float)x1;
365b7e1c893Smrg    vb[1] = (float)y1;
366b7e1c893Smrg
367b7e1c893Smrg    vb[2] = (float)x1;
368b7e1c893Smrg    vb[3] = (float)y2;
369b7e1c893Smrg
370b7e1c893Smrg    vb[4] = (float)x2;
371b7e1c893Smrg    vb[5] = (float)y2;
372b7e1c893Smrg
373921a55d8Smrg    radeon_vbo_commit(pScrn, &accel_state->vbo);
374b7e1c893Smrg}
375b7e1c893Smrg
376b7e1c893Smrgstatic void
377ad43ddacSmrgR600DoPrepareCopy(ScrnInfoPtr pScrn)
378b7e1c893Smrg{
379b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
380b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
381b7e1c893Smrg    cb_config_t     cb_conf;
382b7e1c893Smrg    tex_resource_t  tex_res;
383b7e1c893Smrg    tex_sampler_t   tex_samp;
384b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
385b7e1c893Smrg
386b7e1c893Smrg    CLEAR (cb_conf);
387b7e1c893Smrg    CLEAR (tex_res);
388b7e1c893Smrg    CLEAR (tex_samp);
389b7e1c893Smrg    CLEAR (vs_conf);
390b7e1c893Smrg    CLEAR (ps_conf);
391b7e1c893Smrg
392921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
3930974d292Smrg    radeon_cp_start(pScrn);
394b7e1c893Smrg
395921a55d8Smrg    r600_set_default_state(pScrn, accel_state->ib);
396b7e1c893Smrg
397921a55d8Smrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
398921a55d8Smrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
399921a55d8Smrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
400b7e1c893Smrg
401b7e1c893Smrg    /* Shader */
402b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
4030974d292Smrg    vs_conf.shader_size         = accel_state->vs_size;
404b7e1c893Smrg    vs_conf.num_gprs            = 2;
405b7e1c893Smrg    vs_conf.stack_size          = 0;
406ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
407921a55d8Smrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
408b7e1c893Smrg
409b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
4100974d292Smrg    ps_conf.shader_size         = accel_state->ps_size;
411b7e1c893Smrg    ps_conf.num_gprs            = 1;
412b7e1c893Smrg    ps_conf.stack_size          = 0;
413b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
414b7e1c893Smrg    ps_conf.clamp_consts        = 0;
415b7e1c893Smrg    ps_conf.export_mode         = 2;
416ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
417921a55d8Smrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
418b7e1c893Smrg
419b7e1c893Smrg    /* Texture */
420b7e1c893Smrg    tex_res.id                  = 0;
421ad43ddacSmrg    tex_res.w                   = accel_state->src_obj[0].width;
422ad43ddacSmrg    tex_res.h                   = accel_state->src_obj[0].height;
423ad43ddacSmrg    tex_res.pitch               = accel_state->src_obj[0].pitch;
424b7e1c893Smrg    tex_res.depth               = 0;
425b7e1c893Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
426ad43ddacSmrg    tex_res.base                = accel_state->src_obj[0].offset;
427ad43ddacSmrg    tex_res.mip_base            = accel_state->src_obj[0].offset;
4280974d292Smrg    tex_res.size                = accel_state->src_size[0];
429ad43ddacSmrg    tex_res.bo                  = accel_state->src_obj[0].bo;
430ad43ddacSmrg    tex_res.mip_bo              = accel_state->src_obj[0].bo;
431f3a0071aSrjs#ifdef XF86DRM_MODE
432c73da4dbSmrg    if (info->cs)
433c73da4dbSmrg        tex_res.surface             = accel_state->src_obj[0].surface;
434f3a0071aSrjs#endif
435ad43ddacSmrg    if (accel_state->src_obj[0].bpp == 8) {
436b7e1c893Smrg	tex_res.format              = FMT_8;
437b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
438b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
439b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
440b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
441ad43ddacSmrg    } else if (accel_state->src_obj[0].bpp == 16) {
442b7e1c893Smrg	tex_res.format              = FMT_5_6_5;
443b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
444b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
445b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
446b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
447b7e1c893Smrg    } else {
448b7e1c893Smrg	tex_res.format              = FMT_8_8_8_8;
449b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
450b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
451b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
452b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
453b7e1c893Smrg    }
454b7e1c893Smrg
455b7e1c893Smrg    tex_res.request_size        = 1;
456b7e1c893Smrg    tex_res.base_level          = 0;
457b7e1c893Smrg    tex_res.last_level          = 0;
458b7e1c893Smrg    tex_res.perf_modulation     = 0;
459b13dfe66Smrg    if (accel_state->src_obj[0].tiling_flags == 0)
460b13dfe66Smrg	tex_res.tile_mode           = 1;
461921a55d8Smrg    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
462b7e1c893Smrg
463b7e1c893Smrg    tex_samp.id                 = 0;
464b7e1c893Smrg    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
465b7e1c893Smrg    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
466b7e1c893Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
467b7e1c893Smrg    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
468b7e1c893Smrg    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
469b13dfe66Smrg    tex_samp.mc_coord_truncate  = 1;
470b7e1c893Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
471b7e1c893Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
472921a55d8Smrg    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
473b7e1c893Smrg
474b7e1c893Smrg    cb_conf.id = 0;
475ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
476ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
477ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
478ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
479f3a0071aSrjs#ifdef XF86DRM_MODE
480c73da4dbSmrg    if (info->cs)
481c73da4dbSmrg        cb_conf.surface = accel_state->dst_obj.surface;
482f3a0071aSrjs#endif
483ad43ddacSmrg    if (accel_state->dst_obj.bpp == 8) {
484b7e1c893Smrg	cb_conf.format = COLOR_8;
485b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
486ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 16) {
487b7e1c893Smrg	cb_conf.format = COLOR_5_6_5;
488b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
489b7e1c893Smrg    } else {
490b7e1c893Smrg	cb_conf.format = COLOR_8_8_8_8;
491b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
492b7e1c893Smrg    }
493b7e1c893Smrg    cb_conf.source_format = 1;
494b7e1c893Smrg    cb_conf.blend_clamp = 1;
495b7e1c893Smrg
4960974d292Smrg    /* Render setup */
4970974d292Smrg    if (accel_state->planemask & 0x000000ff)
498b13dfe66Smrg	cb_conf.pmask |= 4; /* B */
4990974d292Smrg    if (accel_state->planemask & 0x0000ff00)
500b13dfe66Smrg	cb_conf.pmask |= 2; /* G */
5010974d292Smrg    if (accel_state->planemask & 0x00ff0000)
502b13dfe66Smrg	cb_conf.pmask |= 1; /* R */
5030974d292Smrg    if (accel_state->planemask & 0xff000000)
504b13dfe66Smrg	cb_conf.pmask |= 8; /* A */
505b13dfe66Smrg    cb_conf.rop = accel_state->rop;
506b13dfe66Smrg    if (accel_state->dst_obj.tiling_flags == 0)
507f3a0071aSrjs	cb_conf.array_mode = 0;
508b13dfe66Smrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
509b13dfe66Smrg
510b13dfe66Smrg    r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
511b7e1c893Smrg
512b7e1c893Smrg}
513b7e1c893Smrg
514b7e1c893Smrgstatic void
515b7e1c893SmrgR600DoCopy(ScrnInfoPtr pScrn)
516b7e1c893Smrg{
517ad43ddacSmrg    r600_finish_op(pScrn, 16);
518ad43ddacSmrg}
519ad43ddacSmrg
520ad43ddacSmrgstatic void
521ad43ddacSmrgR600DoCopyVline(PixmapPtr pPix)
522ad43ddacSmrg{
523c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
524b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
525b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
526b7e1c893Smrg
527ad43ddacSmrg    if (accel_state->vsync)
528921a55d8Smrg	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
529921a55d8Smrg				accel_state->vline_crtc,
530921a55d8Smrg				accel_state->vline_y1,
531921a55d8Smrg				accel_state->vline_y2);
532b7e1c893Smrg
533ad43ddacSmrg    r600_finish_op(pScrn, 16);
534b7e1c893Smrg}
535b7e1c893Smrg
536b7e1c893Smrgstatic void
537b7e1c893SmrgR600AppendCopyVertex(ScrnInfoPtr pScrn,
538b7e1c893Smrg		     int srcX, int srcY,
539b7e1c893Smrg		     int dstX, int dstY,
540b7e1c893Smrg		     int w, int h)
541b7e1c893Smrg{
542921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
543921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
544b7e1c893Smrg    float *vb;
545b7e1c893Smrg
546921a55d8Smrg    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
547b7e1c893Smrg
548b7e1c893Smrg    vb[0] = (float)dstX;
549b7e1c893Smrg    vb[1] = (float)dstY;
550b7e1c893Smrg    vb[2] = (float)srcX;
551b7e1c893Smrg    vb[3] = (float)srcY;
552b7e1c893Smrg
553b7e1c893Smrg    vb[4] = (float)dstX;
554b7e1c893Smrg    vb[5] = (float)(dstY + h);
555b7e1c893Smrg    vb[6] = (float)srcX;
556b7e1c893Smrg    vb[7] = (float)(srcY + h);
557b7e1c893Smrg
558b7e1c893Smrg    vb[8] = (float)(dstX + w);
559b7e1c893Smrg    vb[9] = (float)(dstY + h);
560b7e1c893Smrg    vb[10] = (float)(srcX + w);
561b7e1c893Smrg    vb[11] = (float)(srcY + h);
562b7e1c893Smrg
563921a55d8Smrg    radeon_vbo_commit(pScrn, &accel_state->vbo);
564b7e1c893Smrg}
565b7e1c893Smrg
566b7e1c893Smrgstatic Bool
567b7e1c893SmrgR600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
568b7e1c893Smrg		int xdir, int ydir,
569b7e1c893Smrg		int rop,
570b7e1c893Smrg		Pixel planemask)
571b7e1c893Smrg{
572c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
573b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
574b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
575ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
576ad43ddacSmrg
5770974d292Smrg    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
578ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype src failed\n"));
5790974d292Smrg    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
580ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype dst failed\n"));
5810974d292Smrg    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
582ad43ddacSmrg	RADEON_FALLBACK(("Invalid planemask\n"));
583ad43ddacSmrg
584ad43ddacSmrg    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
585ad43ddacSmrg    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
586ad43ddacSmrg
587ad43ddacSmrg    accel_state->same_surface = FALSE;
588ad43ddacSmrg
589ad43ddacSmrg#if defined(XF86DRM_MODE)
590ad43ddacSmrg    if (info->cs) {
591ad43ddacSmrg	src_obj.offset = 0;
592ad43ddacSmrg	dst_obj.offset = 0;
593ad43ddacSmrg	src_obj.bo = radeon_get_pixmap_bo(pSrc);
594ad43ddacSmrg	dst_obj.bo = radeon_get_pixmap_bo(pDst);
595166b61b6Smrg	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
596166b61b6Smrg	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
597f3a0071aSrjs	src_obj.surface = radeon_get_pixmap_surface(pSrc);
598f3a0071aSrjs	dst_obj.surface = radeon_get_pixmap_surface(pDst);
599ad43ddacSmrg	if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
600ad43ddacSmrg	    accel_state->same_surface = TRUE;
601ad43ddacSmrg    } else
602b7e1c893Smrg#endif
603ad43ddacSmrg    {
604ad43ddacSmrg	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
605ad43ddacSmrg	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
606ad43ddacSmrg	if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst))
607ad43ddacSmrg	    accel_state->same_surface = TRUE;
608ad43ddacSmrg	src_obj.bo = NULL;
609ad43ddacSmrg	dst_obj.bo = NULL;
610b7e1c893Smrg    }
611b7e1c893Smrg
612ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
613ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
614ad43ddacSmrg    src_obj.bpp = pSrc->drawable.bitsPerPixel;
615ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
616921a55d8Smrg
617ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
618ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
619ad43ddacSmrg    dst_obj.bpp = pDst->drawable.bitsPerPixel;
620ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
621ad43ddacSmrg
622ad43ddacSmrg    if (!R600SetAccelState(pScrn,
623ad43ddacSmrg			   &src_obj,
624ad43ddacSmrg			   NULL,
625ad43ddacSmrg			   &dst_obj,
626ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
627ad43ddacSmrg			   rop, planemask))
628b7e1c893Smrg	return FALSE;
629b7e1c893Smrg
630ad43ddacSmrg    if (accel_state->same_surface == TRUE) {
631ad43ddacSmrg#if defined(XF86DRM_MODE)
632ad43ddacSmrg	if (info->cs) {
633c73da4dbSmrg	    unsigned long size = accel_state->dst_obj.surface->bo_size;
634c73da4dbSmrg	    unsigned long align = accel_state->dst_obj.surface->bo_alignment;
635c73da4dbSmrg
636ad43ddacSmrg	    if (accel_state->copy_area_bo) {
637ad43ddacSmrg		radeon_bo_unref(accel_state->copy_area_bo);
638ad43ddacSmrg		accel_state->copy_area_bo = NULL;
639b7e1c893Smrg	    }
640f3a0071aSrjs	    accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, align,
641ad43ddacSmrg						       RADEON_GEM_DOMAIN_VRAM,
642ad43ddacSmrg						       0);
643ad43ddacSmrg	    if (accel_state->copy_area_bo == NULL)
644ad43ddacSmrg		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
645ad43ddacSmrg
646ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
647f3a0071aSrjs					      0, RADEON_GEM_DOMAIN_VRAM);
648ad43ddacSmrg	    if (radeon_cs_space_check(info->cs)) {
649ad43ddacSmrg		radeon_bo_unref(accel_state->copy_area_bo);
650ad43ddacSmrg		accel_state->copy_area_bo = NULL;
651ad43ddacSmrg		return FALSE;
652ad43ddacSmrg	    }
653ad43ddacSmrg	    accel_state->copy_area = (void*)accel_state->copy_area_bo;
654ad43ddacSmrg	} else
655ad43ddacSmrg#endif
656ad43ddacSmrg	{
657c73da4dbSmrg	    unsigned height = pDst->drawable.height;
658c73da4dbSmrg	    unsigned long size = height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
659c73da4dbSmrg
660ad43ddacSmrg	    if (accel_state->copy_area) {
661ad43ddacSmrg		exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
662ad43ddacSmrg		accel_state->copy_area = NULL;
663ad43ddacSmrg	    }
664ad43ddacSmrg	    accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
665ad43ddacSmrg	    if (!accel_state->copy_area)
666ad43ddacSmrg		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
667b7e1c893Smrg	}
668ad43ddacSmrg    } else
669ad43ddacSmrg	R600DoPrepareCopy(pScrn);
670b7e1c893Smrg
671ad43ddacSmrg    if (accel_state->vsync)
6720974d292Smrg	RADEONVlineHelperClear(pScrn);
673ad43ddacSmrg
674f3a0071aSrjs    accel_state->dst_pix = pDst;
675f3a0071aSrjs    accel_state->src_pix = pSrc;
676f3a0071aSrjs    accel_state->xdir = xdir;
677f3a0071aSrjs    accel_state->ydir = ydir;
678f3a0071aSrjs
679ad43ddacSmrg    return TRUE;
680b7e1c893Smrg}
681b7e1c893Smrg
682f3a0071aSrjsstatic void
683f3a0071aSrjsR600DoneCopy(PixmapPtr pDst)
684f3a0071aSrjs{
685c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
686f3a0071aSrjs    RADEONInfoPtr info = RADEONPTR(pScrn);
687f3a0071aSrjs    struct radeon_accel_state *accel_state = info->accel_state;
688f3a0071aSrjs
689f3a0071aSrjs    if (!accel_state->same_surface)
690f3a0071aSrjs	R600DoCopyVline(pDst);
691f3a0071aSrjs
692f3a0071aSrjs    if (accel_state->copy_area) {
693f3a0071aSrjs	if (!info->cs)
694f3a0071aSrjs	    exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
695f3a0071aSrjs	accel_state->copy_area = NULL;
696f3a0071aSrjs    }
697f3a0071aSrjs
698f3a0071aSrjs}
699f3a0071aSrjs
700b7e1c893Smrgstatic void
701b7e1c893SmrgR600Copy(PixmapPtr pDst,
702b7e1c893Smrg	 int srcX, int srcY,
703b7e1c893Smrg	 int dstX, int dstY,
704b7e1c893Smrg	 int w, int h)
705b7e1c893Smrg{
706c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
707b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
708b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
709b7e1c893Smrg
710b7e1c893Smrg    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
711b7e1c893Smrg	return;
712b7e1c893Smrg
713f3a0071aSrjs#ifdef XF86DRM_MODE
714f3a0071aSrjs    if (info->cs && CS_FULL(info->cs)) {
715f3a0071aSrjs	R600DoneCopy(info->accel_state->dst_pix);
716f3a0071aSrjs	radeon_cs_flush_indirect(pScrn);
717f3a0071aSrjs	R600PrepareCopy(accel_state->src_pix,
718f3a0071aSrjs			accel_state->dst_pix,
719f3a0071aSrjs			accel_state->xdir,
720f3a0071aSrjs			accel_state->ydir,
721f3a0071aSrjs			accel_state->rop,
722f3a0071aSrjs			accel_state->planemask);
723f3a0071aSrjs    }
724f3a0071aSrjs#endif
725f3a0071aSrjs
726ad43ddacSmrg    if (accel_state->vsync)
7270974d292Smrg	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
728b7e1c893Smrg
729ad43ddacSmrg    if (accel_state->same_surface && accel_state->copy_area) {
730ad43ddacSmrg	uint32_t orig_offset, tmp_offset;
731ad43ddacSmrg	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
732ad43ddacSmrg	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
733b13dfe66Smrg	uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags;
734b13dfe66Smrg	uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags;
735ad43ddacSmrg	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
736f3a0071aSrjs	int orig_rop = accel_state->rop;
737ad43ddacSmrg
738ad43ddacSmrg#if defined(XF86DRM_MODE)
739ad43ddacSmrg	if (info->cs) {
740ad43ddacSmrg	    tmp_offset = 0;
741ad43ddacSmrg	    orig_offset = 0;
742ad43ddacSmrg	} else
743ad43ddacSmrg#endif
744ad43ddacSmrg	{
745b7e1c893Smrg	    tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset;
746b7e1c893Smrg	    orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
747ad43ddacSmrg	}
748b7e1c893Smrg
749ad43ddacSmrg	/* src to tmp */
750ad43ddacSmrg	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
751ad43ddacSmrg	accel_state->dst_obj.bo = accel_state->copy_area_bo;
752ad43ddacSmrg	accel_state->dst_obj.offset = tmp_offset;
753b13dfe66Smrg	accel_state->dst_obj.tiling_flags = 0;
754f3a0071aSrjs	accel_state->rop = 3;
755ad43ddacSmrg	R600DoPrepareCopy(pScrn);
756b7e1c893Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
757b7e1c893Smrg	R600DoCopy(pScrn);
758ad43ddacSmrg
759ad43ddacSmrg	/* tmp to dst */
760ad43ddacSmrg	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
761ad43ddacSmrg	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
762ad43ddacSmrg	accel_state->src_obj[0].offset = tmp_offset;
763b13dfe66Smrg	accel_state->src_obj[0].tiling_flags = 0;
764ad43ddacSmrg	accel_state->dst_obj.domain = orig_dst_domain;
765ad43ddacSmrg	accel_state->dst_obj.bo = orig_bo;
766ad43ddacSmrg	accel_state->dst_obj.offset = orig_offset;
767b13dfe66Smrg	accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags;
768f3a0071aSrjs	accel_state->rop = orig_rop;
769ad43ddacSmrg	R600DoPrepareCopy(pScrn);
770ad43ddacSmrg	R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
771ad43ddacSmrg	R600DoCopyVline(pDst);
772ad43ddacSmrg
773ad43ddacSmrg	/* restore state */
774ad43ddacSmrg	accel_state->src_obj[0].domain = orig_src_domain;
775ad43ddacSmrg	accel_state->src_obj[0].bo = orig_bo;
776ad43ddacSmrg	accel_state->src_obj[0].offset = orig_offset;
777b13dfe66Smrg	accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags;
778ad43ddacSmrg    } else
779b7e1c893Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
780b7e1c893Smrg
781b7e1c893Smrg}
782b7e1c893Smrg
783b7e1c893Smrgstruct blendinfo {
784b7e1c893Smrg    Bool dst_alpha;
785b7e1c893Smrg    Bool src_alpha;
786b7e1c893Smrg    uint32_t blend_cntl;
787b7e1c893Smrg};
788b7e1c893Smrg
789b7e1c893Smrgstatic struct blendinfo R600BlendOp[] = {
790b7e1c893Smrg    /* Clear */
791b7e1c893Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
792b7e1c893Smrg    /* Src */
793b7e1c893Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
794b7e1c893Smrg    /* Dst */
795b7e1c893Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
796b7e1c893Smrg    /* Over */
797b7e1c893Smrg    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
798b7e1c893Smrg    /* OverReverse */
799b7e1c893Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
800b7e1c893Smrg    /* In */
801b7e1c893Smrg    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
802b7e1c893Smrg    /* InReverse */
803b7e1c893Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
804b7e1c893Smrg    /* Out */
805b7e1c893Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
806b7e1c893Smrg    /* OutReverse */
807b7e1c893Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
808b7e1c893Smrg    /* Atop */
809b7e1c893Smrg    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
810b7e1c893Smrg    /* AtopReverse */
811b7e1c893Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
812b7e1c893Smrg    /* Xor */
813b7e1c893Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
814b7e1c893Smrg    /* Add */
815b7e1c893Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
816b7e1c893Smrg};
817b7e1c893Smrg
818b7e1c893Smrgstruct formatinfo {
819b7e1c893Smrg    unsigned int fmt;
820b7e1c893Smrg    uint32_t card_fmt;
821b7e1c893Smrg};
822b7e1c893Smrg
823b7e1c893Smrgstatic struct formatinfo R600TexFormats[] = {
824b7e1c893Smrg    {PICT_a8r8g8b8,	FMT_8_8_8_8},
825b7e1c893Smrg    {PICT_x8r8g8b8,	FMT_8_8_8_8},
826b7e1c893Smrg    {PICT_a8b8g8r8,	FMT_8_8_8_8},
827b7e1c893Smrg    {PICT_x8b8g8r8,	FMT_8_8_8_8},
828ad43ddacSmrg#ifdef PICT_TYPE_BGRA
829ad43ddacSmrg    {PICT_b8g8r8a8,	FMT_8_8_8_8},
830ad43ddacSmrg    {PICT_b8g8r8x8,	FMT_8_8_8_8},
831ad43ddacSmrg#endif
832b7e1c893Smrg    {PICT_r5g6b5,	FMT_5_6_5},
833b7e1c893Smrg    {PICT_a1r5g5b5,	FMT_1_5_5_5},
834b7e1c893Smrg    {PICT_x1r5g5b5,     FMT_1_5_5_5},
835b7e1c893Smrg    {PICT_a8,		FMT_8},
836b7e1c893Smrg};
837b7e1c893Smrg
838b7e1c893Smrgstatic uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
839b7e1c893Smrg{
840b7e1c893Smrg    uint32_t sblend, dblend;
841b7e1c893Smrg
842b7e1c893Smrg    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
843b7e1c893Smrg    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
844b7e1c893Smrg
845b7e1c893Smrg    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
846b7e1c893Smrg     * it as always 1.
847b7e1c893Smrg     */
848b7e1c893Smrg    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
849b7e1c893Smrg	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
850b7e1c893Smrg	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
851b7e1c893Smrg	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
852b7e1c893Smrg	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
853b7e1c893Smrg    }
854b7e1c893Smrg
855b7e1c893Smrg    /* If the source alpha is being used, then we should only be in a case where
856b7e1c893Smrg     * the source blend factor is 0, and the source blend value is the mask
857b7e1c893Smrg     * channels multiplied by the source picture's alpha.
858b7e1c893Smrg     */
859b7e1c893Smrg    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
860b7e1c893Smrg	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
861b7e1c893Smrg	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
862b7e1c893Smrg	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
863b7e1c893Smrg	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
864b7e1c893Smrg	}
865b7e1c893Smrg    }
866b7e1c893Smrg
867b7e1c893Smrg    return sblend | dblend;
868b7e1c893Smrg}
869b7e1c893Smrg
870b7e1c893Smrgstatic Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
871b7e1c893Smrg{
872b7e1c893Smrg    switch (pDstPicture->format) {
873b7e1c893Smrg    case PICT_a8r8g8b8:
874b7e1c893Smrg    case PICT_x8r8g8b8:
875ad43ddacSmrg    case PICT_a8b8g8r8:
876ad43ddacSmrg    case PICT_x8b8g8r8:
877ad43ddacSmrg#ifdef PICT_TYPE_BGRA
878ad43ddacSmrg    case PICT_b8g8r8a8:
879ad43ddacSmrg    case PICT_b8g8r8x8:
880ad43ddacSmrg#endif
881b7e1c893Smrg	*dst_format = COLOR_8_8_8_8;
882b7e1c893Smrg	break;
883b7e1c893Smrg    case PICT_r5g6b5:
884b7e1c893Smrg	*dst_format = COLOR_5_6_5;
885b7e1c893Smrg	break;
886b7e1c893Smrg    case PICT_a1r5g5b5:
887b7e1c893Smrg    case PICT_x1r5g5b5:
888b7e1c893Smrg	*dst_format = COLOR_1_5_5_5;
889b7e1c893Smrg	break;
890b7e1c893Smrg    case PICT_a8:
891b7e1c893Smrg	*dst_format = COLOR_8;
892b7e1c893Smrg	break;
893b7e1c893Smrg    default:
894b7e1c893Smrg	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
895b7e1c893Smrg	       (int)pDstPicture->format));
896b7e1c893Smrg    }
897b7e1c893Smrg    return TRUE;
898b7e1c893Smrg}
899b7e1c893Smrg
900b7e1c893Smrgstatic Bool R600CheckCompositeTexture(PicturePtr pPict,
901b7e1c893Smrg				      PicturePtr pDstPict,
902b7e1c893Smrg				      int op,
903b7e1c893Smrg				      int unit)
904b7e1c893Smrg{
905ad43ddacSmrg    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
906b7e1c893Smrg    unsigned int i;
907b7e1c893Smrg
908b7e1c893Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
909b7e1c893Smrg	if (R600TexFormats[i].fmt == pPict->format)
910b7e1c893Smrg	    break;
911b7e1c893Smrg    }
912b7e1c893Smrg    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
913b7e1c893Smrg	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
914b7e1c893Smrg			 (int)pPict->format));
915b7e1c893Smrg
916b7e1c893Smrg    if (pPict->filter != PictFilterNearest &&
917b7e1c893Smrg	pPict->filter != PictFilterBilinear)
918b7e1c893Smrg	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
919b7e1c893Smrg
920b7e1c893Smrg    /* for REPEAT_NONE, Render semantics are that sampling outside the source
921b7e1c893Smrg     * picture results in alpha=0 pixels. We can implement this with a border color
922b7e1c893Smrg     * *if* our source texture has an alpha channel, otherwise we need to fall
923b7e1c893Smrg     * back. If we're not transformed then we hope that upper layers have clipped
924b7e1c893Smrg     * rendering to the bounds of the source drawable, in which case it doesn't
925b7e1c893Smrg     * matter. I have not, however, verified that the X server always does such
926b7e1c893Smrg     * clipping.
927b7e1c893Smrg     */
928b7e1c893Smrg    /* FIXME R6xx */
929ad43ddacSmrg    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
930b7e1c893Smrg	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
931b7e1c893Smrg	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
932b7e1c893Smrg    }
933b7e1c893Smrg
934b13dfe66Smrg    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
935921a55d8Smrg	RADEON_FALLBACK(("non-affine transforms not supported\n"));
936921a55d8Smrg
937b7e1c893Smrg    return TRUE;
938b7e1c893Smrg}
939b7e1c893Smrg
940b7e1c893Smrgstatic Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
941b7e1c893Smrg					int unit)
942b7e1c893Smrg{
943c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
944b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
945b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
946c135ecebSveego    unsigned int repeatType;
947b7e1c893Smrg    unsigned int i;
948b7e1c893Smrg    tex_resource_t  tex_res;
949b7e1c893Smrg    tex_sampler_t   tex_samp;
950b7e1c893Smrg    int pix_r, pix_g, pix_b, pix_a;
951ad43ddacSmrg    float vs_alu_consts[8];
952b7e1c893Smrg
953b7e1c893Smrg    CLEAR (tex_res);
954b7e1c893Smrg    CLEAR (tex_samp);
955b7e1c893Smrg
956b7e1c893Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
957b7e1c893Smrg	if (R600TexFormats[i].fmt == pPict->format)
958b7e1c893Smrg	    break;
959b7e1c893Smrg    }
960b7e1c893Smrg
961b7e1c893Smrg    /* Texture */
962c135ecebSveego    if (pPict->pDrawable) {
963c135ecebSveego	tex_res.w               = pPict->pDrawable->width;
964c135ecebSveego	tex_res.h               = pPict->pDrawable->height;
965c135ecebSveego	repeatType              = pPict->repeat ? pPict->repeatType : RepeatNone;
966c135ecebSveego    } else {
967c135ecebSveego	tex_res.w               = 1;
968c135ecebSveego	tex_res.h               = 1;
969c135ecebSveego	repeatType              = RepeatNormal;
970c135ecebSveego    }
971b7e1c893Smrg    tex_res.id                  = unit;
972ad43ddacSmrg    tex_res.pitch               = accel_state->src_obj[unit].pitch;
973b7e1c893Smrg    tex_res.depth               = 0;
974b7e1c893Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
975ad43ddacSmrg    tex_res.base                = accel_state->src_obj[unit].offset;
976ad43ddacSmrg    tex_res.mip_base            = accel_state->src_obj[unit].offset;
9770974d292Smrg    tex_res.size                = accel_state->src_size[unit];
978b7e1c893Smrg    tex_res.format              = R600TexFormats[i].card_fmt;
979ad43ddacSmrg    tex_res.bo                  = accel_state->src_obj[unit].bo;
980ad43ddacSmrg    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
981f3a0071aSrjs#ifdef XF86DRM_MODE
982c73da4dbSmrg    if (info->cs)
983c73da4dbSmrg        tex_res.surface             = accel_state->src_obj[unit].surface;
984f3a0071aSrjs#endif
985b7e1c893Smrg    tex_res.request_size        = 1;
986b7e1c893Smrg
987b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
988b13dfe66Smrg    switch (accel_state->src_obj[unit].bpp) {
989b13dfe66Smrg    case 16:
990b13dfe66Smrg	tex_res.endian = SQ_ENDIAN_8IN16;
991b13dfe66Smrg	break;
992b13dfe66Smrg    case 32:
993b13dfe66Smrg	tex_res.endian = SQ_ENDIAN_8IN32;
994b13dfe66Smrg	break;
995b13dfe66Smrg    default :
996b13dfe66Smrg	break;
997b13dfe66Smrg    }
998b13dfe66Smrg#endif
999b13dfe66Smrg
1000b7e1c893Smrg    /* component swizzles */
1001b7e1c893Smrg    switch (pPict->format) {
1002b7e1c893Smrg    case PICT_a1r5g5b5:
1003b7e1c893Smrg    case PICT_a8r8g8b8:
1004b7e1c893Smrg	pix_r = SQ_SEL_Z; /* R */
1005b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1006b7e1c893Smrg	pix_b = SQ_SEL_X; /* B */
1007b7e1c893Smrg	pix_a = SQ_SEL_W; /* A */
1008b7e1c893Smrg	break;
1009b7e1c893Smrg    case PICT_a8b8g8r8:
1010b7e1c893Smrg	pix_r = SQ_SEL_X; /* R */
1011b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1012b7e1c893Smrg	pix_b = SQ_SEL_Z; /* B */
1013b7e1c893Smrg	pix_a = SQ_SEL_W; /* A */
1014b7e1c893Smrg	break;
1015b7e1c893Smrg    case PICT_x8b8g8r8:
1016b7e1c893Smrg	pix_r = SQ_SEL_X; /* R */
1017b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1018b7e1c893Smrg	pix_b = SQ_SEL_Z; /* B */
1019b7e1c893Smrg	pix_a = SQ_SEL_1; /* A */
1020b7e1c893Smrg	break;
1021ad43ddacSmrg#ifdef PICT_TYPE_BGRA
1022ad43ddacSmrg    case PICT_b8g8r8a8:
1023ad43ddacSmrg	pix_r = SQ_SEL_Y; /* R */
1024ad43ddacSmrg	pix_g = SQ_SEL_Z; /* G */
1025ad43ddacSmrg	pix_b = SQ_SEL_W; /* B */
1026ad43ddacSmrg	pix_a = SQ_SEL_X; /* A */
1027ad43ddacSmrg	break;
1028ad43ddacSmrg    case PICT_b8g8r8x8:
1029ad43ddacSmrg	pix_r = SQ_SEL_Y; /* R */
1030ad43ddacSmrg	pix_g = SQ_SEL_Z; /* G */
1031ad43ddacSmrg	pix_b = SQ_SEL_W; /* B */
1032ad43ddacSmrg	pix_a = SQ_SEL_1; /* A */
1033ad43ddacSmrg	break;
1034ad43ddacSmrg#endif
1035b7e1c893Smrg    case PICT_x1r5g5b5:
1036b7e1c893Smrg    case PICT_x8r8g8b8:
1037b7e1c893Smrg    case PICT_r5g6b5:
1038b7e1c893Smrg	pix_r = SQ_SEL_Z; /* R */
1039b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1040b7e1c893Smrg	pix_b = SQ_SEL_X; /* B */
1041b7e1c893Smrg	pix_a = SQ_SEL_1; /* A */
1042b7e1c893Smrg	break;
1043b7e1c893Smrg    case PICT_a8:
1044b7e1c893Smrg	pix_r = SQ_SEL_0; /* R */
1045b7e1c893Smrg	pix_g = SQ_SEL_0; /* G */
1046b7e1c893Smrg	pix_b = SQ_SEL_0; /* B */
1047b7e1c893Smrg	pix_a = SQ_SEL_X; /* A */
1048b7e1c893Smrg	break;
1049b7e1c893Smrg    default:
1050b7e1c893Smrg	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
1051b7e1c893Smrg    }
1052b7e1c893Smrg
1053b7e1c893Smrg    if (unit == 0) {
1054ad43ddacSmrg	if (!accel_state->msk_pic) {
1055b7e1c893Smrg	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
1056b7e1c893Smrg		pix_r = SQ_SEL_0;
1057b7e1c893Smrg		pix_g = SQ_SEL_0;
1058b7e1c893Smrg		pix_b = SQ_SEL_0;
1059b7e1c893Smrg	    }
1060b7e1c893Smrg
1061b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1062b7e1c893Smrg		pix_a = SQ_SEL_1;
1063b7e1c893Smrg	} else {
1064b7e1c893Smrg	    if (accel_state->component_alpha) {
1065b7e1c893Smrg		if (accel_state->src_alpha) {
1066b7e1c893Smrg		    if (PICT_FORMAT_A(pPict->format) == 0) {
1067b7e1c893Smrg			pix_r = SQ_SEL_1;
1068b7e1c893Smrg			pix_g = SQ_SEL_1;
1069b7e1c893Smrg			pix_b = SQ_SEL_1;
1070b7e1c893Smrg			pix_a = SQ_SEL_1;
1071b7e1c893Smrg		    } else {
1072b7e1c893Smrg			pix_r = pix_a;
1073b7e1c893Smrg			pix_g = pix_a;
1074b7e1c893Smrg			pix_b = pix_a;
1075b7e1c893Smrg		    }
1076b7e1c893Smrg		} else {
1077b7e1c893Smrg		    if (PICT_FORMAT_A(pPict->format) == 0)
1078b7e1c893Smrg			pix_a = SQ_SEL_1;
1079b7e1c893Smrg		}
1080b7e1c893Smrg	    } else {
1081b7e1c893Smrg		if (PICT_FORMAT_RGB(pPict->format) == 0) {
1082b7e1c893Smrg		    pix_r = SQ_SEL_0;
1083b7e1c893Smrg		    pix_g = SQ_SEL_0;
1084b7e1c893Smrg		    pix_b = SQ_SEL_0;
1085b7e1c893Smrg		}
1086b7e1c893Smrg
1087b7e1c893Smrg		if (PICT_FORMAT_A(pPict->format) == 0)
1088b7e1c893Smrg		    pix_a = SQ_SEL_1;
1089b7e1c893Smrg	    }
1090b7e1c893Smrg	}
1091b7e1c893Smrg    } else {
1092b7e1c893Smrg	if (accel_state->component_alpha) {
1093b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1094b7e1c893Smrg		pix_a = SQ_SEL_1;
1095b7e1c893Smrg	} else {
1096b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0) {
1097b7e1c893Smrg		pix_r = SQ_SEL_1;
1098b7e1c893Smrg		pix_g = SQ_SEL_1;
1099b7e1c893Smrg		pix_b = SQ_SEL_1;
1100b7e1c893Smrg		pix_a = SQ_SEL_1;
1101b7e1c893Smrg	    } else {
1102b7e1c893Smrg		pix_r = pix_a;
1103b7e1c893Smrg		pix_g = pix_a;
1104b7e1c893Smrg		pix_b = pix_a;
1105b7e1c893Smrg	    }
1106b7e1c893Smrg	}
1107b7e1c893Smrg    }
1108b7e1c893Smrg
1109b7e1c893Smrg    tex_res.dst_sel_x           = pix_r; /* R */
1110b7e1c893Smrg    tex_res.dst_sel_y           = pix_g; /* G */
1111b7e1c893Smrg    tex_res.dst_sel_z           = pix_b; /* B */
1112b7e1c893Smrg    tex_res.dst_sel_w           = pix_a; /* A */
1113b7e1c893Smrg
1114b7e1c893Smrg    tex_res.base_level          = 0;
1115b7e1c893Smrg    tex_res.last_level          = 0;
1116b7e1c893Smrg    tex_res.perf_modulation     = 0;
1117b13dfe66Smrg    if (accel_state->src_obj[unit].tiling_flags == 0)
1118b13dfe66Smrg	tex_res.tile_mode           = 1;
1119921a55d8Smrg    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
1120b7e1c893Smrg
1121b7e1c893Smrg    tex_samp.id                 = unit;
1122b7e1c893Smrg    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1123b7e1c893Smrg
1124ad43ddacSmrg    switch (repeatType) {
1125ad43ddacSmrg    case RepeatNormal:
1126ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_WRAP;
1127ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_WRAP;
1128ad43ddacSmrg	break;
1129ad43ddacSmrg    case RepeatPad:
1130ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1131ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1132ad43ddacSmrg	break;
1133ad43ddacSmrg    case RepeatReflect:
1134ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1135ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1136ad43ddacSmrg	break;
1137ad43ddacSmrg    case RepeatNone:
1138b7e1c893Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1139b7e1c893Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1140ad43ddacSmrg	break;
1141ad43ddacSmrg    default:
1142ad43ddacSmrg	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1143b7e1c893Smrg    }
1144b7e1c893Smrg
1145b7e1c893Smrg    switch (pPict->filter) {
1146b7e1c893Smrg    case PictFilterNearest:
1147b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1148b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1149b13dfe66Smrg	tex_samp.mc_coord_truncate  = 1;
1150b7e1c893Smrg	break;
1151b7e1c893Smrg    case PictFilterBilinear:
1152b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1153b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1154b7e1c893Smrg	break;
1155b7e1c893Smrg    default:
1156b7e1c893Smrg	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1157b7e1c893Smrg    }
1158b7e1c893Smrg
1159b7e1c893Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
1160b7e1c893Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1161b7e1c893Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
1162921a55d8Smrg    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
1163b7e1c893Smrg
1164b7e1c893Smrg    if (pPict->transform != 0) {
1165b7e1c893Smrg	accel_state->is_transform[unit] = TRUE;
1166b7e1c893Smrg	accel_state->transform[unit] = pPict->transform;
1167ad43ddacSmrg
1168ad43ddacSmrg	vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
1169ad43ddacSmrg	vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
1170ad43ddacSmrg	vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
1171c135ecebSveego	vs_alu_consts[3] = 1.0 / tex_res.w;
1172ad43ddacSmrg
1173ad43ddacSmrg	vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
1174ad43ddacSmrg	vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
1175ad43ddacSmrg	vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
1176c135ecebSveego	vs_alu_consts[7] = 1.0 / tex_res.h;
1177ad43ddacSmrg    } else {
1178b7e1c893Smrg	accel_state->is_transform[unit] = FALSE;
1179b7e1c893Smrg
1180ad43ddacSmrg	vs_alu_consts[0] = 1.0;
1181ad43ddacSmrg	vs_alu_consts[1] = 0.0;
1182ad43ddacSmrg	vs_alu_consts[2] = 0.0;
1183c135ecebSveego	vs_alu_consts[3] = 1.0 / tex_res.w;
1184ad43ddacSmrg
1185ad43ddacSmrg	vs_alu_consts[4] = 0.0;
1186ad43ddacSmrg	vs_alu_consts[5] = 1.0;
1187ad43ddacSmrg	vs_alu_consts[6] = 0.0;
1188c135ecebSveego	vs_alu_consts[7] = 1.0 / tex_res.h;
1189ad43ddacSmrg    }
1190ad43ddacSmrg
1191ad43ddacSmrg    /* VS alu constants */
1192921a55d8Smrg    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
1193921a55d8Smrg			sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
1194ad43ddacSmrg
1195b7e1c893Smrg    return TRUE;
1196b7e1c893Smrg}
1197b7e1c893Smrg
1198b7e1c893Smrgstatic Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1199b7e1c893Smrg			       PicturePtr pDstPicture)
1200b7e1c893Smrg{
1201b7e1c893Smrg    uint32_t tmp1;
1202b7e1c893Smrg    PixmapPtr pSrcPixmap, pDstPixmap;
1203b7e1c893Smrg
1204b7e1c893Smrg    /* Check for unsupported compositing operations. */
1205b7e1c893Smrg    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1206b7e1c893Smrg	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1207b7e1c893Smrg
1208c135ecebSveego    if (pSrcPicture->pDrawable) {
1209c135ecebSveego	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1210ad43ddacSmrg
1211c135ecebSveego	if (pSrcPixmap->drawable.width >= 8192 ||
1212c135ecebSveego	    pSrcPixmap->drawable.height >= 8192) {
1213c135ecebSveego	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1214c135ecebSveego			     pSrcPixmap->drawable.width,
1215c135ecebSveego			     pSrcPixmap->drawable.height));
1216c135ecebSveego	}
1217b7e1c893Smrg
1218c135ecebSveego	if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1219c135ecebSveego	    return FALSE;
1220c135ecebSveego    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
1221c135ecebSveego	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1222b7e1c893Smrg
1223b7e1c893Smrg    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1224b7e1c893Smrg
1225c135ecebSveego    if (pDstPixmap->drawable.width >= 8192 ||
1226c135ecebSveego	pDstPixmap->drawable.height >= 8192) {
1227b7e1c893Smrg	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1228b7e1c893Smrg			 pDstPixmap->drawable.width,
1229b7e1c893Smrg			 pDstPixmap->drawable.height));
1230b7e1c893Smrg    }
1231b7e1c893Smrg
1232b7e1c893Smrg    if (pMaskPicture) {
1233ad43ddacSmrg	PixmapPtr pMaskPixmap;
1234ad43ddacSmrg
1235c135ecebSveego	if (pMaskPicture->pDrawable) {
1236c135ecebSveego	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1237b7e1c893Smrg
1238c135ecebSveego	    if (pMaskPixmap->drawable.width >= 8192 ||
1239c135ecebSveego		pMaskPixmap->drawable.height >= 8192) {
1240c135ecebSveego	      RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1241c135ecebSveego			       pMaskPixmap->drawable.width,
1242c135ecebSveego			       pMaskPixmap->drawable.height));
1243c135ecebSveego	    }
1244b7e1c893Smrg
1245c135ecebSveego	    if (pMaskPicture->componentAlpha) {
1246c135ecebSveego		/* Check if it's component alpha that relies on a source alpha and
1247c135ecebSveego		 * on the source value.  We can only get one of those into the
1248c135ecebSveego		 * single source value that we get to blend with.
1249c135ecebSveego		 */
1250c135ecebSveego		if (R600BlendOp[op].src_alpha &&
1251c135ecebSveego		    (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1252c135ecebSveego		    (BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1253c135ecebSveego		    RADEON_FALLBACK(("Component alpha not supported with source "
1254c135ecebSveego				     "alpha and source value blending.\n"));
1255c135ecebSveego		}
1256b7e1c893Smrg	    }
1257b7e1c893Smrg
1258c135ecebSveego	    if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1259c135ecebSveego		return FALSE;
1260c135ecebSveego	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
1261c135ecebSveego	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1262b7e1c893Smrg    }
1263b7e1c893Smrg
1264b7e1c893Smrg    if (!R600GetDestFormat(pDstPicture, &tmp1))
1265b7e1c893Smrg	return FALSE;
1266b7e1c893Smrg
1267b7e1c893Smrg    return TRUE;
1268b7e1c893Smrg
1269b7e1c893Smrg}
1270b7e1c893Smrg
1271b7e1c893Smrgstatic Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1272b7e1c893Smrg				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1273b7e1c893Smrg				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1274b7e1c893Smrg{
1275c135ecebSveego    ScreenPtr pScreen = pDst->drawable.pScreen;
1276c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1277b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1278b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1279b13dfe66Smrg    uint32_t dst_format;
1280b7e1c893Smrg    cb_config_t cb_conf;
1281b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
1282ad43ddacSmrg    struct r600_accel_object src_obj, mask_obj, dst_obj;
1283b7e1c893Smrg
1284c135ecebSveego    if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8))
1285ad43ddacSmrg	return FALSE;
1286ad43ddacSmrg
1287c135ecebSveego    if (!pSrc) {
1288c135ecebSveego	pSrc = RADEONSolidPixmap(pScreen, pSrcPicture->pSourcePict->solidFill.color);
1289c135ecebSveego	if (!pSrc)
1290c135ecebSveego	    RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
1291c135ecebSveego    }
1292c135ecebSveego
1293ad43ddacSmrg#if defined(XF86DRM_MODE)
1294ad43ddacSmrg    if (info->cs) {
1295ad43ddacSmrg	src_obj.offset = 0;
1296ad43ddacSmrg	dst_obj.offset = 0;
1297ad43ddacSmrg	dst_obj.bo = radeon_get_pixmap_bo(pDst);
1298c135ecebSveego	src_obj.bo = radeon_get_pixmap_bo(pSrc);
1299166b61b6Smrg	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1300166b61b6Smrg	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1301f3a0071aSrjs	dst_obj.surface = radeon_get_pixmap_surface(pDst);
1302f3a0071aSrjs	src_obj.surface = radeon_get_pixmap_surface(pSrc);
1303ad43ddacSmrg    } else
1304ad43ddacSmrg#endif
1305ad43ddacSmrg    {
1306ad43ddacSmrg	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1307ad43ddacSmrg	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1308ad43ddacSmrg	src_obj.bo = NULL;
1309ad43ddacSmrg	dst_obj.bo = NULL;
1310ad43ddacSmrg    }
1311ad43ddacSmrg    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1312ad43ddacSmrg    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1313ad43ddacSmrg
1314ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
1315ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
1316ad43ddacSmrg    src_obj.bpp = pSrc->drawable.bitsPerPixel;
1317ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1318ad43ddacSmrg
1319ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
1320ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
1321ad43ddacSmrg    dst_obj.bpp = pDst->drawable.bitsPerPixel;
1322ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1323b7e1c893Smrg
1324c135ecebSveego    if (pMaskPicture) {
1325c135ecebSveego	if (!pMask) {
1326c135ecebSveego	    pMask = RADEONSolidPixmap(pScreen, pMaskPicture->pSourcePict->solidFill.color);
1327c135ecebSveego	    if (!pMask) {
1328c135ecebSveego		if (!pSrcPicture->pDrawable)
1329c135ecebSveego		    pScreen->DestroyPixmap(pSrc);
1330c135ecebSveego		RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
1331c135ecebSveego	    }
1332c135ecebSveego	}
1333c135ecebSveego
1334ad43ddacSmrg#if defined(XF86DRM_MODE)
1335ad43ddacSmrg	if (info->cs) {
1336ad43ddacSmrg	    mask_obj.offset = 0;
1337ad43ddacSmrg	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
1338166b61b6Smrg	    mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
1339f3a0071aSrjs	    mask_obj.surface = radeon_get_pixmap_surface(pMask);
1340921a55d8Smrg	} else
1341ad43ddacSmrg#endif
1342ad43ddacSmrg	{
1343ad43ddacSmrg	    mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset;
1344ad43ddacSmrg	    mask_obj.bo = NULL;
1345ad43ddacSmrg	}
1346ad43ddacSmrg	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1347ad43ddacSmrg
1348ad43ddacSmrg	mask_obj.width = pMask->drawable.width;
1349ad43ddacSmrg	mask_obj.height = pMask->drawable.height;
1350ad43ddacSmrg	mask_obj.bpp = pMask->drawable.bitsPerPixel;
1351ad43ddacSmrg	mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1352ad43ddacSmrg
1353ad43ddacSmrg	if (!R600SetAccelState(pScrn,
1354ad43ddacSmrg			       &src_obj,
1355ad43ddacSmrg			       &mask_obj,
1356ad43ddacSmrg			       &dst_obj,
13570974d292Smrg			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1358ad43ddacSmrg			       3, 0xffffffff))
1359ad43ddacSmrg	    return FALSE;
1360ad43ddacSmrg
1361ad43ddacSmrg	accel_state->msk_pic = pMaskPicture;
1362b7e1c893Smrg	if (pMaskPicture->componentAlpha) {
1363b7e1c893Smrg	    accel_state->component_alpha = TRUE;
1364b7e1c893Smrg	    if (R600BlendOp[op].src_alpha)
1365b7e1c893Smrg		accel_state->src_alpha = TRUE;
1366b7e1c893Smrg	    else
1367b7e1c893Smrg		accel_state->src_alpha = FALSE;
1368b7e1c893Smrg	} else {
1369b7e1c893Smrg	    accel_state->component_alpha = FALSE;
1370b7e1c893Smrg	    accel_state->src_alpha = FALSE;
1371b7e1c893Smrg	}
1372b7e1c893Smrg    } else {
1373ad43ddacSmrg	if (!R600SetAccelState(pScrn,
1374ad43ddacSmrg			       &src_obj,
1375ad43ddacSmrg			       NULL,
1376ad43ddacSmrg			       &dst_obj,
1377ad43ddacSmrg			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1378ad43ddacSmrg			       3, 0xffffffff))
1379ad43ddacSmrg	    return FALSE;
1380ad43ddacSmrg
1381ad43ddacSmrg	accel_state->msk_pic = NULL;
1382b7e1c893Smrg	accel_state->component_alpha = FALSE;
1383b7e1c893Smrg	accel_state->src_alpha = FALSE;
1384b7e1c893Smrg    }
1385b7e1c893Smrg
1386b7e1c893Smrg    if (!R600GetDestFormat(pDstPicture, &dst_format))
1387b7e1c893Smrg	return FALSE;
1388b7e1c893Smrg
1389b7e1c893Smrg    CLEAR (cb_conf);
1390b7e1c893Smrg    CLEAR (vs_conf);
1391b7e1c893Smrg    CLEAR (ps_conf);
1392b7e1c893Smrg
1393ad43ddacSmrg    if (pMask)
1394921a55d8Smrg        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1395ad43ddacSmrg    else
1396921a55d8Smrg        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1397b7e1c893Smrg
13980974d292Smrg    radeon_cp_start(pScrn);
1399b7e1c893Smrg
1400921a55d8Smrg    r600_set_default_state(pScrn, accel_state->ib);
1401b7e1c893Smrg
1402921a55d8Smrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1403921a55d8Smrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1404921a55d8Smrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1405b7e1c893Smrg
1406b7e1c893Smrg    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
1407ad43ddacSmrg        R600IBDiscard(pScrn, accel_state->ib);
1408ad43ddacSmrg        return FALSE;
1409b7e1c893Smrg    }
1410b7e1c893Smrg
1411b7e1c893Smrg    if (pMask) {
1412ad43ddacSmrg        if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
1413ad43ddacSmrg            R600IBDiscard(pScrn, accel_state->ib);
1414ad43ddacSmrg            return FALSE;
1415ad43ddacSmrg        }
1416b7e1c893Smrg    } else
1417ad43ddacSmrg        accel_state->is_transform[1] = FALSE;
1418b7e1c893Smrg
14190974d292Smrg    if (pMask) {
1420921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
1421921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
14220974d292Smrg    } else {
1423921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
1424921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
14250974d292Smrg    }
1426b7e1c893Smrg
1427b7e1c893Smrg    /* Shader */
1428b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
14290974d292Smrg    vs_conf.shader_size         = accel_state->vs_size;
1430921a55d8Smrg    vs_conf.num_gprs            = 5;
1431b7e1c893Smrg    vs_conf.stack_size          = 1;
1432ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
1433921a55d8Smrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1434b7e1c893Smrg
1435b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
14360974d292Smrg    ps_conf.shader_size         = accel_state->ps_size;
1437b7e1c893Smrg    ps_conf.num_gprs            = 3;
14380974d292Smrg    ps_conf.stack_size          = 1;
1439b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
1440b7e1c893Smrg    ps_conf.clamp_consts        = 0;
1441b7e1c893Smrg    ps_conf.export_mode         = 2;
1442ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
1443921a55d8Smrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1444b7e1c893Smrg
1445b7e1c893Smrg    cb_conf.id = 0;
1446ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
1447ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
1448ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
1449b7e1c893Smrg    cb_conf.format = dst_format;
1450ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
1451f3a0071aSrjs#ifdef XF86DRM_MODE
1452c73da4dbSmrg    if (info->cs)
1453c73da4dbSmrg        cb_conf.surface = accel_state->dst_obj.surface;
1454f3a0071aSrjs#endif
1455b7e1c893Smrg
1456b7e1c893Smrg    switch (pDstPicture->format) {
1457b7e1c893Smrg    case PICT_a8r8g8b8:
1458b7e1c893Smrg    case PICT_x8r8g8b8:
1459b7e1c893Smrg    case PICT_a1r5g5b5:
1460b7e1c893Smrg    case PICT_x1r5g5b5:
1461b7e1c893Smrg    default:
1462b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
1463b7e1c893Smrg	break;
1464ad43ddacSmrg    case PICT_a8b8g8r8:
1465ad43ddacSmrg    case PICT_x8b8g8r8:
1466ad43ddacSmrg	cb_conf.comp_swap = 0; /* ABGR */
1467ad43ddacSmrg	break;
1468ad43ddacSmrg#ifdef PICT_TYPE_BGRA
1469ad43ddacSmrg    case PICT_b8g8r8a8:
1470ad43ddacSmrg    case PICT_b8g8r8x8:
1471ad43ddacSmrg	cb_conf.comp_swap = 3; /* BGRA */
1472ad43ddacSmrg	break;
1473ad43ddacSmrg#endif
1474b7e1c893Smrg    case PICT_r5g6b5:
1475b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
1476b7e1c893Smrg	break;
1477b7e1c893Smrg    case PICT_a8:
1478b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
1479b7e1c893Smrg	break;
1480b7e1c893Smrg    }
1481b7e1c893Smrg    cb_conf.source_format = 1;
1482b7e1c893Smrg    cb_conf.blend_clamp = 1;
1483b13dfe66Smrg    cb_conf.blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1484b13dfe66Smrg    cb_conf.blend_enable = 1;
1485b13dfe66Smrg    cb_conf.pmask = 0xf;
1486b13dfe66Smrg    cb_conf.rop = 3;
1487b13dfe66Smrg    if (accel_state->dst_obj.tiling_flags == 0)
1488f3a0071aSrjs	cb_conf.array_mode = 0;
1489b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1490b13dfe66Smrg    switch (dst_obj.bpp) {
1491b13dfe66Smrg    case 16:
1492b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN16;
1493b13dfe66Smrg	break;
1494b13dfe66Smrg    case 32:
1495b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN32;
1496b13dfe66Smrg	break;
1497b13dfe66Smrg    default:
1498b13dfe66Smrg	break;
1499b7e1c893Smrg    }
1500b13dfe66Smrg#endif
1501b13dfe66Smrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
15020974d292Smrg
1503b13dfe66Smrg    if (pMask)
1504b13dfe66Smrg	r600_set_spi(pScrn, accel_state->ib, (2 - 1), 2);
1505b13dfe66Smrg    else
1506b13dfe66Smrg	r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
1507b7e1c893Smrg
1508ad43ddacSmrg    if (accel_state->vsync)
15090974d292Smrg	RADEONVlineHelperClear(pScrn);
1510b7e1c893Smrg
1511f3a0071aSrjs    accel_state->composite_op = op;
1512f3a0071aSrjs    accel_state->dst_pic = pDstPicture;
1513f3a0071aSrjs    accel_state->src_pic = pSrcPicture;
1514f3a0071aSrjs    accel_state->dst_pix = pDst;
1515f3a0071aSrjs    accel_state->msk_pix = pMask;
1516f3a0071aSrjs    accel_state->src_pix = pSrc;
1517f3a0071aSrjs
1518b7e1c893Smrg    return TRUE;
1519b7e1c893Smrg}
1520b7e1c893Smrg
1521c135ecebSveegostatic void R600FinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst,
1522c135ecebSveego				struct radeon_accel_state *accel_state)
1523f3a0071aSrjs{
1524f3a0071aSrjs    int vtx_size;
1525f3a0071aSrjs
1526f3a0071aSrjs    if (accel_state->vsync)
1527f3a0071aSrjs       r600_cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
1528f3a0071aSrjs			       accel_state->vline_crtc,
1529f3a0071aSrjs			       accel_state->vline_y1,
1530f3a0071aSrjs			       accel_state->vline_y2);
1531f3a0071aSrjs
1532f3a0071aSrjs    vtx_size = accel_state->msk_pic ? 24 : 16;
1533f3a0071aSrjs
1534f3a0071aSrjs    r600_finish_op(pScrn, vtx_size);
1535f3a0071aSrjs}
1536f3a0071aSrjs
1537c135ecebSveegostatic void R600DoneComposite(PixmapPtr pDst)
1538c135ecebSveego{
1539c135ecebSveego    ScreenPtr pScreen = pDst->drawable.pScreen;
1540c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1541c135ecebSveego    RADEONInfoPtr info = RADEONPTR(pScrn);
1542c135ecebSveego    struct radeon_accel_state *accel_state = info->accel_state;
1543c135ecebSveego
1544c135ecebSveego    R600FinishComposite(pScrn, pDst, accel_state);
1545c135ecebSveego
1546c135ecebSveego    if (!accel_state->src_pic->pDrawable)
1547c135ecebSveego	pScreen->DestroyPixmap(accel_state->src_pix);
1548c135ecebSveego
1549c135ecebSveego    if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable)
1550c135ecebSveego	pScreen->DestroyPixmap(accel_state->msk_pix);
1551c135ecebSveego}
1552c135ecebSveego
1553b7e1c893Smrgstatic void R600Composite(PixmapPtr pDst,
1554b7e1c893Smrg			  int srcX, int srcY,
1555b7e1c893Smrg			  int maskX, int maskY,
1556b7e1c893Smrg			  int dstX, int dstY,
1557b7e1c893Smrg			  int w, int h)
1558b7e1c893Smrg{
1559c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1560b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1561b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1562b7e1c893Smrg    float *vb;
1563b7e1c893Smrg
1564b7e1c893Smrg    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1565b7e1c893Smrg       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1566b7e1c893Smrg
1567f3a0071aSrjs#ifdef XF86DRM_MODE
1568f3a0071aSrjs    if (info->cs && CS_FULL(info->cs)) {
1569c135ecebSveego	R600FinishComposite(pScrn, pDst, info->accel_state);
1570f3a0071aSrjs	radeon_cs_flush_indirect(pScrn);
1571f3a0071aSrjs	R600PrepareComposite(info->accel_state->composite_op,
1572f3a0071aSrjs			     info->accel_state->src_pic,
1573f3a0071aSrjs			     info->accel_state->msk_pic,
1574f3a0071aSrjs			     info->accel_state->dst_pic,
1575f3a0071aSrjs			     info->accel_state->src_pix,
1576f3a0071aSrjs			     info->accel_state->msk_pix,
1577f3a0071aSrjs			     info->accel_state->dst_pix);
1578f3a0071aSrjs    }
1579f3a0071aSrjs#endif
1580f3a0071aSrjs
1581ad43ddacSmrg    if (accel_state->vsync)
15820974d292Smrg	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1583b7e1c893Smrg
1584ad43ddacSmrg    if (accel_state->msk_pic) {
1585b7e1c893Smrg
1586921a55d8Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1587b7e1c893Smrg
1588b7e1c893Smrg	vb[0] = (float)dstX;
1589b7e1c893Smrg	vb[1] = (float)dstY;
1590ad43ddacSmrg	vb[2] = (float)srcX;
1591ad43ddacSmrg	vb[3] = (float)srcY;
1592ad43ddacSmrg	vb[4] = (float)maskX;
1593ad43ddacSmrg	vb[5] = (float)maskY;
1594b7e1c893Smrg
1595b7e1c893Smrg	vb[6] = (float)dstX;
1596b7e1c893Smrg	vb[7] = (float)(dstY + h);
1597ad43ddacSmrg	vb[8] = (float)srcX;
1598ad43ddacSmrg	vb[9] = (float)(srcY + h);
1599ad43ddacSmrg	vb[10] = (float)maskX;
1600ad43ddacSmrg	vb[11] = (float)(maskY + h);
1601b7e1c893Smrg
1602b7e1c893Smrg	vb[12] = (float)(dstX + w);
1603b7e1c893Smrg	vb[13] = (float)(dstY + h);
1604ad43ddacSmrg	vb[14] = (float)(srcX + w);
1605ad43ddacSmrg	vb[15] = (float)(srcY + h);
1606ad43ddacSmrg	vb[16] = (float)(maskX + w);
1607ad43ddacSmrg	vb[17] = (float)(maskY + h);
1608ad43ddacSmrg
1609921a55d8Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
1610b7e1c893Smrg
1611b7e1c893Smrg    } else {
1612b7e1c893Smrg
1613921a55d8Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1614b7e1c893Smrg
1615b7e1c893Smrg	vb[0] = (float)dstX;
1616b7e1c893Smrg	vb[1] = (float)dstY;
1617ad43ddacSmrg	vb[2] = (float)srcX;
1618ad43ddacSmrg	vb[3] = (float)srcY;
1619b7e1c893Smrg
1620b7e1c893Smrg	vb[4] = (float)dstX;
1621b7e1c893Smrg	vb[5] = (float)(dstY + h);
1622ad43ddacSmrg	vb[6] = (float)srcX;
1623ad43ddacSmrg	vb[7] = (float)(srcY + h);
1624b7e1c893Smrg
1625b7e1c893Smrg	vb[8] = (float)(dstX + w);
1626b7e1c893Smrg	vb[9] = (float)(dstY + h);
1627ad43ddacSmrg	vb[10] = (float)(srcX + w);
1628ad43ddacSmrg	vb[11] = (float)(srcY + h);
1629ad43ddacSmrg
1630921a55d8Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
1631b7e1c893Smrg    }
1632b7e1c893Smrg
1633b7e1c893Smrg
1634b7e1c893Smrg}
1635b7e1c893Smrg
1636b7e1c893SmrgBool
1637b7e1c893SmrgR600CopyToVRAM(ScrnInfoPtr pScrn,
1638b7e1c893Smrg	       char *src, int src_pitch,
1639ad43ddacSmrg	       uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_width, uint32_t dst_height, int bpp,
1640b7e1c893Smrg	       int x, int y, int w, int h)
1641b7e1c893Smrg{
1642b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1643ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1644b7e1c893Smrg    uint32_t scratch_mc_addr;
1645b7e1c893Smrg    int wpass = w * (bpp/8);
1646ad43ddacSmrg    int scratch_pitch_bytes = RADEON_ALIGN(wpass, 256);
1647b7e1c893Smrg    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1648b7e1c893Smrg    int scratch_offset = 0, hpass, temph;
1649b7e1c893Smrg    char *dst;
1650b7e1c893Smrg    drmBufPtr scratch;
1651ad43ddacSmrg    struct r600_accel_object scratch_obj, dst_obj;
1652b7e1c893Smrg
1653b7e1c893Smrg    if (dst_pitch & 7)
1654b7e1c893Smrg	return FALSE;
1655b7e1c893Smrg
1656b7e1c893Smrg    if (dst_mc_addr & 0xff)
1657b7e1c893Smrg	return FALSE;
1658b7e1c893Smrg
1659b7e1c893Smrg    scratch = RADEONCPGetBuffer(pScrn);
1660b7e1c893Smrg    if (scratch == NULL)
1661b7e1c893Smrg	return FALSE;
1662b7e1c893Smrg
1663b7e1c893Smrg    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1664b7e1c893Smrg    temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1665b7e1c893Smrg    dst = (char *)scratch->address;
1666b7e1c893Smrg
1667ad43ddacSmrg    scratch_obj.pitch = scratch_pitch;
1668ad43ddacSmrg    scratch_obj.width = w;
1669ad43ddacSmrg    scratch_obj.height = hpass;
1670ad43ddacSmrg    scratch_obj.offset = scratch_mc_addr;
1671ad43ddacSmrg    scratch_obj.bpp = bpp;
1672ad43ddacSmrg    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1673ad43ddacSmrg    scratch_obj.bo = NULL;
1674ad43ddacSmrg
1675ad43ddacSmrg    dst_obj.pitch = dst_pitch;
1676ad43ddacSmrg    dst_obj.width = dst_width;
1677ad43ddacSmrg    dst_obj.height = dst_height;
1678ad43ddacSmrg    dst_obj.offset = dst_mc_addr;
1679ad43ddacSmrg    dst_obj.bo = NULL;
1680ad43ddacSmrg    dst_obj.bpp = bpp;
1681ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1682ad43ddacSmrg
1683ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1684ad43ddacSmrg			   &scratch_obj,
1685ad43ddacSmrg			   NULL,
1686ad43ddacSmrg			   &dst_obj,
1687ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1688ad43ddacSmrg			   3, 0xffffffff))
1689ad43ddacSmrg	return FALSE;
1690ad43ddacSmrg
1691b7e1c893Smrg    /* memcopy from sys to scratch */
1692b7e1c893Smrg    while (temph--) {
1693b7e1c893Smrg	memcpy (dst, src, wpass);
1694b7e1c893Smrg	src += src_pitch;
1695b7e1c893Smrg	dst += scratch_pitch_bytes;
1696b7e1c893Smrg    }
1697b7e1c893Smrg
1698b7e1c893Smrg    while (h) {
1699b7e1c893Smrg	uint32_t offset = scratch_mc_addr + scratch_offset;
1700b7e1c893Smrg	int oldhpass = hpass;
1701b7e1c893Smrg	h -= oldhpass;
1702b7e1c893Smrg	temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1703b7e1c893Smrg
1704b7e1c893Smrg	if (hpass) {
1705b7e1c893Smrg	    scratch_offset = scratch->total/2 - scratch_offset;
1706b7e1c893Smrg	    dst = (char *)scratch->address + scratch_offset;
1707b7e1c893Smrg	    /* wait for the engine to be idle */
1708b7e1c893Smrg	    RADEONWaitForIdleCP(pScrn);
1709b7e1c893Smrg	    //memcopy from sys to scratch
1710b7e1c893Smrg	    while (temph--) {
1711b7e1c893Smrg		memcpy (dst, src, wpass);
1712b7e1c893Smrg		src += src_pitch;
1713b7e1c893Smrg		dst += scratch_pitch_bytes;
1714b7e1c893Smrg	    }
1715b7e1c893Smrg	}
1716b7e1c893Smrg	/* blit from scratch to vram */
1717ad43ddacSmrg	info->accel_state->src_obj[0].height = oldhpass;
1718ad43ddacSmrg	info->accel_state->src_obj[0].offset = offset;
1719ad43ddacSmrg	R600DoPrepareCopy(pScrn);
1720b7e1c893Smrg	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
1721b7e1c893Smrg	R600DoCopy(pScrn);
1722b7e1c893Smrg	y += oldhpass;
1723b7e1c893Smrg    }
1724b7e1c893Smrg
1725b7e1c893Smrg    R600IBDiscard(pScrn, scratch);
1726b7e1c893Smrg
1727b7e1c893Smrg    return TRUE;
1728b7e1c893Smrg}
1729b7e1c893Smrg
1730b7e1c893Smrgstatic Bool
1731b7e1c893SmrgR600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1732b7e1c893Smrg		   char *src, int src_pitch)
1733b7e1c893Smrg{
1734c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1735b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1736b7e1c893Smrg    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1737b7e1c893Smrg    uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1738b7e1c893Smrg    int bpp = pDst->drawable.bitsPerPixel;
1739b7e1c893Smrg
1740b7e1c893Smrg    return R600CopyToVRAM(pScrn,
1741b7e1c893Smrg			  src, src_pitch,
1742ad43ddacSmrg			  dst_pitch, dst_mc_addr, pDst->drawable.width, pDst->drawable.height, bpp,
1743b7e1c893Smrg			  x, y, w, h);
1744b7e1c893Smrg}
1745b7e1c893Smrg
1746b7e1c893Smrgstatic Bool
1747b7e1c893SmrgR600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
1748b7e1c893Smrg		       char *dst, int dst_pitch)
1749b7e1c893Smrg{
1750c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
1751b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1752ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1753b7e1c893Smrg    uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1754b7e1c893Smrg    uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1755b7e1c893Smrg    uint32_t src_width = pSrc->drawable.width;
1756b7e1c893Smrg    uint32_t src_height = pSrc->drawable.height;
1757b7e1c893Smrg    int bpp = pSrc->drawable.bitsPerPixel;
1758b7e1c893Smrg    uint32_t scratch_mc_addr;
1759ad43ddacSmrg    int scratch_pitch_bytes = RADEON_ALIGN(dst_pitch, 256);
1760b7e1c893Smrg    int scratch_offset = 0, hpass;
1761b7e1c893Smrg    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1762b7e1c893Smrg    int wpass = w * (bpp/8);
1763b7e1c893Smrg    drmBufPtr scratch;
1764ad43ddacSmrg    struct r600_accel_object scratch_obj, src_obj;
1765b7e1c893Smrg
1766ad43ddacSmrg    /* bad pipe setup in drm prior to 1.32 */
1767ad43ddacSmrg    if (info->dri->pKernelDRMVersion->version_minor < 32) {
1768ad43ddacSmrg	    if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32))
1769ad43ddacSmrg		    return FALSE;
1770ad43ddacSmrg    }
1771c503f109Smrg
1772b7e1c893Smrg    if (src_pitch & 7)
1773b7e1c893Smrg	return FALSE;
1774b7e1c893Smrg
1775b7e1c893Smrg    scratch = RADEONCPGetBuffer(pScrn);
1776b7e1c893Smrg    if (scratch == NULL)
1777b7e1c893Smrg	return FALSE;
1778b7e1c893Smrg
1779b7e1c893Smrg    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1780b7e1c893Smrg    hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1781b7e1c893Smrg
1782ad43ddacSmrg    src_obj.pitch = src_pitch;
1783ad43ddacSmrg    src_obj.width = src_width;
1784ad43ddacSmrg    src_obj.height = src_height;
1785ad43ddacSmrg    src_obj.offset = src_mc_addr;
1786ad43ddacSmrg    src_obj.bo = NULL;
1787ad43ddacSmrg    src_obj.bpp = bpp;
1788ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1789ad43ddacSmrg
1790ad43ddacSmrg    scratch_obj.pitch = scratch_pitch;
1791ad43ddacSmrg    scratch_obj.width = src_width;
1792ad43ddacSmrg    scratch_obj.height = hpass;
1793ad43ddacSmrg    scratch_obj.offset = scratch_mc_addr;
1794ad43ddacSmrg    scratch_obj.bpp = bpp;
1795ad43ddacSmrg    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1796ad43ddacSmrg    scratch_obj.bo = NULL;
1797ad43ddacSmrg
1798ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1799ad43ddacSmrg			   &src_obj,
1800ad43ddacSmrg			   NULL,
1801ad43ddacSmrg			   &scratch_obj,
1802ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1803ad43ddacSmrg			   3, 0xffffffff))
1804ad43ddacSmrg	return FALSE;
1805ad43ddacSmrg
1806b7e1c893Smrg    /* blit from vram to scratch */
1807ad43ddacSmrg    R600DoPrepareCopy(pScrn);
1808b7e1c893Smrg    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1809b7e1c893Smrg    R600DoCopy(pScrn);
1810b7e1c893Smrg
1811b7e1c893Smrg    while (h) {
1812b7e1c893Smrg	char *src = (char *)scratch->address + scratch_offset;
1813b7e1c893Smrg	int oldhpass = hpass;
1814b7e1c893Smrg	h -= oldhpass;
1815b7e1c893Smrg	y += oldhpass;
1816b7e1c893Smrg	hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1817b7e1c893Smrg
1818b7e1c893Smrg	if (hpass) {
1819b7e1c893Smrg	    scratch_offset = scratch->total/2 - scratch_offset;
1820b7e1c893Smrg	    /* blit from vram to scratch */
1821ad43ddacSmrg	    info->accel_state->dst_obj.height = hpass;
1822ad43ddacSmrg	    info->accel_state->dst_obj.offset = scratch_mc_addr + scratch_offset;
1823ad43ddacSmrg	    R600DoPrepareCopy(pScrn);
1824b7e1c893Smrg	    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1825b7e1c893Smrg	    R600DoCopy(pScrn);
1826b7e1c893Smrg	}
1827b7e1c893Smrg
1828b7e1c893Smrg	/* wait for the engine to be idle */
1829b7e1c893Smrg	RADEONWaitForIdleCP(pScrn);
1830b7e1c893Smrg	/* memcopy from scratch to sys */
1831b7e1c893Smrg	while (oldhpass--) {
1832b7e1c893Smrg	    memcpy (dst, src, wpass);
1833b7e1c893Smrg	    dst += dst_pitch;
1834b7e1c893Smrg	    src += scratch_pitch_bytes;
1835b7e1c893Smrg	}
1836b7e1c893Smrg    }
1837b7e1c893Smrg
1838b7e1c893Smrg    R600IBDiscard(pScrn, scratch);
1839b7e1c893Smrg
1840b7e1c893Smrg    return TRUE;
1841b7e1c893Smrg
1842b7e1c893Smrg}
1843b7e1c893Smrg
1844ad43ddacSmrg#if defined(XF86DRM_MODE)
1845ad43ddacSmrg
1846ad43ddacSmrgstatic Bool
1847ad43ddacSmrgR600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
1848ad43ddacSmrg		     char *src, int src_pitch)
1849ad43ddacSmrg{
1850c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1851ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1852ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1853ad43ddacSmrg    struct radeon_exa_pixmap_priv *driver_priv;
18540974d292Smrg    struct radeon_bo *scratch = NULL;
18550974d292Smrg    struct radeon_bo *copy_dst;
18560974d292Smrg    unsigned char *dst;
1857ad43ddacSmrg    unsigned size;
1858ad43ddacSmrg    uint32_t dst_domain;
1859ad43ddacSmrg    int bpp = pDst->drawable.bitsPerPixel;
1860b13dfe66Smrg    uint32_t scratch_pitch;
18610974d292Smrg    uint32_t copy_pitch;
1862ad43ddacSmrg    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
18630974d292Smrg    int ret;
18640974d292Smrg    Bool flush = TRUE;
1865ad43ddacSmrg    Bool r;
1866ad43ddacSmrg    int i;
1867ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
1868166b61b6Smrg    uint32_t height, base_align;
1869ad43ddacSmrg
1870ad43ddacSmrg    if (bpp < 8)
1871ad43ddacSmrg	return FALSE;
1872ad43ddacSmrg
1873ad43ddacSmrg    driver_priv = exaGetPixmapDriverPrivate(pDst);
1874921a55d8Smrg    if (!driver_priv || !driver_priv->bo)
1875921a55d8Smrg	return FALSE;
1876921a55d8Smrg
18770974d292Smrg    /* If we know the BO won't be busy, don't bother with a scratch */
18780974d292Smrg    copy_dst = driver_priv->bo;
18790974d292Smrg    copy_pitch = pDst->devKind;
1880166b61b6Smrg    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1881921a55d8Smrg	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1882921a55d8Smrg	    flush = FALSE;
1883921a55d8Smrg	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
1884921a55d8Smrg		goto copy;
1885921a55d8Smrg	}
18860974d292Smrg    }
1887ad43ddacSmrg
1888b13dfe66Smrg    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1889b13dfe66Smrg    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1890b13dfe66Smrg    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1891b13dfe66Smrg    size = scratch_pitch * height * (bpp / 8);
1892b13dfe66Smrg    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1893ad43ddacSmrg    if (scratch == NULL) {
18940974d292Smrg	goto copy;
1895ad43ddacSmrg    }
1896ad43ddacSmrg
1897b13dfe66Smrg    src_obj.pitch = scratch_pitch;
1898ad43ddacSmrg    src_obj.width = w;
1899ad43ddacSmrg    src_obj.height = h;
1900ad43ddacSmrg    src_obj.offset = 0;
1901ad43ddacSmrg    src_obj.bpp = bpp;
1902ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1903ad43ddacSmrg    src_obj.bo = scratch;
1904166b61b6Smrg    src_obj.tiling_flags = 0;
1905f3a0071aSrjs    src_obj.surface = NULL;
1906ad43ddacSmrg
1907ad43ddacSmrg    dst_obj.pitch = dst_pitch_hw;
1908ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
1909ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
1910ad43ddacSmrg    dst_obj.offset = 0;
1911ad43ddacSmrg    dst_obj.bpp = bpp;
1912ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1913ad43ddacSmrg    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1914166b61b6Smrg    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1915f3a0071aSrjs    dst_obj.surface = radeon_get_pixmap_surface(pDst);
1916ad43ddacSmrg
1917ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1918ad43ddacSmrg			   &src_obj,
1919ad43ddacSmrg			   NULL,
1920ad43ddacSmrg			   &dst_obj,
1921ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1922ad43ddacSmrg			   3, 0xffffffff)) {
19230974d292Smrg        goto copy;
1924ad43ddacSmrg    }
19250974d292Smrg    copy_dst = scratch;
1926b13dfe66Smrg    copy_pitch = scratch_pitch * (bpp / 8);
19270974d292Smrg    flush = FALSE;
19280974d292Smrg
19290974d292Smrgcopy:
19300974d292Smrg    if (flush)
19310974d292Smrg	radeon_cs_flush_indirect(pScrn);
1932ad43ddacSmrg
19330974d292Smrg    ret = radeon_bo_map(copy_dst, 0);
19340974d292Smrg    if (ret) {
1935ad43ddacSmrg        r = FALSE;
1936ad43ddacSmrg        goto out;
1937ad43ddacSmrg    }
1938ad43ddacSmrg    r = TRUE;
1939ad43ddacSmrg    size = w * bpp / 8;
19400974d292Smrg    dst = copy_dst->ptr;
19410974d292Smrg    if (copy_dst == driver_priv->bo)
19420974d292Smrg	dst += y * copy_pitch + x * bpp / 8;
1943ad43ddacSmrg    for (i = 0; i < h; i++) {
19440974d292Smrg        memcpy(dst + i * copy_pitch, src, size);
1945ad43ddacSmrg        src += src_pitch;
1946ad43ddacSmrg    }
19470974d292Smrg    radeon_bo_unmap(copy_dst);
1948ad43ddacSmrg
19490974d292Smrg    if (copy_dst == scratch) {
19500974d292Smrg	if (info->accel_state->vsync)
19510974d292Smrg	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1952ad43ddacSmrg
19530974d292Smrg	/* blit from gart to vram */
19540974d292Smrg	R600DoPrepareCopy(pScrn);
19550974d292Smrg	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
19560974d292Smrg	R600DoCopyVline(pDst);
19570974d292Smrg    }
1958ad43ddacSmrg
1959ad43ddacSmrgout:
19600974d292Smrg    if (scratch)
19610974d292Smrg	radeon_bo_unref(scratch);
1962ad43ddacSmrg    return r;
1963ad43ddacSmrg}
1964ad43ddacSmrg
1965ad43ddacSmrgstatic Bool
1966ad43ddacSmrgR600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
1967ad43ddacSmrg			 int h, char *dst, int dst_pitch)
1968ad43ddacSmrg{
1969c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
1970ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1971ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1972ad43ddacSmrg    struct radeon_exa_pixmap_priv *driver_priv;
19730974d292Smrg    struct radeon_bo *scratch = NULL;
19740974d292Smrg    struct radeon_bo *copy_src;
1975ad43ddacSmrg    unsigned size;
1976ad43ddacSmrg    uint32_t src_domain = 0;
1977ad43ddacSmrg    int bpp = pSrc->drawable.bitsPerPixel;
1978b13dfe66Smrg    uint32_t scratch_pitch;
19790974d292Smrg    uint32_t copy_pitch;
1980ad43ddacSmrg    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
19810974d292Smrg    int ret;
19820974d292Smrg    Bool flush = FALSE;
1983ad43ddacSmrg    Bool r;
1984ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
1985166b61b6Smrg    uint32_t height, base_align;
1986ad43ddacSmrg
1987ad43ddacSmrg    if (bpp < 8)
1988ad43ddacSmrg	return FALSE;
1989ad43ddacSmrg
1990ad43ddacSmrg    driver_priv = exaGetPixmapDriverPrivate(pSrc);
1991921a55d8Smrg    if (!driver_priv || !driver_priv->bo)
1992921a55d8Smrg	return FALSE;
1993921a55d8Smrg
19940974d292Smrg    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
19950974d292Smrg    copy_src = driver_priv->bo;
19960974d292Smrg    copy_pitch = pSrc->devKind;
1997166b61b6Smrg    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1998921a55d8Smrg	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1999921a55d8Smrg	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
2000921a55d8Smrg	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
2001921a55d8Smrg		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
2002921a55d8Smrg		src_domain = 0;
2003921a55d8Smrg	    else /* A write may be scheduled */
2004921a55d8Smrg		flush = TRUE;
2005921a55d8Smrg	}
2006ad43ddacSmrg
2007921a55d8Smrg	if (!src_domain)
2008921a55d8Smrg	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
2009ad43ddacSmrg
2010921a55d8Smrg	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
2011921a55d8Smrg	    goto copy;
2012921a55d8Smrg    }
2013ad43ddacSmrg
2014b13dfe66Smrg    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
2015b13dfe66Smrg    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
2016b13dfe66Smrg    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
2017b13dfe66Smrg    size = scratch_pitch * height * (bpp / 8);
2018b13dfe66Smrg    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
2019ad43ddacSmrg    if (scratch == NULL) {
20200974d292Smrg	goto copy;
2021ad43ddacSmrg    }
2022ad43ddacSmrg    radeon_cs_space_reset_bos(info->cs);
2023ad43ddacSmrg    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
2024ad43ddacSmrg				      RADEON_GEM_DOMAIN_VRAM, 0);
2025ad43ddacSmrg    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
2026ad43ddacSmrg    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
2027ad43ddacSmrg    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
2028ad43ddacSmrg    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
20290974d292Smrg    ret = radeon_cs_space_check(info->cs);
20300974d292Smrg    if (ret) {
20310974d292Smrg        goto copy;
2032ad43ddacSmrg    }
2033ad43ddacSmrg
2034ad43ddacSmrg    src_obj.pitch = src_pitch_hw;
2035ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
2036ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
2037ad43ddacSmrg    src_obj.offset = 0;
2038ad43ddacSmrg    src_obj.bpp = bpp;
2039ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
2040ad43ddacSmrg    src_obj.bo = radeon_get_pixmap_bo(pSrc);
2041166b61b6Smrg    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
2042f3a0071aSrjs    src_obj.surface = radeon_get_pixmap_surface(pSrc);
2043921a55d8Smrg
2044b13dfe66Smrg    dst_obj.pitch = scratch_pitch;
2045ad43ddacSmrg    dst_obj.width = w;
2046ad43ddacSmrg    dst_obj.height = h;
2047ad43ddacSmrg    dst_obj.offset = 0;
2048ad43ddacSmrg    dst_obj.bo = scratch;
2049ad43ddacSmrg    dst_obj.bpp = bpp;
2050ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
2051166b61b6Smrg    dst_obj.tiling_flags = 0;
2052f3a0071aSrjs    dst_obj.surface = NULL;
2053ad43ddacSmrg
2054ad43ddacSmrg    if (!R600SetAccelState(pScrn,
2055ad43ddacSmrg			   &src_obj,
2056ad43ddacSmrg			   NULL,
2057ad43ddacSmrg			   &dst_obj,
2058ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
2059ad43ddacSmrg			   3, 0xffffffff)) {
20600974d292Smrg        goto copy;
2061ad43ddacSmrg    }
2062ad43ddacSmrg
2063ad43ddacSmrg    /* blit from vram to gart */
2064ad43ddacSmrg    R600DoPrepareCopy(pScrn);
2065ad43ddacSmrg    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
2066ad43ddacSmrg    R600DoCopy(pScrn);
20670974d292Smrg    copy_src = scratch;
2068b13dfe66Smrg    copy_pitch = scratch_pitch * (bpp / 8);
20690974d292Smrg    flush = TRUE;
2070ad43ddacSmrg
20710974d292Smrgcopy:
20720974d292Smrg    if (flush && info->cs)
2073ad43ddacSmrg	radeon_cs_flush_indirect(pScrn);
2074ad43ddacSmrg
20750974d292Smrg    ret = radeon_bo_map(copy_src, 0);
20760974d292Smrg    if (ret) {
20770974d292Smrg	ErrorF("failed to map pixmap: %d\n", ret);
2078ad43ddacSmrg        r = FALSE;
2079ad43ddacSmrg        goto out;
2080ad43ddacSmrg    }
2081ad43ddacSmrg    r = TRUE;
2082ad43ddacSmrg    w *= bpp / 8;
20830974d292Smrg    if (copy_src == driver_priv->bo)
20840974d292Smrg	size = y * copy_pitch + x * bpp / 8;
20850974d292Smrg    else
20860974d292Smrg	size = 0;
2087ad43ddacSmrg    while (h--) {
20880974d292Smrg        memcpy(dst, copy_src->ptr + size, w);
20890974d292Smrg        size += copy_pitch;
2090ad43ddacSmrg        dst += dst_pitch;
2091ad43ddacSmrg    }
20920974d292Smrg    radeon_bo_unmap(copy_src);
2093ad43ddacSmrgout:
20940974d292Smrg    if (scratch)
20950974d292Smrg	radeon_bo_unref(scratch);
2096ad43ddacSmrg    return r;
2097ad43ddacSmrg}
2098ad43ddacSmrg#endif
2099ad43ddacSmrg
2100b7e1c893Smrgstatic int
2101b7e1c893SmrgR600MarkSync(ScreenPtr pScreen)
2102b7e1c893Smrg{
2103c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2104b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2105b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2106b7e1c893Smrg
2107b7e1c893Smrg    return ++accel_state->exaSyncMarker;
2108b7e1c893Smrg
2109b7e1c893Smrg}
2110b7e1c893Smrg
2111b7e1c893Smrgstatic void
2112b7e1c893SmrgR600Sync(ScreenPtr pScreen, int marker)
2113b7e1c893Smrg{
2114c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2115b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2116b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2117b7e1c893Smrg
2118b7e1c893Smrg    if (accel_state->exaMarkerSynced != marker) {
2119ad43ddacSmrg#ifdef XF86DRM_MODE
2120ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2121ad43ddacSmrg	if (!info->cs)
2122ad43ddacSmrg#endif
2123ad43ddacSmrg#endif
2124ad43ddacSmrg	    RADEONWaitForIdleCP(pScrn);
2125b7e1c893Smrg	accel_state->exaMarkerSynced = marker;
2126b7e1c893Smrg    }
2127b7e1c893Smrg
2128b7e1c893Smrg}
2129b7e1c893Smrg
2130b7e1c893Smrgstatic Bool
2131b7e1c893SmrgR600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
2132b7e1c893Smrg{
2133b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2134b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2135b7e1c893Smrg
2136b7e1c893Smrg    /* 512 bytes per shader for now */
2137b7e1c893Smrg    int size = 512 * 9;
2138b7e1c893Smrg
2139b7e1c893Smrg    accel_state->shaders = NULL;
2140b7e1c893Smrg
2141ad43ddacSmrg#ifdef XF86DRM_MODE
2142ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2143ad43ddacSmrg    if (info->cs) {
2144ad43ddacSmrg	accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
2145ad43ddacSmrg						 RADEON_GEM_DOMAIN_VRAM, 0);
2146ad43ddacSmrg	if (accel_state->shaders_bo == NULL) {
2147ad43ddacSmrg	    ErrorF("Allocating shader failed\n");
2148ad43ddacSmrg	    return FALSE;
2149ad43ddacSmrg	}
2150ad43ddacSmrg	return TRUE;
2151ad43ddacSmrg    } else
2152ad43ddacSmrg#endif
2153ad43ddacSmrg#endif
2154ad43ddacSmrg    {
2155ad43ddacSmrg	accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
2156ad43ddacSmrg						 TRUE, NULL, NULL);
2157ad43ddacSmrg
2158ad43ddacSmrg	if (accel_state->shaders == NULL)
2159ad43ddacSmrg	    return FALSE;
2160ad43ddacSmrg    }
2161b7e1c893Smrg
2162b7e1c893Smrg    return TRUE;
2163b7e1c893Smrg}
2164b7e1c893Smrg
2165b7e1c893SmrgBool
2166b7e1c893SmrgR600LoadShaders(ScrnInfoPtr pScrn)
2167b7e1c893Smrg{
2168b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2169b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2170b7e1c893Smrg    RADEONChipFamily ChipSet = info->ChipFamily;
2171b7e1c893Smrg    uint32_t *shader;
2172ad43ddacSmrg#ifdef XF86DRM_MODE
2173ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2174ad43ddacSmrg    int ret;
2175ad43ddacSmrg
2176ad43ddacSmrg    if (info->cs) {
2177ad43ddacSmrg	ret = radeon_bo_map(accel_state->shaders_bo, 1);
2178ad43ddacSmrg	if (ret) {
2179ad43ddacSmrg	    FatalError("failed to map shader %d\n", ret);
2180ad43ddacSmrg	    return FALSE;
2181ad43ddacSmrg	}
2182ad43ddacSmrg	shader = accel_state->shaders_bo->ptr;
2183ad43ddacSmrg    } else
2184ad43ddacSmrg#endif
2185ad43ddacSmrg#endif
2186ad43ddacSmrg	shader = (pointer)((char *)info->FB + accel_state->shaders->offset);
2187b7e1c893Smrg
2188b7e1c893Smrg    /*  solid vs --------------------------------------- */
2189b7e1c893Smrg    accel_state->solid_vs_offset = 0;
2190b7e1c893Smrg    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
2191b7e1c893Smrg
2192b7e1c893Smrg    /*  solid ps --------------------------------------- */
2193b7e1c893Smrg    accel_state->solid_ps_offset = 512;
2194b7e1c893Smrg    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
2195b7e1c893Smrg
2196b7e1c893Smrg    /*  copy vs --------------------------------------- */
2197b7e1c893Smrg    accel_state->copy_vs_offset = 1024;
2198b7e1c893Smrg    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
2199b7e1c893Smrg
2200b7e1c893Smrg    /*  copy ps --------------------------------------- */
2201b7e1c893Smrg    accel_state->copy_ps_offset = 1536;
2202b7e1c893Smrg    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2203b7e1c893Smrg
2204b7e1c893Smrg    /*  comp vs --------------------------------------- */
2205b7e1c893Smrg    accel_state->comp_vs_offset = 2048;
2206b7e1c893Smrg    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2207b7e1c893Smrg
2208b7e1c893Smrg    /*  comp ps --------------------------------------- */
2209b7e1c893Smrg    accel_state->comp_ps_offset = 2560;
2210b7e1c893Smrg    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2211b7e1c893Smrg
2212b7e1c893Smrg    /*  xv vs --------------------------------------- */
22130974d292Smrg    accel_state->xv_vs_offset = 3072;
2214b7e1c893Smrg    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2215b7e1c893Smrg
2216b7e1c893Smrg    /*  xv ps --------------------------------------- */
22170974d292Smrg    accel_state->xv_ps_offset = 3584;
2218b7e1c893Smrg    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2219b7e1c893Smrg
2220ad43ddacSmrg#ifdef XF86DRM_MODE
2221ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2222ad43ddacSmrg    if (info->cs) {
2223ad43ddacSmrg	radeon_bo_unmap(accel_state->shaders_bo);
2224ad43ddacSmrg    }
2225ad43ddacSmrg#endif
2226ad43ddacSmrg#endif
2227ad43ddacSmrg
2228b7e1c893Smrg    return TRUE;
2229b7e1c893Smrg}
2230b7e1c893Smrg
2231b7e1c893Smrgstatic Bool
2232b7e1c893SmrgR600PrepareAccess(PixmapPtr pPix, int index)
2233b7e1c893Smrg{
2234c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
2235b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2236b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
2237b7e1c893Smrg
2238b7e1c893Smrg    /* flush HDP read/write caches */
2239b7e1c893Smrg    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2240b7e1c893Smrg
2241b7e1c893Smrg    return TRUE;
2242b7e1c893Smrg}
2243b7e1c893Smrg
2244b7e1c893Smrgstatic void
2245b7e1c893SmrgR600FinishAccess(PixmapPtr pPix, int index)
2246b7e1c893Smrg{
2247c135ecebSveego    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
2248b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2249b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
2250b7e1c893Smrg
2251b7e1c893Smrg    /* flush HDP read/write caches */
2252b7e1c893Smrg    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2253b7e1c893Smrg
2254b7e1c893Smrg}
2255b7e1c893Smrg
2256b7e1c893SmrgBool
2257b7e1c893SmrgR600DrawInit(ScreenPtr pScreen)
2258b7e1c893Smrg{
2259c135ecebSveego    ScrnInfoPtr pScrn =  xf86ScreenToScrn(pScreen);
2260b7e1c893Smrg    RADEONInfoPtr info   = RADEONPTR(pScrn);
2261b7e1c893Smrg
2262b7e1c893Smrg    if (info->accel_state->exa == NULL) {
2263b7e1c893Smrg	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2264b7e1c893Smrg	return FALSE;
2265b7e1c893Smrg    }
2266b7e1c893Smrg
2267b7e1c893Smrg    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2268b7e1c893Smrg    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2269b7e1c893Smrg
2270b7e1c893Smrg    info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2271b7e1c893Smrg    info->accel_state->exa->Solid = R600Solid;
2272b7e1c893Smrg    info->accel_state->exa->DoneSolid = R600DoneSolid;
2273b7e1c893Smrg
2274b7e1c893Smrg    info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2275b7e1c893Smrg    info->accel_state->exa->Copy = R600Copy;
2276b7e1c893Smrg    info->accel_state->exa->DoneCopy = R600DoneCopy;
2277b7e1c893Smrg
2278b7e1c893Smrg    info->accel_state->exa->MarkSync = R600MarkSync;
2279b7e1c893Smrg    info->accel_state->exa->WaitMarker = R600Sync;
2280b7e1c893Smrg
2281ad43ddacSmrg#ifdef XF86DRM_MODE
2282ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2283ad43ddacSmrg    if (info->cs) {
2284ad43ddacSmrg	info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
2285ad43ddacSmrg	info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
2286ad43ddacSmrg	info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
2287ad43ddacSmrg	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
2288ad43ddacSmrg	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
2289ad43ddacSmrg	info->accel_state->exa->UploadToScreen = R600UploadToScreenCS;
2290ad43ddacSmrg	info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS;
22910974d292Smrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5)
22920974d292Smrg        info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
22930974d292Smrg#endif
2294ad43ddacSmrg    } else
2295ad43ddacSmrg#endif
2296ad43ddacSmrg#endif
2297ad43ddacSmrg    {
2298ad43ddacSmrg	info->accel_state->exa->PrepareAccess = R600PrepareAccess;
2299ad43ddacSmrg	info->accel_state->exa->FinishAccess = R600FinishAccess;
2300ad43ddacSmrg
2301ad43ddacSmrg	/* AGP seems to have problems with gart transfers */
2302ad43ddacSmrg	if (info->accelDFS) {
2303ad43ddacSmrg	    info->accel_state->exa->UploadToScreen = R600UploadToScreen;
2304ad43ddacSmrg	    info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen;
2305ad43ddacSmrg	}
2306b7e1c893Smrg    }
2307b7e1c893Smrg
2308b7e1c893Smrg    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
2309b7e1c893Smrg#ifdef EXA_SUPPORTS_PREPARE_AUX
2310b7e1c893Smrg    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
2311ad43ddacSmrg#endif
2312ad43ddacSmrg
2313ad43ddacSmrg#ifdef XF86DRM_MODE
2314ad43ddacSmrg#ifdef EXA_HANDLES_PIXMAPS
2315ad43ddacSmrg    if (info->cs) {
2316ad43ddacSmrg	info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
2317ad43ddacSmrg#ifdef EXA_MIXED_PIXMAPS
2318ad43ddacSmrg	info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
2319ad43ddacSmrg#endif
2320ad43ddacSmrg    }
2321ad43ddacSmrg#endif
2322b7e1c893Smrg#endif
2323b7e1c893Smrg    info->accel_state->exa->pixmapOffsetAlign = 256;
2324b7e1c893Smrg    info->accel_state->exa->pixmapPitchAlign = 256;
2325b7e1c893Smrg
2326b7e1c893Smrg    info->accel_state->exa->CheckComposite = R600CheckComposite;
2327b7e1c893Smrg    info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2328b7e1c893Smrg    info->accel_state->exa->Composite = R600Composite;
2329b7e1c893Smrg    info->accel_state->exa->DoneComposite = R600DoneComposite;
2330b7e1c893Smrg
2331b7e1c893Smrg#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
2332b7e1c893Smrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
2333b7e1c893Smrg
2334b7e1c893Smrg    info->accel_state->exa->maxPitchBytes = 32768;
2335b7e1c893Smrg    info->accel_state->exa->maxX = 8192;
2336b7e1c893Smrg#else
2337b7e1c893Smrg    info->accel_state->exa->maxX = 8192;
2338b7e1c893Smrg#endif
2339b7e1c893Smrg    info->accel_state->exa->maxY = 8192;
2340b7e1c893Smrg
2341b7e1c893Smrg    /* not supported yet */
2342ad43ddacSmrg    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2343ad43ddacSmrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2344ad43ddacSmrg	info->accel_state->vsync = TRUE;
2345ad43ddacSmrg    } else
2346ad43ddacSmrg	info->accel_state->vsync = FALSE;
2347b7e1c893Smrg
2348b7e1c893Smrg    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
23492f39173dSmrg	free(info->accel_state->exa);
2350b7e1c893Smrg	return FALSE;
2351b7e1c893Smrg    }
2352b7e1c893Smrg
2353ad43ddacSmrg#ifdef XF86DRM_MODE
2354ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2355ad43ddacSmrg    if (!info->cs)
2356ad43ddacSmrg#endif
2357ad43ddacSmrg#endif
2358ad43ddacSmrg	if (!info->gartLocation)
2359ad43ddacSmrg	    return FALSE;
2360b7e1c893Smrg
2361b7e1c893Smrg    info->accel_state->XInited3D = FALSE;
2362b7e1c893Smrg    info->accel_state->copy_area = NULL;
2363ad43ddacSmrg    info->accel_state->src_obj[0].bo = NULL;
2364ad43ddacSmrg    info->accel_state->src_obj[1].bo = NULL;
2365ad43ddacSmrg    info->accel_state->dst_obj.bo = NULL;
2366ad43ddacSmrg    info->accel_state->copy_area_bo = NULL;
2367921a55d8Smrg    info->accel_state->vbo.vb_start_op = -1;
23680974d292Smrg    info->accel_state->finish_op = r600_finish_op;
2369921a55d8Smrg    info->accel_state->vbo.verts_per_op = 3;
23700974d292Smrg    RADEONVlineHelperClear(pScrn);
2371ad43ddacSmrg
2372ad43ddacSmrg#ifdef XF86DRM_MODE
2373ad43ddacSmrg    radeon_vbo_init_lists(pScrn);
2374ad43ddacSmrg#endif
2375b7e1c893Smrg
2376b7e1c893Smrg    if (!R600AllocShaders(pScrn, pScreen))
2377b7e1c893Smrg	return FALSE;
2378b7e1c893Smrg
2379b7e1c893Smrg    if (!R600LoadShaders(pScrn))
2380b7e1c893Smrg	return FALSE;
2381b7e1c893Smrg
2382b7e1c893Smrg    exaMarkSync(pScreen);
2383b7e1c893Smrg
2384b7e1c893Smrg    return TRUE;
2385b7e1c893Smrg
2386b7e1c893Smrg}
2387b7e1c893Smrg
2388