r600_exa.c revision 30d12090
1b7e1c893Smrg/*
2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3b7e1c893Smrg *
4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"),
6b7e1c893Smrg * to deal in the Software without restriction, including without limitation
7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions:
10b7e1c893Smrg *
11b7e1c893Smrg * The above copyright notice and this permission notice (including the next
12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the
13b7e1c893Smrg * Software.
14b7e1c893Smrg *
15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b7e1c893Smrg * SOFTWARE.
22b7e1c893Smrg *
23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24b7e1c893Smrg *
25b7e1c893Smrg */
26b7e1c893Smrg
27b7e1c893Smrg#ifdef HAVE_CONFIG_H
28b7e1c893Smrg#include "config.h"
29b7e1c893Smrg#endif
30b7e1c893Smrg
31b7e1c893Smrg#include "xf86.h"
32b7e1c893Smrg
33b7e1c893Smrg#include "exa.h"
34b7e1c893Smrg
35b7e1c893Smrg#include "radeon.h"
36b7e1c893Smrg#include "radeon_macros.h"
37b7e1c893Smrg#include "radeon_reg.h"
38b7e1c893Smrg#include "r600_shader.h"
39b7e1c893Smrg#include "r600_reg.h"
40b7e1c893Smrg#include "r600_state.h"
410974d292Smrg#include "radeon_exa_shared.h"
42ad43ddacSmrg#include "radeon_vbo.h"
43ad43ddacSmrg
44b7e1c893Smrg/* #define SHOW_VERTEXES */
45b7e1c893Smrg
46ad43ddacSmrgBool
47ad43ddacSmrgR600SetAccelState(ScrnInfoPtr pScrn,
48ad43ddacSmrg		  struct r600_accel_object *src0,
49ad43ddacSmrg		  struct r600_accel_object *src1,
50ad43ddacSmrg		  struct r600_accel_object *dst,
51ad43ddacSmrg		  uint32_t vs_offset, uint32_t ps_offset,
52ad43ddacSmrg		  int rop, Pixel planemask)
53ad43ddacSmrg{
54ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
55ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
56b13dfe66Smrg    uint32_t pitch = 0;
57b13dfe66Smrg    uint32_t pitch_align = 0x7, base_align = 0xff;
58b13dfe66Smrg#if defined(XF86DRM_MODE)
59b13dfe66Smrg    int ret;
60b13dfe66Smrg#endif
61ad43ddacSmrg
62ad43ddacSmrg    if (src0) {
63ad43ddacSmrg	memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
64ad43ddacSmrg	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
65b13dfe66Smrg#if defined(XF86DRM_MODE)
66b13dfe66Smrg	if (info->cs) {
67b13dfe66Smrg	    ret = radeon_bo_get_tiling(accel_state->src_obj[0].bo,
68b13dfe66Smrg				       &accel_state->src_obj[0].tiling_flags,
69b13dfe66Smrg				       &pitch);
70b13dfe66Smrg	    if (ret)
71b13dfe66Smrg		RADEON_FALLBACK(("src0 radeon_bo_get_tiling failed\n"));
72b13dfe66Smrg	    pitch_align = drmmode_get_pitch_align(pScrn,
73b13dfe66Smrg						  accel_state->src_obj[0].bpp / 8,
74b13dfe66Smrg						  accel_state->src_obj[0].tiling_flags) - 1;
75b13dfe66Smrg	    base_align = drmmode_get_base_align(pScrn,
76b13dfe66Smrg						accel_state->src_obj[0].bpp / 8,
77b13dfe66Smrg						accel_state->src_obj[0].tiling_flags) - 1;
78b13dfe66Smrg	}
79b13dfe66Smrg#endif
80b13dfe66Smrg	/* bad pitch */
81b13dfe66Smrg	if (accel_state->src_obj[0].pitch & pitch_align)
82b13dfe66Smrg	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
83b13dfe66Smrg
84b13dfe66Smrg	/* bad offset */
85b13dfe66Smrg	if (accel_state->src_obj[0].offset & base_align)
86b13dfe66Smrg	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset));
87b13dfe66Smrg
88ad43ddacSmrg    } else {
89ad43ddacSmrg	memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
90ad43ddacSmrg	accel_state->src_size[0] = 0;
91ad43ddacSmrg    }
92ad43ddacSmrg
93ad43ddacSmrg    if (src1) {
94ad43ddacSmrg	memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
95ad43ddacSmrg	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
96b13dfe66Smrg#if defined(XF86DRM_MODE)
97b13dfe66Smrg	if (info->cs) {
98b13dfe66Smrg	    ret = radeon_bo_get_tiling(accel_state->src_obj[1].bo,
99b13dfe66Smrg				       &accel_state->src_obj[1].tiling_flags,
100b13dfe66Smrg				       &pitch);
101b13dfe66Smrg	    if (ret)
102b13dfe66Smrg		RADEON_FALLBACK(("src1 radeon_bo_get_tiling failed\n"));
103b13dfe66Smrg	    pitch_align = drmmode_get_pitch_align(pScrn,
104b13dfe66Smrg						  accel_state->src_obj[1].bpp / 8,
105b13dfe66Smrg						  accel_state->src_obj[1].tiling_flags) - 1;
106b13dfe66Smrg	    base_align = drmmode_get_base_align(pScrn,
107b13dfe66Smrg						accel_state->src_obj[1].bpp / 8,
108b13dfe66Smrg						accel_state->src_obj[1].tiling_flags) - 1;
109b13dfe66Smrg	}
110b13dfe66Smrg#endif
111b13dfe66Smrg	/* bad pitch */
112b13dfe66Smrg	if (accel_state->src_obj[1].pitch & pitch_align)
113b13dfe66Smrg	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
114b13dfe66Smrg
115b13dfe66Smrg	/* bad offset */
116b13dfe66Smrg	if (accel_state->src_obj[1].offset & base_align)
117b13dfe66Smrg	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset));
118ad43ddacSmrg    } else {
119ad43ddacSmrg	memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
120ad43ddacSmrg	accel_state->src_size[1] = 0;
121ad43ddacSmrg    }
122ad43ddacSmrg
123ad43ddacSmrg    if (dst) {
124ad43ddacSmrg	memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
125ad43ddacSmrg	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
126b13dfe66Smrg#if defined(XF86DRM_MODE)
127b13dfe66Smrg	if (info->cs) {
128b13dfe66Smrg	    ret = radeon_bo_get_tiling(accel_state->dst_obj.bo,
129b13dfe66Smrg				       &accel_state->dst_obj.tiling_flags,
130b13dfe66Smrg				       &pitch);
131b13dfe66Smrg	    if (ret)
132b13dfe66Smrg		RADEON_FALLBACK(("dst radeon_bo_get_tiling failed\n"));
133b13dfe66Smrg	    pitch_align = drmmode_get_pitch_align(pScrn,
134b13dfe66Smrg						  accel_state->dst_obj.bpp / 8,
135b13dfe66Smrg						  accel_state->dst_obj.tiling_flags) - 1;
136b13dfe66Smrg	    base_align = drmmode_get_base_align(pScrn,
137b13dfe66Smrg						accel_state->dst_obj.bpp / 8,
138b13dfe66Smrg						accel_state->dst_obj.tiling_flags) - 1;
139b13dfe66Smrg	}
140b13dfe66Smrg#endif
141b13dfe66Smrg	if (accel_state->dst_obj.pitch & pitch_align)
142b13dfe66Smrg	    RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
143b13dfe66Smrg
144b13dfe66Smrg	if (accel_state->dst_obj.offset & base_align)
145b13dfe66Smrg	    RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset));
146ad43ddacSmrg    } else {
147ad43ddacSmrg	memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
148ad43ddacSmrg	accel_state->dst_size = 0;
149ad43ddacSmrg    }
150ad43ddacSmrg
151ad43ddacSmrg    accel_state->rop = rop;
152ad43ddacSmrg    accel_state->planemask = planemask;
153ad43ddacSmrg
154ad43ddacSmrg    accel_state->vs_size = 512;
155ad43ddacSmrg    accel_state->ps_size = 512;
156ad43ddacSmrg#if defined(XF86DRM_MODE)
157ad43ddacSmrg    if (info->cs) {
158ad43ddacSmrg	accel_state->vs_mc_addr = vs_offset;
159ad43ddacSmrg	accel_state->ps_mc_addr = ps_offset;
160ad43ddacSmrg
161ad43ddacSmrg	radeon_cs_space_reset_bos(info->cs);
162ad43ddacSmrg	radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
163ad43ddacSmrg					  RADEON_GEM_DOMAIN_VRAM, 0);
164ad43ddacSmrg	if (accel_state->src_obj[0].bo)
165ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
166ad43ddacSmrg					      accel_state->src_obj[0].domain, 0);
167ad43ddacSmrg	if (accel_state->src_obj[1].bo)
168ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
169ad43ddacSmrg					      accel_state->src_obj[1].domain, 0);
170ad43ddacSmrg	if (accel_state->dst_obj.bo)
171ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
172ad43ddacSmrg					      0, accel_state->dst_obj.domain);
173ad43ddacSmrg	ret = radeon_cs_space_check(info->cs);
174ad43ddacSmrg	if (ret)
175ad43ddacSmrg	    RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
176ad43ddacSmrg
177ad43ddacSmrg    } else
178ad43ddacSmrg#endif
179ad43ddacSmrg    {
180ad43ddacSmrg	accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
181ad43ddacSmrg	    vs_offset;
182ad43ddacSmrg	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
183ad43ddacSmrg	    ps_offset;
184ad43ddacSmrg    }
185ad43ddacSmrg
186ad43ddacSmrg    return TRUE;
187ad43ddacSmrg}
188ad43ddacSmrg
189b7e1c893Smrgstatic void
190b7e1c893SmrgR600DoneSolid(PixmapPtr pPix);
191b7e1c893Smrg
192b7e1c893Smrgstatic Bool
193b7e1c893SmrgR600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
194b7e1c893Smrg{
195b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
196b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
197b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
198b7e1c893Smrg    cb_config_t     cb_conf;
199b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
200b7e1c893Smrg    uint32_t a, r, g, b;
201b7e1c893Smrg    float ps_alu_consts[4];
202ad43ddacSmrg    struct r600_accel_object dst;
203b7e1c893Smrg
2040974d292Smrg    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
205ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype failed\n"));
2060974d292Smrg    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
207ad43ddacSmrg	RADEON_FALLBACK(("invalid planemask\n"));
208b7e1c893Smrg
209ad43ddacSmrg#if defined(XF86DRM_MODE)
210ad43ddacSmrg    if (info->cs) {
211ad43ddacSmrg	dst.offset = 0;
212ad43ddacSmrg	dst.bo = radeon_get_pixmap_bo(pPix);
213ad43ddacSmrg    } else
214ad43ddacSmrg#endif
215ad43ddacSmrg    {
216ad43ddacSmrg	dst.offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
217ad43ddacSmrg	dst.bo = NULL;
218ad43ddacSmrg    }
219b7e1c893Smrg
220ad43ddacSmrg    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
221ad43ddacSmrg    dst.width = pPix->drawable.width;
222ad43ddacSmrg    dst.height = pPix->drawable.height;
223ad43ddacSmrg    dst.bpp = pPix->drawable.bitsPerPixel;
224ad43ddacSmrg    dst.domain = RADEON_GEM_DOMAIN_VRAM;
2250974d292Smrg
226ad43ddacSmrg    if (!R600SetAccelState(pScrn,
227ad43ddacSmrg			   NULL,
228ad43ddacSmrg			   NULL,
229ad43ddacSmrg			   &dst,
230ad43ddacSmrg			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
231ad43ddacSmrg			   alu, pm))
232b7e1c893Smrg	return FALSE;
233b7e1c893Smrg
234b7e1c893Smrg    CLEAR (cb_conf);
235b7e1c893Smrg    CLEAR (vs_conf);
236b7e1c893Smrg    CLEAR (ps_conf);
237b7e1c893Smrg
238921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
2390974d292Smrg    radeon_cp_start(pScrn);
240b7e1c893Smrg
241921a55d8Smrg    r600_set_default_state(pScrn, accel_state->ib);
242b7e1c893Smrg
243921a55d8Smrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
244921a55d8Smrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
245921a55d8Smrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
246b7e1c893Smrg
247b7e1c893Smrg    /* Shader */
248b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
2490974d292Smrg    vs_conf.shader_size         = accel_state->vs_size;
250b7e1c893Smrg    vs_conf.num_gprs            = 2;
251b7e1c893Smrg    vs_conf.stack_size          = 0;
252ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
253921a55d8Smrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
254b7e1c893Smrg
255b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
2560974d292Smrg    ps_conf.shader_size         = accel_state->ps_size;
257b7e1c893Smrg    ps_conf.num_gprs            = 1;
258b7e1c893Smrg    ps_conf.stack_size          = 0;
259b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
260b7e1c893Smrg    ps_conf.clamp_consts        = 0;
261b7e1c893Smrg    ps_conf.export_mode         = 2;
262ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
263921a55d8Smrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
264b7e1c893Smrg
265b7e1c893Smrg    cb_conf.id = 0;
266ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
267ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
268ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
269ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
270b7e1c893Smrg
271ad43ddacSmrg    if (accel_state->dst_obj.bpp == 8) {
272b7e1c893Smrg	cb_conf.format = COLOR_8;
273b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
274ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 16) {
275b7e1c893Smrg	cb_conf.format = COLOR_5_6_5;
276b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
277b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
278b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN16;
279b13dfe66Smrg#endif
280b7e1c893Smrg    } else {
281b7e1c893Smrg	cb_conf.format = COLOR_8_8_8_8;
282b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
283b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
284b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN32;
285b13dfe66Smrg#endif
286b7e1c893Smrg    }
287b7e1c893Smrg    cb_conf.source_format = 1;
288b7e1c893Smrg    cb_conf.blend_clamp = 1;
2890974d292Smrg    /* Render setup */
2900974d292Smrg    if (accel_state->planemask & 0x000000ff)
291b13dfe66Smrg	cb_conf.pmask |= 4; /* B */
2920974d292Smrg    if (accel_state->planemask & 0x0000ff00)
293b13dfe66Smrg	cb_conf.pmask |= 2; /* G */
2940974d292Smrg    if (accel_state->planemask & 0x00ff0000)
295b13dfe66Smrg	cb_conf.pmask |= 1; /* R */
2960974d292Smrg    if (accel_state->planemask & 0xff000000)
297b13dfe66Smrg	cb_conf.pmask |= 8; /* A */
298b13dfe66Smrg    cb_conf.rop = accel_state->rop;
299b13dfe66Smrg    if (accel_state->dst_obj.tiling_flags == 0)
300b13dfe66Smrg	cb_conf.array_mode = 1;
301b13dfe66Smrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
302b13dfe66Smrg
303b13dfe66Smrg    r600_set_spi(pScrn, accel_state->ib, 0, 0);
304b7e1c893Smrg
305b7e1c893Smrg    /* PS alu constants */
306ad43ddacSmrg    if (accel_state->dst_obj.bpp == 16) {
307b7e1c893Smrg	r = (fg >> 11) & 0x1f;
308b7e1c893Smrg	g = (fg >> 5) & 0x3f;
309b7e1c893Smrg	b = (fg >> 0) & 0x1f;
310b7e1c893Smrg	ps_alu_consts[0] = (float)r / 31; /* R */
311b7e1c893Smrg	ps_alu_consts[1] = (float)g / 63; /* G */
312b7e1c893Smrg	ps_alu_consts[2] = (float)b / 31; /* B */
313b7e1c893Smrg	ps_alu_consts[3] = 1.0; /* A */
314ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 8) {
315b7e1c893Smrg	a = (fg >> 0) & 0xff;
316b7e1c893Smrg	ps_alu_consts[0] = 0.0; /* R */
317b7e1c893Smrg	ps_alu_consts[1] = 0.0; /* G */
318b7e1c893Smrg	ps_alu_consts[2] = 0.0; /* B */
319b7e1c893Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
320b7e1c893Smrg    } else {
321b7e1c893Smrg	a = (fg >> 24) & 0xff;
322b7e1c893Smrg	r = (fg >> 16) & 0xff;
323b7e1c893Smrg	g = (fg >> 8) & 0xff;
324b7e1c893Smrg	b = (fg >> 0) & 0xff;
325b7e1c893Smrg	ps_alu_consts[0] = (float)r / 255; /* R */
326b7e1c893Smrg	ps_alu_consts[1] = (float)g / 255; /* G */
327b7e1c893Smrg	ps_alu_consts[2] = (float)b / 255; /* B */
328b7e1c893Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
329b7e1c893Smrg    }
330921a55d8Smrg    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
331921a55d8Smrg			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
332b7e1c893Smrg
333ad43ddacSmrg    if (accel_state->vsync)
3340974d292Smrg	RADEONVlineHelperClear(pScrn);
335b7e1c893Smrg
336b7e1c893Smrg    return TRUE;
337b7e1c893Smrg}
338b7e1c893Smrg
339b7e1c893Smrg
340b7e1c893Smrgstatic void
341b7e1c893SmrgR600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
342b7e1c893Smrg{
343b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
344b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
345b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
346b7e1c893Smrg    float *vb;
347b7e1c893Smrg
348ad43ddacSmrg    if (accel_state->vsync)
3490974d292Smrg	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
350b7e1c893Smrg
351921a55d8Smrg    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
352b7e1c893Smrg
353b7e1c893Smrg    vb[0] = (float)x1;
354b7e1c893Smrg    vb[1] = (float)y1;
355b7e1c893Smrg
356b7e1c893Smrg    vb[2] = (float)x1;
357b7e1c893Smrg    vb[3] = (float)y2;
358b7e1c893Smrg
359b7e1c893Smrg    vb[4] = (float)x2;
360b7e1c893Smrg    vb[5] = (float)y2;
361b7e1c893Smrg
362921a55d8Smrg    radeon_vbo_commit(pScrn, &accel_state->vbo);
363b7e1c893Smrg}
364b7e1c893Smrg
365b7e1c893Smrgstatic void
366b7e1c893SmrgR600DoneSolid(PixmapPtr pPix)
367b7e1c893Smrg{
368b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
369b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
370b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
371b7e1c893Smrg
372ad43ddacSmrg    if (accel_state->vsync)
373921a55d8Smrg	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
374921a55d8Smrg				accel_state->vline_crtc,
375921a55d8Smrg				accel_state->vline_y1,
376921a55d8Smrg				accel_state->vline_y2);
377b7e1c893Smrg
378ad43ddacSmrg    r600_finish_op(pScrn, 8);
379b7e1c893Smrg}
380b7e1c893Smrg
381b7e1c893Smrgstatic void
382ad43ddacSmrgR600DoPrepareCopy(ScrnInfoPtr pScrn)
383b7e1c893Smrg{
384b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
385b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
386b7e1c893Smrg    cb_config_t     cb_conf;
387b7e1c893Smrg    tex_resource_t  tex_res;
388b7e1c893Smrg    tex_sampler_t   tex_samp;
389b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
390b7e1c893Smrg
391b7e1c893Smrg    CLEAR (cb_conf);
392b7e1c893Smrg    CLEAR (tex_res);
393b7e1c893Smrg    CLEAR (tex_samp);
394b7e1c893Smrg    CLEAR (vs_conf);
395b7e1c893Smrg    CLEAR (ps_conf);
396b7e1c893Smrg
397921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
3980974d292Smrg    radeon_cp_start(pScrn);
399b7e1c893Smrg
400921a55d8Smrg    r600_set_default_state(pScrn, accel_state->ib);
401b7e1c893Smrg
402921a55d8Smrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
403921a55d8Smrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
404921a55d8Smrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
405b7e1c893Smrg
406b7e1c893Smrg    /* Shader */
407b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
4080974d292Smrg    vs_conf.shader_size         = accel_state->vs_size;
409b7e1c893Smrg    vs_conf.num_gprs            = 2;
410b7e1c893Smrg    vs_conf.stack_size          = 0;
411ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
412921a55d8Smrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
413b7e1c893Smrg
414b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
4150974d292Smrg    ps_conf.shader_size         = accel_state->ps_size;
416b7e1c893Smrg    ps_conf.num_gprs            = 1;
417b7e1c893Smrg    ps_conf.stack_size          = 0;
418b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
419b7e1c893Smrg    ps_conf.clamp_consts        = 0;
420b7e1c893Smrg    ps_conf.export_mode         = 2;
421ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
422921a55d8Smrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
423b7e1c893Smrg
424b7e1c893Smrg    /* Texture */
425b7e1c893Smrg    tex_res.id                  = 0;
426ad43ddacSmrg    tex_res.w                   = accel_state->src_obj[0].width;
427ad43ddacSmrg    tex_res.h                   = accel_state->src_obj[0].height;
428ad43ddacSmrg    tex_res.pitch               = accel_state->src_obj[0].pitch;
429b7e1c893Smrg    tex_res.depth               = 0;
430b7e1c893Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
431ad43ddacSmrg    tex_res.base                = accel_state->src_obj[0].offset;
432ad43ddacSmrg    tex_res.mip_base            = accel_state->src_obj[0].offset;
4330974d292Smrg    tex_res.size                = accel_state->src_size[0];
434ad43ddacSmrg    tex_res.bo                  = accel_state->src_obj[0].bo;
435ad43ddacSmrg    tex_res.mip_bo              = accel_state->src_obj[0].bo;
436ad43ddacSmrg    if (accel_state->src_obj[0].bpp == 8) {
437b7e1c893Smrg	tex_res.format              = FMT_8;
438b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
439b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
440b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
441b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
442ad43ddacSmrg    } else if (accel_state->src_obj[0].bpp == 16) {
443b7e1c893Smrg	tex_res.format              = FMT_5_6_5;
444b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
445b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
446b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
447b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
448b7e1c893Smrg    } else {
449b7e1c893Smrg	tex_res.format              = FMT_8_8_8_8;
450b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
451b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
452b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
453b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
454b7e1c893Smrg    }
455b7e1c893Smrg
456b7e1c893Smrg    tex_res.request_size        = 1;
457b7e1c893Smrg    tex_res.base_level          = 0;
458b7e1c893Smrg    tex_res.last_level          = 0;
459b7e1c893Smrg    tex_res.perf_modulation     = 0;
460b13dfe66Smrg    if (accel_state->src_obj[0].tiling_flags == 0)
461b13dfe66Smrg	tex_res.tile_mode           = 1;
462921a55d8Smrg    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
463b7e1c893Smrg
464b7e1c893Smrg    tex_samp.id                 = 0;
465b7e1c893Smrg    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
466b7e1c893Smrg    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
467b7e1c893Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
468b7e1c893Smrg    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
469b7e1c893Smrg    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
470b13dfe66Smrg    tex_samp.mc_coord_truncate  = 1;
471b7e1c893Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
472b7e1c893Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
473921a55d8Smrg    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
474b7e1c893Smrg
475b7e1c893Smrg    cb_conf.id = 0;
476ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
477ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
478ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
479ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
480ad43ddacSmrg    if (accel_state->dst_obj.bpp == 8) {
481b7e1c893Smrg	cb_conf.format = COLOR_8;
482b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
483ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 16) {
484b7e1c893Smrg	cb_conf.format = COLOR_5_6_5;
485b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
486b7e1c893Smrg    } else {
487b7e1c893Smrg	cb_conf.format = COLOR_8_8_8_8;
488b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
489b7e1c893Smrg    }
490b7e1c893Smrg    cb_conf.source_format = 1;
491b7e1c893Smrg    cb_conf.blend_clamp = 1;
492b7e1c893Smrg
4930974d292Smrg    /* Render setup */
4940974d292Smrg    if (accel_state->planemask & 0x000000ff)
495b13dfe66Smrg	cb_conf.pmask |= 4; /* B */
4960974d292Smrg    if (accel_state->planemask & 0x0000ff00)
497b13dfe66Smrg	cb_conf.pmask |= 2; /* G */
4980974d292Smrg    if (accel_state->planemask & 0x00ff0000)
499b13dfe66Smrg	cb_conf.pmask |= 1; /* R */
5000974d292Smrg    if (accel_state->planemask & 0xff000000)
501b13dfe66Smrg	cb_conf.pmask |= 8; /* A */
502b13dfe66Smrg    cb_conf.rop = accel_state->rop;
503b13dfe66Smrg    if (accel_state->dst_obj.tiling_flags == 0)
504b13dfe66Smrg	cb_conf.array_mode = 1;
505b13dfe66Smrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
506b13dfe66Smrg
507b13dfe66Smrg    r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
508b7e1c893Smrg
509b7e1c893Smrg}
510b7e1c893Smrg
511b7e1c893Smrgstatic void
512b7e1c893SmrgR600DoCopy(ScrnInfoPtr pScrn)
513b7e1c893Smrg{
514ad43ddacSmrg    r600_finish_op(pScrn, 16);
515ad43ddacSmrg}
516ad43ddacSmrg
517ad43ddacSmrgstatic void
518ad43ddacSmrgR600DoCopyVline(PixmapPtr pPix)
519ad43ddacSmrg{
520ad43ddacSmrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
521b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
522b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
523b7e1c893Smrg
524ad43ddacSmrg    if (accel_state->vsync)
525921a55d8Smrg	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
526921a55d8Smrg				accel_state->vline_crtc,
527921a55d8Smrg				accel_state->vline_y1,
528921a55d8Smrg				accel_state->vline_y2);
529b7e1c893Smrg
530ad43ddacSmrg    r600_finish_op(pScrn, 16);
531b7e1c893Smrg}
532b7e1c893Smrg
533b7e1c893Smrgstatic void
534b7e1c893SmrgR600AppendCopyVertex(ScrnInfoPtr pScrn,
535b7e1c893Smrg		     int srcX, int srcY,
536b7e1c893Smrg		     int dstX, int dstY,
537b7e1c893Smrg		     int w, int h)
538b7e1c893Smrg{
539921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
540921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
541b7e1c893Smrg    float *vb;
542b7e1c893Smrg
543921a55d8Smrg    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
544b7e1c893Smrg
545b7e1c893Smrg    vb[0] = (float)dstX;
546b7e1c893Smrg    vb[1] = (float)dstY;
547b7e1c893Smrg    vb[2] = (float)srcX;
548b7e1c893Smrg    vb[3] = (float)srcY;
549b7e1c893Smrg
550b7e1c893Smrg    vb[4] = (float)dstX;
551b7e1c893Smrg    vb[5] = (float)(dstY + h);
552b7e1c893Smrg    vb[6] = (float)srcX;
553b7e1c893Smrg    vb[7] = (float)(srcY + h);
554b7e1c893Smrg
555b7e1c893Smrg    vb[8] = (float)(dstX + w);
556b7e1c893Smrg    vb[9] = (float)(dstY + h);
557b7e1c893Smrg    vb[10] = (float)(srcX + w);
558b7e1c893Smrg    vb[11] = (float)(srcY + h);
559b7e1c893Smrg
560921a55d8Smrg    radeon_vbo_commit(pScrn, &accel_state->vbo);
561b7e1c893Smrg}
562b7e1c893Smrg
563b7e1c893Smrgstatic Bool
564b7e1c893SmrgR600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
565b7e1c893Smrg		int xdir, int ydir,
566b7e1c893Smrg		int rop,
567b7e1c893Smrg		Pixel planemask)
568b7e1c893Smrg{
569b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
570b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
571b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
572ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
573ad43ddacSmrg
5740974d292Smrg    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
575ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype src failed\n"));
5760974d292Smrg    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
577ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype dst failed\n"));
5780974d292Smrg    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
579ad43ddacSmrg	RADEON_FALLBACK(("Invalid planemask\n"));
580ad43ddacSmrg
581ad43ddacSmrg    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
582ad43ddacSmrg    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
583ad43ddacSmrg
584ad43ddacSmrg    accel_state->same_surface = FALSE;
585ad43ddacSmrg
586ad43ddacSmrg#if defined(XF86DRM_MODE)
587ad43ddacSmrg    if (info->cs) {
588ad43ddacSmrg	src_obj.offset = 0;
589ad43ddacSmrg	dst_obj.offset = 0;
590ad43ddacSmrg	src_obj.bo = radeon_get_pixmap_bo(pSrc);
591ad43ddacSmrg	dst_obj.bo = radeon_get_pixmap_bo(pDst);
592ad43ddacSmrg	if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
593ad43ddacSmrg	    accel_state->same_surface = TRUE;
594ad43ddacSmrg    } else
595b7e1c893Smrg#endif
596ad43ddacSmrg    {
597ad43ddacSmrg	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
598ad43ddacSmrg	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
599ad43ddacSmrg	if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst))
600ad43ddacSmrg	    accel_state->same_surface = TRUE;
601ad43ddacSmrg	src_obj.bo = NULL;
602ad43ddacSmrg	dst_obj.bo = NULL;
603b7e1c893Smrg    }
604b7e1c893Smrg
605ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
606ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
607ad43ddacSmrg    src_obj.bpp = pSrc->drawable.bitsPerPixel;
608ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
609921a55d8Smrg
610ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
611ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
612ad43ddacSmrg    dst_obj.bpp = pDst->drawable.bitsPerPixel;
613ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
614ad43ddacSmrg
615ad43ddacSmrg    if (!R600SetAccelState(pScrn,
616ad43ddacSmrg			   &src_obj,
617ad43ddacSmrg			   NULL,
618ad43ddacSmrg			   &dst_obj,
619ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
620ad43ddacSmrg			   rop, planemask))
621b7e1c893Smrg	return FALSE;
622b7e1c893Smrg
623ad43ddacSmrg    if (accel_state->same_surface == TRUE) {
62430d12090Smrg#if defined(XF86DRM_MODE)
625b13dfe66Smrg	unsigned height = RADEON_ALIGN(pDst->drawable.height,
626b13dfe66Smrg				       drmmode_get_height_align(pScrn, accel_state->dst_obj.tiling_flags));
62730d12090Smrg#else
62830d12090Smrg	unsigned height = pDst->drawable.height;
62930d12090Smrg#endif
630b13dfe66Smrg	unsigned long size = height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
631b7e1c893Smrg
632ad43ddacSmrg#if defined(XF86DRM_MODE)
633ad43ddacSmrg	if (info->cs) {
634ad43ddacSmrg	    if (accel_state->copy_area_bo) {
635ad43ddacSmrg		radeon_bo_unref(accel_state->copy_area_bo);
636ad43ddacSmrg		accel_state->copy_area_bo = NULL;
637b7e1c893Smrg	    }
638ad43ddacSmrg	    accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
639ad43ddacSmrg						       RADEON_GEM_DOMAIN_VRAM,
640ad43ddacSmrg						       0);
641ad43ddacSmrg	    if (accel_state->copy_area_bo == NULL)
642ad43ddacSmrg		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
643ad43ddacSmrg
644ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
645ad43ddacSmrg					      RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_DOMAIN_VRAM);
646ad43ddacSmrg	    if (radeon_cs_space_check(info->cs)) {
647ad43ddacSmrg		radeon_bo_unref(accel_state->copy_area_bo);
648ad43ddacSmrg		accel_state->copy_area_bo = NULL;
649ad43ddacSmrg		return FALSE;
650ad43ddacSmrg	    }
651ad43ddacSmrg	    accel_state->copy_area = (void*)accel_state->copy_area_bo;
652ad43ddacSmrg	} else
653ad43ddacSmrg#endif
654ad43ddacSmrg	{
655ad43ddacSmrg	    if (accel_state->copy_area) {
656ad43ddacSmrg		exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
657ad43ddacSmrg		accel_state->copy_area = NULL;
658ad43ddacSmrg	    }
659ad43ddacSmrg	    accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
660ad43ddacSmrg	    if (!accel_state->copy_area)
661ad43ddacSmrg		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
662b7e1c893Smrg	}
663ad43ddacSmrg    } else
664ad43ddacSmrg	R600DoPrepareCopy(pScrn);
665b7e1c893Smrg
666ad43ddacSmrg    if (accel_state->vsync)
6670974d292Smrg	RADEONVlineHelperClear(pScrn);
668ad43ddacSmrg
669ad43ddacSmrg    return TRUE;
670b7e1c893Smrg}
671b7e1c893Smrg
672b7e1c893Smrgstatic void
673b7e1c893SmrgR600Copy(PixmapPtr pDst,
674b7e1c893Smrg	 int srcX, int srcY,
675b7e1c893Smrg	 int dstX, int dstY,
676b7e1c893Smrg	 int w, int h)
677b7e1c893Smrg{
678b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
679b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
680b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
681b7e1c893Smrg
682b7e1c893Smrg    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
683b7e1c893Smrg	return;
684b7e1c893Smrg
685ad43ddacSmrg    if (accel_state->vsync)
6860974d292Smrg	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
687b7e1c893Smrg
688ad43ddacSmrg    if (accel_state->same_surface && accel_state->copy_area) {
689ad43ddacSmrg	uint32_t orig_offset, tmp_offset;
690ad43ddacSmrg	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
691ad43ddacSmrg	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
692b13dfe66Smrg	uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags;
693b13dfe66Smrg	uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags;
694ad43ddacSmrg	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
695ad43ddacSmrg
696ad43ddacSmrg#if defined(XF86DRM_MODE)
697ad43ddacSmrg	if (info->cs) {
698ad43ddacSmrg	    tmp_offset = 0;
699ad43ddacSmrg	    orig_offset = 0;
700ad43ddacSmrg	} else
701ad43ddacSmrg#endif
702ad43ddacSmrg	{
703b7e1c893Smrg	    tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset;
704b7e1c893Smrg	    orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
705ad43ddacSmrg	}
706b7e1c893Smrg
707ad43ddacSmrg	/* src to tmp */
708ad43ddacSmrg	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
709ad43ddacSmrg	accel_state->dst_obj.bo = accel_state->copy_area_bo;
710ad43ddacSmrg	accel_state->dst_obj.offset = tmp_offset;
711b13dfe66Smrg	accel_state->dst_obj.tiling_flags = 0;
712ad43ddacSmrg	R600DoPrepareCopy(pScrn);
713b7e1c893Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
714b7e1c893Smrg	R600DoCopy(pScrn);
715ad43ddacSmrg
716ad43ddacSmrg	/* tmp to dst */
717ad43ddacSmrg	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
718ad43ddacSmrg	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
719ad43ddacSmrg	accel_state->src_obj[0].offset = tmp_offset;
720b13dfe66Smrg	accel_state->src_obj[0].tiling_flags = 0;
721ad43ddacSmrg	accel_state->dst_obj.domain = orig_dst_domain;
722ad43ddacSmrg	accel_state->dst_obj.bo = orig_bo;
723ad43ddacSmrg	accel_state->dst_obj.offset = orig_offset;
724b13dfe66Smrg	accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags;
725ad43ddacSmrg	R600DoPrepareCopy(pScrn);
726ad43ddacSmrg	R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
727ad43ddacSmrg	R600DoCopyVline(pDst);
728ad43ddacSmrg
729ad43ddacSmrg	/* restore state */
730ad43ddacSmrg	accel_state->src_obj[0].domain = orig_src_domain;
731ad43ddacSmrg	accel_state->src_obj[0].bo = orig_bo;
732ad43ddacSmrg	accel_state->src_obj[0].offset = orig_offset;
733b13dfe66Smrg	accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags;
734ad43ddacSmrg    } else
735b7e1c893Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
736b7e1c893Smrg
737b7e1c893Smrg}
738b7e1c893Smrg
739b7e1c893Smrgstatic void
740b7e1c893SmrgR600DoneCopy(PixmapPtr pDst)
741b7e1c893Smrg{
742b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
743b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
744b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
745b7e1c893Smrg
746b7e1c893Smrg    if (!accel_state->same_surface)
747ad43ddacSmrg	R600DoCopyVline(pDst);
748b7e1c893Smrg
749b7e1c893Smrg    if (accel_state->copy_area) {
750ad43ddacSmrg	if (!info->cs)
751ad43ddacSmrg	    exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
752b7e1c893Smrg	accel_state->copy_area = NULL;
753b7e1c893Smrg    }
754b7e1c893Smrg
755b7e1c893Smrg}
756b7e1c893Smrg
757b7e1c893Smrgstruct blendinfo {
758b7e1c893Smrg    Bool dst_alpha;
759b7e1c893Smrg    Bool src_alpha;
760b7e1c893Smrg    uint32_t blend_cntl;
761b7e1c893Smrg};
762b7e1c893Smrg
763b7e1c893Smrgstatic struct blendinfo R600BlendOp[] = {
764b7e1c893Smrg    /* Clear */
765b7e1c893Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
766b7e1c893Smrg    /* Src */
767b7e1c893Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
768b7e1c893Smrg    /* Dst */
769b7e1c893Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
770b7e1c893Smrg    /* Over */
771b7e1c893Smrg    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
772b7e1c893Smrg    /* OverReverse */
773b7e1c893Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
774b7e1c893Smrg    /* In */
775b7e1c893Smrg    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
776b7e1c893Smrg    /* InReverse */
777b7e1c893Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
778b7e1c893Smrg    /* Out */
779b7e1c893Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
780b7e1c893Smrg    /* OutReverse */
781b7e1c893Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
782b7e1c893Smrg    /* Atop */
783b7e1c893Smrg    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
784b7e1c893Smrg    /* AtopReverse */
785b7e1c893Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
786b7e1c893Smrg    /* Xor */
787b7e1c893Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
788b7e1c893Smrg    /* Add */
789b7e1c893Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
790b7e1c893Smrg};
791b7e1c893Smrg
792b7e1c893Smrgstruct formatinfo {
793b7e1c893Smrg    unsigned int fmt;
794b7e1c893Smrg    uint32_t card_fmt;
795b7e1c893Smrg};
796b7e1c893Smrg
797b7e1c893Smrgstatic struct formatinfo R600TexFormats[] = {
798b7e1c893Smrg    {PICT_a8r8g8b8,	FMT_8_8_8_8},
799b7e1c893Smrg    {PICT_x8r8g8b8,	FMT_8_8_8_8},
800b7e1c893Smrg    {PICT_a8b8g8r8,	FMT_8_8_8_8},
801b7e1c893Smrg    {PICT_x8b8g8r8,	FMT_8_8_8_8},
802ad43ddacSmrg#ifdef PICT_TYPE_BGRA
803ad43ddacSmrg    {PICT_b8g8r8a8,	FMT_8_8_8_8},
804ad43ddacSmrg    {PICT_b8g8r8x8,	FMT_8_8_8_8},
805ad43ddacSmrg#endif
806b7e1c893Smrg    {PICT_r5g6b5,	FMT_5_6_5},
807b7e1c893Smrg    {PICT_a1r5g5b5,	FMT_1_5_5_5},
808b7e1c893Smrg    {PICT_x1r5g5b5,     FMT_1_5_5_5},
809b7e1c893Smrg    {PICT_a8,		FMT_8},
810b7e1c893Smrg};
811b7e1c893Smrg
812b7e1c893Smrgstatic uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
813b7e1c893Smrg{
814b7e1c893Smrg    uint32_t sblend, dblend;
815b7e1c893Smrg
816b7e1c893Smrg    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
817b7e1c893Smrg    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
818b7e1c893Smrg
819b7e1c893Smrg    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
820b7e1c893Smrg     * it as always 1.
821b7e1c893Smrg     */
822b7e1c893Smrg    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
823b7e1c893Smrg	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
824b7e1c893Smrg	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
825b7e1c893Smrg	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
826b7e1c893Smrg	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
827b7e1c893Smrg    }
828b7e1c893Smrg
829b7e1c893Smrg    /* If the source alpha is being used, then we should only be in a case where
830b7e1c893Smrg     * the source blend factor is 0, and the source blend value is the mask
831b7e1c893Smrg     * channels multiplied by the source picture's alpha.
832b7e1c893Smrg     */
833b7e1c893Smrg    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
834b7e1c893Smrg	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
835b7e1c893Smrg	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
836b7e1c893Smrg	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
837b7e1c893Smrg	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
838b7e1c893Smrg	}
839b7e1c893Smrg    }
840b7e1c893Smrg
841b7e1c893Smrg    return sblend | dblend;
842b7e1c893Smrg}
843b7e1c893Smrg
844b7e1c893Smrgstatic Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
845b7e1c893Smrg{
846b7e1c893Smrg    switch (pDstPicture->format) {
847b7e1c893Smrg    case PICT_a8r8g8b8:
848b7e1c893Smrg    case PICT_x8r8g8b8:
849ad43ddacSmrg    case PICT_a8b8g8r8:
850ad43ddacSmrg    case PICT_x8b8g8r8:
851ad43ddacSmrg#ifdef PICT_TYPE_BGRA
852ad43ddacSmrg    case PICT_b8g8r8a8:
853ad43ddacSmrg    case PICT_b8g8r8x8:
854ad43ddacSmrg#endif
855b7e1c893Smrg	*dst_format = COLOR_8_8_8_8;
856b7e1c893Smrg	break;
857b7e1c893Smrg    case PICT_r5g6b5:
858b7e1c893Smrg	*dst_format = COLOR_5_6_5;
859b7e1c893Smrg	break;
860b7e1c893Smrg    case PICT_a1r5g5b5:
861b7e1c893Smrg    case PICT_x1r5g5b5:
862b7e1c893Smrg	*dst_format = COLOR_1_5_5_5;
863b7e1c893Smrg	break;
864b7e1c893Smrg    case PICT_a8:
865b7e1c893Smrg	*dst_format = COLOR_8;
866b7e1c893Smrg	break;
867b7e1c893Smrg    default:
868b7e1c893Smrg	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
869b7e1c893Smrg	       (int)pDstPicture->format));
870b7e1c893Smrg    }
871b7e1c893Smrg    return TRUE;
872b7e1c893Smrg}
873b7e1c893Smrg
874b7e1c893Smrgstatic Bool R600CheckCompositeTexture(PicturePtr pPict,
875b7e1c893Smrg				      PicturePtr pDstPict,
876b7e1c893Smrg				      int op,
877b7e1c893Smrg				      int unit)
878b7e1c893Smrg{
879b7e1c893Smrg    int w = pPict->pDrawable->width;
880b7e1c893Smrg    int h = pPict->pDrawable->height;
881ad43ddacSmrg    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
882b7e1c893Smrg    unsigned int i;
883b7e1c893Smrg    int max_tex_w, max_tex_h;
884b7e1c893Smrg
885b7e1c893Smrg    max_tex_w = 8192;
886b7e1c893Smrg    max_tex_h = 8192;
887b7e1c893Smrg
888b7e1c893Smrg    if ((w > max_tex_w) || (h > max_tex_h))
889b7e1c893Smrg	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
890b7e1c893Smrg
891b7e1c893Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
892b7e1c893Smrg	if (R600TexFormats[i].fmt == pPict->format)
893b7e1c893Smrg	    break;
894b7e1c893Smrg    }
895b7e1c893Smrg    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
896b7e1c893Smrg	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
897b7e1c893Smrg			 (int)pPict->format));
898b7e1c893Smrg
899b7e1c893Smrg    if (pPict->filter != PictFilterNearest &&
900b7e1c893Smrg	pPict->filter != PictFilterBilinear)
901b7e1c893Smrg	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
902b7e1c893Smrg
903b7e1c893Smrg    /* for REPEAT_NONE, Render semantics are that sampling outside the source
904b7e1c893Smrg     * picture results in alpha=0 pixels. We can implement this with a border color
905b7e1c893Smrg     * *if* our source texture has an alpha channel, otherwise we need to fall
906b7e1c893Smrg     * back. If we're not transformed then we hope that upper layers have clipped
907b7e1c893Smrg     * rendering to the bounds of the source drawable, in which case it doesn't
908b7e1c893Smrg     * matter. I have not, however, verified that the X server always does such
909b7e1c893Smrg     * clipping.
910b7e1c893Smrg     */
911b7e1c893Smrg    /* FIXME R6xx */
912ad43ddacSmrg    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
913b7e1c893Smrg	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
914b7e1c893Smrg	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
915b7e1c893Smrg    }
916b7e1c893Smrg
917b13dfe66Smrg    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
918921a55d8Smrg	RADEON_FALLBACK(("non-affine transforms not supported\n"));
919921a55d8Smrg
920b7e1c893Smrg    return TRUE;
921b7e1c893Smrg}
922b7e1c893Smrg
923b7e1c893Smrgstatic Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
924b7e1c893Smrg					int unit)
925b7e1c893Smrg{
926b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
927b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
928b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
929b7e1c893Smrg    int w = pPict->pDrawable->width;
930b7e1c893Smrg    int h = pPict->pDrawable->height;
931ad43ddacSmrg    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
932b7e1c893Smrg    unsigned int i;
933b7e1c893Smrg    tex_resource_t  tex_res;
934b7e1c893Smrg    tex_sampler_t   tex_samp;
935b7e1c893Smrg    int pix_r, pix_g, pix_b, pix_a;
936ad43ddacSmrg    float vs_alu_consts[8];
937b7e1c893Smrg
938b7e1c893Smrg    CLEAR (tex_res);
939b7e1c893Smrg    CLEAR (tex_samp);
940b7e1c893Smrg
941b7e1c893Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
942b7e1c893Smrg	if (R600TexFormats[i].fmt == pPict->format)
943b7e1c893Smrg	    break;
944b7e1c893Smrg    }
945b7e1c893Smrg
946b7e1c893Smrg    /* Texture */
947b7e1c893Smrg    tex_res.id                  = unit;
948b7e1c893Smrg    tex_res.w                   = w;
949b7e1c893Smrg    tex_res.h                   = h;
950ad43ddacSmrg    tex_res.pitch               = accel_state->src_obj[unit].pitch;
951b7e1c893Smrg    tex_res.depth               = 0;
952b7e1c893Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
953ad43ddacSmrg    tex_res.base                = accel_state->src_obj[unit].offset;
954ad43ddacSmrg    tex_res.mip_base            = accel_state->src_obj[unit].offset;
9550974d292Smrg    tex_res.size                = accel_state->src_size[unit];
956b7e1c893Smrg    tex_res.format              = R600TexFormats[i].card_fmt;
957ad43ddacSmrg    tex_res.bo                  = accel_state->src_obj[unit].bo;
958ad43ddacSmrg    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
959b7e1c893Smrg    tex_res.request_size        = 1;
960b7e1c893Smrg
961b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
962b13dfe66Smrg    switch (accel_state->src_obj[unit].bpp) {
963b13dfe66Smrg    case 16:
964b13dfe66Smrg	tex_res.endian = SQ_ENDIAN_8IN16;
965b13dfe66Smrg	break;
966b13dfe66Smrg    case 32:
967b13dfe66Smrg	tex_res.endian = SQ_ENDIAN_8IN32;
968b13dfe66Smrg	break;
969b13dfe66Smrg    default :
970b13dfe66Smrg	break;
971b13dfe66Smrg    }
972b13dfe66Smrg#endif
973b13dfe66Smrg
974b7e1c893Smrg    /* component swizzles */
975b7e1c893Smrg    switch (pPict->format) {
976b7e1c893Smrg    case PICT_a1r5g5b5:
977b7e1c893Smrg    case PICT_a8r8g8b8:
978b7e1c893Smrg	pix_r = SQ_SEL_Z; /* R */
979b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
980b7e1c893Smrg	pix_b = SQ_SEL_X; /* B */
981b7e1c893Smrg	pix_a = SQ_SEL_W; /* A */
982b7e1c893Smrg	break;
983b7e1c893Smrg    case PICT_a8b8g8r8:
984b7e1c893Smrg	pix_r = SQ_SEL_X; /* R */
985b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
986b7e1c893Smrg	pix_b = SQ_SEL_Z; /* B */
987b7e1c893Smrg	pix_a = SQ_SEL_W; /* A */
988b7e1c893Smrg	break;
989b7e1c893Smrg    case PICT_x8b8g8r8:
990b7e1c893Smrg	pix_r = SQ_SEL_X; /* R */
991b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
992b7e1c893Smrg	pix_b = SQ_SEL_Z; /* B */
993b7e1c893Smrg	pix_a = SQ_SEL_1; /* A */
994b7e1c893Smrg	break;
995ad43ddacSmrg#ifdef PICT_TYPE_BGRA
996ad43ddacSmrg    case PICT_b8g8r8a8:
997ad43ddacSmrg	pix_r = SQ_SEL_Y; /* R */
998ad43ddacSmrg	pix_g = SQ_SEL_Z; /* G */
999ad43ddacSmrg	pix_b = SQ_SEL_W; /* B */
1000ad43ddacSmrg	pix_a = SQ_SEL_X; /* A */
1001ad43ddacSmrg	break;
1002ad43ddacSmrg    case PICT_b8g8r8x8:
1003ad43ddacSmrg	pix_r = SQ_SEL_Y; /* R */
1004ad43ddacSmrg	pix_g = SQ_SEL_Z; /* G */
1005ad43ddacSmrg	pix_b = SQ_SEL_W; /* B */
1006ad43ddacSmrg	pix_a = SQ_SEL_1; /* A */
1007ad43ddacSmrg	break;
1008ad43ddacSmrg#endif
1009b7e1c893Smrg    case PICT_x1r5g5b5:
1010b7e1c893Smrg    case PICT_x8r8g8b8:
1011b7e1c893Smrg    case PICT_r5g6b5:
1012b7e1c893Smrg	pix_r = SQ_SEL_Z; /* R */
1013b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1014b7e1c893Smrg	pix_b = SQ_SEL_X; /* B */
1015b7e1c893Smrg	pix_a = SQ_SEL_1; /* A */
1016b7e1c893Smrg	break;
1017b7e1c893Smrg    case PICT_a8:
1018b7e1c893Smrg	pix_r = SQ_SEL_0; /* R */
1019b7e1c893Smrg	pix_g = SQ_SEL_0; /* G */
1020b7e1c893Smrg	pix_b = SQ_SEL_0; /* B */
1021b7e1c893Smrg	pix_a = SQ_SEL_X; /* A */
1022b7e1c893Smrg	break;
1023b7e1c893Smrg    default:
1024b7e1c893Smrg	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
1025b7e1c893Smrg    }
1026b7e1c893Smrg
1027b7e1c893Smrg    if (unit == 0) {
1028ad43ddacSmrg	if (!accel_state->msk_pic) {
1029b7e1c893Smrg	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
1030b7e1c893Smrg		pix_r = SQ_SEL_0;
1031b7e1c893Smrg		pix_g = SQ_SEL_0;
1032b7e1c893Smrg		pix_b = SQ_SEL_0;
1033b7e1c893Smrg	    }
1034b7e1c893Smrg
1035b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1036b7e1c893Smrg		pix_a = SQ_SEL_1;
1037b7e1c893Smrg	} else {
1038b7e1c893Smrg	    if (accel_state->component_alpha) {
1039b7e1c893Smrg		if (accel_state->src_alpha) {
1040b7e1c893Smrg		    if (PICT_FORMAT_A(pPict->format) == 0) {
1041b7e1c893Smrg			pix_r = SQ_SEL_1;
1042b7e1c893Smrg			pix_g = SQ_SEL_1;
1043b7e1c893Smrg			pix_b = SQ_SEL_1;
1044b7e1c893Smrg			pix_a = SQ_SEL_1;
1045b7e1c893Smrg		    } else {
1046b7e1c893Smrg			pix_r = pix_a;
1047b7e1c893Smrg			pix_g = pix_a;
1048b7e1c893Smrg			pix_b = pix_a;
1049b7e1c893Smrg		    }
1050b7e1c893Smrg		} else {
1051b7e1c893Smrg		    if (PICT_FORMAT_A(pPict->format) == 0)
1052b7e1c893Smrg			pix_a = SQ_SEL_1;
1053b7e1c893Smrg		}
1054b7e1c893Smrg	    } else {
1055b7e1c893Smrg		if (PICT_FORMAT_RGB(pPict->format) == 0) {
1056b7e1c893Smrg		    pix_r = SQ_SEL_0;
1057b7e1c893Smrg		    pix_g = SQ_SEL_0;
1058b7e1c893Smrg		    pix_b = SQ_SEL_0;
1059b7e1c893Smrg		}
1060b7e1c893Smrg
1061b7e1c893Smrg		if (PICT_FORMAT_A(pPict->format) == 0)
1062b7e1c893Smrg		    pix_a = SQ_SEL_1;
1063b7e1c893Smrg	    }
1064b7e1c893Smrg	}
1065b7e1c893Smrg    } else {
1066b7e1c893Smrg	if (accel_state->component_alpha) {
1067b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1068b7e1c893Smrg		pix_a = SQ_SEL_1;
1069b7e1c893Smrg	} else {
1070b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0) {
1071b7e1c893Smrg		pix_r = SQ_SEL_1;
1072b7e1c893Smrg		pix_g = SQ_SEL_1;
1073b7e1c893Smrg		pix_b = SQ_SEL_1;
1074b7e1c893Smrg		pix_a = SQ_SEL_1;
1075b7e1c893Smrg	    } else {
1076b7e1c893Smrg		pix_r = pix_a;
1077b7e1c893Smrg		pix_g = pix_a;
1078b7e1c893Smrg		pix_b = pix_a;
1079b7e1c893Smrg	    }
1080b7e1c893Smrg	}
1081b7e1c893Smrg    }
1082b7e1c893Smrg
1083b7e1c893Smrg    tex_res.dst_sel_x           = pix_r; /* R */
1084b7e1c893Smrg    tex_res.dst_sel_y           = pix_g; /* G */
1085b7e1c893Smrg    tex_res.dst_sel_z           = pix_b; /* B */
1086b7e1c893Smrg    tex_res.dst_sel_w           = pix_a; /* A */
1087b7e1c893Smrg
1088b7e1c893Smrg    tex_res.base_level          = 0;
1089b7e1c893Smrg    tex_res.last_level          = 0;
1090b7e1c893Smrg    tex_res.perf_modulation     = 0;
1091b13dfe66Smrg    if (accel_state->src_obj[unit].tiling_flags == 0)
1092b13dfe66Smrg	tex_res.tile_mode           = 1;
1093921a55d8Smrg    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
1094b7e1c893Smrg
1095b7e1c893Smrg    tex_samp.id                 = unit;
1096b7e1c893Smrg    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1097b7e1c893Smrg
1098ad43ddacSmrg    switch (repeatType) {
1099ad43ddacSmrg    case RepeatNormal:
1100ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_WRAP;
1101ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_WRAP;
1102ad43ddacSmrg	break;
1103ad43ddacSmrg    case RepeatPad:
1104ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1105ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1106ad43ddacSmrg	break;
1107ad43ddacSmrg    case RepeatReflect:
1108ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1109ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1110ad43ddacSmrg	break;
1111ad43ddacSmrg    case RepeatNone:
1112b7e1c893Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1113b7e1c893Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1114ad43ddacSmrg	break;
1115ad43ddacSmrg    default:
1116ad43ddacSmrg	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1117b7e1c893Smrg    }
1118b7e1c893Smrg
1119b7e1c893Smrg    switch (pPict->filter) {
1120b7e1c893Smrg    case PictFilterNearest:
1121b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1122b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1123b13dfe66Smrg	tex_samp.mc_coord_truncate  = 1;
1124b7e1c893Smrg	break;
1125b7e1c893Smrg    case PictFilterBilinear:
1126b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1127b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1128b7e1c893Smrg	break;
1129b7e1c893Smrg    default:
1130b7e1c893Smrg	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1131b7e1c893Smrg    }
1132b7e1c893Smrg
1133b7e1c893Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
1134b7e1c893Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1135b7e1c893Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
1136921a55d8Smrg    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
1137b7e1c893Smrg
1138b7e1c893Smrg    if (pPict->transform != 0) {
1139b7e1c893Smrg	accel_state->is_transform[unit] = TRUE;
1140b7e1c893Smrg	accel_state->transform[unit] = pPict->transform;
1141ad43ddacSmrg
1142ad43ddacSmrg	vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
1143ad43ddacSmrg	vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
1144ad43ddacSmrg	vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
1145ad43ddacSmrg	vs_alu_consts[3] = 1.0 / w;
1146ad43ddacSmrg
1147ad43ddacSmrg	vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
1148ad43ddacSmrg	vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
1149ad43ddacSmrg	vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
1150ad43ddacSmrg	vs_alu_consts[7] = 1.0 / h;
1151ad43ddacSmrg    } else {
1152b7e1c893Smrg	accel_state->is_transform[unit] = FALSE;
1153b7e1c893Smrg
1154ad43ddacSmrg	vs_alu_consts[0] = 1.0;
1155ad43ddacSmrg	vs_alu_consts[1] = 0.0;
1156ad43ddacSmrg	vs_alu_consts[2] = 0.0;
1157ad43ddacSmrg	vs_alu_consts[3] = 1.0 / w;
1158ad43ddacSmrg
1159ad43ddacSmrg	vs_alu_consts[4] = 0.0;
1160ad43ddacSmrg	vs_alu_consts[5] = 1.0;
1161ad43ddacSmrg	vs_alu_consts[6] = 0.0;
1162ad43ddacSmrg	vs_alu_consts[7] = 1.0 / h;
1163ad43ddacSmrg    }
1164ad43ddacSmrg
1165ad43ddacSmrg    /* VS alu constants */
1166921a55d8Smrg    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
1167921a55d8Smrg			sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
1168ad43ddacSmrg
1169b7e1c893Smrg    return TRUE;
1170b7e1c893Smrg}
1171b7e1c893Smrg
1172b7e1c893Smrgstatic Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1173b7e1c893Smrg			       PicturePtr pDstPicture)
1174b7e1c893Smrg{
1175b7e1c893Smrg    uint32_t tmp1;
1176b7e1c893Smrg    PixmapPtr pSrcPixmap, pDstPixmap;
1177b7e1c893Smrg    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1178b7e1c893Smrg
1179b7e1c893Smrg    /* Check for unsupported compositing operations. */
1180b7e1c893Smrg    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1181b7e1c893Smrg	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1182b7e1c893Smrg
1183ad43ddacSmrg    if (!pSrcPicture->pDrawable)
1184ad43ddacSmrg	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1185ad43ddacSmrg
1186b7e1c893Smrg    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1187b7e1c893Smrg
1188b7e1c893Smrg    max_tex_w = 8192;
1189b7e1c893Smrg    max_tex_h = 8192;
1190b7e1c893Smrg    max_dst_w = 8192;
1191b7e1c893Smrg    max_dst_h = 8192;
1192b7e1c893Smrg
1193b7e1c893Smrg    if (pSrcPixmap->drawable.width >= max_tex_w ||
1194b7e1c893Smrg	pSrcPixmap->drawable.height >= max_tex_h) {
1195b7e1c893Smrg	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1196b7e1c893Smrg			 pSrcPixmap->drawable.width,
1197b7e1c893Smrg			 pSrcPixmap->drawable.height));
1198b7e1c893Smrg    }
1199b7e1c893Smrg
1200b7e1c893Smrg    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1201b7e1c893Smrg
1202b7e1c893Smrg    if (pDstPixmap->drawable.width >= max_dst_w ||
1203b7e1c893Smrg	pDstPixmap->drawable.height >= max_dst_h) {
1204b7e1c893Smrg	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1205b7e1c893Smrg			 pDstPixmap->drawable.width,
1206b7e1c893Smrg			 pDstPixmap->drawable.height));
1207b7e1c893Smrg    }
1208b7e1c893Smrg
1209b7e1c893Smrg    if (pMaskPicture) {
1210ad43ddacSmrg	PixmapPtr pMaskPixmap;
1211ad43ddacSmrg
1212ad43ddacSmrg	if (!pMaskPicture->pDrawable)
1213ad43ddacSmrg	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1214ad43ddacSmrg
1215ad43ddacSmrg	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1216b7e1c893Smrg
1217b7e1c893Smrg	if (pMaskPixmap->drawable.width >= max_tex_w ||
1218b7e1c893Smrg	    pMaskPixmap->drawable.height >= max_tex_h) {
1219b7e1c893Smrg	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1220b7e1c893Smrg			     pMaskPixmap->drawable.width,
1221b7e1c893Smrg			     pMaskPixmap->drawable.height));
1222b7e1c893Smrg	}
1223b7e1c893Smrg
1224b7e1c893Smrg	if (pMaskPicture->componentAlpha) {
1225b7e1c893Smrg	    /* Check if it's component alpha that relies on a source alpha and
1226b7e1c893Smrg	     * on the source value.  We can only get one of those into the
1227b7e1c893Smrg	     * single source value that we get to blend with.
1228b7e1c893Smrg	     */
1229b7e1c893Smrg	    if (R600BlendOp[op].src_alpha &&
1230b7e1c893Smrg		(R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1231b7e1c893Smrg		(BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1232b7e1c893Smrg		RADEON_FALLBACK(("Component alpha not supported with source "
1233b7e1c893Smrg				 "alpha and source value blending.\n"));
1234b7e1c893Smrg	    }
1235b7e1c893Smrg	}
1236b7e1c893Smrg
1237b7e1c893Smrg	if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1238b7e1c893Smrg	    return FALSE;
1239b7e1c893Smrg    }
1240b7e1c893Smrg
1241b7e1c893Smrg    if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1242b7e1c893Smrg	return FALSE;
1243b7e1c893Smrg
1244b7e1c893Smrg    if (!R600GetDestFormat(pDstPicture, &tmp1))
1245b7e1c893Smrg	return FALSE;
1246b7e1c893Smrg
1247b7e1c893Smrg    return TRUE;
1248b7e1c893Smrg
1249b7e1c893Smrg}
1250b7e1c893Smrg
1251b7e1c893Smrgstatic Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1252b7e1c893Smrg				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1253b7e1c893Smrg				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1254b7e1c893Smrg{
1255b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1256b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1257b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1258b13dfe66Smrg    uint32_t dst_format;
1259b7e1c893Smrg    cb_config_t cb_conf;
1260b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
1261ad43ddacSmrg    struct r600_accel_object src_obj, mask_obj, dst_obj;
1262b7e1c893Smrg
1263ad43ddacSmrg    if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8)
1264ad43ddacSmrg	return FALSE;
1265ad43ddacSmrg
1266ad43ddacSmrg#if defined(XF86DRM_MODE)
1267ad43ddacSmrg    if (info->cs) {
1268ad43ddacSmrg	src_obj.offset = 0;
1269ad43ddacSmrg	dst_obj.offset = 0;
1270ad43ddacSmrg	src_obj.bo = radeon_get_pixmap_bo(pSrc);
1271ad43ddacSmrg	dst_obj.bo = radeon_get_pixmap_bo(pDst);
1272ad43ddacSmrg    } else
1273ad43ddacSmrg#endif
1274ad43ddacSmrg    {
1275ad43ddacSmrg	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1276ad43ddacSmrg	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1277ad43ddacSmrg	src_obj.bo = NULL;
1278ad43ddacSmrg	dst_obj.bo = NULL;
1279ad43ddacSmrg    }
1280ad43ddacSmrg    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1281ad43ddacSmrg    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1282ad43ddacSmrg
1283ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
1284ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
1285ad43ddacSmrg    src_obj.bpp = pSrc->drawable.bitsPerPixel;
1286ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1287ad43ddacSmrg
1288ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
1289ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
1290ad43ddacSmrg    dst_obj.bpp = pDst->drawable.bitsPerPixel;
1291ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1292b7e1c893Smrg
1293b7e1c893Smrg    if (pMask) {
1294ad43ddacSmrg#if defined(XF86DRM_MODE)
1295ad43ddacSmrg	if (info->cs) {
1296ad43ddacSmrg	    mask_obj.offset = 0;
1297ad43ddacSmrg	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
1298921a55d8Smrg	} else
1299ad43ddacSmrg#endif
1300ad43ddacSmrg	{
1301ad43ddacSmrg	    mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset;
1302ad43ddacSmrg	    mask_obj.bo = NULL;
1303ad43ddacSmrg	}
1304ad43ddacSmrg	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1305ad43ddacSmrg
1306ad43ddacSmrg	mask_obj.width = pMask->drawable.width;
1307ad43ddacSmrg	mask_obj.height = pMask->drawable.height;
1308ad43ddacSmrg	mask_obj.bpp = pMask->drawable.bitsPerPixel;
1309ad43ddacSmrg	mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1310ad43ddacSmrg
1311ad43ddacSmrg	if (!R600SetAccelState(pScrn,
1312ad43ddacSmrg			       &src_obj,
1313ad43ddacSmrg			       &mask_obj,
1314ad43ddacSmrg			       &dst_obj,
13150974d292Smrg			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1316ad43ddacSmrg			       3, 0xffffffff))
1317ad43ddacSmrg	    return FALSE;
1318ad43ddacSmrg
1319ad43ddacSmrg	accel_state->msk_pic = pMaskPicture;
1320b7e1c893Smrg	if (pMaskPicture->componentAlpha) {
1321b7e1c893Smrg	    accel_state->component_alpha = TRUE;
1322b7e1c893Smrg	    if (R600BlendOp[op].src_alpha)
1323b7e1c893Smrg		accel_state->src_alpha = TRUE;
1324b7e1c893Smrg	    else
1325b7e1c893Smrg		accel_state->src_alpha = FALSE;
1326b7e1c893Smrg	} else {
1327b7e1c893Smrg	    accel_state->component_alpha = FALSE;
1328b7e1c893Smrg	    accel_state->src_alpha = FALSE;
1329b7e1c893Smrg	}
1330b7e1c893Smrg    } else {
1331ad43ddacSmrg	if (!R600SetAccelState(pScrn,
1332ad43ddacSmrg			       &src_obj,
1333ad43ddacSmrg			       NULL,
1334ad43ddacSmrg			       &dst_obj,
1335ad43ddacSmrg			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1336ad43ddacSmrg			       3, 0xffffffff))
1337ad43ddacSmrg	    return FALSE;
1338ad43ddacSmrg
1339ad43ddacSmrg	accel_state->msk_pic = NULL;
1340b7e1c893Smrg	accel_state->component_alpha = FALSE;
1341b7e1c893Smrg	accel_state->src_alpha = FALSE;
1342b7e1c893Smrg    }
1343b7e1c893Smrg
1344b7e1c893Smrg    if (!R600GetDestFormat(pDstPicture, &dst_format))
1345b7e1c893Smrg	return FALSE;
1346b7e1c893Smrg
1347b7e1c893Smrg    CLEAR (cb_conf);
1348b7e1c893Smrg    CLEAR (vs_conf);
1349b7e1c893Smrg    CLEAR (ps_conf);
1350b7e1c893Smrg
1351ad43ddacSmrg    if (pMask)
1352921a55d8Smrg        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1353ad43ddacSmrg    else
1354921a55d8Smrg        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1355b7e1c893Smrg
13560974d292Smrg    radeon_cp_start(pScrn);
1357b7e1c893Smrg
1358921a55d8Smrg    r600_set_default_state(pScrn, accel_state->ib);
1359b7e1c893Smrg
1360921a55d8Smrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1361921a55d8Smrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1362921a55d8Smrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1363b7e1c893Smrg
1364b7e1c893Smrg    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
1365ad43ddacSmrg        R600IBDiscard(pScrn, accel_state->ib);
1366ad43ddacSmrg        return FALSE;
1367b7e1c893Smrg    }
1368b7e1c893Smrg
1369b7e1c893Smrg    if (pMask) {
1370ad43ddacSmrg        if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
1371ad43ddacSmrg            R600IBDiscard(pScrn, accel_state->ib);
1372ad43ddacSmrg            return FALSE;
1373ad43ddacSmrg        }
1374b7e1c893Smrg    } else
1375ad43ddacSmrg        accel_state->is_transform[1] = FALSE;
1376b7e1c893Smrg
13770974d292Smrg    if (pMask) {
1378921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
1379921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
13800974d292Smrg    } else {
1381921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
1382921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
13830974d292Smrg    }
1384b7e1c893Smrg
1385b7e1c893Smrg    /* Shader */
1386b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
13870974d292Smrg    vs_conf.shader_size         = accel_state->vs_size;
1388921a55d8Smrg    vs_conf.num_gprs            = 5;
1389b7e1c893Smrg    vs_conf.stack_size          = 1;
1390ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
1391921a55d8Smrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1392b7e1c893Smrg
1393b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
13940974d292Smrg    ps_conf.shader_size         = accel_state->ps_size;
1395b7e1c893Smrg    ps_conf.num_gprs            = 3;
13960974d292Smrg    ps_conf.stack_size          = 1;
1397b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
1398b7e1c893Smrg    ps_conf.clamp_consts        = 0;
1399b7e1c893Smrg    ps_conf.export_mode         = 2;
1400ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
1401921a55d8Smrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1402b7e1c893Smrg
1403b7e1c893Smrg    cb_conf.id = 0;
1404ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
1405ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
1406ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
1407b7e1c893Smrg    cb_conf.format = dst_format;
1408ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
1409b7e1c893Smrg
1410b7e1c893Smrg    switch (pDstPicture->format) {
1411b7e1c893Smrg    case PICT_a8r8g8b8:
1412b7e1c893Smrg    case PICT_x8r8g8b8:
1413b7e1c893Smrg    case PICT_a1r5g5b5:
1414b7e1c893Smrg    case PICT_x1r5g5b5:
1415b7e1c893Smrg    default:
1416b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
1417b7e1c893Smrg	break;
1418ad43ddacSmrg    case PICT_a8b8g8r8:
1419ad43ddacSmrg    case PICT_x8b8g8r8:
1420ad43ddacSmrg	cb_conf.comp_swap = 0; /* ABGR */
1421ad43ddacSmrg	break;
1422ad43ddacSmrg#ifdef PICT_TYPE_BGRA
1423ad43ddacSmrg    case PICT_b8g8r8a8:
1424ad43ddacSmrg    case PICT_b8g8r8x8:
1425ad43ddacSmrg	cb_conf.comp_swap = 3; /* BGRA */
1426ad43ddacSmrg	break;
1427ad43ddacSmrg#endif
1428b7e1c893Smrg    case PICT_r5g6b5:
1429b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
1430b7e1c893Smrg	break;
1431b7e1c893Smrg    case PICT_a8:
1432b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
1433b7e1c893Smrg	break;
1434b7e1c893Smrg    }
1435b7e1c893Smrg    cb_conf.source_format = 1;
1436b7e1c893Smrg    cb_conf.blend_clamp = 1;
1437b13dfe66Smrg    cb_conf.blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1438b13dfe66Smrg    cb_conf.blend_enable = 1;
1439b13dfe66Smrg    cb_conf.pmask = 0xf;
1440b13dfe66Smrg    cb_conf.rop = 3;
1441b13dfe66Smrg    if (accel_state->dst_obj.tiling_flags == 0)
1442b13dfe66Smrg	cb_conf.array_mode = 1;
1443b13dfe66Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1444b13dfe66Smrg    switch (dst_obj.bpp) {
1445b13dfe66Smrg    case 16:
1446b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN16;
1447b13dfe66Smrg	break;
1448b13dfe66Smrg    case 32:
1449b13dfe66Smrg	cb_conf.endian = ENDIAN_8IN32;
1450b13dfe66Smrg	break;
1451b13dfe66Smrg    default:
1452b13dfe66Smrg	break;
1453b7e1c893Smrg    }
1454b13dfe66Smrg#endif
1455b13dfe66Smrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
14560974d292Smrg
1457b13dfe66Smrg    if (pMask)
1458b13dfe66Smrg	r600_set_spi(pScrn, accel_state->ib, (2 - 1), 2);
1459b13dfe66Smrg    else
1460b13dfe66Smrg	r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
1461b7e1c893Smrg
1462ad43ddacSmrg    if (accel_state->vsync)
14630974d292Smrg	RADEONVlineHelperClear(pScrn);
1464b7e1c893Smrg
1465b7e1c893Smrg    return TRUE;
1466b7e1c893Smrg}
1467b7e1c893Smrg
1468b7e1c893Smrgstatic void R600Composite(PixmapPtr pDst,
1469b7e1c893Smrg			  int srcX, int srcY,
1470b7e1c893Smrg			  int maskX, int maskY,
1471b7e1c893Smrg			  int dstX, int dstY,
1472b7e1c893Smrg			  int w, int h)
1473b7e1c893Smrg{
1474b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1475b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1476b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1477b7e1c893Smrg    float *vb;
1478b7e1c893Smrg
1479b7e1c893Smrg    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1480b7e1c893Smrg       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1481b7e1c893Smrg
1482ad43ddacSmrg    if (accel_state->vsync)
14830974d292Smrg	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1484b7e1c893Smrg
1485ad43ddacSmrg    if (accel_state->msk_pic) {
1486b7e1c893Smrg
1487921a55d8Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1488b7e1c893Smrg
1489b7e1c893Smrg	vb[0] = (float)dstX;
1490b7e1c893Smrg	vb[1] = (float)dstY;
1491ad43ddacSmrg	vb[2] = (float)srcX;
1492ad43ddacSmrg	vb[3] = (float)srcY;
1493ad43ddacSmrg	vb[4] = (float)maskX;
1494ad43ddacSmrg	vb[5] = (float)maskY;
1495b7e1c893Smrg
1496b7e1c893Smrg	vb[6] = (float)dstX;
1497b7e1c893Smrg	vb[7] = (float)(dstY + h);
1498ad43ddacSmrg	vb[8] = (float)srcX;
1499ad43ddacSmrg	vb[9] = (float)(srcY + h);
1500ad43ddacSmrg	vb[10] = (float)maskX;
1501ad43ddacSmrg	vb[11] = (float)(maskY + h);
1502b7e1c893Smrg
1503b7e1c893Smrg	vb[12] = (float)(dstX + w);
1504b7e1c893Smrg	vb[13] = (float)(dstY + h);
1505ad43ddacSmrg	vb[14] = (float)(srcX + w);
1506ad43ddacSmrg	vb[15] = (float)(srcY + h);
1507ad43ddacSmrg	vb[16] = (float)(maskX + w);
1508ad43ddacSmrg	vb[17] = (float)(maskY + h);
1509ad43ddacSmrg
1510921a55d8Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
1511b7e1c893Smrg
1512b7e1c893Smrg    } else {
1513b7e1c893Smrg
1514921a55d8Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1515b7e1c893Smrg
1516b7e1c893Smrg	vb[0] = (float)dstX;
1517b7e1c893Smrg	vb[1] = (float)dstY;
1518ad43ddacSmrg	vb[2] = (float)srcX;
1519ad43ddacSmrg	vb[3] = (float)srcY;
1520b7e1c893Smrg
1521b7e1c893Smrg	vb[4] = (float)dstX;
1522b7e1c893Smrg	vb[5] = (float)(dstY + h);
1523ad43ddacSmrg	vb[6] = (float)srcX;
1524ad43ddacSmrg	vb[7] = (float)(srcY + h);
1525b7e1c893Smrg
1526b7e1c893Smrg	vb[8] = (float)(dstX + w);
1527b7e1c893Smrg	vb[9] = (float)(dstY + h);
1528ad43ddacSmrg	vb[10] = (float)(srcX + w);
1529ad43ddacSmrg	vb[11] = (float)(srcY + h);
1530ad43ddacSmrg
1531921a55d8Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
1532b7e1c893Smrg    }
1533b7e1c893Smrg
1534b7e1c893Smrg
1535b7e1c893Smrg}
1536b7e1c893Smrg
1537b7e1c893Smrgstatic void R600DoneComposite(PixmapPtr pDst)
1538b7e1c893Smrg{
1539b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1540b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1541b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1542ad43ddacSmrg    int vtx_size;
1543b7e1c893Smrg
1544ad43ddacSmrg    if (accel_state->vsync)
1545921a55d8Smrg       r600_cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
1546921a55d8Smrg			       accel_state->vline_crtc,
1547921a55d8Smrg			       accel_state->vline_y1,
1548921a55d8Smrg			       accel_state->vline_y2);
1549b7e1c893Smrg
1550ad43ddacSmrg    vtx_size = accel_state->msk_pic ? 24 : 16;
1551b7e1c893Smrg
1552ad43ddacSmrg    r600_finish_op(pScrn, vtx_size);
1553b7e1c893Smrg}
1554b7e1c893Smrg
1555b7e1c893SmrgBool
1556b7e1c893SmrgR600CopyToVRAM(ScrnInfoPtr pScrn,
1557b7e1c893Smrg	       char *src, int src_pitch,
1558ad43ddacSmrg	       uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_width, uint32_t dst_height, int bpp,
1559b7e1c893Smrg	       int x, int y, int w, int h)
1560b7e1c893Smrg{
1561b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1562ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1563b7e1c893Smrg    uint32_t scratch_mc_addr;
1564b7e1c893Smrg    int wpass = w * (bpp/8);
1565ad43ddacSmrg    int scratch_pitch_bytes = RADEON_ALIGN(wpass, 256);
1566b7e1c893Smrg    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1567b7e1c893Smrg    int scratch_offset = 0, hpass, temph;
1568b7e1c893Smrg    char *dst;
1569b7e1c893Smrg    drmBufPtr scratch;
1570ad43ddacSmrg    struct r600_accel_object scratch_obj, dst_obj;
1571b7e1c893Smrg
1572b7e1c893Smrg    if (dst_pitch & 7)
1573b7e1c893Smrg	return FALSE;
1574b7e1c893Smrg
1575b7e1c893Smrg    if (dst_mc_addr & 0xff)
1576b7e1c893Smrg	return FALSE;
1577b7e1c893Smrg
1578b7e1c893Smrg    scratch = RADEONCPGetBuffer(pScrn);
1579b7e1c893Smrg    if (scratch == NULL)
1580b7e1c893Smrg	return FALSE;
1581b7e1c893Smrg
1582b7e1c893Smrg    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1583b7e1c893Smrg    temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1584b7e1c893Smrg    dst = (char *)scratch->address;
1585b7e1c893Smrg
1586ad43ddacSmrg    scratch_obj.pitch = scratch_pitch;
1587ad43ddacSmrg    scratch_obj.width = w;
1588ad43ddacSmrg    scratch_obj.height = hpass;
1589ad43ddacSmrg    scratch_obj.offset = scratch_mc_addr;
1590ad43ddacSmrg    scratch_obj.bpp = bpp;
1591ad43ddacSmrg    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1592ad43ddacSmrg    scratch_obj.bo = NULL;
1593ad43ddacSmrg
1594ad43ddacSmrg    dst_obj.pitch = dst_pitch;
1595ad43ddacSmrg    dst_obj.width = dst_width;
1596ad43ddacSmrg    dst_obj.height = dst_height;
1597ad43ddacSmrg    dst_obj.offset = dst_mc_addr;
1598ad43ddacSmrg    dst_obj.bo = NULL;
1599ad43ddacSmrg    dst_obj.bpp = bpp;
1600ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1601ad43ddacSmrg
1602ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1603ad43ddacSmrg			   &scratch_obj,
1604ad43ddacSmrg			   NULL,
1605ad43ddacSmrg			   &dst_obj,
1606ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1607ad43ddacSmrg			   3, 0xffffffff))
1608ad43ddacSmrg	return FALSE;
1609ad43ddacSmrg
1610b7e1c893Smrg    /* memcopy from sys to scratch */
1611b7e1c893Smrg    while (temph--) {
1612b7e1c893Smrg	memcpy (dst, src, wpass);
1613b7e1c893Smrg	src += src_pitch;
1614b7e1c893Smrg	dst += scratch_pitch_bytes;
1615b7e1c893Smrg    }
1616b7e1c893Smrg
1617b7e1c893Smrg    while (h) {
1618b7e1c893Smrg	uint32_t offset = scratch_mc_addr + scratch_offset;
1619b7e1c893Smrg	int oldhpass = hpass;
1620b7e1c893Smrg	h -= oldhpass;
1621b7e1c893Smrg	temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1622b7e1c893Smrg
1623b7e1c893Smrg	if (hpass) {
1624b7e1c893Smrg	    scratch_offset = scratch->total/2 - scratch_offset;
1625b7e1c893Smrg	    dst = (char *)scratch->address + scratch_offset;
1626b7e1c893Smrg	    /* wait for the engine to be idle */
1627b7e1c893Smrg	    RADEONWaitForIdleCP(pScrn);
1628b7e1c893Smrg	    //memcopy from sys to scratch
1629b7e1c893Smrg	    while (temph--) {
1630b7e1c893Smrg		memcpy (dst, src, wpass);
1631b7e1c893Smrg		src += src_pitch;
1632b7e1c893Smrg		dst += scratch_pitch_bytes;
1633b7e1c893Smrg	    }
1634b7e1c893Smrg	}
1635b7e1c893Smrg	/* blit from scratch to vram */
1636ad43ddacSmrg	info->accel_state->src_obj[0].height = oldhpass;
1637ad43ddacSmrg	info->accel_state->src_obj[0].offset = offset;
1638ad43ddacSmrg	R600DoPrepareCopy(pScrn);
1639b7e1c893Smrg	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
1640b7e1c893Smrg	R600DoCopy(pScrn);
1641b7e1c893Smrg	y += oldhpass;
1642b7e1c893Smrg    }
1643b7e1c893Smrg
1644b7e1c893Smrg    R600IBDiscard(pScrn, scratch);
1645b7e1c893Smrg
1646b7e1c893Smrg    return TRUE;
1647b7e1c893Smrg}
1648b7e1c893Smrg
1649b7e1c893Smrgstatic Bool
1650b7e1c893SmrgR600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1651b7e1c893Smrg		   char *src, int src_pitch)
1652b7e1c893Smrg{
1653b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1654b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1655b7e1c893Smrg    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1656b7e1c893Smrg    uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1657b7e1c893Smrg    int bpp = pDst->drawable.bitsPerPixel;
1658b7e1c893Smrg
1659b7e1c893Smrg    return R600CopyToVRAM(pScrn,
1660b7e1c893Smrg			  src, src_pitch,
1661ad43ddacSmrg			  dst_pitch, dst_mc_addr, pDst->drawable.width, pDst->drawable.height, bpp,
1662b7e1c893Smrg			  x, y, w, h);
1663b7e1c893Smrg}
1664b7e1c893Smrg
1665b7e1c893Smrgstatic Bool
1666b7e1c893SmrgR600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
1667b7e1c893Smrg		       char *dst, int dst_pitch)
1668b7e1c893Smrg{
1669b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1670b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1671ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1672b7e1c893Smrg    uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1673b7e1c893Smrg    uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1674b7e1c893Smrg    uint32_t src_width = pSrc->drawable.width;
1675b7e1c893Smrg    uint32_t src_height = pSrc->drawable.height;
1676b7e1c893Smrg    int bpp = pSrc->drawable.bitsPerPixel;
1677b7e1c893Smrg    uint32_t scratch_mc_addr;
1678ad43ddacSmrg    int scratch_pitch_bytes = RADEON_ALIGN(dst_pitch, 256);
1679b7e1c893Smrg    int scratch_offset = 0, hpass;
1680b7e1c893Smrg    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1681b7e1c893Smrg    int wpass = w * (bpp/8);
1682b7e1c893Smrg    drmBufPtr scratch;
1683ad43ddacSmrg    struct r600_accel_object scratch_obj, src_obj;
1684b7e1c893Smrg
1685ad43ddacSmrg    /* bad pipe setup in drm prior to 1.32 */
1686ad43ddacSmrg    if (info->dri->pKernelDRMVersion->version_minor < 32) {
1687ad43ddacSmrg	    if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32))
1688ad43ddacSmrg		    return FALSE;
1689ad43ddacSmrg    }
1690c503f109Smrg
1691b7e1c893Smrg    if (src_pitch & 7)
1692b7e1c893Smrg	return FALSE;
1693b7e1c893Smrg
1694b7e1c893Smrg    scratch = RADEONCPGetBuffer(pScrn);
1695b7e1c893Smrg    if (scratch == NULL)
1696b7e1c893Smrg	return FALSE;
1697b7e1c893Smrg
1698b7e1c893Smrg    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1699b7e1c893Smrg    hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1700b7e1c893Smrg
1701ad43ddacSmrg    src_obj.pitch = src_pitch;
1702ad43ddacSmrg    src_obj.width = src_width;
1703ad43ddacSmrg    src_obj.height = src_height;
1704ad43ddacSmrg    src_obj.offset = src_mc_addr;
1705ad43ddacSmrg    src_obj.bo = NULL;
1706ad43ddacSmrg    src_obj.bpp = bpp;
1707ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1708ad43ddacSmrg
1709ad43ddacSmrg    scratch_obj.pitch = scratch_pitch;
1710ad43ddacSmrg    scratch_obj.width = src_width;
1711ad43ddacSmrg    scratch_obj.height = hpass;
1712ad43ddacSmrg    scratch_obj.offset = scratch_mc_addr;
1713ad43ddacSmrg    scratch_obj.bpp = bpp;
1714ad43ddacSmrg    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1715ad43ddacSmrg    scratch_obj.bo = NULL;
1716ad43ddacSmrg
1717ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1718ad43ddacSmrg			   &src_obj,
1719ad43ddacSmrg			   NULL,
1720ad43ddacSmrg			   &scratch_obj,
1721ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1722ad43ddacSmrg			   3, 0xffffffff))
1723ad43ddacSmrg	return FALSE;
1724ad43ddacSmrg
1725b7e1c893Smrg    /* blit from vram to scratch */
1726ad43ddacSmrg    R600DoPrepareCopy(pScrn);
1727b7e1c893Smrg    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1728b7e1c893Smrg    R600DoCopy(pScrn);
1729b7e1c893Smrg
1730b7e1c893Smrg    while (h) {
1731b7e1c893Smrg	char *src = (char *)scratch->address + scratch_offset;
1732b7e1c893Smrg	int oldhpass = hpass;
1733b7e1c893Smrg	h -= oldhpass;
1734b7e1c893Smrg	y += oldhpass;
1735b7e1c893Smrg	hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1736b7e1c893Smrg
1737b7e1c893Smrg	if (hpass) {
1738b7e1c893Smrg	    scratch_offset = scratch->total/2 - scratch_offset;
1739b7e1c893Smrg	    /* blit from vram to scratch */
1740ad43ddacSmrg	    info->accel_state->dst_obj.height = hpass;
1741ad43ddacSmrg	    info->accel_state->dst_obj.offset = scratch_mc_addr + scratch_offset;
1742ad43ddacSmrg	    R600DoPrepareCopy(pScrn);
1743b7e1c893Smrg	    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1744b7e1c893Smrg	    R600DoCopy(pScrn);
1745b7e1c893Smrg	}
1746b7e1c893Smrg
1747b7e1c893Smrg	/* wait for the engine to be idle */
1748b7e1c893Smrg	RADEONWaitForIdleCP(pScrn);
1749b7e1c893Smrg	/* memcopy from scratch to sys */
1750b7e1c893Smrg	while (oldhpass--) {
1751b7e1c893Smrg	    memcpy (dst, src, wpass);
1752b7e1c893Smrg	    dst += dst_pitch;
1753b7e1c893Smrg	    src += scratch_pitch_bytes;
1754b7e1c893Smrg	}
1755b7e1c893Smrg    }
1756b7e1c893Smrg
1757b7e1c893Smrg    R600IBDiscard(pScrn, scratch);
1758b7e1c893Smrg
1759b7e1c893Smrg    return TRUE;
1760b7e1c893Smrg
1761b7e1c893Smrg}
1762b7e1c893Smrg
1763ad43ddacSmrg#if defined(XF86DRM_MODE)
1764ad43ddacSmrg
1765ad43ddacSmrgstatic Bool
1766ad43ddacSmrgR600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
1767ad43ddacSmrg		     char *src, int src_pitch)
1768ad43ddacSmrg{
1769ad43ddacSmrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1770ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1771ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1772ad43ddacSmrg    struct radeon_exa_pixmap_priv *driver_priv;
17730974d292Smrg    struct radeon_bo *scratch = NULL;
17740974d292Smrg    struct radeon_bo *copy_dst;
17750974d292Smrg    unsigned char *dst;
1776ad43ddacSmrg    unsigned size;
1777ad43ddacSmrg    uint32_t dst_domain;
1778ad43ddacSmrg    int bpp = pDst->drawable.bitsPerPixel;
1779b13dfe66Smrg    uint32_t scratch_pitch;
17800974d292Smrg    uint32_t copy_pitch;
1781ad43ddacSmrg    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
17820974d292Smrg    int ret;
17830974d292Smrg    Bool flush = TRUE;
1784ad43ddacSmrg    Bool r;
1785ad43ddacSmrg    int i;
1786ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
1787b13dfe66Smrg    uint32_t tiling_flags = 0, pitch = 0, height, base_align;
1788ad43ddacSmrg
1789ad43ddacSmrg    if (bpp < 8)
1790ad43ddacSmrg	return FALSE;
1791ad43ddacSmrg
1792ad43ddacSmrg    driver_priv = exaGetPixmapDriverPrivate(pDst);
1793921a55d8Smrg    if (!driver_priv || !driver_priv->bo)
1794921a55d8Smrg	return FALSE;
1795921a55d8Smrg
1796921a55d8Smrg    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
1797921a55d8Smrg    if (ret)
1798921a55d8Smrg	ErrorF("radeon_bo_get_tiling failed\n");
1799ad43ddacSmrg
18000974d292Smrg    /* If we know the BO won't be busy, don't bother with a scratch */
18010974d292Smrg    copy_dst = driver_priv->bo;
18020974d292Smrg    copy_pitch = pDst->devKind;
1803921a55d8Smrg    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1804921a55d8Smrg	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1805921a55d8Smrg	    flush = FALSE;
1806921a55d8Smrg	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
1807921a55d8Smrg		goto copy;
1808921a55d8Smrg	}
18090974d292Smrg    }
1810ad43ddacSmrg
1811b13dfe66Smrg    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1812b13dfe66Smrg    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1813b13dfe66Smrg    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1814b13dfe66Smrg    size = scratch_pitch * height * (bpp / 8);
1815b13dfe66Smrg    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1816ad43ddacSmrg    if (scratch == NULL) {
18170974d292Smrg	goto copy;
1818ad43ddacSmrg    }
1819ad43ddacSmrg
1820b13dfe66Smrg    src_obj.pitch = scratch_pitch;
1821ad43ddacSmrg    src_obj.width = w;
1822ad43ddacSmrg    src_obj.height = h;
1823ad43ddacSmrg    src_obj.offset = 0;
1824ad43ddacSmrg    src_obj.bpp = bpp;
1825ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1826ad43ddacSmrg    src_obj.bo = scratch;
1827ad43ddacSmrg
1828ad43ddacSmrg    dst_obj.pitch = dst_pitch_hw;
1829ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
1830ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
1831ad43ddacSmrg    dst_obj.offset = 0;
1832ad43ddacSmrg    dst_obj.bpp = bpp;
1833ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1834ad43ddacSmrg    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1835ad43ddacSmrg
1836ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1837ad43ddacSmrg			   &src_obj,
1838ad43ddacSmrg			   NULL,
1839ad43ddacSmrg			   &dst_obj,
1840ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1841ad43ddacSmrg			   3, 0xffffffff)) {
18420974d292Smrg        goto copy;
1843ad43ddacSmrg    }
18440974d292Smrg    copy_dst = scratch;
1845b13dfe66Smrg    copy_pitch = scratch_pitch * (bpp / 8);
18460974d292Smrg    flush = FALSE;
18470974d292Smrg
18480974d292Smrgcopy:
18490974d292Smrg    if (flush)
18500974d292Smrg	radeon_cs_flush_indirect(pScrn);
1851ad43ddacSmrg
18520974d292Smrg    ret = radeon_bo_map(copy_dst, 0);
18530974d292Smrg    if (ret) {
1854ad43ddacSmrg        r = FALSE;
1855ad43ddacSmrg        goto out;
1856ad43ddacSmrg    }
1857ad43ddacSmrg    r = TRUE;
1858ad43ddacSmrg    size = w * bpp / 8;
18590974d292Smrg    dst = copy_dst->ptr;
18600974d292Smrg    if (copy_dst == driver_priv->bo)
18610974d292Smrg	dst += y * copy_pitch + x * bpp / 8;
1862ad43ddacSmrg    for (i = 0; i < h; i++) {
18630974d292Smrg        memcpy(dst + i * copy_pitch, src, size);
1864ad43ddacSmrg        src += src_pitch;
1865ad43ddacSmrg    }
18660974d292Smrg    radeon_bo_unmap(copy_dst);
1867ad43ddacSmrg
18680974d292Smrg    if (copy_dst == scratch) {
18690974d292Smrg	if (info->accel_state->vsync)
18700974d292Smrg	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1871ad43ddacSmrg
18720974d292Smrg	/* blit from gart to vram */
18730974d292Smrg	R600DoPrepareCopy(pScrn);
18740974d292Smrg	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
18750974d292Smrg	R600DoCopyVline(pDst);
18760974d292Smrg    }
1877ad43ddacSmrg
1878ad43ddacSmrgout:
18790974d292Smrg    if (scratch)
18800974d292Smrg	radeon_bo_unref(scratch);
1881ad43ddacSmrg    return r;
1882ad43ddacSmrg}
1883ad43ddacSmrg
1884ad43ddacSmrgstatic Bool
1885ad43ddacSmrgR600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
1886ad43ddacSmrg			 int h, char *dst, int dst_pitch)
1887ad43ddacSmrg{
1888ad43ddacSmrg    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1889ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1890ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1891ad43ddacSmrg    struct radeon_exa_pixmap_priv *driver_priv;
18920974d292Smrg    struct radeon_bo *scratch = NULL;
18930974d292Smrg    struct radeon_bo *copy_src;
1894ad43ddacSmrg    unsigned size;
1895ad43ddacSmrg    uint32_t src_domain = 0;
1896ad43ddacSmrg    int bpp = pSrc->drawable.bitsPerPixel;
1897b13dfe66Smrg    uint32_t scratch_pitch;
18980974d292Smrg    uint32_t copy_pitch;
1899ad43ddacSmrg    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
19000974d292Smrg    int ret;
19010974d292Smrg    Bool flush = FALSE;
1902ad43ddacSmrg    Bool r;
1903ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
1904b13dfe66Smrg    uint32_t tiling_flags = 0, pitch = 0, height, base_align;
1905ad43ddacSmrg
1906ad43ddacSmrg    if (bpp < 8)
1907ad43ddacSmrg	return FALSE;
1908ad43ddacSmrg
1909ad43ddacSmrg    driver_priv = exaGetPixmapDriverPrivate(pSrc);
1910921a55d8Smrg    if (!driver_priv || !driver_priv->bo)
1911921a55d8Smrg	return FALSE;
1912921a55d8Smrg
1913921a55d8Smrg    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
1914921a55d8Smrg    if (ret)
1915921a55d8Smrg	ErrorF("radeon_bo_get_tiling failed\n");
1916ad43ddacSmrg
19170974d292Smrg    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
19180974d292Smrg    copy_src = driver_priv->bo;
19190974d292Smrg    copy_pitch = pSrc->devKind;
1920921a55d8Smrg    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1921921a55d8Smrg	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1922921a55d8Smrg	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
1923921a55d8Smrg	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
1924921a55d8Smrg		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
1925921a55d8Smrg		src_domain = 0;
1926921a55d8Smrg	    else /* A write may be scheduled */
1927921a55d8Smrg		flush = TRUE;
1928921a55d8Smrg	}
1929ad43ddacSmrg
1930921a55d8Smrg	if (!src_domain)
1931921a55d8Smrg	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
1932ad43ddacSmrg
1933921a55d8Smrg	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
1934921a55d8Smrg	    goto copy;
1935921a55d8Smrg    }
1936ad43ddacSmrg
1937b13dfe66Smrg    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1938b13dfe66Smrg    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1939b13dfe66Smrg    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1940b13dfe66Smrg    size = scratch_pitch * height * (bpp / 8);
1941b13dfe66Smrg    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1942ad43ddacSmrg    if (scratch == NULL) {
19430974d292Smrg	goto copy;
1944ad43ddacSmrg    }
1945ad43ddacSmrg    radeon_cs_space_reset_bos(info->cs);
1946ad43ddacSmrg    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
1947ad43ddacSmrg				      RADEON_GEM_DOMAIN_VRAM, 0);
1948ad43ddacSmrg    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
1949ad43ddacSmrg    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
1950ad43ddacSmrg    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1951ad43ddacSmrg    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
19520974d292Smrg    ret = radeon_cs_space_check(info->cs);
19530974d292Smrg    if (ret) {
19540974d292Smrg        goto copy;
1955ad43ddacSmrg    }
1956ad43ddacSmrg
1957ad43ddacSmrg    src_obj.pitch = src_pitch_hw;
1958ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
1959ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
1960ad43ddacSmrg    src_obj.offset = 0;
1961ad43ddacSmrg    src_obj.bpp = bpp;
1962ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1963ad43ddacSmrg    src_obj.bo = radeon_get_pixmap_bo(pSrc);
1964921a55d8Smrg
1965b13dfe66Smrg    dst_obj.pitch = scratch_pitch;
1966ad43ddacSmrg    dst_obj.width = w;
1967ad43ddacSmrg    dst_obj.height = h;
1968ad43ddacSmrg    dst_obj.offset = 0;
1969ad43ddacSmrg    dst_obj.bo = scratch;
1970ad43ddacSmrg    dst_obj.bpp = bpp;
1971ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1972ad43ddacSmrg
1973ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1974ad43ddacSmrg			   &src_obj,
1975ad43ddacSmrg			   NULL,
1976ad43ddacSmrg			   &dst_obj,
1977ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1978ad43ddacSmrg			   3, 0xffffffff)) {
19790974d292Smrg        goto copy;
1980ad43ddacSmrg    }
1981ad43ddacSmrg
1982ad43ddacSmrg    /* blit from vram to gart */
1983ad43ddacSmrg    R600DoPrepareCopy(pScrn);
1984ad43ddacSmrg    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
1985ad43ddacSmrg    R600DoCopy(pScrn);
19860974d292Smrg    copy_src = scratch;
1987b13dfe66Smrg    copy_pitch = scratch_pitch * (bpp / 8);
19880974d292Smrg    flush = TRUE;
1989ad43ddacSmrg
19900974d292Smrgcopy:
19910974d292Smrg    if (flush && info->cs)
1992ad43ddacSmrg	radeon_cs_flush_indirect(pScrn);
1993ad43ddacSmrg
19940974d292Smrg    ret = radeon_bo_map(copy_src, 0);
19950974d292Smrg    if (ret) {
19960974d292Smrg	ErrorF("failed to map pixmap: %d\n", ret);
1997ad43ddacSmrg        r = FALSE;
1998ad43ddacSmrg        goto out;
1999ad43ddacSmrg    }
2000ad43ddacSmrg    r = TRUE;
2001ad43ddacSmrg    w *= bpp / 8;
20020974d292Smrg    if (copy_src == driver_priv->bo)
20030974d292Smrg	size = y * copy_pitch + x * bpp / 8;
20040974d292Smrg    else
20050974d292Smrg	size = 0;
2006ad43ddacSmrg    while (h--) {
20070974d292Smrg        memcpy(dst, copy_src->ptr + size, w);
20080974d292Smrg        size += copy_pitch;
2009ad43ddacSmrg        dst += dst_pitch;
2010ad43ddacSmrg    }
20110974d292Smrg    radeon_bo_unmap(copy_src);
2012ad43ddacSmrgout:
20130974d292Smrg    if (scratch)
20140974d292Smrg	radeon_bo_unref(scratch);
2015ad43ddacSmrg    return r;
2016ad43ddacSmrg}
2017ad43ddacSmrg#endif
2018ad43ddacSmrg
2019b7e1c893Smrgstatic int
2020b7e1c893SmrgR600MarkSync(ScreenPtr pScreen)
2021b7e1c893Smrg{
2022b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
2023b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2024b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2025b7e1c893Smrg
2026b7e1c893Smrg    return ++accel_state->exaSyncMarker;
2027b7e1c893Smrg
2028b7e1c893Smrg}
2029b7e1c893Smrg
2030b7e1c893Smrgstatic void
2031b7e1c893SmrgR600Sync(ScreenPtr pScreen, int marker)
2032b7e1c893Smrg{
2033b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
2034b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2035b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2036b7e1c893Smrg
2037b7e1c893Smrg    if (accel_state->exaMarkerSynced != marker) {
2038ad43ddacSmrg#ifdef XF86DRM_MODE
2039ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2040ad43ddacSmrg	if (!info->cs)
2041ad43ddacSmrg#endif
2042ad43ddacSmrg#endif
2043ad43ddacSmrg	    RADEONWaitForIdleCP(pScrn);
2044b7e1c893Smrg	accel_state->exaMarkerSynced = marker;
2045b7e1c893Smrg    }
2046b7e1c893Smrg
2047b7e1c893Smrg}
2048b7e1c893Smrg
2049b7e1c893Smrgstatic Bool
2050b7e1c893SmrgR600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
2051b7e1c893Smrg{
2052b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2053b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2054b7e1c893Smrg
2055b7e1c893Smrg    /* 512 bytes per shader for now */
2056b7e1c893Smrg    int size = 512 * 9;
2057b7e1c893Smrg
2058b7e1c893Smrg    accel_state->shaders = NULL;
2059b7e1c893Smrg
2060ad43ddacSmrg#ifdef XF86DRM_MODE
2061ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2062ad43ddacSmrg    if (info->cs) {
2063ad43ddacSmrg	accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
2064ad43ddacSmrg						 RADEON_GEM_DOMAIN_VRAM, 0);
2065ad43ddacSmrg	if (accel_state->shaders_bo == NULL) {
2066ad43ddacSmrg	    ErrorF("Allocating shader failed\n");
2067ad43ddacSmrg	    return FALSE;
2068ad43ddacSmrg	}
2069ad43ddacSmrg	return TRUE;
2070ad43ddacSmrg    } else
2071ad43ddacSmrg#endif
2072ad43ddacSmrg#endif
2073ad43ddacSmrg    {
2074ad43ddacSmrg	accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
2075ad43ddacSmrg						 TRUE, NULL, NULL);
2076ad43ddacSmrg
2077ad43ddacSmrg	if (accel_state->shaders == NULL)
2078ad43ddacSmrg	    return FALSE;
2079ad43ddacSmrg    }
2080b7e1c893Smrg
2081b7e1c893Smrg    return TRUE;
2082b7e1c893Smrg}
2083b7e1c893Smrg
2084b7e1c893SmrgBool
2085b7e1c893SmrgR600LoadShaders(ScrnInfoPtr pScrn)
2086b7e1c893Smrg{
2087b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2088b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2089b7e1c893Smrg    RADEONChipFamily ChipSet = info->ChipFamily;
2090b7e1c893Smrg    uint32_t *shader;
2091ad43ddacSmrg#ifdef XF86DRM_MODE
2092ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2093ad43ddacSmrg    int ret;
2094ad43ddacSmrg
2095ad43ddacSmrg    if (info->cs) {
2096ad43ddacSmrg	ret = radeon_bo_map(accel_state->shaders_bo, 1);
2097ad43ddacSmrg	if (ret) {
2098ad43ddacSmrg	    FatalError("failed to map shader %d\n", ret);
2099ad43ddacSmrg	    return FALSE;
2100ad43ddacSmrg	}
2101ad43ddacSmrg	shader = accel_state->shaders_bo->ptr;
2102ad43ddacSmrg    } else
2103ad43ddacSmrg#endif
2104ad43ddacSmrg#endif
2105ad43ddacSmrg	shader = (pointer)((char *)info->FB + accel_state->shaders->offset);
2106b7e1c893Smrg
2107b7e1c893Smrg    /*  solid vs --------------------------------------- */
2108b7e1c893Smrg    accel_state->solid_vs_offset = 0;
2109b7e1c893Smrg    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
2110b7e1c893Smrg
2111b7e1c893Smrg    /*  solid ps --------------------------------------- */
2112b7e1c893Smrg    accel_state->solid_ps_offset = 512;
2113b7e1c893Smrg    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
2114b7e1c893Smrg
2115b7e1c893Smrg    /*  copy vs --------------------------------------- */
2116b7e1c893Smrg    accel_state->copy_vs_offset = 1024;
2117b7e1c893Smrg    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
2118b7e1c893Smrg
2119b7e1c893Smrg    /*  copy ps --------------------------------------- */
2120b7e1c893Smrg    accel_state->copy_ps_offset = 1536;
2121b7e1c893Smrg    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2122b7e1c893Smrg
2123b7e1c893Smrg    /*  comp vs --------------------------------------- */
2124b7e1c893Smrg    accel_state->comp_vs_offset = 2048;
2125b7e1c893Smrg    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2126b7e1c893Smrg
2127b7e1c893Smrg    /*  comp ps --------------------------------------- */
2128b7e1c893Smrg    accel_state->comp_ps_offset = 2560;
2129b7e1c893Smrg    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2130b7e1c893Smrg
2131b7e1c893Smrg    /*  xv vs --------------------------------------- */
21320974d292Smrg    accel_state->xv_vs_offset = 3072;
2133b7e1c893Smrg    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2134b7e1c893Smrg
2135b7e1c893Smrg    /*  xv ps --------------------------------------- */
21360974d292Smrg    accel_state->xv_ps_offset = 3584;
2137b7e1c893Smrg    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2138b7e1c893Smrg
2139ad43ddacSmrg#ifdef XF86DRM_MODE
2140ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2141ad43ddacSmrg    if (info->cs) {
2142ad43ddacSmrg	radeon_bo_unmap(accel_state->shaders_bo);
2143ad43ddacSmrg    }
2144ad43ddacSmrg#endif
2145ad43ddacSmrg#endif
2146ad43ddacSmrg
2147b7e1c893Smrg    return TRUE;
2148b7e1c893Smrg}
2149b7e1c893Smrg
2150b7e1c893Smrgstatic Bool
2151b7e1c893SmrgR600PrepareAccess(PixmapPtr pPix, int index)
2152b7e1c893Smrg{
2153b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2154b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2155b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
2156b7e1c893Smrg
2157b7e1c893Smrg    /* flush HDP read/write caches */
2158b7e1c893Smrg    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2159b7e1c893Smrg
2160b7e1c893Smrg    return TRUE;
2161b7e1c893Smrg}
2162b7e1c893Smrg
2163b7e1c893Smrgstatic void
2164b7e1c893SmrgR600FinishAccess(PixmapPtr pPix, int index)
2165b7e1c893Smrg{
2166b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2167b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2168b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
2169b7e1c893Smrg
2170b7e1c893Smrg    /* flush HDP read/write caches */
2171b7e1c893Smrg    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2172b7e1c893Smrg
2173b7e1c893Smrg}
2174b7e1c893Smrg
2175b7e1c893SmrgBool
2176b7e1c893SmrgR600DrawInit(ScreenPtr pScreen)
2177b7e1c893Smrg{
2178b7e1c893Smrg    ScrnInfoPtr pScrn =  xf86Screens[pScreen->myNum];
2179b7e1c893Smrg    RADEONInfoPtr info   = RADEONPTR(pScrn);
2180b7e1c893Smrg
2181b7e1c893Smrg    if (info->accel_state->exa == NULL) {
2182b7e1c893Smrg	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2183b7e1c893Smrg	return FALSE;
2184b7e1c893Smrg    }
2185b7e1c893Smrg
2186b7e1c893Smrg    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2187b7e1c893Smrg    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2188b7e1c893Smrg
2189b7e1c893Smrg    info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2190b7e1c893Smrg    info->accel_state->exa->Solid = R600Solid;
2191b7e1c893Smrg    info->accel_state->exa->DoneSolid = R600DoneSolid;
2192b7e1c893Smrg
2193b7e1c893Smrg    info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2194b7e1c893Smrg    info->accel_state->exa->Copy = R600Copy;
2195b7e1c893Smrg    info->accel_state->exa->DoneCopy = R600DoneCopy;
2196b7e1c893Smrg
2197b7e1c893Smrg    info->accel_state->exa->MarkSync = R600MarkSync;
2198b7e1c893Smrg    info->accel_state->exa->WaitMarker = R600Sync;
2199b7e1c893Smrg
2200ad43ddacSmrg#ifdef XF86DRM_MODE
2201ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2202ad43ddacSmrg    if (info->cs) {
2203ad43ddacSmrg	info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
2204ad43ddacSmrg	info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
2205ad43ddacSmrg	info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
2206ad43ddacSmrg	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
2207ad43ddacSmrg	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
2208ad43ddacSmrg	info->accel_state->exa->UploadToScreen = R600UploadToScreenCS;
2209ad43ddacSmrg	info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS;
22100974d292Smrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5)
22110974d292Smrg        info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
22120974d292Smrg#endif
2213ad43ddacSmrg    } else
2214ad43ddacSmrg#endif
2215ad43ddacSmrg#endif
2216ad43ddacSmrg    {
2217ad43ddacSmrg	info->accel_state->exa->PrepareAccess = R600PrepareAccess;
2218ad43ddacSmrg	info->accel_state->exa->FinishAccess = R600FinishAccess;
2219ad43ddacSmrg
2220ad43ddacSmrg	/* AGP seems to have problems with gart transfers */
2221ad43ddacSmrg	if (info->accelDFS) {
2222ad43ddacSmrg	    info->accel_state->exa->UploadToScreen = R600UploadToScreen;
2223ad43ddacSmrg	    info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen;
2224ad43ddacSmrg	}
2225b7e1c893Smrg    }
2226b7e1c893Smrg
2227b7e1c893Smrg    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
2228b7e1c893Smrg#ifdef EXA_SUPPORTS_PREPARE_AUX
2229b7e1c893Smrg    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
2230ad43ddacSmrg#endif
2231ad43ddacSmrg
2232ad43ddacSmrg#ifdef XF86DRM_MODE
2233ad43ddacSmrg#ifdef EXA_HANDLES_PIXMAPS
2234ad43ddacSmrg    if (info->cs) {
2235ad43ddacSmrg	info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
2236ad43ddacSmrg#ifdef EXA_MIXED_PIXMAPS
2237ad43ddacSmrg	info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
2238ad43ddacSmrg#endif
2239ad43ddacSmrg    }
2240ad43ddacSmrg#endif
2241b7e1c893Smrg#endif
2242b7e1c893Smrg    info->accel_state->exa->pixmapOffsetAlign = 256;
2243b7e1c893Smrg    info->accel_state->exa->pixmapPitchAlign = 256;
2244b7e1c893Smrg
2245b7e1c893Smrg    info->accel_state->exa->CheckComposite = R600CheckComposite;
2246b7e1c893Smrg    info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2247b7e1c893Smrg    info->accel_state->exa->Composite = R600Composite;
2248b7e1c893Smrg    info->accel_state->exa->DoneComposite = R600DoneComposite;
2249b7e1c893Smrg
2250b7e1c893Smrg#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
2251b7e1c893Smrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
2252b7e1c893Smrg
2253b7e1c893Smrg    info->accel_state->exa->maxPitchBytes = 32768;
2254b7e1c893Smrg    info->accel_state->exa->maxX = 8192;
2255b7e1c893Smrg#else
2256b7e1c893Smrg    info->accel_state->exa->maxX = 8192;
2257b7e1c893Smrg#endif
2258b7e1c893Smrg    info->accel_state->exa->maxY = 8192;
2259b7e1c893Smrg
2260b7e1c893Smrg    /* not supported yet */
2261ad43ddacSmrg    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2262ad43ddacSmrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2263ad43ddacSmrg	info->accel_state->vsync = TRUE;
2264ad43ddacSmrg    } else
2265ad43ddacSmrg	info->accel_state->vsync = FALSE;
2266b7e1c893Smrg
2267b7e1c893Smrg    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
22682f39173dSmrg	free(info->accel_state->exa);
2269b7e1c893Smrg	return FALSE;
2270b7e1c893Smrg    }
2271b7e1c893Smrg
2272ad43ddacSmrg#ifdef XF86DRM_MODE
2273ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2274ad43ddacSmrg    if (!info->cs)
2275ad43ddacSmrg#endif
2276ad43ddacSmrg#endif
2277ad43ddacSmrg	if (!info->gartLocation)
2278ad43ddacSmrg	    return FALSE;
2279b7e1c893Smrg
2280b7e1c893Smrg    info->accel_state->XInited3D = FALSE;
2281b7e1c893Smrg    info->accel_state->copy_area = NULL;
2282ad43ddacSmrg    info->accel_state->src_obj[0].bo = NULL;
2283ad43ddacSmrg    info->accel_state->src_obj[1].bo = NULL;
2284ad43ddacSmrg    info->accel_state->dst_obj.bo = NULL;
2285ad43ddacSmrg    info->accel_state->copy_area_bo = NULL;
2286921a55d8Smrg    info->accel_state->vbo.vb_start_op = -1;
22870974d292Smrg    info->accel_state->finish_op = r600_finish_op;
2288921a55d8Smrg    info->accel_state->vbo.verts_per_op = 3;
22890974d292Smrg    RADEONVlineHelperClear(pScrn);
2290ad43ddacSmrg
2291ad43ddacSmrg#ifdef XF86DRM_MODE
2292ad43ddacSmrg    radeon_vbo_init_lists(pScrn);
2293ad43ddacSmrg#endif
2294b7e1c893Smrg
2295b7e1c893Smrg    if (!R600AllocShaders(pScrn, pScreen))
2296b7e1c893Smrg	return FALSE;
2297b7e1c893Smrg
2298b7e1c893Smrg    if (!R600LoadShaders(pScrn))
2299b7e1c893Smrg	return FALSE;
2300b7e1c893Smrg
2301b7e1c893Smrg    exaMarkSync(pScreen);
2302b7e1c893Smrg
2303b7e1c893Smrg    return TRUE;
2304b7e1c893Smrg
2305b7e1c893Smrg}
2306b7e1c893Smrg
2307