r600_exa.c revision 7821949a
1de2362d3Smrg/*
2de2362d3Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3de2362d3Smrg *
4de2362d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5de2362d3Smrg * copy of this software and associated documentation files (the "Software"),
6de2362d3Smrg * to deal in the Software without restriction, including without limitation
7de2362d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8de2362d3Smrg * and/or sell copies of the Software, and to permit persons to whom the
9de2362d3Smrg * Software is furnished to do so, subject to the following conditions:
10de2362d3Smrg *
11de2362d3Smrg * The above copyright notice and this permission notice (including the next
12de2362d3Smrg * paragraph) shall be included in all copies or substantial portions of the
13de2362d3Smrg * Software.
14de2362d3Smrg *
15de2362d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16de2362d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17de2362d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18de2362d3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19de2362d3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20de2362d3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21de2362d3Smrg * SOFTWARE.
22de2362d3Smrg *
23de2362d3Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24de2362d3Smrg *
25de2362d3Smrg */
26de2362d3Smrg
27de2362d3Smrg#ifdef HAVE_CONFIG_H
28de2362d3Smrg#include "config.h"
29de2362d3Smrg#endif
30de2362d3Smrg
31de2362d3Smrg#include "xf86.h"
32de2362d3Smrg
33de2362d3Smrg#include "exa.h"
34de2362d3Smrg
35de2362d3Smrg#include "radeon.h"
367821949aSmrg#include "radeon_macros.h"
37de2362d3Smrg#include "radeon_reg.h"
38de2362d3Smrg#include "r600_shader.h"
39de2362d3Smrg#include "r600_reg.h"
40de2362d3Smrg#include "r600_state.h"
41de2362d3Smrg#include "radeon_exa_shared.h"
42de2362d3Smrg#include "radeon_vbo.h"
43de2362d3Smrg
44de2362d3Smrg/* #define SHOW_VERTEXES */
45de2362d3Smrg
46de2362d3SmrgBool
47de2362d3SmrgR600SetAccelState(ScrnInfoPtr pScrn,
48de2362d3Smrg		  struct r600_accel_object *src0,
49de2362d3Smrg		  struct r600_accel_object *src1,
50de2362d3Smrg		  struct r600_accel_object *dst,
51de2362d3Smrg		  uint32_t vs_offset, uint32_t ps_offset,
52de2362d3Smrg		  int rop, Pixel planemask)
53de2362d3Smrg{
54de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
55de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
567821949aSmrg    uint32_t pitch_align = 0x7, base_align = 0xff;
577821949aSmrg#if defined(XF86DRM_MODE)
58de2362d3Smrg    int ret;
597821949aSmrg#endif
60de2362d3Smrg
61de2362d3Smrg    if (src0) {
62de2362d3Smrg	memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
63de2362d3Smrg	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
647821949aSmrg#if defined(XF86DRM_MODE)
657821949aSmrg	if (info->cs && src0->surface) {
66de2362d3Smrg		accel_state->src_size[0] = src0->surface->bo_size;
677821949aSmrg	}
687821949aSmrg#endif
69de2362d3Smrg
70de2362d3Smrg	/* bad pitch */
71de2362d3Smrg	if (accel_state->src_obj[0].pitch & pitch_align)
72de2362d3Smrg	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
73de2362d3Smrg
747821949aSmrg	/* bad offset */
757821949aSmrg	if (accel_state->src_obj[0].offset & base_align)
767821949aSmrg	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset));
777821949aSmrg
78de2362d3Smrg    } else {
79de2362d3Smrg	memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
80de2362d3Smrg	accel_state->src_size[0] = 0;
81de2362d3Smrg    }
82de2362d3Smrg
83de2362d3Smrg    if (src1) {
84de2362d3Smrg	memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
85de2362d3Smrg	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
867821949aSmrg#if defined(XF86DRM_MODE)
877821949aSmrg	if (info->cs && src1->surface) {
88de2362d3Smrg		accel_state->src_size[1] = src1->surface->bo_size;
89de2362d3Smrg	}
907821949aSmrg#endif
91de2362d3Smrg
92de2362d3Smrg	/* bad pitch */
93de2362d3Smrg	if (accel_state->src_obj[1].pitch & pitch_align)
94de2362d3Smrg	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
95de2362d3Smrg
967821949aSmrg	/* bad offset */
977821949aSmrg	if (accel_state->src_obj[1].offset & base_align)
987821949aSmrg	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset));
99de2362d3Smrg    } else {
100de2362d3Smrg	memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
101de2362d3Smrg	accel_state->src_size[1] = 0;
102de2362d3Smrg    }
103de2362d3Smrg
104de2362d3Smrg    if (dst) {
105de2362d3Smrg	memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
106de2362d3Smrg	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
1077821949aSmrg#if defined(XF86DRM_MODE)
1087821949aSmrg	if (info->cs && dst->surface) {
109de2362d3Smrg		accel_state->dst_size = dst->surface->bo_size;
110de2362d3Smrg	} else
1117821949aSmrg#endif
112de2362d3Smrg	{
113de2362d3Smrg		accel_state->dst_obj.tiling_flags = 0;
114de2362d3Smrg	}
115de2362d3Smrg	if (accel_state->dst_obj.pitch & pitch_align)
116de2362d3Smrg	    RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
117de2362d3Smrg
1187821949aSmrg	if (accel_state->dst_obj.offset & base_align)
1197821949aSmrg	    RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset));
120de2362d3Smrg    } else {
121de2362d3Smrg	memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
122de2362d3Smrg	accel_state->dst_size = 0;
123de2362d3Smrg    }
124de2362d3Smrg
1257821949aSmrg#ifdef XF86DRM_MODE
1267821949aSmrg    if (info->cs && CS_FULL(info->cs))
127de2362d3Smrg	radeon_cs_flush_indirect(pScrn);
1287821949aSmrg#endif
129de2362d3Smrg
130de2362d3Smrg    accel_state->rop = rop;
131de2362d3Smrg    accel_state->planemask = planemask;
132de2362d3Smrg
133de2362d3Smrg    accel_state->vs_size = 512;
134de2362d3Smrg    accel_state->ps_size = 512;
1357821949aSmrg#if defined(XF86DRM_MODE)
1367821949aSmrg    if (info->cs) {
1377821949aSmrg	accel_state->vs_mc_addr = vs_offset;
1387821949aSmrg	accel_state->ps_mc_addr = ps_offset;
1397821949aSmrg
1407821949aSmrg	radeon_cs_space_reset_bos(info->cs);
1417821949aSmrg	radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
1427821949aSmrg					  RADEON_GEM_DOMAIN_VRAM, 0);
1437821949aSmrg	if (accel_state->src_obj[0].bo)
1447821949aSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
1457821949aSmrg					      accel_state->src_obj[0].domain, 0);
1467821949aSmrg	if (accel_state->src_obj[1].bo)
1477821949aSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
1487821949aSmrg					      accel_state->src_obj[1].domain, 0);
1497821949aSmrg	if (accel_state->dst_obj.bo)
1507821949aSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
1517821949aSmrg					      0, accel_state->dst_obj.domain);
1527821949aSmrg	ret = radeon_cs_space_check(info->cs);
1537821949aSmrg	if (ret)
1547821949aSmrg	    RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
155de2362d3Smrg
1567821949aSmrg    } else
1577821949aSmrg#endif
1587821949aSmrg    {
1597821949aSmrg	accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
1607821949aSmrg	    vs_offset;
1617821949aSmrg	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
1627821949aSmrg	    ps_offset;
1637821949aSmrg    }
164de2362d3Smrg
165de2362d3Smrg    return TRUE;
166de2362d3Smrg}
167de2362d3Smrg
168de2362d3Smrgstatic Bool
169de2362d3SmrgR600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
170de2362d3Smrg{
171de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
172de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
173de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
174de2362d3Smrg    cb_config_t     cb_conf;
175de2362d3Smrg    shader_config_t vs_conf, ps_conf;
176de2362d3Smrg    uint32_t a, r, g, b;
177de2362d3Smrg    float ps_alu_consts[4];
178de2362d3Smrg    struct r600_accel_object dst;
179de2362d3Smrg
180de2362d3Smrg    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
181de2362d3Smrg	RADEON_FALLBACK(("R600CheckDatatype failed\n"));
182de2362d3Smrg    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
183de2362d3Smrg	RADEON_FALLBACK(("invalid planemask\n"));
184de2362d3Smrg
1857821949aSmrg#if defined(XF86DRM_MODE)
1867821949aSmrg    if (info->cs) {
1877821949aSmrg	dst.offset = 0;
1887821949aSmrg	dst.bo = radeon_get_pixmap_bo(pPix);
1897821949aSmrg	dst.tiling_flags = radeon_get_pixmap_tiling(pPix);
1907821949aSmrg	dst.surface = radeon_get_pixmap_surface(pPix);
1917821949aSmrg    } else
1927821949aSmrg#endif
1937821949aSmrg    {
1947821949aSmrg	dst.offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
1957821949aSmrg	dst.bo = NULL;
1967821949aSmrg    }
197de2362d3Smrg
198de2362d3Smrg    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
199de2362d3Smrg    dst.width = pPix->drawable.width;
200de2362d3Smrg    dst.height = pPix->drawable.height;
201de2362d3Smrg    dst.bpp = pPix->drawable.bitsPerPixel;
202de2362d3Smrg    dst.domain = RADEON_GEM_DOMAIN_VRAM;
203de2362d3Smrg
204de2362d3Smrg    if (!R600SetAccelState(pScrn,
205de2362d3Smrg			   NULL,
206de2362d3Smrg			   NULL,
207de2362d3Smrg			   &dst,
208de2362d3Smrg			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
209de2362d3Smrg			   alu, pm))
210de2362d3Smrg	return FALSE;
211de2362d3Smrg
212de2362d3Smrg    CLEAR (cb_conf);
213de2362d3Smrg    CLEAR (vs_conf);
214de2362d3Smrg    CLEAR (ps_conf);
215de2362d3Smrg
216de2362d3Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
217de2362d3Smrg    radeon_cp_start(pScrn);
218de2362d3Smrg
2197821949aSmrg    r600_set_default_state(pScrn, accel_state->ib);
220de2362d3Smrg
2217821949aSmrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
2227821949aSmrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
2237821949aSmrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
224de2362d3Smrg
225de2362d3Smrg    /* Shader */
226de2362d3Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
227de2362d3Smrg    vs_conf.shader_size         = accel_state->vs_size;
228de2362d3Smrg    vs_conf.num_gprs            = 2;
229de2362d3Smrg    vs_conf.stack_size          = 0;
230de2362d3Smrg    vs_conf.bo                  = accel_state->shaders_bo;
2317821949aSmrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
232de2362d3Smrg
233de2362d3Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
234de2362d3Smrg    ps_conf.shader_size         = accel_state->ps_size;
235de2362d3Smrg    ps_conf.num_gprs            = 1;
236de2362d3Smrg    ps_conf.stack_size          = 0;
237de2362d3Smrg    ps_conf.uncached_first_inst = 1;
238de2362d3Smrg    ps_conf.clamp_consts        = 0;
239de2362d3Smrg    ps_conf.export_mode         = 2;
240de2362d3Smrg    ps_conf.bo                  = accel_state->shaders_bo;
2417821949aSmrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
242de2362d3Smrg
243de2362d3Smrg    cb_conf.id = 0;
244de2362d3Smrg    cb_conf.w = accel_state->dst_obj.pitch;
245de2362d3Smrg    cb_conf.h = accel_state->dst_obj.height;
2467821949aSmrg    cb_conf.base = accel_state->dst_obj.offset;
247de2362d3Smrg    cb_conf.bo = accel_state->dst_obj.bo;
2487821949aSmrg#ifdef XF86DRM_MODE
249de2362d3Smrg    cb_conf.surface = accel_state->dst_obj.surface;
2507821949aSmrg#endif
251de2362d3Smrg
252de2362d3Smrg    if (accel_state->dst_obj.bpp == 8) {
253de2362d3Smrg	cb_conf.format = COLOR_8;
254de2362d3Smrg	cb_conf.comp_swap = 3; /* A */
255de2362d3Smrg    } else if (accel_state->dst_obj.bpp == 16) {
256de2362d3Smrg	cb_conf.format = COLOR_5_6_5;
257de2362d3Smrg	cb_conf.comp_swap = 2; /* RGB */
258de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
259de2362d3Smrg	cb_conf.endian = ENDIAN_8IN16;
260de2362d3Smrg#endif
261de2362d3Smrg    } else {
262de2362d3Smrg	cb_conf.format = COLOR_8_8_8_8;
263de2362d3Smrg	cb_conf.comp_swap = 1; /* ARGB */
264de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
265de2362d3Smrg	cb_conf.endian = ENDIAN_8IN32;
266de2362d3Smrg#endif
267de2362d3Smrg    }
268de2362d3Smrg    cb_conf.source_format = 1;
269de2362d3Smrg    cb_conf.blend_clamp = 1;
270de2362d3Smrg    /* Render setup */
271de2362d3Smrg    if (accel_state->planemask & 0x000000ff)
272de2362d3Smrg	cb_conf.pmask |= 4; /* B */
273de2362d3Smrg    if (accel_state->planemask & 0x0000ff00)
274de2362d3Smrg	cb_conf.pmask |= 2; /* G */
275de2362d3Smrg    if (accel_state->planemask & 0x00ff0000)
276de2362d3Smrg	cb_conf.pmask |= 1; /* R */
277de2362d3Smrg    if (accel_state->planemask & 0xff000000)
278de2362d3Smrg	cb_conf.pmask |= 8; /* A */
279de2362d3Smrg    cb_conf.rop = accel_state->rop;
280de2362d3Smrg    if (accel_state->dst_obj.tiling_flags == 0)
281de2362d3Smrg	cb_conf.array_mode = 0;
2827821949aSmrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
283de2362d3Smrg
2847821949aSmrg    r600_set_spi(pScrn, accel_state->ib, 0, 0);
285de2362d3Smrg
286de2362d3Smrg    /* PS alu constants */
287de2362d3Smrg    if (accel_state->dst_obj.bpp == 16) {
288de2362d3Smrg	r = (fg >> 11) & 0x1f;
289de2362d3Smrg	g = (fg >> 5) & 0x3f;
290de2362d3Smrg	b = (fg >> 0) & 0x1f;
291de2362d3Smrg	ps_alu_consts[0] = (float)r / 31; /* R */
292de2362d3Smrg	ps_alu_consts[1] = (float)g / 63; /* G */
293de2362d3Smrg	ps_alu_consts[2] = (float)b / 31; /* B */
294de2362d3Smrg	ps_alu_consts[3] = 1.0; /* A */
295de2362d3Smrg    } else if (accel_state->dst_obj.bpp == 8) {
296de2362d3Smrg	a = (fg >> 0) & 0xff;
297de2362d3Smrg	ps_alu_consts[0] = 0.0; /* R */
298de2362d3Smrg	ps_alu_consts[1] = 0.0; /* G */
299de2362d3Smrg	ps_alu_consts[2] = 0.0; /* B */
300de2362d3Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
301de2362d3Smrg    } else {
302de2362d3Smrg	a = (fg >> 24) & 0xff;
303de2362d3Smrg	r = (fg >> 16) & 0xff;
304de2362d3Smrg	g = (fg >> 8) & 0xff;
305de2362d3Smrg	b = (fg >> 0) & 0xff;
306de2362d3Smrg	ps_alu_consts[0] = (float)r / 255; /* R */
307de2362d3Smrg	ps_alu_consts[1] = (float)g / 255; /* G */
308de2362d3Smrg	ps_alu_consts[2] = (float)b / 255; /* B */
309de2362d3Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
310de2362d3Smrg    }
3117821949aSmrg    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
312de2362d3Smrg			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
313de2362d3Smrg
314de2362d3Smrg    if (accel_state->vsync)
315de2362d3Smrg	RADEONVlineHelperClear(pScrn);
316de2362d3Smrg
317de2362d3Smrg    accel_state->dst_pix = pPix;
318de2362d3Smrg    accel_state->fg = fg;
319de2362d3Smrg
320de2362d3Smrg    return TRUE;
321de2362d3Smrg}
322de2362d3Smrg
323de2362d3Smrgstatic void
324de2362d3SmrgR600DoneSolid(PixmapPtr pPix)
325de2362d3Smrg{
326de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
327de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
328de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
329de2362d3Smrg
330de2362d3Smrg    if (accel_state->vsync)
3317821949aSmrg	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
332de2362d3Smrg				accel_state->vline_crtc,
333de2362d3Smrg				accel_state->vline_y1,
334de2362d3Smrg				accel_state->vline_y2);
335de2362d3Smrg
336de2362d3Smrg    r600_finish_op(pScrn, 8);
337de2362d3Smrg}
338de2362d3Smrg
339de2362d3Smrgstatic void
340de2362d3SmrgR600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
341de2362d3Smrg{
342de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
343de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
344de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
345de2362d3Smrg    float *vb;
346de2362d3Smrg
3477821949aSmrg#ifdef XF86DRM_MODE
3487821949aSmrg    if (info->cs && CS_FULL(info->cs)) {
349de2362d3Smrg	R600DoneSolid(info->accel_state->dst_pix);
350de2362d3Smrg	radeon_cs_flush_indirect(pScrn);
351de2362d3Smrg	R600PrepareSolid(accel_state->dst_pix,
352de2362d3Smrg			 accel_state->rop,
353de2362d3Smrg			 accel_state->planemask,
354de2362d3Smrg			 accel_state->fg);
355de2362d3Smrg    }
3567821949aSmrg#endif
357de2362d3Smrg
358de2362d3Smrg    if (accel_state->vsync)
359de2362d3Smrg	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
360de2362d3Smrg
361de2362d3Smrg    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
362de2362d3Smrg
363de2362d3Smrg    vb[0] = (float)x1;
364de2362d3Smrg    vb[1] = (float)y1;
365de2362d3Smrg
366de2362d3Smrg    vb[2] = (float)x1;
367de2362d3Smrg    vb[3] = (float)y2;
368de2362d3Smrg
369de2362d3Smrg    vb[4] = (float)x2;
370de2362d3Smrg    vb[5] = (float)y2;
371de2362d3Smrg
372de2362d3Smrg    radeon_vbo_commit(pScrn, &accel_state->vbo);
373de2362d3Smrg}
374de2362d3Smrg
375de2362d3Smrgstatic void
376de2362d3SmrgR600DoPrepareCopy(ScrnInfoPtr pScrn)
377de2362d3Smrg{
378de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
379de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
380de2362d3Smrg    cb_config_t     cb_conf;
381de2362d3Smrg    tex_resource_t  tex_res;
382de2362d3Smrg    tex_sampler_t   tex_samp;
383de2362d3Smrg    shader_config_t vs_conf, ps_conf;
384de2362d3Smrg
385de2362d3Smrg    CLEAR (cb_conf);
386de2362d3Smrg    CLEAR (tex_res);
387de2362d3Smrg    CLEAR (tex_samp);
388de2362d3Smrg    CLEAR (vs_conf);
389de2362d3Smrg    CLEAR (ps_conf);
390de2362d3Smrg
391de2362d3Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
392de2362d3Smrg    radeon_cp_start(pScrn);
393de2362d3Smrg
3947821949aSmrg    r600_set_default_state(pScrn, accel_state->ib);
395de2362d3Smrg
3967821949aSmrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
3977821949aSmrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
3987821949aSmrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
399de2362d3Smrg
400de2362d3Smrg    /* Shader */
401de2362d3Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
402de2362d3Smrg    vs_conf.shader_size         = accel_state->vs_size;
403de2362d3Smrg    vs_conf.num_gprs            = 2;
404de2362d3Smrg    vs_conf.stack_size          = 0;
405de2362d3Smrg    vs_conf.bo                  = accel_state->shaders_bo;
4067821949aSmrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
407de2362d3Smrg
408de2362d3Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
409de2362d3Smrg    ps_conf.shader_size         = accel_state->ps_size;
410de2362d3Smrg    ps_conf.num_gprs            = 1;
411de2362d3Smrg    ps_conf.stack_size          = 0;
412de2362d3Smrg    ps_conf.uncached_first_inst = 1;
413de2362d3Smrg    ps_conf.clamp_consts        = 0;
414de2362d3Smrg    ps_conf.export_mode         = 2;
415de2362d3Smrg    ps_conf.bo                  = accel_state->shaders_bo;
4167821949aSmrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
417de2362d3Smrg
418de2362d3Smrg    /* Texture */
419de2362d3Smrg    tex_res.id                  = 0;
420de2362d3Smrg    tex_res.w                   = accel_state->src_obj[0].width;
421de2362d3Smrg    tex_res.h                   = accel_state->src_obj[0].height;
422de2362d3Smrg    tex_res.pitch               = accel_state->src_obj[0].pitch;
423de2362d3Smrg    tex_res.depth               = 0;
424de2362d3Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
4257821949aSmrg    tex_res.base                = accel_state->src_obj[0].offset;
4267821949aSmrg    tex_res.mip_base            = accel_state->src_obj[0].offset;
427de2362d3Smrg    tex_res.size                = accel_state->src_size[0];
428de2362d3Smrg    tex_res.bo                  = accel_state->src_obj[0].bo;
429de2362d3Smrg    tex_res.mip_bo              = accel_state->src_obj[0].bo;
4307821949aSmrg#ifdef XF86DRM_MODE
431de2362d3Smrg    tex_res.surface             = accel_state->src_obj[0].surface;
4327821949aSmrg#endif
433de2362d3Smrg    if (accel_state->src_obj[0].bpp == 8) {
434de2362d3Smrg	tex_res.format              = FMT_8;
435de2362d3Smrg	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
436de2362d3Smrg	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
437de2362d3Smrg	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
438de2362d3Smrg	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
439de2362d3Smrg    } else if (accel_state->src_obj[0].bpp == 16) {
440de2362d3Smrg	tex_res.format              = FMT_5_6_5;
441de2362d3Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
442de2362d3Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
443de2362d3Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
444de2362d3Smrg	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
445de2362d3Smrg    } else {
446de2362d3Smrg	tex_res.format              = FMT_8_8_8_8;
447de2362d3Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
448de2362d3Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
449de2362d3Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
450de2362d3Smrg	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
451de2362d3Smrg    }
452de2362d3Smrg
453de2362d3Smrg    tex_res.request_size        = 1;
454de2362d3Smrg    tex_res.base_level          = 0;
455de2362d3Smrg    tex_res.last_level          = 0;
456de2362d3Smrg    tex_res.perf_modulation     = 0;
457de2362d3Smrg    if (accel_state->src_obj[0].tiling_flags == 0)
458de2362d3Smrg	tex_res.tile_mode           = 1;
4597821949aSmrg    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
460de2362d3Smrg
461de2362d3Smrg    tex_samp.id                 = 0;
462de2362d3Smrg    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
463de2362d3Smrg    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
464de2362d3Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
465de2362d3Smrg    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
466de2362d3Smrg    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
467de2362d3Smrg    tex_samp.mc_coord_truncate  = 1;
468de2362d3Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
469de2362d3Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
4707821949aSmrg    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
471de2362d3Smrg
472de2362d3Smrg    cb_conf.id = 0;
473de2362d3Smrg    cb_conf.w = accel_state->dst_obj.pitch;
474de2362d3Smrg    cb_conf.h = accel_state->dst_obj.height;
4757821949aSmrg    cb_conf.base = accel_state->dst_obj.offset;
476de2362d3Smrg    cb_conf.bo = accel_state->dst_obj.bo;
4777821949aSmrg#ifdef XF86DRM_MODE
478de2362d3Smrg    cb_conf.surface = accel_state->dst_obj.surface;
4797821949aSmrg#endif
480de2362d3Smrg    if (accel_state->dst_obj.bpp == 8) {
481de2362d3Smrg	cb_conf.format = COLOR_8;
482de2362d3Smrg	cb_conf.comp_swap = 3; /* A */
483de2362d3Smrg    } else if (accel_state->dst_obj.bpp == 16) {
484de2362d3Smrg	cb_conf.format = COLOR_5_6_5;
485de2362d3Smrg	cb_conf.comp_swap = 2; /* RGB */
486de2362d3Smrg    } else {
487de2362d3Smrg	cb_conf.format = COLOR_8_8_8_8;
488de2362d3Smrg	cb_conf.comp_swap = 1; /* ARGB */
489de2362d3Smrg    }
490de2362d3Smrg    cb_conf.source_format = 1;
491de2362d3Smrg    cb_conf.blend_clamp = 1;
492de2362d3Smrg
493de2362d3Smrg    /* Render setup */
494de2362d3Smrg    if (accel_state->planemask & 0x000000ff)
495de2362d3Smrg	cb_conf.pmask |= 4; /* B */
496de2362d3Smrg    if (accel_state->planemask & 0x0000ff00)
497de2362d3Smrg	cb_conf.pmask |= 2; /* G */
498de2362d3Smrg    if (accel_state->planemask & 0x00ff0000)
499de2362d3Smrg	cb_conf.pmask |= 1; /* R */
500de2362d3Smrg    if (accel_state->planemask & 0xff000000)
501de2362d3Smrg	cb_conf.pmask |= 8; /* A */
502de2362d3Smrg    cb_conf.rop = accel_state->rop;
503de2362d3Smrg    if (accel_state->dst_obj.tiling_flags == 0)
504de2362d3Smrg	cb_conf.array_mode = 0;
5057821949aSmrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
506de2362d3Smrg
5077821949aSmrg    r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
508de2362d3Smrg
509de2362d3Smrg}
510de2362d3Smrg
511de2362d3Smrgstatic void
512de2362d3SmrgR600DoCopy(ScrnInfoPtr pScrn)
513de2362d3Smrg{
514de2362d3Smrg    r600_finish_op(pScrn, 16);
515de2362d3Smrg}
516de2362d3Smrg
517de2362d3Smrgstatic void
518de2362d3SmrgR600DoCopyVline(PixmapPtr pPix)
519de2362d3Smrg{
520de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
521de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
522de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
523de2362d3Smrg
524de2362d3Smrg    if (accel_state->vsync)
5257821949aSmrg	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
526de2362d3Smrg				accel_state->vline_crtc,
527de2362d3Smrg				accel_state->vline_y1,
528de2362d3Smrg				accel_state->vline_y2);
529de2362d3Smrg
530de2362d3Smrg    r600_finish_op(pScrn, 16);
531de2362d3Smrg}
532de2362d3Smrg
533de2362d3Smrgstatic void
534de2362d3SmrgR600AppendCopyVertex(ScrnInfoPtr pScrn,
535de2362d3Smrg		     int srcX, int srcY,
536de2362d3Smrg		     int dstX, int dstY,
537de2362d3Smrg		     int w, int h)
538de2362d3Smrg{
539de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
540de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
541de2362d3Smrg    float *vb;
542de2362d3Smrg
543de2362d3Smrg    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
544de2362d3Smrg
545de2362d3Smrg    vb[0] = (float)dstX;
546de2362d3Smrg    vb[1] = (float)dstY;
547de2362d3Smrg    vb[2] = (float)srcX;
548de2362d3Smrg    vb[3] = (float)srcY;
549de2362d3Smrg
550de2362d3Smrg    vb[4] = (float)dstX;
551de2362d3Smrg    vb[5] = (float)(dstY + h);
552de2362d3Smrg    vb[6] = (float)srcX;
553de2362d3Smrg    vb[7] = (float)(srcY + h);
554de2362d3Smrg
555de2362d3Smrg    vb[8] = (float)(dstX + w);
556de2362d3Smrg    vb[9] = (float)(dstY + h);
557de2362d3Smrg    vb[10] = (float)(srcX + w);
558de2362d3Smrg    vb[11] = (float)(srcY + h);
559de2362d3Smrg
560de2362d3Smrg    radeon_vbo_commit(pScrn, &accel_state->vbo);
561de2362d3Smrg}
562de2362d3Smrg
563de2362d3Smrgstatic Bool
564de2362d3SmrgR600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
565de2362d3Smrg		int xdir, int ydir,
566de2362d3Smrg		int rop,
567de2362d3Smrg		Pixel planemask)
568de2362d3Smrg{
569de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
570de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
571de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
572de2362d3Smrg    struct r600_accel_object src_obj, dst_obj;
573de2362d3Smrg
574de2362d3Smrg    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
575de2362d3Smrg	RADEON_FALLBACK(("R600CheckDatatype src failed\n"));
576de2362d3Smrg    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
577de2362d3Smrg	RADEON_FALLBACK(("R600CheckDatatype dst failed\n"));
578de2362d3Smrg    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
579de2362d3Smrg	RADEON_FALLBACK(("Invalid planemask\n"));
580de2362d3Smrg
581de2362d3Smrg    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
582de2362d3Smrg    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
583de2362d3Smrg
584de2362d3Smrg    accel_state->same_surface = FALSE;
585de2362d3Smrg
5867821949aSmrg#if defined(XF86DRM_MODE)
5877821949aSmrg    if (info->cs) {
5887821949aSmrg	src_obj.offset = 0;
5897821949aSmrg	dst_obj.offset = 0;
5907821949aSmrg	src_obj.bo = radeon_get_pixmap_bo(pSrc);
5917821949aSmrg	dst_obj.bo = radeon_get_pixmap_bo(pDst);
5927821949aSmrg	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
5937821949aSmrg	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
5947821949aSmrg	src_obj.surface = radeon_get_pixmap_surface(pSrc);
5957821949aSmrg	dst_obj.surface = radeon_get_pixmap_surface(pDst);
5967821949aSmrg	if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
5977821949aSmrg	    accel_state->same_surface = TRUE;
5987821949aSmrg    } else
5997821949aSmrg#endif
6007821949aSmrg    {
6017821949aSmrg	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
6027821949aSmrg	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
6037821949aSmrg	if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst))
6047821949aSmrg	    accel_state->same_surface = TRUE;
6057821949aSmrg	src_obj.bo = NULL;
6067821949aSmrg	dst_obj.bo = NULL;
6077821949aSmrg    }
608de2362d3Smrg
609de2362d3Smrg    src_obj.width = pSrc->drawable.width;
610de2362d3Smrg    src_obj.height = pSrc->drawable.height;
611de2362d3Smrg    src_obj.bpp = pSrc->drawable.bitsPerPixel;
612de2362d3Smrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
613de2362d3Smrg
614de2362d3Smrg    dst_obj.width = pDst->drawable.width;
615de2362d3Smrg    dst_obj.height = pDst->drawable.height;
616de2362d3Smrg    dst_obj.bpp = pDst->drawable.bitsPerPixel;
6177821949aSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
618de2362d3Smrg
619de2362d3Smrg    if (!R600SetAccelState(pScrn,
620de2362d3Smrg			   &src_obj,
621de2362d3Smrg			   NULL,
622de2362d3Smrg			   &dst_obj,
623de2362d3Smrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
624de2362d3Smrg			   rop, planemask))
625de2362d3Smrg	return FALSE;
626de2362d3Smrg
627de2362d3Smrg    if (accel_state->same_surface == TRUE) {
6287821949aSmrg#if defined(XF86DRM_MODE)
629de2362d3Smrg	unsigned long size = accel_state->dst_obj.surface->bo_size;
630de2362d3Smrg	unsigned long align = accel_state->dst_obj.surface->bo_alignment;
6317821949aSmrg#else
6327821949aSmrg	unsigned height = pDst->drawable.height;
6337821949aSmrg	unsigned long size = height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
6347821949aSmrg#endif
635de2362d3Smrg
6367821949aSmrg#if defined(XF86DRM_MODE)
6377821949aSmrg	if (info->cs) {
6387821949aSmrg	    if (accel_state->copy_area_bo) {
6397821949aSmrg		radeon_bo_unref(accel_state->copy_area_bo);
6407821949aSmrg		accel_state->copy_area_bo = NULL;
6417821949aSmrg	    }
6427821949aSmrg	    accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, align,
6437821949aSmrg						       RADEON_GEM_DOMAIN_VRAM,
6447821949aSmrg						       0);
6457821949aSmrg	    if (accel_state->copy_area_bo == NULL)
6467821949aSmrg		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
6477821949aSmrg
6487821949aSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
6497821949aSmrg					      0, RADEON_GEM_DOMAIN_VRAM);
6507821949aSmrg	    if (radeon_cs_space_check(info->cs)) {
6517821949aSmrg		radeon_bo_unref(accel_state->copy_area_bo);
6527821949aSmrg		accel_state->copy_area_bo = NULL;
6537821949aSmrg		return FALSE;
6547821949aSmrg	    }
6557821949aSmrg	    accel_state->copy_area = (void*)accel_state->copy_area_bo;
6567821949aSmrg	} else
6577821949aSmrg#endif
6587821949aSmrg	{
6597821949aSmrg	    if (accel_state->copy_area) {
6607821949aSmrg		exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
6617821949aSmrg		accel_state->copy_area = NULL;
6627821949aSmrg	    }
6637821949aSmrg	    accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
6647821949aSmrg	    if (!accel_state->copy_area)
6657821949aSmrg		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
666de2362d3Smrg	}
667de2362d3Smrg    } else
668de2362d3Smrg	R600DoPrepareCopy(pScrn);
669de2362d3Smrg
670de2362d3Smrg    if (accel_state->vsync)
671de2362d3Smrg	RADEONVlineHelperClear(pScrn);
672de2362d3Smrg
673de2362d3Smrg    accel_state->dst_pix = pDst;
674de2362d3Smrg    accel_state->src_pix = pSrc;
675de2362d3Smrg    accel_state->xdir = xdir;
676de2362d3Smrg    accel_state->ydir = ydir;
677de2362d3Smrg
678de2362d3Smrg    return TRUE;
679de2362d3Smrg}
680de2362d3Smrg
681de2362d3Smrgstatic void
682de2362d3SmrgR600DoneCopy(PixmapPtr pDst)
683de2362d3Smrg{
684de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
685de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
686de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
687de2362d3Smrg
688de2362d3Smrg    if (!accel_state->same_surface)
689de2362d3Smrg	R600DoCopyVline(pDst);
690de2362d3Smrg
691de2362d3Smrg    if (accel_state->copy_area) {
6927821949aSmrg	if (!info->cs)
6937821949aSmrg	    exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
694de2362d3Smrg	accel_state->copy_area = NULL;
695de2362d3Smrg    }
696de2362d3Smrg
697de2362d3Smrg}
698de2362d3Smrg
699de2362d3Smrgstatic void
700de2362d3SmrgR600Copy(PixmapPtr pDst,
701de2362d3Smrg	 int srcX, int srcY,
702de2362d3Smrg	 int dstX, int dstY,
703de2362d3Smrg	 int w, int h)
704de2362d3Smrg{
705de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
706de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
707de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
708de2362d3Smrg
709de2362d3Smrg    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
710de2362d3Smrg	return;
711de2362d3Smrg
7127821949aSmrg#ifdef XF86DRM_MODE
7137821949aSmrg    if (info->cs && CS_FULL(info->cs)) {
714de2362d3Smrg	R600DoneCopy(info->accel_state->dst_pix);
715de2362d3Smrg	radeon_cs_flush_indirect(pScrn);
716de2362d3Smrg	R600PrepareCopy(accel_state->src_pix,
717de2362d3Smrg			accel_state->dst_pix,
718de2362d3Smrg			accel_state->xdir,
719de2362d3Smrg			accel_state->ydir,
720de2362d3Smrg			accel_state->rop,
721de2362d3Smrg			accel_state->planemask);
722de2362d3Smrg    }
7237821949aSmrg#endif
724de2362d3Smrg
725de2362d3Smrg    if (accel_state->vsync)
726de2362d3Smrg	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
727de2362d3Smrg
7287821949aSmrg    if (accel_state->same_surface && accel_state->copy_area) {
7297821949aSmrg	uint32_t orig_offset, tmp_offset;
730de2362d3Smrg	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
731de2362d3Smrg	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
732de2362d3Smrg	uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags;
733de2362d3Smrg	uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags;
734de2362d3Smrg	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
735de2362d3Smrg	int orig_rop = accel_state->rop;
736de2362d3Smrg
7377821949aSmrg#if defined(XF86DRM_MODE)
7387821949aSmrg	if (info->cs) {
7397821949aSmrg	    tmp_offset = 0;
7407821949aSmrg	    orig_offset = 0;
7417821949aSmrg	} else
7427821949aSmrg#endif
7437821949aSmrg	{
7447821949aSmrg	    tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset;
7457821949aSmrg	    orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
7467821949aSmrg	}
7477821949aSmrg
748de2362d3Smrg	/* src to tmp */
749de2362d3Smrg	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
750de2362d3Smrg	accel_state->dst_obj.bo = accel_state->copy_area_bo;
7517821949aSmrg	accel_state->dst_obj.offset = tmp_offset;
752de2362d3Smrg	accel_state->dst_obj.tiling_flags = 0;
753de2362d3Smrg	accel_state->rop = 3;
754de2362d3Smrg	R600DoPrepareCopy(pScrn);
755de2362d3Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
756de2362d3Smrg	R600DoCopy(pScrn);
757de2362d3Smrg
758de2362d3Smrg	/* tmp to dst */
759de2362d3Smrg	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
760de2362d3Smrg	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
7617821949aSmrg	accel_state->src_obj[0].offset = tmp_offset;
762de2362d3Smrg	accel_state->src_obj[0].tiling_flags = 0;
763de2362d3Smrg	accel_state->dst_obj.domain = orig_dst_domain;
764de2362d3Smrg	accel_state->dst_obj.bo = orig_bo;
7657821949aSmrg	accel_state->dst_obj.offset = orig_offset;
766de2362d3Smrg	accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags;
767de2362d3Smrg	accel_state->rop = orig_rop;
768de2362d3Smrg	R600DoPrepareCopy(pScrn);
769de2362d3Smrg	R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
770de2362d3Smrg	R600DoCopyVline(pDst);
771de2362d3Smrg
772de2362d3Smrg	/* restore state */
773de2362d3Smrg	accel_state->src_obj[0].domain = orig_src_domain;
774de2362d3Smrg	accel_state->src_obj[0].bo = orig_bo;
7757821949aSmrg	accel_state->src_obj[0].offset = orig_offset;
776de2362d3Smrg	accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags;
777de2362d3Smrg    } else
778de2362d3Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
779de2362d3Smrg
780de2362d3Smrg}
781de2362d3Smrg
782de2362d3Smrgstruct blendinfo {
783de2362d3Smrg    Bool dst_alpha;
784de2362d3Smrg    Bool src_alpha;
785de2362d3Smrg    uint32_t blend_cntl;
786de2362d3Smrg};
787de2362d3Smrg
788de2362d3Smrgstatic struct blendinfo R600BlendOp[] = {
789de2362d3Smrg    /* Clear */
790de2362d3Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
791de2362d3Smrg    /* Src */
792de2362d3Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
793de2362d3Smrg    /* Dst */
794de2362d3Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
795de2362d3Smrg    /* Over */
796de2362d3Smrg    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
797de2362d3Smrg    /* OverReverse */
798de2362d3Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
799de2362d3Smrg    /* In */
800de2362d3Smrg    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
801de2362d3Smrg    /* InReverse */
802de2362d3Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
803de2362d3Smrg    /* Out */
804de2362d3Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
805de2362d3Smrg    /* OutReverse */
806de2362d3Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
807de2362d3Smrg    /* Atop */
808de2362d3Smrg    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
809de2362d3Smrg    /* AtopReverse */
810de2362d3Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
811de2362d3Smrg    /* Xor */
812de2362d3Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
813de2362d3Smrg    /* Add */
814de2362d3Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
815de2362d3Smrg};
816de2362d3Smrg
817de2362d3Smrgstruct formatinfo {
818de2362d3Smrg    unsigned int fmt;
819de2362d3Smrg    uint32_t card_fmt;
820de2362d3Smrg};
821de2362d3Smrg
822de2362d3Smrgstatic struct formatinfo R600TexFormats[] = {
823de2362d3Smrg    {PICT_a8r8g8b8,	FMT_8_8_8_8},
824de2362d3Smrg    {PICT_x8r8g8b8,	FMT_8_8_8_8},
825de2362d3Smrg    {PICT_a8b8g8r8,	FMT_8_8_8_8},
826de2362d3Smrg    {PICT_x8b8g8r8,	FMT_8_8_8_8},
8277821949aSmrg#ifdef PICT_TYPE_BGRA
828de2362d3Smrg    {PICT_b8g8r8a8,	FMT_8_8_8_8},
829de2362d3Smrg    {PICT_b8g8r8x8,	FMT_8_8_8_8},
8307821949aSmrg#endif
831de2362d3Smrg    {PICT_r5g6b5,	FMT_5_6_5},
832de2362d3Smrg    {PICT_a1r5g5b5,	FMT_1_5_5_5},
833de2362d3Smrg    {PICT_x1r5g5b5,     FMT_1_5_5_5},
834de2362d3Smrg    {PICT_a8,		FMT_8},
835de2362d3Smrg};
836de2362d3Smrg
837de2362d3Smrgstatic uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
838de2362d3Smrg{
839de2362d3Smrg    uint32_t sblend, dblend;
840de2362d3Smrg
841de2362d3Smrg    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
842de2362d3Smrg    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
843de2362d3Smrg
844de2362d3Smrg    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
845de2362d3Smrg     * it as always 1.
846de2362d3Smrg     */
847de2362d3Smrg    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
848de2362d3Smrg	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
849de2362d3Smrg	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
850de2362d3Smrg	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
851de2362d3Smrg	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
852de2362d3Smrg    }
853de2362d3Smrg
854de2362d3Smrg    /* If the source alpha is being used, then we should only be in a case where
855de2362d3Smrg     * the source blend factor is 0, and the source blend value is the mask
856de2362d3Smrg     * channels multiplied by the source picture's alpha.
857de2362d3Smrg     */
858de2362d3Smrg    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
859de2362d3Smrg	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
860de2362d3Smrg	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
861de2362d3Smrg	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
862de2362d3Smrg	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
863de2362d3Smrg	}
864de2362d3Smrg    }
865de2362d3Smrg
866de2362d3Smrg    return sblend | dblend;
867de2362d3Smrg}
868de2362d3Smrg
869de2362d3Smrgstatic Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
870de2362d3Smrg{
871de2362d3Smrg    switch (pDstPicture->format) {
872de2362d3Smrg    case PICT_a8r8g8b8:
873de2362d3Smrg    case PICT_x8r8g8b8:
874de2362d3Smrg    case PICT_a8b8g8r8:
875de2362d3Smrg    case PICT_x8b8g8r8:
8767821949aSmrg#ifdef PICT_TYPE_BGRA
877de2362d3Smrg    case PICT_b8g8r8a8:
878de2362d3Smrg    case PICT_b8g8r8x8:
8797821949aSmrg#endif
880de2362d3Smrg	*dst_format = COLOR_8_8_8_8;
881de2362d3Smrg	break;
882de2362d3Smrg    case PICT_r5g6b5:
883de2362d3Smrg	*dst_format = COLOR_5_6_5;
884de2362d3Smrg	break;
885de2362d3Smrg    case PICT_a1r5g5b5:
886de2362d3Smrg    case PICT_x1r5g5b5:
887de2362d3Smrg	*dst_format = COLOR_1_5_5_5;
888de2362d3Smrg	break;
889de2362d3Smrg    case PICT_a8:
890de2362d3Smrg	*dst_format = COLOR_8;
891de2362d3Smrg	break;
892de2362d3Smrg    default:
893de2362d3Smrg	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
894de2362d3Smrg	       (int)pDstPicture->format));
895de2362d3Smrg    }
896de2362d3Smrg    return TRUE;
897de2362d3Smrg}
898de2362d3Smrg
899de2362d3Smrgstatic Bool R600CheckCompositeTexture(PicturePtr pPict,
900de2362d3Smrg				      PicturePtr pDstPict,
901de2362d3Smrg				      int op,
902de2362d3Smrg				      int unit)
903de2362d3Smrg{
904de2362d3Smrg    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
905de2362d3Smrg    unsigned int i;
906de2362d3Smrg
907de2362d3Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
908de2362d3Smrg	if (R600TexFormats[i].fmt == pPict->format)
909de2362d3Smrg	    break;
910de2362d3Smrg    }
911de2362d3Smrg    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
912de2362d3Smrg	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
913de2362d3Smrg			 (int)pPict->format));
914de2362d3Smrg
915de2362d3Smrg    if (pPict->filter != PictFilterNearest &&
916de2362d3Smrg	pPict->filter != PictFilterBilinear)
917de2362d3Smrg	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
918de2362d3Smrg
919de2362d3Smrg    /* for REPEAT_NONE, Render semantics are that sampling outside the source
920de2362d3Smrg     * picture results in alpha=0 pixels. We can implement this with a border color
921de2362d3Smrg     * *if* our source texture has an alpha channel, otherwise we need to fall
922de2362d3Smrg     * back. If we're not transformed then we hope that upper layers have clipped
923de2362d3Smrg     * rendering to the bounds of the source drawable, in which case it doesn't
924de2362d3Smrg     * matter. I have not, however, verified that the X server always does such
925de2362d3Smrg     * clipping.
926de2362d3Smrg     */
927de2362d3Smrg    /* FIXME R6xx */
928de2362d3Smrg    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
929de2362d3Smrg	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
930de2362d3Smrg	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
931de2362d3Smrg    }
932de2362d3Smrg
933de2362d3Smrg    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
934de2362d3Smrg	RADEON_FALLBACK(("non-affine transforms not supported\n"));
935de2362d3Smrg
936de2362d3Smrg    return TRUE;
937de2362d3Smrg}
938de2362d3Smrg
939de2362d3Smrgstatic Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
940de2362d3Smrg					int unit)
941de2362d3Smrg{
942de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
943de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
944de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
945de2362d3Smrg    unsigned int repeatType;
946de2362d3Smrg    unsigned int i;
947de2362d3Smrg    tex_resource_t  tex_res;
948de2362d3Smrg    tex_sampler_t   tex_samp;
949de2362d3Smrg    int pix_r, pix_g, pix_b, pix_a;
950de2362d3Smrg    float vs_alu_consts[8];
951de2362d3Smrg
952de2362d3Smrg    CLEAR (tex_res);
953de2362d3Smrg    CLEAR (tex_samp);
954de2362d3Smrg
955de2362d3Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
956de2362d3Smrg	if (R600TexFormats[i].fmt == pPict->format)
957de2362d3Smrg	    break;
958de2362d3Smrg    }
959de2362d3Smrg
960de2362d3Smrg    /* Texture */
961de2362d3Smrg    if (pPict->pDrawable) {
962de2362d3Smrg	tex_res.w               = pPict->pDrawable->width;
963de2362d3Smrg	tex_res.h               = pPict->pDrawable->height;
964de2362d3Smrg	repeatType              = pPict->repeat ? pPict->repeatType : RepeatNone;
965de2362d3Smrg    } else {
966de2362d3Smrg	tex_res.w               = 1;
967de2362d3Smrg	tex_res.h               = 1;
968de2362d3Smrg	repeatType              = RepeatNormal;
969de2362d3Smrg    }
970de2362d3Smrg    tex_res.id                  = unit;
971de2362d3Smrg    tex_res.pitch               = accel_state->src_obj[unit].pitch;
972de2362d3Smrg    tex_res.depth               = 0;
973de2362d3Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
9747821949aSmrg    tex_res.base                = accel_state->src_obj[unit].offset;
9757821949aSmrg    tex_res.mip_base            = accel_state->src_obj[unit].offset;
976de2362d3Smrg    tex_res.size                = accel_state->src_size[unit];
977de2362d3Smrg    tex_res.format              = R600TexFormats[i].card_fmt;
978de2362d3Smrg    tex_res.bo                  = accel_state->src_obj[unit].bo;
979de2362d3Smrg    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
9807821949aSmrg#ifdef XF86DRM_MODE
981de2362d3Smrg    tex_res.surface             = accel_state->src_obj[unit].surface;
9827821949aSmrg#endif
983de2362d3Smrg    tex_res.request_size        = 1;
984de2362d3Smrg
985de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
986de2362d3Smrg    switch (accel_state->src_obj[unit].bpp) {
987de2362d3Smrg    case 16:
988de2362d3Smrg	tex_res.endian = SQ_ENDIAN_8IN16;
989de2362d3Smrg	break;
990de2362d3Smrg    case 32:
991de2362d3Smrg	tex_res.endian = SQ_ENDIAN_8IN32;
992de2362d3Smrg	break;
993de2362d3Smrg    default :
994de2362d3Smrg	break;
995de2362d3Smrg    }
996de2362d3Smrg#endif
997de2362d3Smrg
998de2362d3Smrg    /* component swizzles */
999de2362d3Smrg    switch (pPict->format) {
1000de2362d3Smrg    case PICT_a1r5g5b5:
1001de2362d3Smrg    case PICT_a8r8g8b8:
1002de2362d3Smrg	pix_r = SQ_SEL_Z; /* R */
1003de2362d3Smrg	pix_g = SQ_SEL_Y; /* G */
1004de2362d3Smrg	pix_b = SQ_SEL_X; /* B */
1005de2362d3Smrg	pix_a = SQ_SEL_W; /* A */
1006de2362d3Smrg	break;
1007de2362d3Smrg    case PICT_a8b8g8r8:
1008de2362d3Smrg	pix_r = SQ_SEL_X; /* R */
1009de2362d3Smrg	pix_g = SQ_SEL_Y; /* G */
1010de2362d3Smrg	pix_b = SQ_SEL_Z; /* B */
1011de2362d3Smrg	pix_a = SQ_SEL_W; /* A */
1012de2362d3Smrg	break;
1013de2362d3Smrg    case PICT_x8b8g8r8:
1014de2362d3Smrg	pix_r = SQ_SEL_X; /* R */
1015de2362d3Smrg	pix_g = SQ_SEL_Y; /* G */
1016de2362d3Smrg	pix_b = SQ_SEL_Z; /* B */
1017de2362d3Smrg	pix_a = SQ_SEL_1; /* A */
1018de2362d3Smrg	break;
10197821949aSmrg#ifdef PICT_TYPE_BGRA
1020de2362d3Smrg    case PICT_b8g8r8a8:
1021de2362d3Smrg	pix_r = SQ_SEL_Y; /* R */
1022de2362d3Smrg	pix_g = SQ_SEL_Z; /* G */
1023de2362d3Smrg	pix_b = SQ_SEL_W; /* B */
1024de2362d3Smrg	pix_a = SQ_SEL_X; /* A */
1025de2362d3Smrg	break;
1026de2362d3Smrg    case PICT_b8g8r8x8:
1027de2362d3Smrg	pix_r = SQ_SEL_Y; /* R */
1028de2362d3Smrg	pix_g = SQ_SEL_Z; /* G */
1029de2362d3Smrg	pix_b = SQ_SEL_W; /* B */
1030de2362d3Smrg	pix_a = SQ_SEL_1; /* A */
1031de2362d3Smrg	break;
10327821949aSmrg#endif
1033de2362d3Smrg    case PICT_x1r5g5b5:
1034de2362d3Smrg    case PICT_x8r8g8b8:
1035de2362d3Smrg    case PICT_r5g6b5:
1036de2362d3Smrg	pix_r = SQ_SEL_Z; /* R */
1037de2362d3Smrg	pix_g = SQ_SEL_Y; /* G */
1038de2362d3Smrg	pix_b = SQ_SEL_X; /* B */
1039de2362d3Smrg	pix_a = SQ_SEL_1; /* A */
1040de2362d3Smrg	break;
1041de2362d3Smrg    case PICT_a8:
1042de2362d3Smrg	pix_r = SQ_SEL_0; /* R */
1043de2362d3Smrg	pix_g = SQ_SEL_0; /* G */
1044de2362d3Smrg	pix_b = SQ_SEL_0; /* B */
1045de2362d3Smrg	pix_a = SQ_SEL_X; /* A */
1046de2362d3Smrg	break;
1047de2362d3Smrg    default:
1048de2362d3Smrg	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
1049de2362d3Smrg    }
1050de2362d3Smrg
1051de2362d3Smrg    if (unit == 0) {
1052de2362d3Smrg	if (!accel_state->msk_pic) {
1053de2362d3Smrg	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
1054de2362d3Smrg		pix_r = SQ_SEL_0;
1055de2362d3Smrg		pix_g = SQ_SEL_0;
1056de2362d3Smrg		pix_b = SQ_SEL_0;
1057de2362d3Smrg	    }
1058de2362d3Smrg
1059de2362d3Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1060de2362d3Smrg		pix_a = SQ_SEL_1;
1061de2362d3Smrg	} else {
1062de2362d3Smrg	    if (accel_state->component_alpha) {
1063de2362d3Smrg		if (accel_state->src_alpha) {
1064de2362d3Smrg		    if (PICT_FORMAT_A(pPict->format) == 0) {
1065de2362d3Smrg			pix_r = SQ_SEL_1;
1066de2362d3Smrg			pix_g = SQ_SEL_1;
1067de2362d3Smrg			pix_b = SQ_SEL_1;
1068de2362d3Smrg			pix_a = SQ_SEL_1;
1069de2362d3Smrg		    } else {
1070de2362d3Smrg			pix_r = pix_a;
1071de2362d3Smrg			pix_g = pix_a;
1072de2362d3Smrg			pix_b = pix_a;
1073de2362d3Smrg		    }
1074de2362d3Smrg		} else {
1075de2362d3Smrg		    if (PICT_FORMAT_A(pPict->format) == 0)
1076de2362d3Smrg			pix_a = SQ_SEL_1;
1077de2362d3Smrg		}
1078de2362d3Smrg	    } else {
1079de2362d3Smrg		if (PICT_FORMAT_RGB(pPict->format) == 0) {
1080de2362d3Smrg		    pix_r = SQ_SEL_0;
1081de2362d3Smrg		    pix_g = SQ_SEL_0;
1082de2362d3Smrg		    pix_b = SQ_SEL_0;
1083de2362d3Smrg		}
1084de2362d3Smrg
1085de2362d3Smrg		if (PICT_FORMAT_A(pPict->format) == 0)
1086de2362d3Smrg		    pix_a = SQ_SEL_1;
1087de2362d3Smrg	    }
1088de2362d3Smrg	}
1089de2362d3Smrg    } else {
1090de2362d3Smrg	if (accel_state->component_alpha) {
1091de2362d3Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1092de2362d3Smrg		pix_a = SQ_SEL_1;
1093de2362d3Smrg	} else {
1094de2362d3Smrg	    if (PICT_FORMAT_A(pPict->format) == 0) {
1095de2362d3Smrg		pix_r = SQ_SEL_1;
1096de2362d3Smrg		pix_g = SQ_SEL_1;
1097de2362d3Smrg		pix_b = SQ_SEL_1;
1098de2362d3Smrg		pix_a = SQ_SEL_1;
1099de2362d3Smrg	    } else {
1100de2362d3Smrg		pix_r = pix_a;
1101de2362d3Smrg		pix_g = pix_a;
1102de2362d3Smrg		pix_b = pix_a;
1103de2362d3Smrg	    }
1104de2362d3Smrg	}
1105de2362d3Smrg    }
1106de2362d3Smrg
1107de2362d3Smrg    tex_res.dst_sel_x           = pix_r; /* R */
1108de2362d3Smrg    tex_res.dst_sel_y           = pix_g; /* G */
1109de2362d3Smrg    tex_res.dst_sel_z           = pix_b; /* B */
1110de2362d3Smrg    tex_res.dst_sel_w           = pix_a; /* A */
1111de2362d3Smrg
1112de2362d3Smrg    tex_res.base_level          = 0;
1113de2362d3Smrg    tex_res.last_level          = 0;
1114de2362d3Smrg    tex_res.perf_modulation     = 0;
1115de2362d3Smrg    if (accel_state->src_obj[unit].tiling_flags == 0)
1116de2362d3Smrg	tex_res.tile_mode           = 1;
11177821949aSmrg    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
1118de2362d3Smrg
1119de2362d3Smrg    tex_samp.id                 = unit;
1120de2362d3Smrg    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1121de2362d3Smrg
1122de2362d3Smrg    switch (repeatType) {
1123de2362d3Smrg    case RepeatNormal:
1124de2362d3Smrg	tex_samp.clamp_x            = SQ_TEX_WRAP;
1125de2362d3Smrg	tex_samp.clamp_y            = SQ_TEX_WRAP;
1126de2362d3Smrg	break;
1127de2362d3Smrg    case RepeatPad:
1128de2362d3Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1129de2362d3Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1130de2362d3Smrg	break;
1131de2362d3Smrg    case RepeatReflect:
1132de2362d3Smrg	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1133de2362d3Smrg	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1134de2362d3Smrg	break;
1135de2362d3Smrg    case RepeatNone:
1136de2362d3Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1137de2362d3Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1138de2362d3Smrg	break;
1139de2362d3Smrg    default:
1140de2362d3Smrg	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1141de2362d3Smrg    }
1142de2362d3Smrg
1143de2362d3Smrg    switch (pPict->filter) {
1144de2362d3Smrg    case PictFilterNearest:
1145de2362d3Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1146de2362d3Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1147de2362d3Smrg	tex_samp.mc_coord_truncate  = 1;
1148de2362d3Smrg	break;
1149de2362d3Smrg    case PictFilterBilinear:
1150de2362d3Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1151de2362d3Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1152de2362d3Smrg	break;
1153de2362d3Smrg    default:
1154de2362d3Smrg	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1155de2362d3Smrg    }
1156de2362d3Smrg
1157de2362d3Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
1158de2362d3Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1159de2362d3Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
11607821949aSmrg    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
1161de2362d3Smrg
1162de2362d3Smrg    if (pPict->transform != 0) {
1163de2362d3Smrg	accel_state->is_transform[unit] = TRUE;
1164de2362d3Smrg	accel_state->transform[unit] = pPict->transform;
1165de2362d3Smrg
1166de2362d3Smrg	vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
1167de2362d3Smrg	vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
1168de2362d3Smrg	vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
1169de2362d3Smrg	vs_alu_consts[3] = 1.0 / tex_res.w;
1170de2362d3Smrg
1171de2362d3Smrg	vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
1172de2362d3Smrg	vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
1173de2362d3Smrg	vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
1174de2362d3Smrg	vs_alu_consts[7] = 1.0 / tex_res.h;
1175de2362d3Smrg    } else {
1176de2362d3Smrg	accel_state->is_transform[unit] = FALSE;
1177de2362d3Smrg
1178de2362d3Smrg	vs_alu_consts[0] = 1.0;
1179de2362d3Smrg	vs_alu_consts[1] = 0.0;
1180de2362d3Smrg	vs_alu_consts[2] = 0.0;
1181de2362d3Smrg	vs_alu_consts[3] = 1.0 / tex_res.w;
1182de2362d3Smrg
1183de2362d3Smrg	vs_alu_consts[4] = 0.0;
1184de2362d3Smrg	vs_alu_consts[5] = 1.0;
1185de2362d3Smrg	vs_alu_consts[6] = 0.0;
1186de2362d3Smrg	vs_alu_consts[7] = 1.0 / tex_res.h;
1187de2362d3Smrg    }
1188de2362d3Smrg
1189de2362d3Smrg    /* VS alu constants */
11907821949aSmrg    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
1191de2362d3Smrg			sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
1192de2362d3Smrg
1193de2362d3Smrg    return TRUE;
1194de2362d3Smrg}
1195de2362d3Smrg
1196de2362d3Smrgstatic Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1197de2362d3Smrg			       PicturePtr pDstPicture)
1198de2362d3Smrg{
1199de2362d3Smrg    uint32_t tmp1;
1200de2362d3Smrg    PixmapPtr pSrcPixmap, pDstPixmap;
1201de2362d3Smrg
1202de2362d3Smrg    /* Check for unsupported compositing operations. */
1203de2362d3Smrg    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1204de2362d3Smrg	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1205de2362d3Smrg
1206de2362d3Smrg    if (pSrcPicture->pDrawable) {
1207de2362d3Smrg	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1208de2362d3Smrg
1209de2362d3Smrg	if (pSrcPixmap->drawable.width >= 8192 ||
1210de2362d3Smrg	    pSrcPixmap->drawable.height >= 8192) {
1211de2362d3Smrg	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1212de2362d3Smrg			     pSrcPixmap->drawable.width,
1213de2362d3Smrg			     pSrcPixmap->drawable.height));
1214de2362d3Smrg	}
1215de2362d3Smrg
1216de2362d3Smrg	if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1217de2362d3Smrg	    return FALSE;
1218de2362d3Smrg    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
1219de2362d3Smrg	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1220de2362d3Smrg
1221de2362d3Smrg    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1222de2362d3Smrg
1223de2362d3Smrg    if (pDstPixmap->drawable.width >= 8192 ||
1224de2362d3Smrg	pDstPixmap->drawable.height >= 8192) {
1225de2362d3Smrg	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1226de2362d3Smrg			 pDstPixmap->drawable.width,
1227de2362d3Smrg			 pDstPixmap->drawable.height));
1228de2362d3Smrg    }
1229de2362d3Smrg
1230de2362d3Smrg    if (pMaskPicture) {
1231de2362d3Smrg	PixmapPtr pMaskPixmap;
1232de2362d3Smrg
1233de2362d3Smrg	if (pMaskPicture->pDrawable) {
1234de2362d3Smrg	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1235de2362d3Smrg
1236de2362d3Smrg	    if (pMaskPixmap->drawable.width >= 8192 ||
1237de2362d3Smrg		pMaskPixmap->drawable.height >= 8192) {
1238de2362d3Smrg	      RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1239de2362d3Smrg			       pMaskPixmap->drawable.width,
1240de2362d3Smrg			       pMaskPixmap->drawable.height));
1241de2362d3Smrg	    }
1242de2362d3Smrg
1243de2362d3Smrg	    if (pMaskPicture->componentAlpha) {
1244de2362d3Smrg		/* Check if it's component alpha that relies on a source alpha and
1245de2362d3Smrg		 * on the source value.  We can only get one of those into the
1246de2362d3Smrg		 * single source value that we get to blend with.
1247de2362d3Smrg		 */
1248de2362d3Smrg		if (R600BlendOp[op].src_alpha &&
1249de2362d3Smrg		    (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1250de2362d3Smrg		    (BLEND_ZERO << COLOR_SRCBLEND_shift)) {
12517821949aSmrg		    RADEON_FALLBACK(("Component alpha not supported with source "
12527821949aSmrg				     "alpha and source value blending.\n"));
1253de2362d3Smrg		}
1254de2362d3Smrg	    }
1255de2362d3Smrg
1256de2362d3Smrg	    if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1257de2362d3Smrg		return FALSE;
1258de2362d3Smrg	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
1259de2362d3Smrg	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1260de2362d3Smrg    }
1261de2362d3Smrg
1262de2362d3Smrg    if (!R600GetDestFormat(pDstPicture, &tmp1))
1263de2362d3Smrg	return FALSE;
1264de2362d3Smrg
1265de2362d3Smrg    return TRUE;
1266de2362d3Smrg
1267de2362d3Smrg}
1268de2362d3Smrg
1269de2362d3Smrgstatic Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1270de2362d3Smrg				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1271de2362d3Smrg				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1272de2362d3Smrg{
1273de2362d3Smrg    ScreenPtr pScreen = pDst->drawable.pScreen;
1274de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1275de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1276de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1277de2362d3Smrg    uint32_t dst_format;
1278de2362d3Smrg    cb_config_t cb_conf;
1279de2362d3Smrg    shader_config_t vs_conf, ps_conf;
1280de2362d3Smrg    struct r600_accel_object src_obj, mask_obj, dst_obj;
1281de2362d3Smrg
1282de2362d3Smrg    if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8))
1283de2362d3Smrg	return FALSE;
1284de2362d3Smrg
12857821949aSmrg    if (!pSrc) {
12867821949aSmrg	pSrc = RADEONSolidPixmap(pScreen, pSrcPicture->pSourcePict->solidFill.color);
12877821949aSmrg	if (!pSrc)
12887821949aSmrg	    RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
12897821949aSmrg    }
12907821949aSmrg
12917821949aSmrg#if defined(XF86DRM_MODE)
12927821949aSmrg    if (info->cs) {
12937821949aSmrg	src_obj.offset = 0;
12947821949aSmrg	dst_obj.offset = 0;
12957821949aSmrg	dst_obj.bo = radeon_get_pixmap_bo(pDst);
12967314432eSmrg	src_obj.bo = radeon_get_pixmap_bo(pSrc);
12977821949aSmrg	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
12987314432eSmrg	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
12997821949aSmrg	dst_obj.surface = radeon_get_pixmap_surface(pDst);
13007314432eSmrg	src_obj.surface = radeon_get_pixmap_surface(pSrc);
13017821949aSmrg    } else
13027821949aSmrg#endif
13037821949aSmrg    {
13047821949aSmrg	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
13057821949aSmrg	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
13067821949aSmrg	src_obj.bo = NULL;
13077821949aSmrg	dst_obj.bo = NULL;
1308de2362d3Smrg    }
13097821949aSmrg    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1310de2362d3Smrg    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
13117821949aSmrg
13127821949aSmrg    src_obj.width = pSrc->drawable.width;
13137821949aSmrg    src_obj.height = pSrc->drawable.height;
13147821949aSmrg    src_obj.bpp = pSrc->drawable.bitsPerPixel;
13157821949aSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
13167821949aSmrg
1317de2362d3Smrg    dst_obj.width = pDst->drawable.width;
1318de2362d3Smrg    dst_obj.height = pDst->drawable.height;
1319de2362d3Smrg    dst_obj.bpp = pDst->drawable.bitsPerPixel;
13207821949aSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1321de2362d3Smrg
1322de2362d3Smrg    if (pMaskPicture) {
13237821949aSmrg	if (!pMask) {
13247821949aSmrg	    pMask = RADEONSolidPixmap(pScreen, pMaskPicture->pSourcePict->solidFill.color);
13257821949aSmrg	    if (!pMask) {
13267821949aSmrg		if (!pSrcPicture->pDrawable)
13277821949aSmrg		    pScreen->DestroyPixmap(pSrc);
13287821949aSmrg		RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
13297821949aSmrg	    }
13307821949aSmrg	}
13317821949aSmrg
13327821949aSmrg#if defined(XF86DRM_MODE)
13337821949aSmrg	if (info->cs) {
13347821949aSmrg	    mask_obj.offset = 0;
13357314432eSmrg	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
13367314432eSmrg	    mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
13377314432eSmrg	    mask_obj.surface = radeon_get_pixmap_surface(pMask);
13387821949aSmrg	} else
13397821949aSmrg#endif
13407821949aSmrg	{
13417821949aSmrg	    mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset;
13427821949aSmrg	    mask_obj.bo = NULL;
1343de2362d3Smrg	}
13447821949aSmrg	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
13457821949aSmrg
13467821949aSmrg	mask_obj.width = pMask->drawable.width;
13477821949aSmrg	mask_obj.height = pMask->drawable.height;
13487821949aSmrg	mask_obj.bpp = pMask->drawable.bitsPerPixel;
13497821949aSmrg	mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
13507821949aSmrg
13517821949aSmrg	if (!R600SetAccelState(pScrn,
13527821949aSmrg			       &src_obj,
13537821949aSmrg			       &mask_obj,
13547821949aSmrg			       &dst_obj,
13557821949aSmrg			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
13567821949aSmrg			       3, 0xffffffff))
13577821949aSmrg	    return FALSE;
1358de2362d3Smrg
1359de2362d3Smrg	accel_state->msk_pic = pMaskPicture;
1360de2362d3Smrg	if (pMaskPicture->componentAlpha) {
1361de2362d3Smrg	    accel_state->component_alpha = TRUE;
1362de2362d3Smrg	    if (R600BlendOp[op].src_alpha)
1363de2362d3Smrg		accel_state->src_alpha = TRUE;
1364de2362d3Smrg	    else
1365de2362d3Smrg		accel_state->src_alpha = FALSE;
1366de2362d3Smrg	} else {
1367de2362d3Smrg	    accel_state->component_alpha = FALSE;
1368de2362d3Smrg	    accel_state->src_alpha = FALSE;
1369de2362d3Smrg	}
1370de2362d3Smrg    } else {
13717821949aSmrg	if (!R600SetAccelState(pScrn,
13727821949aSmrg			       &src_obj,
13737821949aSmrg			       NULL,
13747821949aSmrg			       &dst_obj,
13757821949aSmrg			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
13767821949aSmrg			       3, 0xffffffff))
13777821949aSmrg	    return FALSE;
13787821949aSmrg
1379de2362d3Smrg	accel_state->msk_pic = NULL;
1380de2362d3Smrg	accel_state->component_alpha = FALSE;
1381de2362d3Smrg	accel_state->src_alpha = FALSE;
1382de2362d3Smrg    }
1383de2362d3Smrg
1384de2362d3Smrg    if (!R600GetDestFormat(pDstPicture, &dst_format))
1385de2362d3Smrg	return FALSE;
1386de2362d3Smrg
1387de2362d3Smrg    CLEAR (cb_conf);
1388de2362d3Smrg    CLEAR (vs_conf);
1389de2362d3Smrg    CLEAR (ps_conf);
1390de2362d3Smrg
1391de2362d3Smrg    if (pMask)
1392de2362d3Smrg        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1393de2362d3Smrg    else
1394de2362d3Smrg        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1395de2362d3Smrg
1396de2362d3Smrg    radeon_cp_start(pScrn);
1397de2362d3Smrg
13987821949aSmrg    r600_set_default_state(pScrn, accel_state->ib);
1399de2362d3Smrg
14007821949aSmrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
14017821949aSmrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
14027821949aSmrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1403de2362d3Smrg
14047821949aSmrg    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
14057821949aSmrg        R600IBDiscard(pScrn, accel_state->ib);
14067821949aSmrg        return FALSE;
14077821949aSmrg    }
1408de2362d3Smrg
1409de2362d3Smrg    if (pMask) {
1410de2362d3Smrg        if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
14117821949aSmrg            R600IBDiscard(pScrn, accel_state->ib);
1412de2362d3Smrg            return FALSE;
1413de2362d3Smrg        }
1414de2362d3Smrg    } else
1415de2362d3Smrg        accel_state->is_transform[1] = FALSE;
1416de2362d3Smrg
1417de2362d3Smrg    if (pMask) {
14187821949aSmrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
14197821949aSmrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
1420de2362d3Smrg    } else {
14217821949aSmrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
14227821949aSmrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
1423de2362d3Smrg    }
1424de2362d3Smrg
1425de2362d3Smrg    /* Shader */
1426de2362d3Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
1427de2362d3Smrg    vs_conf.shader_size         = accel_state->vs_size;
1428de2362d3Smrg    vs_conf.num_gprs            = 5;
1429de2362d3Smrg    vs_conf.stack_size          = 1;
1430de2362d3Smrg    vs_conf.bo                  = accel_state->shaders_bo;
14317821949aSmrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1432de2362d3Smrg
1433de2362d3Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
1434de2362d3Smrg    ps_conf.shader_size         = accel_state->ps_size;
14357821949aSmrg    ps_conf.num_gprs            = 3;
1436de2362d3Smrg    ps_conf.stack_size          = 1;
1437de2362d3Smrg    ps_conf.uncached_first_inst = 1;
1438de2362d3Smrg    ps_conf.clamp_consts        = 0;
1439de2362d3Smrg    ps_conf.export_mode         = 2;
1440de2362d3Smrg    ps_conf.bo                  = accel_state->shaders_bo;
14417821949aSmrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1442de2362d3Smrg
1443de2362d3Smrg    cb_conf.id = 0;
1444de2362d3Smrg    cb_conf.w = accel_state->dst_obj.pitch;
1445de2362d3Smrg    cb_conf.h = accel_state->dst_obj.height;
14467821949aSmrg    cb_conf.base = accel_state->dst_obj.offset;
1447de2362d3Smrg    cb_conf.format = dst_format;
1448de2362d3Smrg    cb_conf.bo = accel_state->dst_obj.bo;
14497821949aSmrg#ifdef XF86DRM_MODE
1450de2362d3Smrg    cb_conf.surface = accel_state->dst_obj.surface;
14517821949aSmrg#endif
1452de2362d3Smrg
1453de2362d3Smrg    switch (pDstPicture->format) {
1454de2362d3Smrg    case PICT_a8r8g8b8:
1455de2362d3Smrg    case PICT_x8r8g8b8:
1456de2362d3Smrg    case PICT_a1r5g5b5:
1457de2362d3Smrg    case PICT_x1r5g5b5:
1458de2362d3Smrg    default:
1459de2362d3Smrg	cb_conf.comp_swap = 1; /* ARGB */
1460de2362d3Smrg	break;
1461de2362d3Smrg    case PICT_a8b8g8r8:
1462de2362d3Smrg    case PICT_x8b8g8r8:
1463de2362d3Smrg	cb_conf.comp_swap = 0; /* ABGR */
1464de2362d3Smrg	break;
14657821949aSmrg#ifdef PICT_TYPE_BGRA
1466de2362d3Smrg    case PICT_b8g8r8a8:
1467de2362d3Smrg    case PICT_b8g8r8x8:
1468de2362d3Smrg	cb_conf.comp_swap = 3; /* BGRA */
1469de2362d3Smrg	break;
14707821949aSmrg#endif
1471de2362d3Smrg    case PICT_r5g6b5:
1472de2362d3Smrg	cb_conf.comp_swap = 2; /* RGB */
1473de2362d3Smrg	break;
1474de2362d3Smrg    case PICT_a8:
1475de2362d3Smrg	cb_conf.comp_swap = 3; /* A */
1476de2362d3Smrg	break;
1477de2362d3Smrg    }
1478de2362d3Smrg    cb_conf.source_format = 1;
1479de2362d3Smrg    cb_conf.blend_clamp = 1;
1480de2362d3Smrg    cb_conf.blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1481de2362d3Smrg    cb_conf.blend_enable = 1;
1482de2362d3Smrg    cb_conf.pmask = 0xf;
1483de2362d3Smrg    cb_conf.rop = 3;
1484de2362d3Smrg    if (accel_state->dst_obj.tiling_flags == 0)
1485de2362d3Smrg	cb_conf.array_mode = 0;
1486de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1487de2362d3Smrg    switch (dst_obj.bpp) {
1488de2362d3Smrg    case 16:
1489de2362d3Smrg	cb_conf.endian = ENDIAN_8IN16;
1490de2362d3Smrg	break;
1491de2362d3Smrg    case 32:
1492de2362d3Smrg	cb_conf.endian = ENDIAN_8IN32;
1493de2362d3Smrg	break;
1494de2362d3Smrg    default:
1495de2362d3Smrg	break;
1496de2362d3Smrg    }
1497de2362d3Smrg#endif
14987821949aSmrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
1499de2362d3Smrg
1500de2362d3Smrg    if (pMask)
15017821949aSmrg	r600_set_spi(pScrn, accel_state->ib, (2 - 1), 2);
1502de2362d3Smrg    else
15037821949aSmrg	r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
15047314432eSmrg
1505de2362d3Smrg    if (accel_state->vsync)
1506de2362d3Smrg	RADEONVlineHelperClear(pScrn);
1507de2362d3Smrg
1508de2362d3Smrg    accel_state->composite_op = op;
1509de2362d3Smrg    accel_state->dst_pic = pDstPicture;
1510de2362d3Smrg    accel_state->src_pic = pSrcPicture;
1511de2362d3Smrg    accel_state->dst_pix = pDst;
1512de2362d3Smrg    accel_state->msk_pix = pMask;
1513de2362d3Smrg    accel_state->src_pix = pSrc;
1514de2362d3Smrg
1515de2362d3Smrg    return TRUE;
1516de2362d3Smrg}
1517de2362d3Smrg
1518de2362d3Smrgstatic void R600FinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst,
1519de2362d3Smrg				struct radeon_accel_state *accel_state)
1520de2362d3Smrg{
1521de2362d3Smrg    int vtx_size;
1522de2362d3Smrg
1523de2362d3Smrg    if (accel_state->vsync)
15247821949aSmrg       r600_cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
1525de2362d3Smrg			       accel_state->vline_crtc,
1526de2362d3Smrg			       accel_state->vline_y1,
1527de2362d3Smrg			       accel_state->vline_y2);
1528de2362d3Smrg
15297821949aSmrg    vtx_size = accel_state->msk_pic ? 24 : 16;
1530de2362d3Smrg
1531de2362d3Smrg    r600_finish_op(pScrn, vtx_size);
1532de2362d3Smrg}
1533de2362d3Smrg
1534de2362d3Smrgstatic void R600DoneComposite(PixmapPtr pDst)
1535de2362d3Smrg{
1536de2362d3Smrg    ScreenPtr pScreen = pDst->drawable.pScreen;
1537de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1538de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1539de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1540de2362d3Smrg
1541de2362d3Smrg    R600FinishComposite(pScrn, pDst, accel_state);
15427821949aSmrg
15437821949aSmrg    if (!accel_state->src_pic->pDrawable)
15447821949aSmrg	pScreen->DestroyPixmap(accel_state->src_pix);
15457821949aSmrg
15467821949aSmrg    if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable)
15477821949aSmrg	pScreen->DestroyPixmap(accel_state->msk_pix);
1548de2362d3Smrg}
1549de2362d3Smrg
1550de2362d3Smrgstatic void R600Composite(PixmapPtr pDst,
1551de2362d3Smrg			  int srcX, int srcY,
1552de2362d3Smrg			  int maskX, int maskY,
1553de2362d3Smrg			  int dstX, int dstY,
1554de2362d3Smrg			  int w, int h)
1555de2362d3Smrg{
1556de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1557de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1558de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1559de2362d3Smrg    float *vb;
1560de2362d3Smrg
1561de2362d3Smrg    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1562de2362d3Smrg       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1563de2362d3Smrg
15647821949aSmrg#ifdef XF86DRM_MODE
15657821949aSmrg    if (info->cs && CS_FULL(info->cs)) {
1566de2362d3Smrg	R600FinishComposite(pScrn, pDst, info->accel_state);
1567de2362d3Smrg	radeon_cs_flush_indirect(pScrn);
1568de2362d3Smrg	R600PrepareComposite(info->accel_state->composite_op,
1569de2362d3Smrg			     info->accel_state->src_pic,
1570de2362d3Smrg			     info->accel_state->msk_pic,
1571de2362d3Smrg			     info->accel_state->dst_pic,
1572de2362d3Smrg			     info->accel_state->src_pix,
1573de2362d3Smrg			     info->accel_state->msk_pix,
1574de2362d3Smrg			     info->accel_state->dst_pix);
1575de2362d3Smrg    }
15767821949aSmrg#endif
1577de2362d3Smrg
1578de2362d3Smrg    if (accel_state->vsync)
1579de2362d3Smrg	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1580de2362d3Smrg
15817821949aSmrg    if (accel_state->msk_pic) {
1582de2362d3Smrg
1583de2362d3Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1584de2362d3Smrg
1585de2362d3Smrg	vb[0] = (float)dstX;
1586de2362d3Smrg	vb[1] = (float)dstY;
1587de2362d3Smrg	vb[2] = (float)srcX;
1588de2362d3Smrg	vb[3] = (float)srcY;
1589de2362d3Smrg	vb[4] = (float)maskX;
1590de2362d3Smrg	vb[5] = (float)maskY;
1591de2362d3Smrg
1592de2362d3Smrg	vb[6] = (float)dstX;
1593de2362d3Smrg	vb[7] = (float)(dstY + h);
1594de2362d3Smrg	vb[8] = (float)srcX;
1595de2362d3Smrg	vb[9] = (float)(srcY + h);
1596de2362d3Smrg	vb[10] = (float)maskX;
1597de2362d3Smrg	vb[11] = (float)(maskY + h);
1598de2362d3Smrg
1599de2362d3Smrg	vb[12] = (float)(dstX + w);
1600de2362d3Smrg	vb[13] = (float)(dstY + h);
1601de2362d3Smrg	vb[14] = (float)(srcX + w);
1602de2362d3Smrg	vb[15] = (float)(srcY + h);
1603de2362d3Smrg	vb[16] = (float)(maskX + w);
1604de2362d3Smrg	vb[17] = (float)(maskY + h);
1605de2362d3Smrg
1606de2362d3Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
1607de2362d3Smrg
1608de2362d3Smrg    } else {
1609de2362d3Smrg
1610de2362d3Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1611de2362d3Smrg
1612de2362d3Smrg	vb[0] = (float)dstX;
1613de2362d3Smrg	vb[1] = (float)dstY;
1614de2362d3Smrg	vb[2] = (float)srcX;
1615de2362d3Smrg	vb[3] = (float)srcY;
1616de2362d3Smrg
1617de2362d3Smrg	vb[4] = (float)dstX;
1618de2362d3Smrg	vb[5] = (float)(dstY + h);
1619de2362d3Smrg	vb[6] = (float)srcX;
1620de2362d3Smrg	vb[7] = (float)(srcY + h);
1621de2362d3Smrg
1622de2362d3Smrg	vb[8] = (float)(dstX + w);
1623de2362d3Smrg	vb[9] = (float)(dstY + h);
1624de2362d3Smrg	vb[10] = (float)(srcX + w);
1625de2362d3Smrg	vb[11] = (float)(srcY + h);
1626de2362d3Smrg
1627de2362d3Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
1628de2362d3Smrg    }
1629de2362d3Smrg
1630de2362d3Smrg
1631de2362d3Smrg}
1632de2362d3Smrg
16337821949aSmrgBool
16347821949aSmrgR600CopyToVRAM(ScrnInfoPtr pScrn,
16357821949aSmrg	       char *src, int src_pitch,
16367821949aSmrg	       uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_width, uint32_t dst_height, int bpp,
16377821949aSmrg	       int x, int y, int w, int h)
16387821949aSmrg{
16397821949aSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
16407821949aSmrg    struct radeon_accel_state *accel_state = info->accel_state;
16417821949aSmrg    uint32_t scratch_mc_addr;
16427821949aSmrg    int wpass = w * (bpp/8);
16437821949aSmrg    int scratch_pitch_bytes = RADEON_ALIGN(wpass, 256);
16447821949aSmrg    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
16457821949aSmrg    int scratch_offset = 0, hpass, temph;
16467821949aSmrg    char *dst;
16477821949aSmrg    drmBufPtr scratch;
16487821949aSmrg    struct r600_accel_object scratch_obj, dst_obj;
16497821949aSmrg
16507821949aSmrg    if (dst_pitch & 7)
16517821949aSmrg	return FALSE;
16527821949aSmrg
16537821949aSmrg    if (dst_mc_addr & 0xff)
16547821949aSmrg	return FALSE;
16557821949aSmrg
16567821949aSmrg    scratch = RADEONCPGetBuffer(pScrn);
16577821949aSmrg    if (scratch == NULL)
16587821949aSmrg	return FALSE;
16597821949aSmrg
16607821949aSmrg    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
16617821949aSmrg    temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
16627821949aSmrg    dst = (char *)scratch->address;
16637821949aSmrg
16647821949aSmrg    scratch_obj.pitch = scratch_pitch;
16657821949aSmrg    scratch_obj.width = w;
16667821949aSmrg    scratch_obj.height = hpass;
16677821949aSmrg    scratch_obj.offset = scratch_mc_addr;
16687821949aSmrg    scratch_obj.bpp = bpp;
16697821949aSmrg    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
16707821949aSmrg    scratch_obj.bo = NULL;
16717821949aSmrg
16727821949aSmrg    dst_obj.pitch = dst_pitch;
16737821949aSmrg    dst_obj.width = dst_width;
16747821949aSmrg    dst_obj.height = dst_height;
16757821949aSmrg    dst_obj.offset = dst_mc_addr;
16767821949aSmrg    dst_obj.bo = NULL;
16777821949aSmrg    dst_obj.bpp = bpp;
16787821949aSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
16797821949aSmrg
16807821949aSmrg    if (!R600SetAccelState(pScrn,
16817821949aSmrg			   &scratch_obj,
16827821949aSmrg			   NULL,
16837821949aSmrg			   &dst_obj,
16847821949aSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
16857821949aSmrg			   3, 0xffffffff))
16867821949aSmrg	return FALSE;
16877821949aSmrg
16887821949aSmrg    /* memcopy from sys to scratch */
16897821949aSmrg    while (temph--) {
16907821949aSmrg	memcpy (dst, src, wpass);
16917821949aSmrg	src += src_pitch;
16927821949aSmrg	dst += scratch_pitch_bytes;
16937821949aSmrg    }
16947821949aSmrg
16957821949aSmrg    while (h) {
16967821949aSmrg	uint32_t offset = scratch_mc_addr + scratch_offset;
16977821949aSmrg	int oldhpass = hpass;
16987821949aSmrg	h -= oldhpass;
16997821949aSmrg	temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
17007821949aSmrg
17017821949aSmrg	if (hpass) {
17027821949aSmrg	    scratch_offset = scratch->total/2 - scratch_offset;
17037821949aSmrg	    dst = (char *)scratch->address + scratch_offset;
17047821949aSmrg	    /* wait for the engine to be idle */
17057821949aSmrg	    RADEONWaitForIdleCP(pScrn);
17067821949aSmrg	    //memcopy from sys to scratch
17077821949aSmrg	    while (temph--) {
17087821949aSmrg		memcpy (dst, src, wpass);
17097821949aSmrg		src += src_pitch;
17107821949aSmrg		dst += scratch_pitch_bytes;
17117821949aSmrg	    }
17127821949aSmrg	}
17137821949aSmrg	/* blit from scratch to vram */
17147821949aSmrg	info->accel_state->src_obj[0].height = oldhpass;
17157821949aSmrg	info->accel_state->src_obj[0].offset = offset;
17167821949aSmrg	R600DoPrepareCopy(pScrn);
17177821949aSmrg	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
17187821949aSmrg	R600DoCopy(pScrn);
17197821949aSmrg	y += oldhpass;
17207821949aSmrg    }
17217821949aSmrg
17227821949aSmrg    R600IBDiscard(pScrn, scratch);
17237821949aSmrg
17247821949aSmrg    return TRUE;
17257821949aSmrg}
17267821949aSmrg
17277821949aSmrgstatic Bool
17287821949aSmrgR600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
17297821949aSmrg		   char *src, int src_pitch)
17307821949aSmrg{
17317821949aSmrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
17327821949aSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
17337821949aSmrg    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
17347821949aSmrg    uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
17357821949aSmrg    int bpp = pDst->drawable.bitsPerPixel;
17367821949aSmrg
17377821949aSmrg    return R600CopyToVRAM(pScrn,
17387821949aSmrg			  src, src_pitch,
17397821949aSmrg			  dst_pitch, dst_mc_addr, pDst->drawable.width, pDst->drawable.height, bpp,
17407821949aSmrg			  x, y, w, h);
17417821949aSmrg}
17427821949aSmrg
17437821949aSmrgstatic Bool
17447821949aSmrgR600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
17457821949aSmrg		       char *dst, int dst_pitch)
17467821949aSmrg{
17477821949aSmrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
17487821949aSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
17497821949aSmrg    struct radeon_accel_state *accel_state = info->accel_state;
17507821949aSmrg    uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
17517821949aSmrg    uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
17527821949aSmrg    uint32_t src_width = pSrc->drawable.width;
17537821949aSmrg    uint32_t src_height = pSrc->drawable.height;
17547821949aSmrg    int bpp = pSrc->drawable.bitsPerPixel;
17557821949aSmrg    uint32_t scratch_mc_addr;
17567821949aSmrg    int scratch_pitch_bytes = RADEON_ALIGN(dst_pitch, 256);
17577821949aSmrg    int scratch_offset = 0, hpass;
17587821949aSmrg    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
17597821949aSmrg    int wpass = w * (bpp/8);
17607821949aSmrg    drmBufPtr scratch;
17617821949aSmrg    struct r600_accel_object scratch_obj, src_obj;
17627821949aSmrg
17637821949aSmrg    /* bad pipe setup in drm prior to 1.32 */
17647821949aSmrg    if (info->dri->pKernelDRMVersion->version_minor < 32) {
17657821949aSmrg	    if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32))
17667821949aSmrg		    return FALSE;
17677821949aSmrg    }
17687821949aSmrg
17697821949aSmrg    if (src_pitch & 7)
17707821949aSmrg	return FALSE;
17717821949aSmrg
17727821949aSmrg    scratch = RADEONCPGetBuffer(pScrn);
17737821949aSmrg    if (scratch == NULL)
17747821949aSmrg	return FALSE;
17757821949aSmrg
17767821949aSmrg    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
17777821949aSmrg    hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
17787821949aSmrg
17797821949aSmrg    src_obj.pitch = src_pitch;
17807821949aSmrg    src_obj.width = src_width;
17817821949aSmrg    src_obj.height = src_height;
17827821949aSmrg    src_obj.offset = src_mc_addr;
17837821949aSmrg    src_obj.bo = NULL;
17847821949aSmrg    src_obj.bpp = bpp;
17857821949aSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM;
17867821949aSmrg
17877821949aSmrg    scratch_obj.pitch = scratch_pitch;
17887821949aSmrg    scratch_obj.width = src_width;
17897821949aSmrg    scratch_obj.height = hpass;
17907821949aSmrg    scratch_obj.offset = scratch_mc_addr;
17917821949aSmrg    scratch_obj.bpp = bpp;
17927821949aSmrg    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
17937821949aSmrg    scratch_obj.bo = NULL;
17947821949aSmrg
17957821949aSmrg    if (!R600SetAccelState(pScrn,
17967821949aSmrg			   &src_obj,
17977821949aSmrg			   NULL,
17987821949aSmrg			   &scratch_obj,
17997821949aSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
18007821949aSmrg			   3, 0xffffffff))
18017821949aSmrg	return FALSE;
18027821949aSmrg
18037821949aSmrg    /* blit from vram to scratch */
18047821949aSmrg    R600DoPrepareCopy(pScrn);
18057821949aSmrg    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
18067821949aSmrg    R600DoCopy(pScrn);
18077821949aSmrg
18087821949aSmrg    while (h) {
18097821949aSmrg	char *src = (char *)scratch->address + scratch_offset;
18107821949aSmrg	int oldhpass = hpass;
18117821949aSmrg	h -= oldhpass;
18127821949aSmrg	y += oldhpass;
18137821949aSmrg	hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
18147821949aSmrg
18157821949aSmrg	if (hpass) {
18167821949aSmrg	    scratch_offset = scratch->total/2 - scratch_offset;
18177821949aSmrg	    /* blit from vram to scratch */
18187821949aSmrg	    info->accel_state->dst_obj.height = hpass;
18197821949aSmrg	    info->accel_state->dst_obj.offset = scratch_mc_addr + scratch_offset;
18207821949aSmrg	    R600DoPrepareCopy(pScrn);
18217821949aSmrg	    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
18227821949aSmrg	    R600DoCopy(pScrn);
18237821949aSmrg	}
18247821949aSmrg
18257821949aSmrg	/* wait for the engine to be idle */
18267821949aSmrg	RADEONWaitForIdleCP(pScrn);
18277821949aSmrg	/* memcopy from scratch to sys */
18287821949aSmrg	while (oldhpass--) {
18297821949aSmrg	    memcpy (dst, src, wpass);
18307821949aSmrg	    dst += dst_pitch;
18317821949aSmrg	    src += scratch_pitch_bytes;
18327821949aSmrg	}
18337821949aSmrg    }
18347821949aSmrg
18357821949aSmrg    R600IBDiscard(pScrn, scratch);
18367821949aSmrg
18377821949aSmrg    return TRUE;
18387821949aSmrg
18397821949aSmrg}
18407821949aSmrg
18417821949aSmrg#if defined(XF86DRM_MODE)
18427821949aSmrg
1843de2362d3Smrgstatic Bool
1844de2362d3SmrgR600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
1845de2362d3Smrg		     char *src, int src_pitch)
1846de2362d3Smrg{
1847de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1848de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1849de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1850de2362d3Smrg    struct radeon_exa_pixmap_priv *driver_priv;
1851de2362d3Smrg    struct radeon_bo *scratch = NULL;
1852de2362d3Smrg    struct radeon_bo *copy_dst;
1853de2362d3Smrg    unsigned char *dst;
1854de2362d3Smrg    unsigned size;
1855de2362d3Smrg    uint32_t dst_domain;
1856de2362d3Smrg    int bpp = pDst->drawable.bitsPerPixel;
1857de2362d3Smrg    uint32_t scratch_pitch;
1858de2362d3Smrg    uint32_t copy_pitch;
1859de2362d3Smrg    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
1860de2362d3Smrg    int ret;
1861de2362d3Smrg    Bool flush = TRUE;
1862de2362d3Smrg    Bool r;
1863de2362d3Smrg    int i;
1864de2362d3Smrg    struct r600_accel_object src_obj, dst_obj;
1865de2362d3Smrg    uint32_t height, base_align;
1866de2362d3Smrg
1867de2362d3Smrg    if (bpp < 8)
1868de2362d3Smrg	return FALSE;
1869de2362d3Smrg
1870de2362d3Smrg    driver_priv = exaGetPixmapDriverPrivate(pDst);
1871de2362d3Smrg    if (!driver_priv || !driver_priv->bo)
1872de2362d3Smrg	return FALSE;
1873de2362d3Smrg
18747821949aSmrg    /* If we know the BO won't be busy, don't bother with a scratch */
1875de2362d3Smrg    copy_dst = driver_priv->bo;
1876de2362d3Smrg    copy_pitch = pDst->devKind;
1877de2362d3Smrg    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1878de2362d3Smrg	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1879de2362d3Smrg	    flush = FALSE;
18807821949aSmrg	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
1881de2362d3Smrg		goto copy;
1882de2362d3Smrg	}
1883de2362d3Smrg    }
1884de2362d3Smrg
1885de2362d3Smrg    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1886de2362d3Smrg    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1887de2362d3Smrg    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1888de2362d3Smrg    size = scratch_pitch * height * (bpp / 8);
1889de2362d3Smrg    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1890de2362d3Smrg    if (scratch == NULL) {
1891de2362d3Smrg	goto copy;
1892de2362d3Smrg    }
1893de2362d3Smrg
1894de2362d3Smrg    src_obj.pitch = scratch_pitch;
1895de2362d3Smrg    src_obj.width = w;
1896de2362d3Smrg    src_obj.height = h;
18977821949aSmrg    src_obj.offset = 0;
1898de2362d3Smrg    src_obj.bpp = bpp;
1899de2362d3Smrg    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1900de2362d3Smrg    src_obj.bo = scratch;
1901de2362d3Smrg    src_obj.tiling_flags = 0;
19027821949aSmrg#ifdef XF86DRM_MODE
1903de2362d3Smrg    src_obj.surface = NULL;
19047821949aSmrg#endif
1905de2362d3Smrg
1906de2362d3Smrg    dst_obj.pitch = dst_pitch_hw;
1907de2362d3Smrg    dst_obj.width = pDst->drawable.width;
1908de2362d3Smrg    dst_obj.height = pDst->drawable.height;
19097821949aSmrg    dst_obj.offset = 0;
1910de2362d3Smrg    dst_obj.bpp = bpp;
1911de2362d3Smrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1912de2362d3Smrg    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1913de2362d3Smrg    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
19147821949aSmrg#ifdef XF86DRM_MODE
1915de2362d3Smrg    dst_obj.surface = radeon_get_pixmap_surface(pDst);
19167821949aSmrg#endif
1917de2362d3Smrg
1918de2362d3Smrg    if (!R600SetAccelState(pScrn,
1919de2362d3Smrg			   &src_obj,
1920de2362d3Smrg			   NULL,
1921de2362d3Smrg			   &dst_obj,
1922de2362d3Smrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1923de2362d3Smrg			   3, 0xffffffff)) {
1924de2362d3Smrg        goto copy;
1925de2362d3Smrg    }
1926de2362d3Smrg    copy_dst = scratch;
1927de2362d3Smrg    copy_pitch = scratch_pitch * (bpp / 8);
1928de2362d3Smrg    flush = FALSE;
1929de2362d3Smrg
1930de2362d3Smrgcopy:
1931de2362d3Smrg    if (flush)
1932de2362d3Smrg	radeon_cs_flush_indirect(pScrn);
1933de2362d3Smrg
1934de2362d3Smrg    ret = radeon_bo_map(copy_dst, 0);
1935de2362d3Smrg    if (ret) {
1936de2362d3Smrg        r = FALSE;
1937de2362d3Smrg        goto out;
1938de2362d3Smrg    }
1939de2362d3Smrg    r = TRUE;
1940de2362d3Smrg    size = w * bpp / 8;
1941de2362d3Smrg    dst = copy_dst->ptr;
1942de2362d3Smrg    if (copy_dst == driver_priv->bo)
1943de2362d3Smrg	dst += y * copy_pitch + x * bpp / 8;
1944de2362d3Smrg    for (i = 0; i < h; i++) {
1945de2362d3Smrg        memcpy(dst + i * copy_pitch, src, size);
1946de2362d3Smrg        src += src_pitch;
1947de2362d3Smrg    }
1948de2362d3Smrg    radeon_bo_unmap(copy_dst);
1949de2362d3Smrg
1950de2362d3Smrg    if (copy_dst == scratch) {
1951de2362d3Smrg	if (info->accel_state->vsync)
1952de2362d3Smrg	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1953de2362d3Smrg
1954de2362d3Smrg	/* blit from gart to vram */
1955de2362d3Smrg	R600DoPrepareCopy(pScrn);
1956de2362d3Smrg	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
1957de2362d3Smrg	R600DoCopyVline(pDst);
1958de2362d3Smrg    }
1959de2362d3Smrg
1960de2362d3Smrgout:
1961de2362d3Smrg    if (scratch)
1962de2362d3Smrg	radeon_bo_unref(scratch);
1963de2362d3Smrg    return r;
1964de2362d3Smrg}
1965de2362d3Smrg
1966de2362d3Smrgstatic Bool
1967de2362d3SmrgR600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
1968de2362d3Smrg			 int h, char *dst, int dst_pitch)
1969de2362d3Smrg{
1970de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
1971de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1972de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1973de2362d3Smrg    struct radeon_exa_pixmap_priv *driver_priv;
1974de2362d3Smrg    struct radeon_bo *scratch = NULL;
1975de2362d3Smrg    struct radeon_bo *copy_src;
1976de2362d3Smrg    unsigned size;
1977de2362d3Smrg    uint32_t src_domain = 0;
1978de2362d3Smrg    int bpp = pSrc->drawable.bitsPerPixel;
1979de2362d3Smrg    uint32_t scratch_pitch;
1980de2362d3Smrg    uint32_t copy_pitch;
1981de2362d3Smrg    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
1982de2362d3Smrg    int ret;
1983de2362d3Smrg    Bool flush = FALSE;
1984de2362d3Smrg    Bool r;
1985de2362d3Smrg    struct r600_accel_object src_obj, dst_obj;
1986de2362d3Smrg    uint32_t height, base_align;
1987de2362d3Smrg
1988de2362d3Smrg    if (bpp < 8)
1989de2362d3Smrg	return FALSE;
1990de2362d3Smrg
1991de2362d3Smrg    driver_priv = exaGetPixmapDriverPrivate(pSrc);
1992de2362d3Smrg    if (!driver_priv || !driver_priv->bo)
1993de2362d3Smrg	return FALSE;
1994de2362d3Smrg
1995de2362d3Smrg    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
1996de2362d3Smrg    copy_src = driver_priv->bo;
1997de2362d3Smrg    copy_pitch = pSrc->devKind;
1998de2362d3Smrg    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1999de2362d3Smrg	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
2000de2362d3Smrg	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
2001de2362d3Smrg	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
2002de2362d3Smrg		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
2003de2362d3Smrg		src_domain = 0;
2004de2362d3Smrg	    else /* A write may be scheduled */
2005de2362d3Smrg		flush = TRUE;
2006de2362d3Smrg	}
2007de2362d3Smrg
2008de2362d3Smrg	if (!src_domain)
2009de2362d3Smrg	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
2010de2362d3Smrg
2011de2362d3Smrg	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
2012de2362d3Smrg	    goto copy;
2013de2362d3Smrg    }
2014de2362d3Smrg
2015de2362d3Smrg    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
2016de2362d3Smrg    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
2017de2362d3Smrg    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
2018de2362d3Smrg    size = scratch_pitch * height * (bpp / 8);
2019de2362d3Smrg    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
2020de2362d3Smrg    if (scratch == NULL) {
2021de2362d3Smrg	goto copy;
2022de2362d3Smrg    }
2023de2362d3Smrg    radeon_cs_space_reset_bos(info->cs);
2024de2362d3Smrg    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
2025de2362d3Smrg				      RADEON_GEM_DOMAIN_VRAM, 0);
2026de2362d3Smrg    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
2027de2362d3Smrg    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
2028de2362d3Smrg    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
2029de2362d3Smrg    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
2030de2362d3Smrg    ret = radeon_cs_space_check(info->cs);
2031de2362d3Smrg    if (ret) {
2032de2362d3Smrg        goto copy;
2033de2362d3Smrg    }
2034de2362d3Smrg
2035de2362d3Smrg    src_obj.pitch = src_pitch_hw;
2036de2362d3Smrg    src_obj.width = pSrc->drawable.width;
2037de2362d3Smrg    src_obj.height = pSrc->drawable.height;
20387821949aSmrg    src_obj.offset = 0;
2039de2362d3Smrg    src_obj.bpp = bpp;
2040de2362d3Smrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
2041de2362d3Smrg    src_obj.bo = radeon_get_pixmap_bo(pSrc);
2042de2362d3Smrg    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
20437821949aSmrg#ifdef XF86DRM_MODE
2044de2362d3Smrg    src_obj.surface = radeon_get_pixmap_surface(pSrc);
20457821949aSmrg#endif
2046de2362d3Smrg
2047de2362d3Smrg    dst_obj.pitch = scratch_pitch;
2048de2362d3Smrg    dst_obj.width = w;
2049de2362d3Smrg    dst_obj.height = h;
20507821949aSmrg    dst_obj.offset = 0;
2051de2362d3Smrg    dst_obj.bo = scratch;
2052de2362d3Smrg    dst_obj.bpp = bpp;
2053de2362d3Smrg    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
2054de2362d3Smrg    dst_obj.tiling_flags = 0;
20557821949aSmrg#ifdef XF86DRM_MODE
2056de2362d3Smrg    dst_obj.surface = NULL;
20577821949aSmrg#endif
2058de2362d3Smrg
2059de2362d3Smrg    if (!R600SetAccelState(pScrn,
2060de2362d3Smrg			   &src_obj,
2061de2362d3Smrg			   NULL,
2062de2362d3Smrg			   &dst_obj,
2063de2362d3Smrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
2064de2362d3Smrg			   3, 0xffffffff)) {
2065de2362d3Smrg        goto copy;
2066de2362d3Smrg    }
2067de2362d3Smrg
2068de2362d3Smrg    /* blit from vram to gart */
2069de2362d3Smrg    R600DoPrepareCopy(pScrn);
2070de2362d3Smrg    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
2071de2362d3Smrg    R600DoCopy(pScrn);
2072de2362d3Smrg    copy_src = scratch;
2073de2362d3Smrg    copy_pitch = scratch_pitch * (bpp / 8);
2074de2362d3Smrg    flush = TRUE;
2075de2362d3Smrg
2076de2362d3Smrgcopy:
20777821949aSmrg    if (flush && info->cs)
2078de2362d3Smrg	radeon_cs_flush_indirect(pScrn);
2079de2362d3Smrg
2080de2362d3Smrg    ret = radeon_bo_map(copy_src, 0);
2081de2362d3Smrg    if (ret) {
2082de2362d3Smrg	ErrorF("failed to map pixmap: %d\n", ret);
2083de2362d3Smrg        r = FALSE;
2084de2362d3Smrg        goto out;
2085de2362d3Smrg    }
2086de2362d3Smrg    r = TRUE;
2087de2362d3Smrg    w *= bpp / 8;
2088de2362d3Smrg    if (copy_src == driver_priv->bo)
2089de2362d3Smrg	size = y * copy_pitch + x * bpp / 8;
2090de2362d3Smrg    else
2091de2362d3Smrg	size = 0;
2092de2362d3Smrg    while (h--) {
2093de2362d3Smrg        memcpy(dst, copy_src->ptr + size, w);
2094de2362d3Smrg        size += copy_pitch;
2095de2362d3Smrg        dst += dst_pitch;
2096de2362d3Smrg    }
2097de2362d3Smrg    radeon_bo_unmap(copy_src);
2098de2362d3Smrgout:
2099de2362d3Smrg    if (scratch)
2100de2362d3Smrg	radeon_bo_unref(scratch);
2101de2362d3Smrg    return r;
2102de2362d3Smrg}
21037821949aSmrg#endif
2104de2362d3Smrg
2105de2362d3Smrgstatic int
2106de2362d3SmrgR600MarkSync(ScreenPtr pScreen)
2107de2362d3Smrg{
2108de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2109de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2110de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2111de2362d3Smrg
2112de2362d3Smrg    return ++accel_state->exaSyncMarker;
2113de2362d3Smrg
2114de2362d3Smrg}
2115de2362d3Smrg
2116de2362d3Smrgstatic void
2117de2362d3SmrgR600Sync(ScreenPtr pScreen, int marker)
2118de2362d3Smrg{
2119de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2120de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2121de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2122de2362d3Smrg
2123de2362d3Smrg    if (accel_state->exaMarkerSynced != marker) {
21247821949aSmrg#ifdef XF86DRM_MODE
21257821949aSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
21267821949aSmrg	if (!info->cs)
21277821949aSmrg#endif
21287821949aSmrg#endif
21297821949aSmrg	    RADEONWaitForIdleCP(pScrn);
2130de2362d3Smrg	accel_state->exaMarkerSynced = marker;
2131de2362d3Smrg    }
2132de2362d3Smrg
2133de2362d3Smrg}
2134de2362d3Smrg
2135de2362d3Smrgstatic Bool
2136de2362d3SmrgR600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
2137de2362d3Smrg{
2138de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2139de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2140de2362d3Smrg
2141de2362d3Smrg    /* 512 bytes per shader for now */
2142de2362d3Smrg    int size = 512 * 9;
2143de2362d3Smrg
21447821949aSmrg    accel_state->shaders = NULL;
21457821949aSmrg
21467821949aSmrg#ifdef XF86DRM_MODE
21477821949aSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
21487821949aSmrg    if (info->cs) {
21497821949aSmrg	accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
21507821949aSmrg						 RADEON_GEM_DOMAIN_VRAM, 0);
21517821949aSmrg	if (accel_state->shaders_bo == NULL) {
21527821949aSmrg	    ErrorF("Allocating shader failed\n");
21537821949aSmrg	    return FALSE;
21547821949aSmrg	}
21557821949aSmrg	return TRUE;
21567821949aSmrg    } else
21577821949aSmrg#endif
21587821949aSmrg#endif
21597821949aSmrg    {
21607821949aSmrg	accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
21617821949aSmrg						 TRUE, NULL, NULL);
21627821949aSmrg
21637821949aSmrg	if (accel_state->shaders == NULL)
21647821949aSmrg	    return FALSE;
2165de2362d3Smrg    }
21667821949aSmrg
2167de2362d3Smrg    return TRUE;
2168de2362d3Smrg}
2169de2362d3Smrg
2170de2362d3SmrgBool
2171de2362d3SmrgR600LoadShaders(ScrnInfoPtr pScrn)
2172de2362d3Smrg{
2173de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2174de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2175de2362d3Smrg    RADEONChipFamily ChipSet = info->ChipFamily;
2176de2362d3Smrg    uint32_t *shader;
21777821949aSmrg#ifdef XF86DRM_MODE
21787821949aSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2179de2362d3Smrg    int ret;
2180de2362d3Smrg
21817821949aSmrg    if (info->cs) {
21827821949aSmrg	ret = radeon_bo_map(accel_state->shaders_bo, 1);
21837821949aSmrg	if (ret) {
21847821949aSmrg	    FatalError("failed to map shader %d\n", ret);
21857821949aSmrg	    return FALSE;
21867821949aSmrg	}
21877821949aSmrg	shader = accel_state->shaders_bo->ptr;
21887821949aSmrg    } else
21897821949aSmrg#endif
21907821949aSmrg#endif
21917821949aSmrg	shader = (pointer)((char *)info->FB + accel_state->shaders->offset);
2192de2362d3Smrg
2193de2362d3Smrg    /*  solid vs --------------------------------------- */
2194de2362d3Smrg    accel_state->solid_vs_offset = 0;
2195de2362d3Smrg    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
2196de2362d3Smrg
2197de2362d3Smrg    /*  solid ps --------------------------------------- */
2198de2362d3Smrg    accel_state->solid_ps_offset = 512;
2199de2362d3Smrg    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
2200de2362d3Smrg
2201de2362d3Smrg    /*  copy vs --------------------------------------- */
2202de2362d3Smrg    accel_state->copy_vs_offset = 1024;
2203de2362d3Smrg    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
2204de2362d3Smrg
2205de2362d3Smrg    /*  copy ps --------------------------------------- */
2206de2362d3Smrg    accel_state->copy_ps_offset = 1536;
2207de2362d3Smrg    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2208de2362d3Smrg
2209de2362d3Smrg    /*  comp vs --------------------------------------- */
2210de2362d3Smrg    accel_state->comp_vs_offset = 2048;
2211de2362d3Smrg    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2212de2362d3Smrg
2213de2362d3Smrg    /*  comp ps --------------------------------------- */
2214de2362d3Smrg    accel_state->comp_ps_offset = 2560;
2215de2362d3Smrg    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2216de2362d3Smrg
2217de2362d3Smrg    /*  xv vs --------------------------------------- */
2218de2362d3Smrg    accel_state->xv_vs_offset = 3072;
2219de2362d3Smrg    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2220de2362d3Smrg
2221de2362d3Smrg    /*  xv ps --------------------------------------- */
2222de2362d3Smrg    accel_state->xv_ps_offset = 3584;
2223de2362d3Smrg    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2224de2362d3Smrg
22257821949aSmrg#ifdef XF86DRM_MODE
22267821949aSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
22277821949aSmrg    if (info->cs) {
22287821949aSmrg	radeon_bo_unmap(accel_state->shaders_bo);
22297821949aSmrg    }
22307821949aSmrg#endif
22317821949aSmrg#endif
22327821949aSmrg
2233de2362d3Smrg    return TRUE;
2234de2362d3Smrg}
2235de2362d3Smrg
22367821949aSmrgstatic Bool
22377821949aSmrgR600PrepareAccess(PixmapPtr pPix, int index)
22387821949aSmrg{
22397821949aSmrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
22407821949aSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
22417821949aSmrg    unsigned char *RADEONMMIO = info->MMIO;
22427821949aSmrg
22437821949aSmrg    /* flush HDP read/write caches */
22447821949aSmrg    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
22457821949aSmrg
22467821949aSmrg    return TRUE;
22477821949aSmrg}
22487821949aSmrg
22497821949aSmrgstatic void
22507821949aSmrgR600FinishAccess(PixmapPtr pPix, int index)
22517821949aSmrg{
22527821949aSmrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
22537821949aSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
22547821949aSmrg    unsigned char *RADEONMMIO = info->MMIO;
22557821949aSmrg
22567821949aSmrg    /* flush HDP read/write caches */
22577821949aSmrg    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
22587821949aSmrg
22597821949aSmrg}
22607821949aSmrg
2261de2362d3SmrgBool
2262de2362d3SmrgR600DrawInit(ScreenPtr pScreen)
2263de2362d3Smrg{
2264de2362d3Smrg    ScrnInfoPtr pScrn =  xf86ScreenToScrn(pScreen);
2265de2362d3Smrg    RADEONInfoPtr info   = RADEONPTR(pScrn);
2266de2362d3Smrg
2267de2362d3Smrg    if (info->accel_state->exa == NULL) {
2268de2362d3Smrg	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2269de2362d3Smrg	return FALSE;
2270de2362d3Smrg    }
2271de2362d3Smrg
2272de2362d3Smrg    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2273de2362d3Smrg    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2274de2362d3Smrg
2275de2362d3Smrg    info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2276de2362d3Smrg    info->accel_state->exa->Solid = R600Solid;
2277de2362d3Smrg    info->accel_state->exa->DoneSolid = R600DoneSolid;
2278de2362d3Smrg
2279de2362d3Smrg    info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2280de2362d3Smrg    info->accel_state->exa->Copy = R600Copy;
2281de2362d3Smrg    info->accel_state->exa->DoneCopy = R600DoneCopy;
2282de2362d3Smrg
2283de2362d3Smrg    info->accel_state->exa->MarkSync = R600MarkSync;
2284de2362d3Smrg    info->accel_state->exa->WaitMarker = R600Sync;
2285de2362d3Smrg
22867821949aSmrg#ifdef XF86DRM_MODE
22877821949aSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
22887821949aSmrg    if (info->cs) {
22897821949aSmrg	info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
22907821949aSmrg	info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
22917821949aSmrg	info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
22927821949aSmrg	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
22937821949aSmrg	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
22947821949aSmrg	info->accel_state->exa->UploadToScreen = R600UploadToScreenCS;
22957821949aSmrg	info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS;
22967821949aSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5)
22977821949aSmrg        info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
22987821949aSmrg#endif
22997821949aSmrg    } else
23007821949aSmrg#endif
23017821949aSmrg#endif
23027821949aSmrg    {
23037821949aSmrg	info->accel_state->exa->PrepareAccess = R600PrepareAccess;
23047821949aSmrg	info->accel_state->exa->FinishAccess = R600FinishAccess;
23057821949aSmrg
23067821949aSmrg	/* AGP seems to have problems with gart transfers */
23077821949aSmrg	if (info->accelDFS) {
23087821949aSmrg	    info->accel_state->exa->UploadToScreen = R600UploadToScreen;
23097821949aSmrg	    info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen;
23107821949aSmrg	}
23117821949aSmrg    }
23127821949aSmrg
23137821949aSmrg    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
23147821949aSmrg#ifdef EXA_SUPPORTS_PREPARE_AUX
23157821949aSmrg    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
23167821949aSmrg#endif
23177821949aSmrg
23187821949aSmrg#ifdef XF86DRM_MODE
23197821949aSmrg#ifdef EXA_HANDLES_PIXMAPS
23207821949aSmrg    if (info->cs) {
23217821949aSmrg	info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
23227821949aSmrg#ifdef EXA_MIXED_PIXMAPS
23237821949aSmrg	info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
23247821949aSmrg#endif
23257821949aSmrg    }
23267821949aSmrg#endif
2327de2362d3Smrg#endif
2328de2362d3Smrg    info->accel_state->exa->pixmapOffsetAlign = 256;
2329de2362d3Smrg    info->accel_state->exa->pixmapPitchAlign = 256;
2330de2362d3Smrg
2331de2362d3Smrg    info->accel_state->exa->CheckComposite = R600CheckComposite;
2332de2362d3Smrg    info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2333de2362d3Smrg    info->accel_state->exa->Composite = R600Composite;
2334de2362d3Smrg    info->accel_state->exa->DoneComposite = R600DoneComposite;
2335de2362d3Smrg
23367821949aSmrg#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
23377821949aSmrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
23387821949aSmrg
2339de2362d3Smrg    info->accel_state->exa->maxPitchBytes = 32768;
2340de2362d3Smrg    info->accel_state->exa->maxX = 8192;
23417821949aSmrg#else
23427821949aSmrg    info->accel_state->exa->maxX = 8192;
23437821949aSmrg#endif
2344de2362d3Smrg    info->accel_state->exa->maxY = 8192;
2345de2362d3Smrg
2346de2362d3Smrg    /* not supported yet */
2347de2362d3Smrg    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2348de2362d3Smrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2349de2362d3Smrg	info->accel_state->vsync = TRUE;
2350de2362d3Smrg    } else
2351de2362d3Smrg	info->accel_state->vsync = FALSE;
2352de2362d3Smrg
2353de2362d3Smrg    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
2354de2362d3Smrg	free(info->accel_state->exa);
2355de2362d3Smrg	return FALSE;
2356de2362d3Smrg    }
2357de2362d3Smrg
23587821949aSmrg#ifdef XF86DRM_MODE
23597821949aSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
23607821949aSmrg    if (!info->cs)
23617821949aSmrg#endif
23627821949aSmrg#endif
23637821949aSmrg	if (!info->gartLocation)
23647821949aSmrg	    return FALSE;
23657821949aSmrg
2366de2362d3Smrg    info->accel_state->XInited3D = FALSE;
23677821949aSmrg    info->accel_state->copy_area = NULL;
2368de2362d3Smrg    info->accel_state->src_obj[0].bo = NULL;
2369de2362d3Smrg    info->accel_state->src_obj[1].bo = NULL;
2370de2362d3Smrg    info->accel_state->dst_obj.bo = NULL;
2371de2362d3Smrg    info->accel_state->copy_area_bo = NULL;
2372de2362d3Smrg    info->accel_state->vbo.vb_start_op = -1;
2373de2362d3Smrg    info->accel_state->finish_op = r600_finish_op;
2374de2362d3Smrg    info->accel_state->vbo.verts_per_op = 3;
2375de2362d3Smrg    RADEONVlineHelperClear(pScrn);
2376de2362d3Smrg
23777821949aSmrg#ifdef XF86DRM_MODE
2378de2362d3Smrg    radeon_vbo_init_lists(pScrn);
23797821949aSmrg#endif
2380de2362d3Smrg
2381de2362d3Smrg    if (!R600AllocShaders(pScrn, pScreen))
2382de2362d3Smrg	return FALSE;
2383de2362d3Smrg
2384de2362d3Smrg    if (!R600LoadShaders(pScrn))
2385de2362d3Smrg	return FALSE;
2386de2362d3Smrg
2387de2362d3Smrg    exaMarkSync(pScreen);
2388de2362d3Smrg
2389de2362d3Smrg    return TRUE;
2390de2362d3Smrg
2391de2362d3Smrg}
2392de2362d3Smrg
2393