r600_exa.c revision 921a55d8
1b7e1c893Smrg/*
2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3b7e1c893Smrg *
4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"),
6b7e1c893Smrg * to deal in the Software without restriction, including without limitation
7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions:
10b7e1c893Smrg *
11b7e1c893Smrg * The above copyright notice and this permission notice (including the next
12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the
13b7e1c893Smrg * Software.
14b7e1c893Smrg *
15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b7e1c893Smrg * SOFTWARE.
22b7e1c893Smrg *
23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24b7e1c893Smrg *
25b7e1c893Smrg */
26b7e1c893Smrg
27b7e1c893Smrg#ifdef HAVE_CONFIG_H
28b7e1c893Smrg#include "config.h"
29b7e1c893Smrg#endif
30b7e1c893Smrg
31b7e1c893Smrg#include "xf86.h"
32b7e1c893Smrg
33b7e1c893Smrg#include "exa.h"
34b7e1c893Smrg
35b7e1c893Smrg#include "radeon.h"
36b7e1c893Smrg#include "radeon_macros.h"
37b7e1c893Smrg#include "radeon_reg.h"
38b7e1c893Smrg#include "r600_shader.h"
39b7e1c893Smrg#include "r600_reg.h"
40b7e1c893Smrg#include "r600_state.h"
410974d292Smrg#include "radeon_exa_shared.h"
42ad43ddacSmrg#include "radeon_vbo.h"
43ad43ddacSmrg
44b7e1c893Smrg/* #define SHOW_VERTEXES */
45b7e1c893Smrg
46921a55d8Smrguint32_t R600_ROP[16] = {
47b7e1c893Smrg    RADEON_ROP3_ZERO, /* GXclear        */
48b7e1c893Smrg    RADEON_ROP3_DSa,  /* Gxand          */
49b7e1c893Smrg    RADEON_ROP3_SDna, /* GXandReverse   */
50b7e1c893Smrg    RADEON_ROP3_S,    /* GXcopy         */
51b7e1c893Smrg    RADEON_ROP3_DSna, /* GXandInverted  */
52b7e1c893Smrg    RADEON_ROP3_D,    /* GXnoop         */
53b7e1c893Smrg    RADEON_ROP3_DSx,  /* GXxor          */
54b7e1c893Smrg    RADEON_ROP3_DSo,  /* GXor           */
55b7e1c893Smrg    RADEON_ROP3_DSon, /* GXnor          */
56b7e1c893Smrg    RADEON_ROP3_DSxn, /* GXequiv        */
57b7e1c893Smrg    RADEON_ROP3_Dn,   /* GXinvert       */
58b7e1c893Smrg    RADEON_ROP3_SDno, /* GXorReverse    */
59b7e1c893Smrg    RADEON_ROP3_Sn,   /* GXcopyInverted */
60b7e1c893Smrg    RADEON_ROP3_DSno, /* GXorInverted   */
61b7e1c893Smrg    RADEON_ROP3_DSan, /* GXnand         */
62b7e1c893Smrg    RADEON_ROP3_ONE,  /* GXset          */
63b7e1c893Smrg};
64b7e1c893Smrg
65ad43ddacSmrgBool
66ad43ddacSmrgR600SetAccelState(ScrnInfoPtr pScrn,
67ad43ddacSmrg		  struct r600_accel_object *src0,
68ad43ddacSmrg		  struct r600_accel_object *src1,
69ad43ddacSmrg		  struct r600_accel_object *dst,
70ad43ddacSmrg		  uint32_t vs_offset, uint32_t ps_offset,
71ad43ddacSmrg		  int rop, Pixel planemask)
72ad43ddacSmrg{
73ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
74ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
75ad43ddacSmrg
76ad43ddacSmrg    if (src0) {
77ad43ddacSmrg	memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
78ad43ddacSmrg	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
79ad43ddacSmrg    } else {
80ad43ddacSmrg	memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
81ad43ddacSmrg	accel_state->src_size[0] = 0;
82ad43ddacSmrg    }
83ad43ddacSmrg
84ad43ddacSmrg    if (src1) {
85ad43ddacSmrg	memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
86ad43ddacSmrg	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
87ad43ddacSmrg    } else {
88ad43ddacSmrg	memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
89ad43ddacSmrg	accel_state->src_size[1] = 0;
90ad43ddacSmrg    }
91ad43ddacSmrg
92ad43ddacSmrg    if (dst) {
93ad43ddacSmrg	memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
94ad43ddacSmrg	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
95ad43ddacSmrg    } else {
96ad43ddacSmrg	memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
97ad43ddacSmrg	accel_state->dst_size = 0;
98ad43ddacSmrg    }
99ad43ddacSmrg
100ad43ddacSmrg    accel_state->rop = rop;
101ad43ddacSmrg    accel_state->planemask = planemask;
102ad43ddacSmrg
103ad43ddacSmrg    /* bad pitch */
104ad43ddacSmrg    if (accel_state->src_obj[0].pitch & 7)
105ad43ddacSmrg	RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
106ad43ddacSmrg
107ad43ddacSmrg    /* bad offset */
108ad43ddacSmrg    if (accel_state->src_obj[0].offset & 0xff)
109ad43ddacSmrg	RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset));
110ad43ddacSmrg
111ad43ddacSmrg    /* bad pitch */
112ad43ddacSmrg    if (accel_state->src_obj[1].pitch & 7)
113ad43ddacSmrg	RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
114ad43ddacSmrg
115ad43ddacSmrg    /* bad offset */
116ad43ddacSmrg    if (accel_state->src_obj[1].offset & 0xff)
117ad43ddacSmrg	RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset));
118ad43ddacSmrg
119ad43ddacSmrg    if (accel_state->dst_obj.pitch & 7)
120ad43ddacSmrg	RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
121ad43ddacSmrg
122ad43ddacSmrg    if (accel_state->dst_obj.offset & 0xff)
123ad43ddacSmrg	RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset));
124ad43ddacSmrg
125ad43ddacSmrg    accel_state->vs_size = 512;
126ad43ddacSmrg    accel_state->ps_size = 512;
127ad43ddacSmrg#if defined(XF86DRM_MODE)
128ad43ddacSmrg    if (info->cs) {
1290974d292Smrg	int ret;
130ad43ddacSmrg	accel_state->vs_mc_addr = vs_offset;
131ad43ddacSmrg	accel_state->ps_mc_addr = ps_offset;
132ad43ddacSmrg
133ad43ddacSmrg	radeon_cs_space_reset_bos(info->cs);
134ad43ddacSmrg	radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
135ad43ddacSmrg					  RADEON_GEM_DOMAIN_VRAM, 0);
136ad43ddacSmrg	if (accel_state->src_obj[0].bo)
137ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
138ad43ddacSmrg					      accel_state->src_obj[0].domain, 0);
139ad43ddacSmrg	if (accel_state->src_obj[1].bo)
140ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
141ad43ddacSmrg					      accel_state->src_obj[1].domain, 0);
142ad43ddacSmrg	if (accel_state->dst_obj.bo)
143ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
144ad43ddacSmrg					      0, accel_state->dst_obj.domain);
145ad43ddacSmrg	ret = radeon_cs_space_check(info->cs);
146ad43ddacSmrg	if (ret)
147ad43ddacSmrg	    RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
148ad43ddacSmrg
149ad43ddacSmrg    } else
150ad43ddacSmrg#endif
151ad43ddacSmrg    {
152ad43ddacSmrg	accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
153ad43ddacSmrg	    vs_offset;
154ad43ddacSmrg	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
155ad43ddacSmrg	    ps_offset;
156ad43ddacSmrg    }
157ad43ddacSmrg
158ad43ddacSmrg    return TRUE;
159ad43ddacSmrg}
160ad43ddacSmrg
161b7e1c893Smrgstatic void
162b7e1c893SmrgR600DoneSolid(PixmapPtr pPix);
163b7e1c893Smrg
164b7e1c893Smrgstatic Bool
165b7e1c893SmrgR600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
166b7e1c893Smrg{
167b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
168b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
169b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
170b7e1c893Smrg    cb_config_t     cb_conf;
171b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
172b7e1c893Smrg    int pmask = 0;
173b7e1c893Smrg    uint32_t a, r, g, b;
174b7e1c893Smrg    float ps_alu_consts[4];
175ad43ddacSmrg    struct r600_accel_object dst;
176b7e1c893Smrg
1770974d292Smrg    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
178ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype failed\n"));
1790974d292Smrg    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
180ad43ddacSmrg	RADEON_FALLBACK(("invalid planemask\n"));
181b7e1c893Smrg
182ad43ddacSmrg#if defined(XF86DRM_MODE)
183ad43ddacSmrg    if (info->cs) {
184ad43ddacSmrg	dst.offset = 0;
185ad43ddacSmrg	dst.bo = radeon_get_pixmap_bo(pPix);
186ad43ddacSmrg    } else
187ad43ddacSmrg#endif
188ad43ddacSmrg    {
189ad43ddacSmrg	dst.offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
190ad43ddacSmrg	dst.bo = NULL;
191ad43ddacSmrg    }
192b7e1c893Smrg
193ad43ddacSmrg    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
194ad43ddacSmrg    dst.width = pPix->drawable.width;
195ad43ddacSmrg    dst.height = pPix->drawable.height;
196ad43ddacSmrg    dst.bpp = pPix->drawable.bitsPerPixel;
197ad43ddacSmrg    dst.domain = RADEON_GEM_DOMAIN_VRAM;
1980974d292Smrg
199ad43ddacSmrg    if (!R600SetAccelState(pScrn,
200ad43ddacSmrg			   NULL,
201ad43ddacSmrg			   NULL,
202ad43ddacSmrg			   &dst,
203ad43ddacSmrg			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
204ad43ddacSmrg			   alu, pm))
205b7e1c893Smrg	return FALSE;
206b7e1c893Smrg
207b7e1c893Smrg    CLEAR (cb_conf);
208b7e1c893Smrg    CLEAR (vs_conf);
209b7e1c893Smrg    CLEAR (ps_conf);
210b7e1c893Smrg
211921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
2120974d292Smrg    radeon_cp_start(pScrn);
213b7e1c893Smrg
214921a55d8Smrg    r600_set_default_state(pScrn, accel_state->ib);
215b7e1c893Smrg
216921a55d8Smrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
217921a55d8Smrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
218921a55d8Smrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
219b7e1c893Smrg
220b7e1c893Smrg    /* Shader */
221b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
2220974d292Smrg    vs_conf.shader_size         = accel_state->vs_size;
223b7e1c893Smrg    vs_conf.num_gprs            = 2;
224b7e1c893Smrg    vs_conf.stack_size          = 0;
225ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
226921a55d8Smrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
227b7e1c893Smrg
228b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
2290974d292Smrg    ps_conf.shader_size         = accel_state->ps_size;
230b7e1c893Smrg    ps_conf.num_gprs            = 1;
231b7e1c893Smrg    ps_conf.stack_size          = 0;
232b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
233b7e1c893Smrg    ps_conf.clamp_consts        = 0;
234b7e1c893Smrg    ps_conf.export_mode         = 2;
235ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
236921a55d8Smrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
237b7e1c893Smrg
238b7e1c893Smrg    cb_conf.id = 0;
239ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
240ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
241ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
242ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
243b7e1c893Smrg
244ad43ddacSmrg    if (accel_state->dst_obj.bpp == 8) {
245b7e1c893Smrg	cb_conf.format = COLOR_8;
246b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
247ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 16) {
248b7e1c893Smrg	cb_conf.format = COLOR_5_6_5;
249b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
250b7e1c893Smrg    } else {
251b7e1c893Smrg	cb_conf.format = COLOR_8_8_8_8;
252b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
253b7e1c893Smrg    }
254b7e1c893Smrg    cb_conf.source_format = 1;
255b7e1c893Smrg    cb_conf.blend_clamp = 1;
256921a55d8Smrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
257b7e1c893Smrg
2580974d292Smrg    /* Render setup */
2590974d292Smrg    if (accel_state->planemask & 0x000000ff)
2600974d292Smrg	pmask |= 4; /* B */
2610974d292Smrg    if (accel_state->planemask & 0x0000ff00)
2620974d292Smrg	pmask |= 2; /* G */
2630974d292Smrg    if (accel_state->planemask & 0x00ff0000)
2640974d292Smrg	pmask |= 1; /* R */
2650974d292Smrg    if (accel_state->planemask & 0xff000000)
2660974d292Smrg	pmask |= 8; /* A */
2670974d292Smrg    BEGIN_BATCH(20);
2680974d292Smrg    EREG(accel_state->ib, CB_TARGET_MASK,                      (pmask << TARGET0_ENABLE_shift));
269921a55d8Smrg    EREG(accel_state->ib, CB_COLOR_CONTROL,                    R600_ROP[accel_state->rop]);
2700974d292Smrg
271b7e1c893Smrg    /* Interpolator setup */
272b7e1c893Smrg    /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */
273b7e1c893Smrg    EREG(accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift));
274b7e1c893Smrg    EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
275b7e1c893Smrg    /* color semantic id 0 -> GPR[0] */
2760974d292Smrg    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 << 2),       ((0    << SEMANTIC_shift)	|
277b7e1c893Smrg								  (0x03 << DEFAULT_VAL_shift)	|
278b7e1c893Smrg								  FLAT_SHADE_bit		|
279b7e1c893Smrg								  SEL_CENTROID_bit));
2800974d292Smrg
2810974d292Smrg    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
2820974d292Smrg     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
2830974d292Smrg    /* no VS exports as PS input (NUM_INTERP is not zero based, no minus one) */
2840974d292Smrg    PACK0(accel_state->ib, SPI_PS_IN_CONTROL_0, 3);
2850974d292Smrg    E32(accel_state->ib, (0 << NUM_INTERP_shift));
2860974d292Smrg    E32(accel_state->ib, 0);
2870974d292Smrg    E32(accel_state->ib, FLAT_SHADE_ENA_bit);
288ad43ddacSmrg    END_BATCH();
289b7e1c893Smrg
290b7e1c893Smrg    /* PS alu constants */
291ad43ddacSmrg    if (accel_state->dst_obj.bpp == 16) {
292b7e1c893Smrg	r = (fg >> 11) & 0x1f;
293b7e1c893Smrg	g = (fg >> 5) & 0x3f;
294b7e1c893Smrg	b = (fg >> 0) & 0x1f;
295b7e1c893Smrg	ps_alu_consts[0] = (float)r / 31; /* R */
296b7e1c893Smrg	ps_alu_consts[1] = (float)g / 63; /* G */
297b7e1c893Smrg	ps_alu_consts[2] = (float)b / 31; /* B */
298b7e1c893Smrg	ps_alu_consts[3] = 1.0; /* A */
299ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 8) {
300b7e1c893Smrg	a = (fg >> 0) & 0xff;
301b7e1c893Smrg	ps_alu_consts[0] = 0.0; /* R */
302b7e1c893Smrg	ps_alu_consts[1] = 0.0; /* G */
303b7e1c893Smrg	ps_alu_consts[2] = 0.0; /* B */
304b7e1c893Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
305b7e1c893Smrg    } else {
306b7e1c893Smrg	a = (fg >> 24) & 0xff;
307b7e1c893Smrg	r = (fg >> 16) & 0xff;
308b7e1c893Smrg	g = (fg >> 8) & 0xff;
309b7e1c893Smrg	b = (fg >> 0) & 0xff;
310b7e1c893Smrg	ps_alu_consts[0] = (float)r / 255; /* R */
311b7e1c893Smrg	ps_alu_consts[1] = (float)g / 255; /* G */
312b7e1c893Smrg	ps_alu_consts[2] = (float)b / 255; /* B */
313b7e1c893Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
314b7e1c893Smrg    }
315921a55d8Smrg    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
316921a55d8Smrg			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
317b7e1c893Smrg
318ad43ddacSmrg    if (accel_state->vsync)
3190974d292Smrg	RADEONVlineHelperClear(pScrn);
320b7e1c893Smrg
321b7e1c893Smrg    return TRUE;
322b7e1c893Smrg}
323b7e1c893Smrg
324b7e1c893Smrg
325b7e1c893Smrgstatic void
326b7e1c893SmrgR600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
327b7e1c893Smrg{
328b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
329b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
330b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
331b7e1c893Smrg    float *vb;
332b7e1c893Smrg
333ad43ddacSmrg    if (accel_state->vsync)
3340974d292Smrg	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
335b7e1c893Smrg
336921a55d8Smrg    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
337b7e1c893Smrg
338b7e1c893Smrg    vb[0] = (float)x1;
339b7e1c893Smrg    vb[1] = (float)y1;
340b7e1c893Smrg
341b7e1c893Smrg    vb[2] = (float)x1;
342b7e1c893Smrg    vb[3] = (float)y2;
343b7e1c893Smrg
344b7e1c893Smrg    vb[4] = (float)x2;
345b7e1c893Smrg    vb[5] = (float)y2;
346b7e1c893Smrg
347921a55d8Smrg    radeon_vbo_commit(pScrn, &accel_state->vbo);
348b7e1c893Smrg}
349b7e1c893Smrg
350b7e1c893Smrgstatic void
351b7e1c893SmrgR600DoneSolid(PixmapPtr pPix)
352b7e1c893Smrg{
353b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
354b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
355b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
356b7e1c893Smrg
357ad43ddacSmrg    if (accel_state->vsync)
358921a55d8Smrg	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
359921a55d8Smrg				accel_state->vline_crtc,
360921a55d8Smrg				accel_state->vline_y1,
361921a55d8Smrg				accel_state->vline_y2);
362b7e1c893Smrg
363ad43ddacSmrg    r600_finish_op(pScrn, 8);
364b7e1c893Smrg}
365b7e1c893Smrg
366b7e1c893Smrgstatic void
367ad43ddacSmrgR600DoPrepareCopy(ScrnInfoPtr pScrn)
368b7e1c893Smrg{
369b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
370b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
371b7e1c893Smrg    int pmask = 0;
372b7e1c893Smrg    cb_config_t     cb_conf;
373b7e1c893Smrg    tex_resource_t  tex_res;
374b7e1c893Smrg    tex_sampler_t   tex_samp;
375b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
376b7e1c893Smrg
377b7e1c893Smrg    CLEAR (cb_conf);
378b7e1c893Smrg    CLEAR (tex_res);
379b7e1c893Smrg    CLEAR (tex_samp);
380b7e1c893Smrg    CLEAR (vs_conf);
381b7e1c893Smrg    CLEAR (ps_conf);
382b7e1c893Smrg
383921a55d8Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
3840974d292Smrg    radeon_cp_start(pScrn);
385b7e1c893Smrg
386921a55d8Smrg    r600_set_default_state(pScrn, accel_state->ib);
387b7e1c893Smrg
388921a55d8Smrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
389921a55d8Smrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
390921a55d8Smrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
391b7e1c893Smrg
392b7e1c893Smrg    /* Shader */
393b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
3940974d292Smrg    vs_conf.shader_size         = accel_state->vs_size;
395b7e1c893Smrg    vs_conf.num_gprs            = 2;
396b7e1c893Smrg    vs_conf.stack_size          = 0;
397ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
398921a55d8Smrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
399b7e1c893Smrg
400b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
4010974d292Smrg    ps_conf.shader_size         = accel_state->ps_size;
402b7e1c893Smrg    ps_conf.num_gprs            = 1;
403b7e1c893Smrg    ps_conf.stack_size          = 0;
404b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
405b7e1c893Smrg    ps_conf.clamp_consts        = 0;
406b7e1c893Smrg    ps_conf.export_mode         = 2;
407ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
408921a55d8Smrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
409b7e1c893Smrg
410b7e1c893Smrg    /* Texture */
411b7e1c893Smrg    tex_res.id                  = 0;
412ad43ddacSmrg    tex_res.w                   = accel_state->src_obj[0].width;
413ad43ddacSmrg    tex_res.h                   = accel_state->src_obj[0].height;
414ad43ddacSmrg    tex_res.pitch               = accel_state->src_obj[0].pitch;
415b7e1c893Smrg    tex_res.depth               = 0;
416b7e1c893Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
417ad43ddacSmrg    tex_res.base                = accel_state->src_obj[0].offset;
418ad43ddacSmrg    tex_res.mip_base            = accel_state->src_obj[0].offset;
4190974d292Smrg    tex_res.size                = accel_state->src_size[0];
420ad43ddacSmrg    tex_res.bo                  = accel_state->src_obj[0].bo;
421ad43ddacSmrg    tex_res.mip_bo              = accel_state->src_obj[0].bo;
422ad43ddacSmrg    if (accel_state->src_obj[0].bpp == 8) {
423b7e1c893Smrg	tex_res.format              = FMT_8;
424b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
425b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
426b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
427b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
428ad43ddacSmrg    } else if (accel_state->src_obj[0].bpp == 16) {
429b7e1c893Smrg	tex_res.format              = FMT_5_6_5;
430b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
431b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
432b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
433b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
434b7e1c893Smrg    } else {
435b7e1c893Smrg	tex_res.format              = FMT_8_8_8_8;
436b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
437b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
438b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
439b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
440b7e1c893Smrg    }
441b7e1c893Smrg
442b7e1c893Smrg    tex_res.request_size        = 1;
443b7e1c893Smrg    tex_res.base_level          = 0;
444b7e1c893Smrg    tex_res.last_level          = 0;
445b7e1c893Smrg    tex_res.perf_modulation     = 0;
446921a55d8Smrg    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
447b7e1c893Smrg
448b7e1c893Smrg    tex_samp.id                 = 0;
449b7e1c893Smrg    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
450b7e1c893Smrg    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
451b7e1c893Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
452b7e1c893Smrg    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
453b7e1c893Smrg    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
454b7e1c893Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
455b7e1c893Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
456921a55d8Smrg    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
457b7e1c893Smrg
458b7e1c893Smrg    cb_conf.id = 0;
459ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
460ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
461ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
462ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
463ad43ddacSmrg    if (accel_state->dst_obj.bpp == 8) {
464b7e1c893Smrg	cb_conf.format = COLOR_8;
465b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
466ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 16) {
467b7e1c893Smrg	cb_conf.format = COLOR_5_6_5;
468b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
469b7e1c893Smrg    } else {
470b7e1c893Smrg	cb_conf.format = COLOR_8_8_8_8;
471b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
472b7e1c893Smrg    }
473b7e1c893Smrg    cb_conf.source_format = 1;
474b7e1c893Smrg    cb_conf.blend_clamp = 1;
475921a55d8Smrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
476b7e1c893Smrg
4770974d292Smrg    /* Render setup */
4780974d292Smrg    if (accel_state->planemask & 0x000000ff)
4790974d292Smrg	pmask |= 4; /* B */
4800974d292Smrg    if (accel_state->planemask & 0x0000ff00)
4810974d292Smrg	pmask |= 2; /* G */
4820974d292Smrg    if (accel_state->planemask & 0x00ff0000)
4830974d292Smrg	pmask |= 1; /* R */
4840974d292Smrg    if (accel_state->planemask & 0xff000000)
4850974d292Smrg	pmask |= 8; /* A */
4860974d292Smrg    BEGIN_BATCH(20);
4870974d292Smrg    EREG(accel_state->ib, CB_TARGET_MASK,                      (pmask << TARGET0_ENABLE_shift));
488921a55d8Smrg    EREG(accel_state->ib, CB_COLOR_CONTROL,                    R600_ROP[accel_state->rop]);
4890974d292Smrg
490b7e1c893Smrg    /* Interpolator setup */
491b7e1c893Smrg    /* export tex coord from VS */
492b7e1c893Smrg    EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
493b7e1c893Smrg    EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
4940974d292Smrg    /* color semantic id 0 -> GPR[0] */
4950974d292Smrg    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 << 2),       ((0    << SEMANTIC_shift)	|
4960974d292Smrg								(0x01 << DEFAULT_VAL_shift)	|
4970974d292Smrg								SEL_CENTROID_bit));
498b7e1c893Smrg
499b7e1c893Smrg    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
500b7e1c893Smrg     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
501b7e1c893Smrg    /* input tex coord from VS */
5020974d292Smrg    PACK0(accel_state->ib, SPI_PS_IN_CONTROL_0, 3);
5030974d292Smrg    E32(accel_state->ib, ((1 << NUM_INTERP_shift)));
5040974d292Smrg    E32(accel_state->ib, 0);
5050974d292Smrg    E32(accel_state->ib, 0);
506ad43ddacSmrg    END_BATCH();
507b7e1c893Smrg
508b7e1c893Smrg}
509b7e1c893Smrg
510b7e1c893Smrgstatic void
511b7e1c893SmrgR600DoCopy(ScrnInfoPtr pScrn)
512b7e1c893Smrg{
513ad43ddacSmrg    r600_finish_op(pScrn, 16);
514ad43ddacSmrg}
515ad43ddacSmrg
516ad43ddacSmrgstatic void
517ad43ddacSmrgR600DoCopyVline(PixmapPtr pPix)
518ad43ddacSmrg{
519ad43ddacSmrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
520b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
521b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
522b7e1c893Smrg
523ad43ddacSmrg    if (accel_state->vsync)
524921a55d8Smrg	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
525921a55d8Smrg				accel_state->vline_crtc,
526921a55d8Smrg				accel_state->vline_y1,
527921a55d8Smrg				accel_state->vline_y2);
528b7e1c893Smrg
529ad43ddacSmrg    r600_finish_op(pScrn, 16);
530b7e1c893Smrg}
531b7e1c893Smrg
532b7e1c893Smrgstatic void
533b7e1c893SmrgR600AppendCopyVertex(ScrnInfoPtr pScrn,
534b7e1c893Smrg		     int srcX, int srcY,
535b7e1c893Smrg		     int dstX, int dstY,
536b7e1c893Smrg		     int w, int h)
537b7e1c893Smrg{
538921a55d8Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
539921a55d8Smrg    struct radeon_accel_state *accel_state = info->accel_state;
540b7e1c893Smrg    float *vb;
541b7e1c893Smrg
542921a55d8Smrg    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
543b7e1c893Smrg
544b7e1c893Smrg    vb[0] = (float)dstX;
545b7e1c893Smrg    vb[1] = (float)dstY;
546b7e1c893Smrg    vb[2] = (float)srcX;
547b7e1c893Smrg    vb[3] = (float)srcY;
548b7e1c893Smrg
549b7e1c893Smrg    vb[4] = (float)dstX;
550b7e1c893Smrg    vb[5] = (float)(dstY + h);
551b7e1c893Smrg    vb[6] = (float)srcX;
552b7e1c893Smrg    vb[7] = (float)(srcY + h);
553b7e1c893Smrg
554b7e1c893Smrg    vb[8] = (float)(dstX + w);
555b7e1c893Smrg    vb[9] = (float)(dstY + h);
556b7e1c893Smrg    vb[10] = (float)(srcX + w);
557b7e1c893Smrg    vb[11] = (float)(srcY + h);
558b7e1c893Smrg
559921a55d8Smrg    radeon_vbo_commit(pScrn, &accel_state->vbo);
560b7e1c893Smrg}
561b7e1c893Smrg
562b7e1c893Smrgstatic Bool
563b7e1c893SmrgR600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
564b7e1c893Smrg		int xdir, int ydir,
565b7e1c893Smrg		int rop,
566b7e1c893Smrg		Pixel planemask)
567b7e1c893Smrg{
568b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
569b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
570b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
571ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
572ad43ddacSmrg
5730974d292Smrg    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
574ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype src failed\n"));
5750974d292Smrg    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
576ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype dst failed\n"));
5770974d292Smrg    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
578ad43ddacSmrg	RADEON_FALLBACK(("Invalid planemask\n"));
579ad43ddacSmrg
580ad43ddacSmrg    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
581ad43ddacSmrg    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
582ad43ddacSmrg
583ad43ddacSmrg    accel_state->same_surface = FALSE;
584ad43ddacSmrg
585ad43ddacSmrg#if defined(XF86DRM_MODE)
586ad43ddacSmrg    if (info->cs) {
587ad43ddacSmrg	src_obj.offset = 0;
588ad43ddacSmrg	dst_obj.offset = 0;
589ad43ddacSmrg	src_obj.bo = radeon_get_pixmap_bo(pSrc);
590ad43ddacSmrg	dst_obj.bo = radeon_get_pixmap_bo(pDst);
591ad43ddacSmrg	if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
592ad43ddacSmrg	    accel_state->same_surface = TRUE;
593ad43ddacSmrg    } else
594b7e1c893Smrg#endif
595ad43ddacSmrg    {
596ad43ddacSmrg	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
597ad43ddacSmrg	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
598ad43ddacSmrg	if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst))
599ad43ddacSmrg	    accel_state->same_surface = TRUE;
600ad43ddacSmrg	src_obj.bo = NULL;
601ad43ddacSmrg	dst_obj.bo = NULL;
602b7e1c893Smrg    }
603b7e1c893Smrg
604ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
605ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
606ad43ddacSmrg    src_obj.bpp = pSrc->drawable.bitsPerPixel;
607ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
608921a55d8Smrg
609ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
610ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
611ad43ddacSmrg    dst_obj.bpp = pDst->drawable.bitsPerPixel;
612ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
613ad43ddacSmrg
614ad43ddacSmrg    if (!R600SetAccelState(pScrn,
615ad43ddacSmrg			   &src_obj,
616ad43ddacSmrg			   NULL,
617ad43ddacSmrg			   &dst_obj,
618ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
619ad43ddacSmrg			   rop, planemask))
620b7e1c893Smrg	return FALSE;
621b7e1c893Smrg
622ad43ddacSmrg    if (accel_state->same_surface == TRUE) {
623ad43ddacSmrg	unsigned long size = pDst->drawable.height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
624b7e1c893Smrg
625ad43ddacSmrg#if defined(XF86DRM_MODE)
626ad43ddacSmrg	if (info->cs) {
627ad43ddacSmrg	    if (accel_state->copy_area_bo) {
628ad43ddacSmrg		radeon_bo_unref(accel_state->copy_area_bo);
629ad43ddacSmrg		accel_state->copy_area_bo = NULL;
630b7e1c893Smrg	    }
631ad43ddacSmrg	    accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
632ad43ddacSmrg						       RADEON_GEM_DOMAIN_VRAM,
633ad43ddacSmrg						       0);
634ad43ddacSmrg	    if (accel_state->copy_area_bo == NULL)
635ad43ddacSmrg		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
636ad43ddacSmrg
637ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
638ad43ddacSmrg					      RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_DOMAIN_VRAM);
639ad43ddacSmrg	    if (radeon_cs_space_check(info->cs)) {
640ad43ddacSmrg		radeon_bo_unref(accel_state->copy_area_bo);
641ad43ddacSmrg		accel_state->copy_area_bo = NULL;
642ad43ddacSmrg		return FALSE;
643ad43ddacSmrg	    }
644ad43ddacSmrg	    accel_state->copy_area = (void*)accel_state->copy_area_bo;
645ad43ddacSmrg	} else
646ad43ddacSmrg#endif
647ad43ddacSmrg	{
648ad43ddacSmrg	    if (accel_state->copy_area) {
649ad43ddacSmrg		exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
650ad43ddacSmrg		accel_state->copy_area = NULL;
651ad43ddacSmrg	    }
652ad43ddacSmrg	    accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
653ad43ddacSmrg	    if (!accel_state->copy_area)
654ad43ddacSmrg		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
655b7e1c893Smrg	}
656ad43ddacSmrg    } else
657ad43ddacSmrg	R600DoPrepareCopy(pScrn);
658b7e1c893Smrg
659ad43ddacSmrg    if (accel_state->vsync)
6600974d292Smrg	RADEONVlineHelperClear(pScrn);
661ad43ddacSmrg
662ad43ddacSmrg    return TRUE;
663b7e1c893Smrg}
664b7e1c893Smrg
665b7e1c893Smrgstatic void
666b7e1c893SmrgR600Copy(PixmapPtr pDst,
667b7e1c893Smrg	 int srcX, int srcY,
668b7e1c893Smrg	 int dstX, int dstY,
669b7e1c893Smrg	 int w, int h)
670b7e1c893Smrg{
671b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
672b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
673b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
674b7e1c893Smrg
675b7e1c893Smrg    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
676b7e1c893Smrg	return;
677b7e1c893Smrg
678ad43ddacSmrg    if (accel_state->vsync)
6790974d292Smrg	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
680b7e1c893Smrg
681ad43ddacSmrg    if (accel_state->same_surface && accel_state->copy_area) {
682ad43ddacSmrg	uint32_t orig_offset, tmp_offset;
683ad43ddacSmrg	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
684ad43ddacSmrg	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
685ad43ddacSmrg	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
686ad43ddacSmrg
687ad43ddacSmrg#if defined(XF86DRM_MODE)
688ad43ddacSmrg	if (info->cs) {
689ad43ddacSmrg	    tmp_offset = 0;
690ad43ddacSmrg	    orig_offset = 0;
691ad43ddacSmrg	} else
692ad43ddacSmrg#endif
693ad43ddacSmrg	{
694b7e1c893Smrg	    tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset;
695b7e1c893Smrg	    orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
696ad43ddacSmrg	}
697b7e1c893Smrg
698ad43ddacSmrg	/* src to tmp */
699ad43ddacSmrg	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
700ad43ddacSmrg	accel_state->dst_obj.bo = accel_state->copy_area_bo;
701ad43ddacSmrg	accel_state->dst_obj.offset = tmp_offset;
702ad43ddacSmrg	R600DoPrepareCopy(pScrn);
703b7e1c893Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
704b7e1c893Smrg	R600DoCopy(pScrn);
705ad43ddacSmrg
706ad43ddacSmrg	/* tmp to dst */
707ad43ddacSmrg	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
708ad43ddacSmrg	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
709ad43ddacSmrg	accel_state->src_obj[0].offset = tmp_offset;
710ad43ddacSmrg	accel_state->dst_obj.domain = orig_dst_domain;
711ad43ddacSmrg	accel_state->dst_obj.bo = orig_bo;
712ad43ddacSmrg	accel_state->dst_obj.offset = orig_offset;
713ad43ddacSmrg	R600DoPrepareCopy(pScrn);
714ad43ddacSmrg	R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
715ad43ddacSmrg	R600DoCopyVline(pDst);
716ad43ddacSmrg
717ad43ddacSmrg	/* restore state */
718ad43ddacSmrg	accel_state->src_obj[0].domain = orig_src_domain;
719ad43ddacSmrg	accel_state->src_obj[0].bo = orig_bo;
720ad43ddacSmrg	accel_state->src_obj[0].offset = orig_offset;
721ad43ddacSmrg    } else
722b7e1c893Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
723b7e1c893Smrg
724b7e1c893Smrg}
725b7e1c893Smrg
726b7e1c893Smrgstatic void
727b7e1c893SmrgR600DoneCopy(PixmapPtr pDst)
728b7e1c893Smrg{
729b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
730b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
731b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
732b7e1c893Smrg
733b7e1c893Smrg    if (!accel_state->same_surface)
734ad43ddacSmrg	R600DoCopyVline(pDst);
735b7e1c893Smrg
736b7e1c893Smrg    if (accel_state->copy_area) {
737ad43ddacSmrg	if (!info->cs)
738ad43ddacSmrg	    exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
739b7e1c893Smrg	accel_state->copy_area = NULL;
740b7e1c893Smrg    }
741b7e1c893Smrg
742b7e1c893Smrg}
743b7e1c893Smrg
744b7e1c893Smrgstruct blendinfo {
745b7e1c893Smrg    Bool dst_alpha;
746b7e1c893Smrg    Bool src_alpha;
747b7e1c893Smrg    uint32_t blend_cntl;
748b7e1c893Smrg};
749b7e1c893Smrg
750b7e1c893Smrgstatic struct blendinfo R600BlendOp[] = {
751b7e1c893Smrg    /* Clear */
752b7e1c893Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
753b7e1c893Smrg    /* Src */
754b7e1c893Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
755b7e1c893Smrg    /* Dst */
756b7e1c893Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
757b7e1c893Smrg    /* Over */
758b7e1c893Smrg    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
759b7e1c893Smrg    /* OverReverse */
760b7e1c893Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
761b7e1c893Smrg    /* In */
762b7e1c893Smrg    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
763b7e1c893Smrg    /* InReverse */
764b7e1c893Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
765b7e1c893Smrg    /* Out */
766b7e1c893Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
767b7e1c893Smrg    /* OutReverse */
768b7e1c893Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
769b7e1c893Smrg    /* Atop */
770b7e1c893Smrg    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
771b7e1c893Smrg    /* AtopReverse */
772b7e1c893Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
773b7e1c893Smrg    /* Xor */
774b7e1c893Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
775b7e1c893Smrg    /* Add */
776b7e1c893Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
777b7e1c893Smrg};
778b7e1c893Smrg
779b7e1c893Smrgstruct formatinfo {
780b7e1c893Smrg    unsigned int fmt;
781b7e1c893Smrg    uint32_t card_fmt;
782b7e1c893Smrg};
783b7e1c893Smrg
784b7e1c893Smrgstatic struct formatinfo R600TexFormats[] = {
785b7e1c893Smrg    {PICT_a8r8g8b8,	FMT_8_8_8_8},
786b7e1c893Smrg    {PICT_x8r8g8b8,	FMT_8_8_8_8},
787b7e1c893Smrg    {PICT_a8b8g8r8,	FMT_8_8_8_8},
788b7e1c893Smrg    {PICT_x8b8g8r8,	FMT_8_8_8_8},
789ad43ddacSmrg#ifdef PICT_TYPE_BGRA
790ad43ddacSmrg    {PICT_b8g8r8a8,	FMT_8_8_8_8},
791ad43ddacSmrg    {PICT_b8g8r8x8,	FMT_8_8_8_8},
792ad43ddacSmrg#endif
793b7e1c893Smrg    {PICT_r5g6b5,	FMT_5_6_5},
794b7e1c893Smrg    {PICT_a1r5g5b5,	FMT_1_5_5_5},
795b7e1c893Smrg    {PICT_x1r5g5b5,     FMT_1_5_5_5},
796b7e1c893Smrg    {PICT_a8,		FMT_8},
797b7e1c893Smrg};
798b7e1c893Smrg
799b7e1c893Smrgstatic uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
800b7e1c893Smrg{
801b7e1c893Smrg    uint32_t sblend, dblend;
802b7e1c893Smrg
803b7e1c893Smrg    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
804b7e1c893Smrg    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
805b7e1c893Smrg
806b7e1c893Smrg    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
807b7e1c893Smrg     * it as always 1.
808b7e1c893Smrg     */
809b7e1c893Smrg    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
810b7e1c893Smrg	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
811b7e1c893Smrg	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
812b7e1c893Smrg	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
813b7e1c893Smrg	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
814b7e1c893Smrg    }
815b7e1c893Smrg
816b7e1c893Smrg    /* If the source alpha is being used, then we should only be in a case where
817b7e1c893Smrg     * the source blend factor is 0, and the source blend value is the mask
818b7e1c893Smrg     * channels multiplied by the source picture's alpha.
819b7e1c893Smrg     */
820b7e1c893Smrg    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
821b7e1c893Smrg	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
822b7e1c893Smrg	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
823b7e1c893Smrg	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
824b7e1c893Smrg	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
825b7e1c893Smrg	}
826b7e1c893Smrg    }
827b7e1c893Smrg
828b7e1c893Smrg    return sblend | dblend;
829b7e1c893Smrg}
830b7e1c893Smrg
831b7e1c893Smrgstatic Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
832b7e1c893Smrg{
833b7e1c893Smrg    switch (pDstPicture->format) {
834b7e1c893Smrg    case PICT_a8r8g8b8:
835b7e1c893Smrg    case PICT_x8r8g8b8:
836ad43ddacSmrg    case PICT_a8b8g8r8:
837ad43ddacSmrg    case PICT_x8b8g8r8:
838ad43ddacSmrg#ifdef PICT_TYPE_BGRA
839ad43ddacSmrg    case PICT_b8g8r8a8:
840ad43ddacSmrg    case PICT_b8g8r8x8:
841ad43ddacSmrg#endif
842b7e1c893Smrg	*dst_format = COLOR_8_8_8_8;
843b7e1c893Smrg	break;
844b7e1c893Smrg    case PICT_r5g6b5:
845b7e1c893Smrg	*dst_format = COLOR_5_6_5;
846b7e1c893Smrg	break;
847b7e1c893Smrg    case PICT_a1r5g5b5:
848b7e1c893Smrg    case PICT_x1r5g5b5:
849b7e1c893Smrg	*dst_format = COLOR_1_5_5_5;
850b7e1c893Smrg	break;
851b7e1c893Smrg    case PICT_a8:
852b7e1c893Smrg	*dst_format = COLOR_8;
853b7e1c893Smrg	break;
854b7e1c893Smrg    default:
855b7e1c893Smrg	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
856b7e1c893Smrg	       (int)pDstPicture->format));
857b7e1c893Smrg    }
858b7e1c893Smrg    return TRUE;
859b7e1c893Smrg}
860b7e1c893Smrg
861b7e1c893Smrgstatic Bool R600CheckCompositeTexture(PicturePtr pPict,
862b7e1c893Smrg				      PicturePtr pDstPict,
863b7e1c893Smrg				      int op,
864b7e1c893Smrg				      int unit)
865b7e1c893Smrg{
866b7e1c893Smrg    int w = pPict->pDrawable->width;
867b7e1c893Smrg    int h = pPict->pDrawable->height;
868ad43ddacSmrg    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
869b7e1c893Smrg    unsigned int i;
870b7e1c893Smrg    int max_tex_w, max_tex_h;
871b7e1c893Smrg
872b7e1c893Smrg    max_tex_w = 8192;
873b7e1c893Smrg    max_tex_h = 8192;
874b7e1c893Smrg
875b7e1c893Smrg    if ((w > max_tex_w) || (h > max_tex_h))
876b7e1c893Smrg	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
877b7e1c893Smrg
878b7e1c893Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
879b7e1c893Smrg	if (R600TexFormats[i].fmt == pPict->format)
880b7e1c893Smrg	    break;
881b7e1c893Smrg    }
882b7e1c893Smrg    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
883b7e1c893Smrg	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
884b7e1c893Smrg			 (int)pPict->format));
885b7e1c893Smrg
886b7e1c893Smrg    if (pPict->filter != PictFilterNearest &&
887b7e1c893Smrg	pPict->filter != PictFilterBilinear)
888b7e1c893Smrg	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
889b7e1c893Smrg
890b7e1c893Smrg    /* for REPEAT_NONE, Render semantics are that sampling outside the source
891b7e1c893Smrg     * picture results in alpha=0 pixels. We can implement this with a border color
892b7e1c893Smrg     * *if* our source texture has an alpha channel, otherwise we need to fall
893b7e1c893Smrg     * back. If we're not transformed then we hope that upper layers have clipped
894b7e1c893Smrg     * rendering to the bounds of the source drawable, in which case it doesn't
895b7e1c893Smrg     * matter. I have not, however, verified that the X server always does such
896b7e1c893Smrg     * clipping.
897b7e1c893Smrg     */
898b7e1c893Smrg    /* FIXME R6xx */
899ad43ddacSmrg    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
900b7e1c893Smrg	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
901b7e1c893Smrg	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
902b7e1c893Smrg    }
903b7e1c893Smrg
904921a55d8Smrg    if (!radeon_transform_is_affine(pPict->transform))
905921a55d8Smrg	RADEON_FALLBACK(("non-affine transforms not supported\n"));
906921a55d8Smrg
907b7e1c893Smrg    return TRUE;
908b7e1c893Smrg}
909b7e1c893Smrg
910b7e1c893Smrgstatic Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
911b7e1c893Smrg					int unit)
912b7e1c893Smrg{
913b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
914b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
915b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
916b7e1c893Smrg    int w = pPict->pDrawable->width;
917b7e1c893Smrg    int h = pPict->pDrawable->height;
918ad43ddacSmrg    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
919b7e1c893Smrg    unsigned int i;
920b7e1c893Smrg    tex_resource_t  tex_res;
921b7e1c893Smrg    tex_sampler_t   tex_samp;
922b7e1c893Smrg    int pix_r, pix_g, pix_b, pix_a;
923ad43ddacSmrg    float vs_alu_consts[8];
924b7e1c893Smrg
925b7e1c893Smrg    CLEAR (tex_res);
926b7e1c893Smrg    CLEAR (tex_samp);
927b7e1c893Smrg
928b7e1c893Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
929b7e1c893Smrg	if (R600TexFormats[i].fmt == pPict->format)
930b7e1c893Smrg	    break;
931b7e1c893Smrg    }
932b7e1c893Smrg
933b7e1c893Smrg    /* Texture */
934b7e1c893Smrg    tex_res.id                  = unit;
935b7e1c893Smrg    tex_res.w                   = w;
936b7e1c893Smrg    tex_res.h                   = h;
937ad43ddacSmrg    tex_res.pitch               = accel_state->src_obj[unit].pitch;
938b7e1c893Smrg    tex_res.depth               = 0;
939b7e1c893Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
940ad43ddacSmrg    tex_res.base                = accel_state->src_obj[unit].offset;
941ad43ddacSmrg    tex_res.mip_base            = accel_state->src_obj[unit].offset;
9420974d292Smrg    tex_res.size                = accel_state->src_size[unit];
943b7e1c893Smrg    tex_res.format              = R600TexFormats[i].card_fmt;
944ad43ddacSmrg    tex_res.bo                  = accel_state->src_obj[unit].bo;
945ad43ddacSmrg    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
946b7e1c893Smrg    tex_res.request_size        = 1;
947b7e1c893Smrg
948b7e1c893Smrg    /* component swizzles */
949b7e1c893Smrg    switch (pPict->format) {
950b7e1c893Smrg    case PICT_a1r5g5b5:
951b7e1c893Smrg    case PICT_a8r8g8b8:
952b7e1c893Smrg	pix_r = SQ_SEL_Z; /* R */
953b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
954b7e1c893Smrg	pix_b = SQ_SEL_X; /* B */
955b7e1c893Smrg	pix_a = SQ_SEL_W; /* A */
956b7e1c893Smrg	break;
957b7e1c893Smrg    case PICT_a8b8g8r8:
958b7e1c893Smrg	pix_r = SQ_SEL_X; /* R */
959b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
960b7e1c893Smrg	pix_b = SQ_SEL_Z; /* B */
961b7e1c893Smrg	pix_a = SQ_SEL_W; /* A */
962b7e1c893Smrg	break;
963b7e1c893Smrg    case PICT_x8b8g8r8:
964b7e1c893Smrg	pix_r = SQ_SEL_X; /* R */
965b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
966b7e1c893Smrg	pix_b = SQ_SEL_Z; /* B */
967b7e1c893Smrg	pix_a = SQ_SEL_1; /* A */
968b7e1c893Smrg	break;
969ad43ddacSmrg#ifdef PICT_TYPE_BGRA
970ad43ddacSmrg    case PICT_b8g8r8a8:
971ad43ddacSmrg	pix_r = SQ_SEL_Y; /* R */
972ad43ddacSmrg	pix_g = SQ_SEL_Z; /* G */
973ad43ddacSmrg	pix_b = SQ_SEL_W; /* B */
974ad43ddacSmrg	pix_a = SQ_SEL_X; /* A */
975ad43ddacSmrg	break;
976ad43ddacSmrg    case PICT_b8g8r8x8:
977ad43ddacSmrg	pix_r = SQ_SEL_Y; /* R */
978ad43ddacSmrg	pix_g = SQ_SEL_Z; /* G */
979ad43ddacSmrg	pix_b = SQ_SEL_W; /* B */
980ad43ddacSmrg	pix_a = SQ_SEL_1; /* A */
981ad43ddacSmrg	break;
982ad43ddacSmrg#endif
983b7e1c893Smrg    case PICT_x1r5g5b5:
984b7e1c893Smrg    case PICT_x8r8g8b8:
985b7e1c893Smrg    case PICT_r5g6b5:
986b7e1c893Smrg	pix_r = SQ_SEL_Z; /* R */
987b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
988b7e1c893Smrg	pix_b = SQ_SEL_X; /* B */
989b7e1c893Smrg	pix_a = SQ_SEL_1; /* A */
990b7e1c893Smrg	break;
991b7e1c893Smrg    case PICT_a8:
992b7e1c893Smrg	pix_r = SQ_SEL_0; /* R */
993b7e1c893Smrg	pix_g = SQ_SEL_0; /* G */
994b7e1c893Smrg	pix_b = SQ_SEL_0; /* B */
995b7e1c893Smrg	pix_a = SQ_SEL_X; /* A */
996b7e1c893Smrg	break;
997b7e1c893Smrg    default:
998b7e1c893Smrg	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
999b7e1c893Smrg    }
1000b7e1c893Smrg
1001b7e1c893Smrg    if (unit == 0) {
1002ad43ddacSmrg	if (!accel_state->msk_pic) {
1003b7e1c893Smrg	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
1004b7e1c893Smrg		pix_r = SQ_SEL_0;
1005b7e1c893Smrg		pix_g = SQ_SEL_0;
1006b7e1c893Smrg		pix_b = SQ_SEL_0;
1007b7e1c893Smrg	    }
1008b7e1c893Smrg
1009b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1010b7e1c893Smrg		pix_a = SQ_SEL_1;
1011b7e1c893Smrg	} else {
1012b7e1c893Smrg	    if (accel_state->component_alpha) {
1013b7e1c893Smrg		if (accel_state->src_alpha) {
1014b7e1c893Smrg		    if (PICT_FORMAT_A(pPict->format) == 0) {
1015b7e1c893Smrg			pix_r = SQ_SEL_1;
1016b7e1c893Smrg			pix_g = SQ_SEL_1;
1017b7e1c893Smrg			pix_b = SQ_SEL_1;
1018b7e1c893Smrg			pix_a = SQ_SEL_1;
1019b7e1c893Smrg		    } else {
1020b7e1c893Smrg			pix_r = pix_a;
1021b7e1c893Smrg			pix_g = pix_a;
1022b7e1c893Smrg			pix_b = pix_a;
1023b7e1c893Smrg		    }
1024b7e1c893Smrg		} else {
1025b7e1c893Smrg		    if (PICT_FORMAT_A(pPict->format) == 0)
1026b7e1c893Smrg			pix_a = SQ_SEL_1;
1027b7e1c893Smrg		}
1028b7e1c893Smrg	    } else {
1029b7e1c893Smrg		if (PICT_FORMAT_RGB(pPict->format) == 0) {
1030b7e1c893Smrg		    pix_r = SQ_SEL_0;
1031b7e1c893Smrg		    pix_g = SQ_SEL_0;
1032b7e1c893Smrg		    pix_b = SQ_SEL_0;
1033b7e1c893Smrg		}
1034b7e1c893Smrg
1035b7e1c893Smrg		if (PICT_FORMAT_A(pPict->format) == 0)
1036b7e1c893Smrg		    pix_a = SQ_SEL_1;
1037b7e1c893Smrg	    }
1038b7e1c893Smrg	}
1039b7e1c893Smrg    } else {
1040b7e1c893Smrg	if (accel_state->component_alpha) {
1041b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1042b7e1c893Smrg		pix_a = SQ_SEL_1;
1043b7e1c893Smrg	} else {
1044b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0) {
1045b7e1c893Smrg		pix_r = SQ_SEL_1;
1046b7e1c893Smrg		pix_g = SQ_SEL_1;
1047b7e1c893Smrg		pix_b = SQ_SEL_1;
1048b7e1c893Smrg		pix_a = SQ_SEL_1;
1049b7e1c893Smrg	    } else {
1050b7e1c893Smrg		pix_r = pix_a;
1051b7e1c893Smrg		pix_g = pix_a;
1052b7e1c893Smrg		pix_b = pix_a;
1053b7e1c893Smrg	    }
1054b7e1c893Smrg	}
1055b7e1c893Smrg    }
1056b7e1c893Smrg
1057b7e1c893Smrg    tex_res.dst_sel_x           = pix_r; /* R */
1058b7e1c893Smrg    tex_res.dst_sel_y           = pix_g; /* G */
1059b7e1c893Smrg    tex_res.dst_sel_z           = pix_b; /* B */
1060b7e1c893Smrg    tex_res.dst_sel_w           = pix_a; /* A */
1061b7e1c893Smrg
1062b7e1c893Smrg    tex_res.base_level          = 0;
1063b7e1c893Smrg    tex_res.last_level          = 0;
1064b7e1c893Smrg    tex_res.perf_modulation     = 0;
1065921a55d8Smrg    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
1066b7e1c893Smrg
1067b7e1c893Smrg    tex_samp.id                 = unit;
1068b7e1c893Smrg    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1069b7e1c893Smrg
1070ad43ddacSmrg    switch (repeatType) {
1071ad43ddacSmrg    case RepeatNormal:
1072ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_WRAP;
1073ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_WRAP;
1074ad43ddacSmrg	break;
1075ad43ddacSmrg    case RepeatPad:
1076ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1077ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1078ad43ddacSmrg	break;
1079ad43ddacSmrg    case RepeatReflect:
1080ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1081ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1082ad43ddacSmrg	break;
1083ad43ddacSmrg    case RepeatNone:
1084b7e1c893Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1085b7e1c893Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1086ad43ddacSmrg	break;
1087ad43ddacSmrg    default:
1088ad43ddacSmrg	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1089b7e1c893Smrg    }
1090b7e1c893Smrg
1091b7e1c893Smrg    switch (pPict->filter) {
1092b7e1c893Smrg    case PictFilterNearest:
1093b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1094b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1095b7e1c893Smrg	break;
1096b7e1c893Smrg    case PictFilterBilinear:
1097b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1098b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1099b7e1c893Smrg	break;
1100b7e1c893Smrg    default:
1101b7e1c893Smrg	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1102b7e1c893Smrg    }
1103b7e1c893Smrg
1104b7e1c893Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
1105b7e1c893Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1106b7e1c893Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
1107921a55d8Smrg    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
1108b7e1c893Smrg
1109b7e1c893Smrg    if (pPict->transform != 0) {
1110b7e1c893Smrg	accel_state->is_transform[unit] = TRUE;
1111b7e1c893Smrg	accel_state->transform[unit] = pPict->transform;
1112ad43ddacSmrg
1113ad43ddacSmrg	vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
1114ad43ddacSmrg	vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
1115ad43ddacSmrg	vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
1116ad43ddacSmrg	vs_alu_consts[3] = 1.0 / w;
1117ad43ddacSmrg
1118ad43ddacSmrg	vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
1119ad43ddacSmrg	vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
1120ad43ddacSmrg	vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
1121ad43ddacSmrg	vs_alu_consts[7] = 1.0 / h;
1122ad43ddacSmrg    } else {
1123b7e1c893Smrg	accel_state->is_transform[unit] = FALSE;
1124b7e1c893Smrg
1125ad43ddacSmrg	vs_alu_consts[0] = 1.0;
1126ad43ddacSmrg	vs_alu_consts[1] = 0.0;
1127ad43ddacSmrg	vs_alu_consts[2] = 0.0;
1128ad43ddacSmrg	vs_alu_consts[3] = 1.0 / w;
1129ad43ddacSmrg
1130ad43ddacSmrg	vs_alu_consts[4] = 0.0;
1131ad43ddacSmrg	vs_alu_consts[5] = 1.0;
1132ad43ddacSmrg	vs_alu_consts[6] = 0.0;
1133ad43ddacSmrg	vs_alu_consts[7] = 1.0 / h;
1134ad43ddacSmrg    }
1135ad43ddacSmrg
1136ad43ddacSmrg    /* VS alu constants */
1137921a55d8Smrg    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
1138921a55d8Smrg			sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
1139ad43ddacSmrg
1140b7e1c893Smrg    return TRUE;
1141b7e1c893Smrg}
1142b7e1c893Smrg
1143b7e1c893Smrgstatic Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1144b7e1c893Smrg			       PicturePtr pDstPicture)
1145b7e1c893Smrg{
1146b7e1c893Smrg    uint32_t tmp1;
1147b7e1c893Smrg    PixmapPtr pSrcPixmap, pDstPixmap;
1148b7e1c893Smrg    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1149b7e1c893Smrg
1150b7e1c893Smrg    /* Check for unsupported compositing operations. */
1151b7e1c893Smrg    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1152b7e1c893Smrg	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1153b7e1c893Smrg
1154ad43ddacSmrg    if (!pSrcPicture->pDrawable)
1155ad43ddacSmrg	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1156ad43ddacSmrg
1157b7e1c893Smrg    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1158b7e1c893Smrg
1159b7e1c893Smrg    max_tex_w = 8192;
1160b7e1c893Smrg    max_tex_h = 8192;
1161b7e1c893Smrg    max_dst_w = 8192;
1162b7e1c893Smrg    max_dst_h = 8192;
1163b7e1c893Smrg
1164b7e1c893Smrg    if (pSrcPixmap->drawable.width >= max_tex_w ||
1165b7e1c893Smrg	pSrcPixmap->drawable.height >= max_tex_h) {
1166b7e1c893Smrg	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1167b7e1c893Smrg			 pSrcPixmap->drawable.width,
1168b7e1c893Smrg			 pSrcPixmap->drawable.height));
1169b7e1c893Smrg    }
1170b7e1c893Smrg
1171b7e1c893Smrg    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1172b7e1c893Smrg
1173b7e1c893Smrg    if (pDstPixmap->drawable.width >= max_dst_w ||
1174b7e1c893Smrg	pDstPixmap->drawable.height >= max_dst_h) {
1175b7e1c893Smrg	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1176b7e1c893Smrg			 pDstPixmap->drawable.width,
1177b7e1c893Smrg			 pDstPixmap->drawable.height));
1178b7e1c893Smrg    }
1179b7e1c893Smrg
1180b7e1c893Smrg    if (pMaskPicture) {
1181ad43ddacSmrg	PixmapPtr pMaskPixmap;
1182ad43ddacSmrg
1183ad43ddacSmrg	if (!pMaskPicture->pDrawable)
1184ad43ddacSmrg	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1185ad43ddacSmrg
1186ad43ddacSmrg	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1187b7e1c893Smrg
1188b7e1c893Smrg	if (pMaskPixmap->drawable.width >= max_tex_w ||
1189b7e1c893Smrg	    pMaskPixmap->drawable.height >= max_tex_h) {
1190b7e1c893Smrg	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1191b7e1c893Smrg			     pMaskPixmap->drawable.width,
1192b7e1c893Smrg			     pMaskPixmap->drawable.height));
1193b7e1c893Smrg	}
1194b7e1c893Smrg
1195b7e1c893Smrg	if (pMaskPicture->componentAlpha) {
1196b7e1c893Smrg	    /* Check if it's component alpha that relies on a source alpha and
1197b7e1c893Smrg	     * on the source value.  We can only get one of those into the
1198b7e1c893Smrg	     * single source value that we get to blend with.
1199b7e1c893Smrg	     */
1200b7e1c893Smrg	    if (R600BlendOp[op].src_alpha &&
1201b7e1c893Smrg		(R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1202b7e1c893Smrg		(BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1203b7e1c893Smrg		RADEON_FALLBACK(("Component alpha not supported with source "
1204b7e1c893Smrg				 "alpha and source value blending.\n"));
1205b7e1c893Smrg	    }
1206b7e1c893Smrg	}
1207b7e1c893Smrg
1208b7e1c893Smrg	if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1209b7e1c893Smrg	    return FALSE;
1210b7e1c893Smrg    }
1211b7e1c893Smrg
1212b7e1c893Smrg    if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1213b7e1c893Smrg	return FALSE;
1214b7e1c893Smrg
1215b7e1c893Smrg    if (!R600GetDestFormat(pDstPicture, &tmp1))
1216b7e1c893Smrg	return FALSE;
1217b7e1c893Smrg
1218b7e1c893Smrg    return TRUE;
1219b7e1c893Smrg
1220b7e1c893Smrg}
1221b7e1c893Smrg
1222b7e1c893Smrgstatic Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1223b7e1c893Smrg				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1224b7e1c893Smrg				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1225b7e1c893Smrg{
1226b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1227b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1228b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1229b7e1c893Smrg    uint32_t blendcntl, dst_format;
1230b7e1c893Smrg    cb_config_t cb_conf;
1231b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
1232ad43ddacSmrg    struct r600_accel_object src_obj, mask_obj, dst_obj;
1233b7e1c893Smrg
1234ad43ddacSmrg    if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8)
1235ad43ddacSmrg	return FALSE;
1236ad43ddacSmrg
1237ad43ddacSmrg#if defined(XF86DRM_MODE)
1238ad43ddacSmrg    if (info->cs) {
1239ad43ddacSmrg	src_obj.offset = 0;
1240ad43ddacSmrg	dst_obj.offset = 0;
1241ad43ddacSmrg	src_obj.bo = radeon_get_pixmap_bo(pSrc);
1242ad43ddacSmrg	dst_obj.bo = radeon_get_pixmap_bo(pDst);
1243ad43ddacSmrg    } else
1244ad43ddacSmrg#endif
1245ad43ddacSmrg    {
1246ad43ddacSmrg	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1247ad43ddacSmrg	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1248ad43ddacSmrg	src_obj.bo = NULL;
1249ad43ddacSmrg	dst_obj.bo = NULL;
1250ad43ddacSmrg    }
1251ad43ddacSmrg    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1252ad43ddacSmrg    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1253ad43ddacSmrg
1254ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
1255ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
1256ad43ddacSmrg    src_obj.bpp = pSrc->drawable.bitsPerPixel;
1257ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1258ad43ddacSmrg
1259ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
1260ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
1261ad43ddacSmrg    dst_obj.bpp = pDst->drawable.bitsPerPixel;
1262ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1263b7e1c893Smrg
1264b7e1c893Smrg    if (pMask) {
1265ad43ddacSmrg#if defined(XF86DRM_MODE)
1266ad43ddacSmrg	if (info->cs) {
1267ad43ddacSmrg	    mask_obj.offset = 0;
1268ad43ddacSmrg	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
1269921a55d8Smrg	} else
1270ad43ddacSmrg#endif
1271ad43ddacSmrg	{
1272ad43ddacSmrg	    mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset;
1273ad43ddacSmrg	    mask_obj.bo = NULL;
1274ad43ddacSmrg	}
1275ad43ddacSmrg	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1276ad43ddacSmrg
1277ad43ddacSmrg	mask_obj.width = pMask->drawable.width;
1278ad43ddacSmrg	mask_obj.height = pMask->drawable.height;
1279ad43ddacSmrg	mask_obj.bpp = pMask->drawable.bitsPerPixel;
1280ad43ddacSmrg	mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1281ad43ddacSmrg
1282ad43ddacSmrg	if (!R600SetAccelState(pScrn,
1283ad43ddacSmrg			       &src_obj,
1284ad43ddacSmrg			       &mask_obj,
1285ad43ddacSmrg			       &dst_obj,
12860974d292Smrg			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1287ad43ddacSmrg			       3, 0xffffffff))
1288ad43ddacSmrg	    return FALSE;
1289ad43ddacSmrg
1290ad43ddacSmrg	accel_state->msk_pic = pMaskPicture;
1291b7e1c893Smrg	if (pMaskPicture->componentAlpha) {
1292b7e1c893Smrg	    accel_state->component_alpha = TRUE;
1293b7e1c893Smrg	    if (R600BlendOp[op].src_alpha)
1294b7e1c893Smrg		accel_state->src_alpha = TRUE;
1295b7e1c893Smrg	    else
1296b7e1c893Smrg		accel_state->src_alpha = FALSE;
1297b7e1c893Smrg	} else {
1298b7e1c893Smrg	    accel_state->component_alpha = FALSE;
1299b7e1c893Smrg	    accel_state->src_alpha = FALSE;
1300b7e1c893Smrg	}
1301b7e1c893Smrg    } else {
1302ad43ddacSmrg	if (!R600SetAccelState(pScrn,
1303ad43ddacSmrg			       &src_obj,
1304ad43ddacSmrg			       NULL,
1305ad43ddacSmrg			       &dst_obj,
1306ad43ddacSmrg			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1307ad43ddacSmrg			       3, 0xffffffff))
1308ad43ddacSmrg	    return FALSE;
1309ad43ddacSmrg
1310ad43ddacSmrg	accel_state->msk_pic = NULL;
1311b7e1c893Smrg	accel_state->component_alpha = FALSE;
1312b7e1c893Smrg	accel_state->src_alpha = FALSE;
1313b7e1c893Smrg    }
1314b7e1c893Smrg
1315b7e1c893Smrg    if (!R600GetDestFormat(pDstPicture, &dst_format))
1316b7e1c893Smrg	return FALSE;
1317b7e1c893Smrg
1318b7e1c893Smrg    CLEAR (cb_conf);
1319b7e1c893Smrg    CLEAR (vs_conf);
1320b7e1c893Smrg    CLEAR (ps_conf);
1321b7e1c893Smrg
1322ad43ddacSmrg    if (pMask)
1323921a55d8Smrg        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1324ad43ddacSmrg    else
1325921a55d8Smrg        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1326b7e1c893Smrg
13270974d292Smrg    radeon_cp_start(pScrn);
1328b7e1c893Smrg
1329921a55d8Smrg    r600_set_default_state(pScrn, accel_state->ib);
1330b7e1c893Smrg
1331921a55d8Smrg    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1332921a55d8Smrg    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1333921a55d8Smrg    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1334b7e1c893Smrg
1335b7e1c893Smrg    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
1336ad43ddacSmrg        R600IBDiscard(pScrn, accel_state->ib);
1337ad43ddacSmrg        return FALSE;
1338b7e1c893Smrg    }
1339b7e1c893Smrg
1340b7e1c893Smrg    if (pMask) {
1341ad43ddacSmrg        if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
1342ad43ddacSmrg            R600IBDiscard(pScrn, accel_state->ib);
1343ad43ddacSmrg            return FALSE;
1344ad43ddacSmrg        }
1345b7e1c893Smrg    } else
1346ad43ddacSmrg        accel_state->is_transform[1] = FALSE;
1347b7e1c893Smrg
13480974d292Smrg    if (pMask) {
1349921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
1350921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
13510974d292Smrg    } else {
1352921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
1353921a55d8Smrg	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
13540974d292Smrg    }
1355b7e1c893Smrg
1356b7e1c893Smrg    /* Shader */
1357b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
13580974d292Smrg    vs_conf.shader_size         = accel_state->vs_size;
1359921a55d8Smrg    vs_conf.num_gprs            = 5;
1360b7e1c893Smrg    vs_conf.stack_size          = 1;
1361ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
1362921a55d8Smrg    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1363b7e1c893Smrg
1364b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
13650974d292Smrg    ps_conf.shader_size         = accel_state->ps_size;
1366b7e1c893Smrg    ps_conf.num_gprs            = 3;
13670974d292Smrg    ps_conf.stack_size          = 1;
1368b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
1369b7e1c893Smrg    ps_conf.clamp_consts        = 0;
1370b7e1c893Smrg    ps_conf.export_mode         = 2;
1371ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
1372921a55d8Smrg    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1373b7e1c893Smrg
1374b7e1c893Smrg    cb_conf.id = 0;
1375ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
1376ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
1377ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
1378b7e1c893Smrg    cb_conf.format = dst_format;
1379ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
1380b7e1c893Smrg
1381b7e1c893Smrg    switch (pDstPicture->format) {
1382b7e1c893Smrg    case PICT_a8r8g8b8:
1383b7e1c893Smrg    case PICT_x8r8g8b8:
1384b7e1c893Smrg    case PICT_a1r5g5b5:
1385b7e1c893Smrg    case PICT_x1r5g5b5:
1386b7e1c893Smrg    default:
1387b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
1388b7e1c893Smrg	break;
1389ad43ddacSmrg    case PICT_a8b8g8r8:
1390ad43ddacSmrg    case PICT_x8b8g8r8:
1391ad43ddacSmrg	cb_conf.comp_swap = 0; /* ABGR */
1392ad43ddacSmrg	break;
1393ad43ddacSmrg#ifdef PICT_TYPE_BGRA
1394ad43ddacSmrg    case PICT_b8g8r8a8:
1395ad43ddacSmrg    case PICT_b8g8r8x8:
1396ad43ddacSmrg	cb_conf.comp_swap = 3; /* BGRA */
1397ad43ddacSmrg	break;
1398ad43ddacSmrg#endif
1399b7e1c893Smrg    case PICT_r5g6b5:
1400b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
1401b7e1c893Smrg	break;
1402b7e1c893Smrg    case PICT_a8:
1403b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
1404b7e1c893Smrg	break;
1405b7e1c893Smrg    }
1406b7e1c893Smrg    cb_conf.source_format = 1;
1407b7e1c893Smrg    cb_conf.blend_clamp = 1;
1408921a55d8Smrg    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
1409b7e1c893Smrg
14100974d292Smrg    BEGIN_BATCH(24);
14110974d292Smrg    EREG(accel_state->ib, CB_TARGET_MASK,                      (0xf << TARGET0_ENABLE_shift));
14120974d292Smrg
14130974d292Smrg    blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
14140974d292Smrg
14150974d292Smrg    if (info->ChipFamily == CHIP_FAMILY_R600) {
14160974d292Smrg	/* no per-MRT blend on R600 */
1417921a55d8Smrg	EREG(accel_state->ib, CB_COLOR_CONTROL,                    R600_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift));
14180974d292Smrg	EREG(accel_state->ib, CB_BLEND_CONTROL,                    blendcntl);
14190974d292Smrg    } else {
1420921a55d8Smrg	EREG(accel_state->ib, CB_COLOR_CONTROL,                    (R600_ROP[3] |
14210974d292Smrg								    (1 << TARGET_BLEND_ENABLE_shift) |
14220974d292Smrg								    PER_MRT_BLEND_bit));
14230974d292Smrg	EREG(accel_state->ib, CB_BLEND0_CONTROL,                   blendcntl);
14240974d292Smrg    }
14250974d292Smrg
1426b7e1c893Smrg    /* Interpolator setup */
1427b7e1c893Smrg    if (pMask) {
1428b7e1c893Smrg	/* export 2 tex coords from VS */
1429b7e1c893Smrg	EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift));
1430b7e1c893Smrg	/* src = semantic id 0; mask = semantic id 1 */
1431b7e1c893Smrg	EREG(accel_state->ib, SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1432b7e1c893Smrg						  (1 << SEMANTIC_1_shift)));
1433b7e1c893Smrg    } else {
1434b7e1c893Smrg	/* export 1 tex coords from VS */
1435b7e1c893Smrg	EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
1436b7e1c893Smrg	/* src = semantic id 0 */
1437b7e1c893Smrg	EREG(accel_state->ib, SPI_VS_OUT_ID_0,   (0 << SEMANTIC_0_shift));
1438b7e1c893Smrg    }
14390974d292Smrg
14400974d292Smrg    PACK0(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
1441b7e1c893Smrg    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
14420974d292Smrg    E32(accel_state->ib, ((0    << SEMANTIC_shift)	|
14430974d292Smrg			  (0x01 << DEFAULT_VAL_shift)	|
14440974d292Smrg			  SEL_CENTROID_bit));
1445b7e1c893Smrg    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
14460974d292Smrg    E32(accel_state->ib, ((1    << SEMANTIC_shift)	|
14470974d292Smrg			  (0x01 << DEFAULT_VAL_shift)	|
14480974d292Smrg			  SEL_CENTROID_bit));
14490974d292Smrg
14500974d292Smrg    PACK0(accel_state->ib, SPI_PS_IN_CONTROL_0, 3);
14510974d292Smrg    if (pMask) {
14520974d292Smrg	/* input 2 tex coords from VS */
14530974d292Smrg	E32(accel_state->ib, (2 << NUM_INTERP_shift));
14540974d292Smrg    } else {
14550974d292Smrg	/* input 1 tex coords from VS */
14560974d292Smrg	E32(accel_state->ib, (1 << NUM_INTERP_shift));
14570974d292Smrg    }
14580974d292Smrg    E32(accel_state->ib, 0);
14590974d292Smrg    E32(accel_state->ib, 0);
1460ad43ddacSmrg    END_BATCH();
1461b7e1c893Smrg
1462ad43ddacSmrg    if (accel_state->vsync)
14630974d292Smrg	RADEONVlineHelperClear(pScrn);
1464b7e1c893Smrg
1465b7e1c893Smrg    return TRUE;
1466b7e1c893Smrg}
1467b7e1c893Smrg
1468b7e1c893Smrgstatic void R600Composite(PixmapPtr pDst,
1469b7e1c893Smrg			  int srcX, int srcY,
1470b7e1c893Smrg			  int maskX, int maskY,
1471b7e1c893Smrg			  int dstX, int dstY,
1472b7e1c893Smrg			  int w, int h)
1473b7e1c893Smrg{
1474b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1475b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1476b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1477b7e1c893Smrg    float *vb;
1478b7e1c893Smrg
1479b7e1c893Smrg    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1480b7e1c893Smrg       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1481b7e1c893Smrg
1482ad43ddacSmrg    if (accel_state->vsync)
14830974d292Smrg	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1484b7e1c893Smrg
1485ad43ddacSmrg    if (accel_state->msk_pic) {
1486b7e1c893Smrg
1487921a55d8Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1488b7e1c893Smrg
1489b7e1c893Smrg	vb[0] = (float)dstX;
1490b7e1c893Smrg	vb[1] = (float)dstY;
1491ad43ddacSmrg	vb[2] = (float)srcX;
1492ad43ddacSmrg	vb[3] = (float)srcY;
1493ad43ddacSmrg	vb[4] = (float)maskX;
1494ad43ddacSmrg	vb[5] = (float)maskY;
1495b7e1c893Smrg
1496b7e1c893Smrg	vb[6] = (float)dstX;
1497b7e1c893Smrg	vb[7] = (float)(dstY + h);
1498ad43ddacSmrg	vb[8] = (float)srcX;
1499ad43ddacSmrg	vb[9] = (float)(srcY + h);
1500ad43ddacSmrg	vb[10] = (float)maskX;
1501ad43ddacSmrg	vb[11] = (float)(maskY + h);
1502b7e1c893Smrg
1503b7e1c893Smrg	vb[12] = (float)(dstX + w);
1504b7e1c893Smrg	vb[13] = (float)(dstY + h);
1505ad43ddacSmrg	vb[14] = (float)(srcX + w);
1506ad43ddacSmrg	vb[15] = (float)(srcY + h);
1507ad43ddacSmrg	vb[16] = (float)(maskX + w);
1508ad43ddacSmrg	vb[17] = (float)(maskY + h);
1509ad43ddacSmrg
1510921a55d8Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
1511b7e1c893Smrg
1512b7e1c893Smrg    } else {
1513b7e1c893Smrg
1514921a55d8Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1515b7e1c893Smrg
1516b7e1c893Smrg	vb[0] = (float)dstX;
1517b7e1c893Smrg	vb[1] = (float)dstY;
1518ad43ddacSmrg	vb[2] = (float)srcX;
1519ad43ddacSmrg	vb[3] = (float)srcY;
1520b7e1c893Smrg
1521b7e1c893Smrg	vb[4] = (float)dstX;
1522b7e1c893Smrg	vb[5] = (float)(dstY + h);
1523ad43ddacSmrg	vb[6] = (float)srcX;
1524ad43ddacSmrg	vb[7] = (float)(srcY + h);
1525b7e1c893Smrg
1526b7e1c893Smrg	vb[8] = (float)(dstX + w);
1527b7e1c893Smrg	vb[9] = (float)(dstY + h);
1528ad43ddacSmrg	vb[10] = (float)(srcX + w);
1529ad43ddacSmrg	vb[11] = (float)(srcY + h);
1530ad43ddacSmrg
1531921a55d8Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
1532b7e1c893Smrg    }
1533b7e1c893Smrg
1534b7e1c893Smrg
1535b7e1c893Smrg}
1536b7e1c893Smrg
1537b7e1c893Smrgstatic void R600DoneComposite(PixmapPtr pDst)
1538b7e1c893Smrg{
1539b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1540b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1541b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1542ad43ddacSmrg    int vtx_size;
1543b7e1c893Smrg
1544ad43ddacSmrg    if (accel_state->vsync)
1545921a55d8Smrg       r600_cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
1546921a55d8Smrg			       accel_state->vline_crtc,
1547921a55d8Smrg			       accel_state->vline_y1,
1548921a55d8Smrg			       accel_state->vline_y2);
1549b7e1c893Smrg
1550ad43ddacSmrg    vtx_size = accel_state->msk_pic ? 24 : 16;
1551b7e1c893Smrg
1552ad43ddacSmrg    r600_finish_op(pScrn, vtx_size);
1553b7e1c893Smrg}
1554b7e1c893Smrg
1555b7e1c893SmrgBool
1556b7e1c893SmrgR600CopyToVRAM(ScrnInfoPtr pScrn,
1557b7e1c893Smrg	       char *src, int src_pitch,
1558ad43ddacSmrg	       uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_width, uint32_t dst_height, int bpp,
1559b7e1c893Smrg	       int x, int y, int w, int h)
1560b7e1c893Smrg{
1561b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1562ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1563b7e1c893Smrg    uint32_t scratch_mc_addr;
1564b7e1c893Smrg    int wpass = w * (bpp/8);
1565ad43ddacSmrg    int scratch_pitch_bytes = RADEON_ALIGN(wpass, 256);
1566b7e1c893Smrg    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1567b7e1c893Smrg    int scratch_offset = 0, hpass, temph;
1568b7e1c893Smrg    char *dst;
1569b7e1c893Smrg    drmBufPtr scratch;
1570ad43ddacSmrg    struct r600_accel_object scratch_obj, dst_obj;
1571b7e1c893Smrg
1572b7e1c893Smrg    if (dst_pitch & 7)
1573b7e1c893Smrg	return FALSE;
1574b7e1c893Smrg
1575b7e1c893Smrg    if (dst_mc_addr & 0xff)
1576b7e1c893Smrg	return FALSE;
1577b7e1c893Smrg
1578b7e1c893Smrg    scratch = RADEONCPGetBuffer(pScrn);
1579b7e1c893Smrg    if (scratch == NULL)
1580b7e1c893Smrg	return FALSE;
1581b7e1c893Smrg
1582b7e1c893Smrg    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1583b7e1c893Smrg    temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1584b7e1c893Smrg    dst = (char *)scratch->address;
1585b7e1c893Smrg
1586ad43ddacSmrg    scratch_obj.pitch = scratch_pitch;
1587ad43ddacSmrg    scratch_obj.width = w;
1588ad43ddacSmrg    scratch_obj.height = hpass;
1589ad43ddacSmrg    scratch_obj.offset = scratch_mc_addr;
1590ad43ddacSmrg    scratch_obj.bpp = bpp;
1591ad43ddacSmrg    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1592ad43ddacSmrg    scratch_obj.bo = NULL;
1593ad43ddacSmrg
1594ad43ddacSmrg    dst_obj.pitch = dst_pitch;
1595ad43ddacSmrg    dst_obj.width = dst_width;
1596ad43ddacSmrg    dst_obj.height = dst_height;
1597ad43ddacSmrg    dst_obj.offset = dst_mc_addr;
1598ad43ddacSmrg    dst_obj.bo = NULL;
1599ad43ddacSmrg    dst_obj.bpp = bpp;
1600ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1601ad43ddacSmrg
1602ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1603ad43ddacSmrg			   &scratch_obj,
1604ad43ddacSmrg			   NULL,
1605ad43ddacSmrg			   &dst_obj,
1606ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1607ad43ddacSmrg			   3, 0xffffffff))
1608ad43ddacSmrg	return FALSE;
1609ad43ddacSmrg
1610b7e1c893Smrg    /* memcopy from sys to scratch */
1611b7e1c893Smrg    while (temph--) {
1612b7e1c893Smrg	memcpy (dst, src, wpass);
1613b7e1c893Smrg	src += src_pitch;
1614b7e1c893Smrg	dst += scratch_pitch_bytes;
1615b7e1c893Smrg    }
1616b7e1c893Smrg
1617b7e1c893Smrg    while (h) {
1618b7e1c893Smrg	uint32_t offset = scratch_mc_addr + scratch_offset;
1619b7e1c893Smrg	int oldhpass = hpass;
1620b7e1c893Smrg	h -= oldhpass;
1621b7e1c893Smrg	temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1622b7e1c893Smrg
1623b7e1c893Smrg	if (hpass) {
1624b7e1c893Smrg	    scratch_offset = scratch->total/2 - scratch_offset;
1625b7e1c893Smrg	    dst = (char *)scratch->address + scratch_offset;
1626b7e1c893Smrg	    /* wait for the engine to be idle */
1627b7e1c893Smrg	    RADEONWaitForIdleCP(pScrn);
1628b7e1c893Smrg	    //memcopy from sys to scratch
1629b7e1c893Smrg	    while (temph--) {
1630b7e1c893Smrg		memcpy (dst, src, wpass);
1631b7e1c893Smrg		src += src_pitch;
1632b7e1c893Smrg		dst += scratch_pitch_bytes;
1633b7e1c893Smrg	    }
1634b7e1c893Smrg	}
1635b7e1c893Smrg	/* blit from scratch to vram */
1636ad43ddacSmrg	info->accel_state->src_obj[0].height = oldhpass;
1637ad43ddacSmrg	info->accel_state->src_obj[0].offset = offset;
1638ad43ddacSmrg	R600DoPrepareCopy(pScrn);
1639b7e1c893Smrg	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
1640b7e1c893Smrg	R600DoCopy(pScrn);
1641b7e1c893Smrg	y += oldhpass;
1642b7e1c893Smrg    }
1643b7e1c893Smrg
1644b7e1c893Smrg    R600IBDiscard(pScrn, scratch);
1645b7e1c893Smrg
1646b7e1c893Smrg    return TRUE;
1647b7e1c893Smrg}
1648b7e1c893Smrg
1649b7e1c893Smrgstatic Bool
1650b7e1c893SmrgR600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1651b7e1c893Smrg		   char *src, int src_pitch)
1652b7e1c893Smrg{
1653b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1654b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1655b7e1c893Smrg    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1656b7e1c893Smrg    uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1657b7e1c893Smrg    int bpp = pDst->drawable.bitsPerPixel;
1658b7e1c893Smrg
1659b7e1c893Smrg    return R600CopyToVRAM(pScrn,
1660b7e1c893Smrg			  src, src_pitch,
1661ad43ddacSmrg			  dst_pitch, dst_mc_addr, pDst->drawable.width, pDst->drawable.height, bpp,
1662b7e1c893Smrg			  x, y, w, h);
1663b7e1c893Smrg}
1664b7e1c893Smrg
1665b7e1c893Smrgstatic Bool
1666b7e1c893SmrgR600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
1667b7e1c893Smrg		       char *dst, int dst_pitch)
1668b7e1c893Smrg{
1669b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1670b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1671ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1672b7e1c893Smrg    uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1673b7e1c893Smrg    uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1674b7e1c893Smrg    uint32_t src_width = pSrc->drawable.width;
1675b7e1c893Smrg    uint32_t src_height = pSrc->drawable.height;
1676b7e1c893Smrg    int bpp = pSrc->drawable.bitsPerPixel;
1677b7e1c893Smrg    uint32_t scratch_mc_addr;
1678ad43ddacSmrg    int scratch_pitch_bytes = RADEON_ALIGN(dst_pitch, 256);
1679b7e1c893Smrg    int scratch_offset = 0, hpass;
1680b7e1c893Smrg    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1681b7e1c893Smrg    int wpass = w * (bpp/8);
1682b7e1c893Smrg    drmBufPtr scratch;
1683ad43ddacSmrg    struct r600_accel_object scratch_obj, src_obj;
1684b7e1c893Smrg
1685ad43ddacSmrg    /* bad pipe setup in drm prior to 1.32 */
1686ad43ddacSmrg    if (info->dri->pKernelDRMVersion->version_minor < 32) {
1687ad43ddacSmrg	    if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32))
1688ad43ddacSmrg		    return FALSE;
1689ad43ddacSmrg    }
1690c503f109Smrg
1691b7e1c893Smrg    if (src_pitch & 7)
1692b7e1c893Smrg	return FALSE;
1693b7e1c893Smrg
1694b7e1c893Smrg    scratch = RADEONCPGetBuffer(pScrn);
1695b7e1c893Smrg    if (scratch == NULL)
1696b7e1c893Smrg	return FALSE;
1697b7e1c893Smrg
1698b7e1c893Smrg    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1699b7e1c893Smrg    hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1700b7e1c893Smrg
1701ad43ddacSmrg    src_obj.pitch = src_pitch;
1702ad43ddacSmrg    src_obj.width = src_width;
1703ad43ddacSmrg    src_obj.height = src_height;
1704ad43ddacSmrg    src_obj.offset = src_mc_addr;
1705ad43ddacSmrg    src_obj.bo = NULL;
1706ad43ddacSmrg    src_obj.bpp = bpp;
1707ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1708ad43ddacSmrg
1709ad43ddacSmrg    scratch_obj.pitch = scratch_pitch;
1710ad43ddacSmrg    scratch_obj.width = src_width;
1711ad43ddacSmrg    scratch_obj.height = hpass;
1712ad43ddacSmrg    scratch_obj.offset = scratch_mc_addr;
1713ad43ddacSmrg    scratch_obj.bpp = bpp;
1714ad43ddacSmrg    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1715ad43ddacSmrg    scratch_obj.bo = NULL;
1716ad43ddacSmrg
1717ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1718ad43ddacSmrg			   &src_obj,
1719ad43ddacSmrg			   NULL,
1720ad43ddacSmrg			   &scratch_obj,
1721ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1722ad43ddacSmrg			   3, 0xffffffff))
1723ad43ddacSmrg	return FALSE;
1724ad43ddacSmrg
1725b7e1c893Smrg    /* blit from vram to scratch */
1726ad43ddacSmrg    R600DoPrepareCopy(pScrn);
1727b7e1c893Smrg    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1728b7e1c893Smrg    R600DoCopy(pScrn);
1729b7e1c893Smrg
1730b7e1c893Smrg    while (h) {
1731b7e1c893Smrg	char *src = (char *)scratch->address + scratch_offset;
1732b7e1c893Smrg	int oldhpass = hpass;
1733b7e1c893Smrg	h -= oldhpass;
1734b7e1c893Smrg	y += oldhpass;
1735b7e1c893Smrg	hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1736b7e1c893Smrg
1737b7e1c893Smrg	if (hpass) {
1738b7e1c893Smrg	    scratch_offset = scratch->total/2 - scratch_offset;
1739b7e1c893Smrg	    /* blit from vram to scratch */
1740ad43ddacSmrg	    info->accel_state->dst_obj.height = hpass;
1741ad43ddacSmrg	    info->accel_state->dst_obj.offset = scratch_mc_addr + scratch_offset;
1742ad43ddacSmrg	    R600DoPrepareCopy(pScrn);
1743b7e1c893Smrg	    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1744b7e1c893Smrg	    R600DoCopy(pScrn);
1745b7e1c893Smrg	}
1746b7e1c893Smrg
1747b7e1c893Smrg	/* wait for the engine to be idle */
1748b7e1c893Smrg	RADEONWaitForIdleCP(pScrn);
1749b7e1c893Smrg	/* memcopy from scratch to sys */
1750b7e1c893Smrg	while (oldhpass--) {
1751b7e1c893Smrg	    memcpy (dst, src, wpass);
1752b7e1c893Smrg	    dst += dst_pitch;
1753b7e1c893Smrg	    src += scratch_pitch_bytes;
1754b7e1c893Smrg	}
1755b7e1c893Smrg    }
1756b7e1c893Smrg
1757b7e1c893Smrg    R600IBDiscard(pScrn, scratch);
1758b7e1c893Smrg
1759b7e1c893Smrg    return TRUE;
1760b7e1c893Smrg
1761b7e1c893Smrg}
1762b7e1c893Smrg
1763ad43ddacSmrg#if defined(XF86DRM_MODE)
1764ad43ddacSmrg
1765ad43ddacSmrgstatic Bool
1766ad43ddacSmrgR600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
1767ad43ddacSmrg		     char *src, int src_pitch)
1768ad43ddacSmrg{
1769ad43ddacSmrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1770ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1771ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1772ad43ddacSmrg    struct radeon_exa_pixmap_priv *driver_priv;
17730974d292Smrg    struct radeon_bo *scratch = NULL;
17740974d292Smrg    struct radeon_bo *copy_dst;
17750974d292Smrg    unsigned char *dst;
1776ad43ddacSmrg    unsigned size;
1777ad43ddacSmrg    uint32_t dst_domain;
1778ad43ddacSmrg    int bpp = pDst->drawable.bitsPerPixel;
1779ad43ddacSmrg    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
17800974d292Smrg    uint32_t copy_pitch;
1781ad43ddacSmrg    uint32_t src_pitch_hw = scratch_pitch / (bpp / 8);
1782ad43ddacSmrg    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
17830974d292Smrg    int ret;
17840974d292Smrg    Bool flush = TRUE;
1785ad43ddacSmrg    Bool r;
1786ad43ddacSmrg    int i;
1787ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
1788921a55d8Smrg    uint32_t tiling_flags = 0, pitch = 0;
1789ad43ddacSmrg
1790ad43ddacSmrg    if (bpp < 8)
1791ad43ddacSmrg	return FALSE;
1792ad43ddacSmrg
1793ad43ddacSmrg    driver_priv = exaGetPixmapDriverPrivate(pDst);
1794921a55d8Smrg    if (!driver_priv || !driver_priv->bo)
1795921a55d8Smrg	return FALSE;
1796921a55d8Smrg
1797921a55d8Smrg    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
1798921a55d8Smrg    if (ret)
1799921a55d8Smrg	ErrorF("radeon_bo_get_tiling failed\n");
1800ad43ddacSmrg
18010974d292Smrg    /* If we know the BO won't be busy, don't bother with a scratch */
18020974d292Smrg    copy_dst = driver_priv->bo;
18030974d292Smrg    copy_pitch = pDst->devKind;
1804921a55d8Smrg    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1805921a55d8Smrg	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1806921a55d8Smrg	    flush = FALSE;
1807921a55d8Smrg	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
1808921a55d8Smrg		goto copy;
1809921a55d8Smrg	}
18100974d292Smrg    }
1811ad43ddacSmrg
1812ad43ddacSmrg    size = scratch_pitch * h;
1813ad43ddacSmrg    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
1814ad43ddacSmrg    if (scratch == NULL) {
18150974d292Smrg	goto copy;
1816ad43ddacSmrg    }
1817ad43ddacSmrg
1818ad43ddacSmrg    src_obj.pitch = src_pitch_hw;
1819ad43ddacSmrg    src_obj.width = w;
1820ad43ddacSmrg    src_obj.height = h;
1821ad43ddacSmrg    src_obj.offset = 0;
1822ad43ddacSmrg    src_obj.bpp = bpp;
1823ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1824ad43ddacSmrg    src_obj.bo = scratch;
1825ad43ddacSmrg
1826ad43ddacSmrg    dst_obj.pitch = dst_pitch_hw;
1827ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
1828ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
1829ad43ddacSmrg    dst_obj.offset = 0;
1830ad43ddacSmrg    dst_obj.bpp = bpp;
1831ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1832ad43ddacSmrg    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1833ad43ddacSmrg
1834ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1835ad43ddacSmrg			   &src_obj,
1836ad43ddacSmrg			   NULL,
1837ad43ddacSmrg			   &dst_obj,
1838ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1839ad43ddacSmrg			   3, 0xffffffff)) {
18400974d292Smrg        goto copy;
1841ad43ddacSmrg    }
18420974d292Smrg    copy_dst = scratch;
18430974d292Smrg    copy_pitch = scratch_pitch;
18440974d292Smrg    flush = FALSE;
18450974d292Smrg
18460974d292Smrgcopy:
18470974d292Smrg    if (flush)
18480974d292Smrg	radeon_cs_flush_indirect(pScrn);
1849ad43ddacSmrg
18500974d292Smrg    ret = radeon_bo_map(copy_dst, 0);
18510974d292Smrg    if (ret) {
1852ad43ddacSmrg        r = FALSE;
1853ad43ddacSmrg        goto out;
1854ad43ddacSmrg    }
1855ad43ddacSmrg    r = TRUE;
1856ad43ddacSmrg    size = w * bpp / 8;
18570974d292Smrg    dst = copy_dst->ptr;
18580974d292Smrg    if (copy_dst == driver_priv->bo)
18590974d292Smrg	dst += y * copy_pitch + x * bpp / 8;
1860ad43ddacSmrg    for (i = 0; i < h; i++) {
18610974d292Smrg        memcpy(dst + i * copy_pitch, src, size);
1862ad43ddacSmrg        src += src_pitch;
1863ad43ddacSmrg    }
18640974d292Smrg    radeon_bo_unmap(copy_dst);
1865ad43ddacSmrg
18660974d292Smrg    if (copy_dst == scratch) {
18670974d292Smrg	if (info->accel_state->vsync)
18680974d292Smrg	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1869ad43ddacSmrg
18700974d292Smrg	/* blit from gart to vram */
18710974d292Smrg	R600DoPrepareCopy(pScrn);
18720974d292Smrg	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
18730974d292Smrg	R600DoCopyVline(pDst);
18740974d292Smrg    }
1875ad43ddacSmrg
1876ad43ddacSmrgout:
18770974d292Smrg    if (scratch)
18780974d292Smrg	radeon_bo_unref(scratch);
1879ad43ddacSmrg    return r;
1880ad43ddacSmrg}
1881ad43ddacSmrg
1882ad43ddacSmrgstatic Bool
1883ad43ddacSmrgR600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
1884ad43ddacSmrg			 int h, char *dst, int dst_pitch)
1885ad43ddacSmrg{
1886ad43ddacSmrg    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1887ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1888ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1889ad43ddacSmrg    struct radeon_exa_pixmap_priv *driver_priv;
18900974d292Smrg    struct radeon_bo *scratch = NULL;
18910974d292Smrg    struct radeon_bo *copy_src;
1892ad43ddacSmrg    unsigned size;
1893ad43ddacSmrg    uint32_t src_domain = 0;
1894ad43ddacSmrg    int bpp = pSrc->drawable.bitsPerPixel;
1895ad43ddacSmrg    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
18960974d292Smrg    uint32_t copy_pitch;
1897ad43ddacSmrg    uint32_t dst_pitch_hw = scratch_pitch / (bpp / 8);
1898ad43ddacSmrg    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
18990974d292Smrg    int ret;
19000974d292Smrg    Bool flush = FALSE;
1901ad43ddacSmrg    Bool r;
1902ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
1903921a55d8Smrg    uint32_t tiling_flags = 0, pitch = 0;
1904ad43ddacSmrg
1905ad43ddacSmrg    if (bpp < 8)
1906ad43ddacSmrg	return FALSE;
1907ad43ddacSmrg
1908ad43ddacSmrg    driver_priv = exaGetPixmapDriverPrivate(pSrc);
1909921a55d8Smrg    if (!driver_priv || !driver_priv->bo)
1910921a55d8Smrg	return FALSE;
1911921a55d8Smrg
1912921a55d8Smrg    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
1913921a55d8Smrg    if (ret)
1914921a55d8Smrg	ErrorF("radeon_bo_get_tiling failed\n");
1915ad43ddacSmrg
19160974d292Smrg    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
19170974d292Smrg    copy_src = driver_priv->bo;
19180974d292Smrg    copy_pitch = pSrc->devKind;
1919921a55d8Smrg    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1920921a55d8Smrg	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1921921a55d8Smrg	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
1922921a55d8Smrg	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
1923921a55d8Smrg		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
1924921a55d8Smrg		src_domain = 0;
1925921a55d8Smrg	    else /* A write may be scheduled */
1926921a55d8Smrg		flush = TRUE;
1927921a55d8Smrg	}
1928ad43ddacSmrg
1929921a55d8Smrg	if (!src_domain)
1930921a55d8Smrg	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
1931ad43ddacSmrg
1932921a55d8Smrg	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
1933921a55d8Smrg	    goto copy;
1934921a55d8Smrg    }
1935ad43ddacSmrg
1936ad43ddacSmrg    size = scratch_pitch * h;
1937ad43ddacSmrg    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
1938ad43ddacSmrg    if (scratch == NULL) {
19390974d292Smrg	goto copy;
1940ad43ddacSmrg    }
1941ad43ddacSmrg    radeon_cs_space_reset_bos(info->cs);
1942ad43ddacSmrg    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
1943ad43ddacSmrg				      RADEON_GEM_DOMAIN_VRAM, 0);
1944ad43ddacSmrg    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
1945ad43ddacSmrg    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
1946ad43ddacSmrg    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1947ad43ddacSmrg    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
19480974d292Smrg    ret = radeon_cs_space_check(info->cs);
19490974d292Smrg    if (ret) {
19500974d292Smrg        goto copy;
1951ad43ddacSmrg    }
1952ad43ddacSmrg
1953ad43ddacSmrg    src_obj.pitch = src_pitch_hw;
1954ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
1955ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
1956ad43ddacSmrg    src_obj.offset = 0;
1957ad43ddacSmrg    src_obj.bpp = bpp;
1958ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1959ad43ddacSmrg    src_obj.bo = radeon_get_pixmap_bo(pSrc);
1960921a55d8Smrg
1961ad43ddacSmrg    dst_obj.pitch = dst_pitch_hw;
1962ad43ddacSmrg    dst_obj.width = w;
1963ad43ddacSmrg    dst_obj.height = h;
1964ad43ddacSmrg    dst_obj.offset = 0;
1965ad43ddacSmrg    dst_obj.bo = scratch;
1966ad43ddacSmrg    dst_obj.bpp = bpp;
1967ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1968ad43ddacSmrg
1969ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1970ad43ddacSmrg			   &src_obj,
1971ad43ddacSmrg			   NULL,
1972ad43ddacSmrg			   &dst_obj,
1973ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1974ad43ddacSmrg			   3, 0xffffffff)) {
19750974d292Smrg        goto copy;
1976ad43ddacSmrg    }
1977ad43ddacSmrg
1978ad43ddacSmrg    /* blit from vram to gart */
1979ad43ddacSmrg    R600DoPrepareCopy(pScrn);
1980ad43ddacSmrg    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
1981ad43ddacSmrg    R600DoCopy(pScrn);
19820974d292Smrg    copy_src = scratch;
19830974d292Smrg    copy_pitch = scratch_pitch;
19840974d292Smrg    flush = TRUE;
1985ad43ddacSmrg
19860974d292Smrgcopy:
19870974d292Smrg    if (flush && info->cs)
1988ad43ddacSmrg	radeon_cs_flush_indirect(pScrn);
1989ad43ddacSmrg
19900974d292Smrg    ret = radeon_bo_map(copy_src, 0);
19910974d292Smrg    if (ret) {
19920974d292Smrg	ErrorF("failed to map pixmap: %d\n", ret);
1993ad43ddacSmrg        r = FALSE;
1994ad43ddacSmrg        goto out;
1995ad43ddacSmrg    }
1996ad43ddacSmrg    r = TRUE;
1997ad43ddacSmrg    w *= bpp / 8;
19980974d292Smrg    if (copy_src == driver_priv->bo)
19990974d292Smrg	size = y * copy_pitch + x * bpp / 8;
20000974d292Smrg    else
20010974d292Smrg	size = 0;
2002ad43ddacSmrg    while (h--) {
20030974d292Smrg        memcpy(dst, copy_src->ptr + size, w);
20040974d292Smrg        size += copy_pitch;
2005ad43ddacSmrg        dst += dst_pitch;
2006ad43ddacSmrg    }
20070974d292Smrg    radeon_bo_unmap(copy_src);
2008ad43ddacSmrgout:
20090974d292Smrg    if (scratch)
20100974d292Smrg	radeon_bo_unref(scratch);
2011ad43ddacSmrg    return r;
2012ad43ddacSmrg}
2013ad43ddacSmrg#endif
2014ad43ddacSmrg
2015b7e1c893Smrgstatic int
2016b7e1c893SmrgR600MarkSync(ScreenPtr pScreen)
2017b7e1c893Smrg{
2018b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
2019b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2020b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2021b7e1c893Smrg
2022b7e1c893Smrg    return ++accel_state->exaSyncMarker;
2023b7e1c893Smrg
2024b7e1c893Smrg}
2025b7e1c893Smrg
2026b7e1c893Smrgstatic void
2027b7e1c893SmrgR600Sync(ScreenPtr pScreen, int marker)
2028b7e1c893Smrg{
2029b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
2030b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2031b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2032b7e1c893Smrg
2033b7e1c893Smrg    if (accel_state->exaMarkerSynced != marker) {
2034ad43ddacSmrg#ifdef XF86DRM_MODE
2035ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2036ad43ddacSmrg	if (!info->cs)
2037ad43ddacSmrg#endif
2038ad43ddacSmrg#endif
2039ad43ddacSmrg	    RADEONWaitForIdleCP(pScrn);
2040b7e1c893Smrg	accel_state->exaMarkerSynced = marker;
2041b7e1c893Smrg    }
2042b7e1c893Smrg
2043b7e1c893Smrg}
2044b7e1c893Smrg
2045b7e1c893Smrgstatic Bool
2046b7e1c893SmrgR600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
2047b7e1c893Smrg{
2048b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2049b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2050b7e1c893Smrg
2051b7e1c893Smrg    /* 512 bytes per shader for now */
2052b7e1c893Smrg    int size = 512 * 9;
2053b7e1c893Smrg
2054b7e1c893Smrg    accel_state->shaders = NULL;
2055b7e1c893Smrg
2056ad43ddacSmrg#ifdef XF86DRM_MODE
2057ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2058ad43ddacSmrg    if (info->cs) {
2059ad43ddacSmrg	accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
2060ad43ddacSmrg						 RADEON_GEM_DOMAIN_VRAM, 0);
2061ad43ddacSmrg	if (accel_state->shaders_bo == NULL) {
2062ad43ddacSmrg	    ErrorF("Allocating shader failed\n");
2063ad43ddacSmrg	    return FALSE;
2064ad43ddacSmrg	}
2065ad43ddacSmrg	return TRUE;
2066ad43ddacSmrg    } else
2067ad43ddacSmrg#endif
2068ad43ddacSmrg#endif
2069ad43ddacSmrg    {
2070ad43ddacSmrg	accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
2071ad43ddacSmrg						 TRUE, NULL, NULL);
2072ad43ddacSmrg
2073ad43ddacSmrg	if (accel_state->shaders == NULL)
2074ad43ddacSmrg	    return FALSE;
2075ad43ddacSmrg    }
2076b7e1c893Smrg
2077b7e1c893Smrg    return TRUE;
2078b7e1c893Smrg}
2079b7e1c893Smrg
2080b7e1c893SmrgBool
2081b7e1c893SmrgR600LoadShaders(ScrnInfoPtr pScrn)
2082b7e1c893Smrg{
2083b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2084b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2085b7e1c893Smrg    RADEONChipFamily ChipSet = info->ChipFamily;
2086b7e1c893Smrg    uint32_t *shader;
2087ad43ddacSmrg#ifdef XF86DRM_MODE
2088ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2089ad43ddacSmrg    int ret;
2090ad43ddacSmrg
2091ad43ddacSmrg    if (info->cs) {
2092ad43ddacSmrg	ret = radeon_bo_map(accel_state->shaders_bo, 1);
2093ad43ddacSmrg	if (ret) {
2094ad43ddacSmrg	    FatalError("failed to map shader %d\n", ret);
2095ad43ddacSmrg	    return FALSE;
2096ad43ddacSmrg	}
2097ad43ddacSmrg	shader = accel_state->shaders_bo->ptr;
2098ad43ddacSmrg    } else
2099ad43ddacSmrg#endif
2100ad43ddacSmrg#endif
2101ad43ddacSmrg	shader = (pointer)((char *)info->FB + accel_state->shaders->offset);
2102b7e1c893Smrg
2103b7e1c893Smrg    /*  solid vs --------------------------------------- */
2104b7e1c893Smrg    accel_state->solid_vs_offset = 0;
2105b7e1c893Smrg    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
2106b7e1c893Smrg
2107b7e1c893Smrg    /*  solid ps --------------------------------------- */
2108b7e1c893Smrg    accel_state->solid_ps_offset = 512;
2109b7e1c893Smrg    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
2110b7e1c893Smrg
2111b7e1c893Smrg    /*  copy vs --------------------------------------- */
2112b7e1c893Smrg    accel_state->copy_vs_offset = 1024;
2113b7e1c893Smrg    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
2114b7e1c893Smrg
2115b7e1c893Smrg    /*  copy ps --------------------------------------- */
2116b7e1c893Smrg    accel_state->copy_ps_offset = 1536;
2117b7e1c893Smrg    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2118b7e1c893Smrg
2119b7e1c893Smrg    /*  comp vs --------------------------------------- */
2120b7e1c893Smrg    accel_state->comp_vs_offset = 2048;
2121b7e1c893Smrg    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2122b7e1c893Smrg
2123b7e1c893Smrg    /*  comp ps --------------------------------------- */
2124b7e1c893Smrg    accel_state->comp_ps_offset = 2560;
2125b7e1c893Smrg    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2126b7e1c893Smrg
2127b7e1c893Smrg    /*  xv vs --------------------------------------- */
21280974d292Smrg    accel_state->xv_vs_offset = 3072;
2129b7e1c893Smrg    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2130b7e1c893Smrg
2131b7e1c893Smrg    /*  xv ps --------------------------------------- */
21320974d292Smrg    accel_state->xv_ps_offset = 3584;
2133b7e1c893Smrg    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2134b7e1c893Smrg
2135ad43ddacSmrg#ifdef XF86DRM_MODE
2136ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2137ad43ddacSmrg    if (info->cs) {
2138ad43ddacSmrg	radeon_bo_unmap(accel_state->shaders_bo);
2139ad43ddacSmrg    }
2140ad43ddacSmrg#endif
2141ad43ddacSmrg#endif
2142ad43ddacSmrg
2143b7e1c893Smrg    return TRUE;
2144b7e1c893Smrg}
2145b7e1c893Smrg
2146b7e1c893Smrgstatic Bool
2147b7e1c893SmrgR600PrepareAccess(PixmapPtr pPix, int index)
2148b7e1c893Smrg{
2149b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2150b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2151b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
2152b7e1c893Smrg
2153b7e1c893Smrg    /* flush HDP read/write caches */
2154b7e1c893Smrg    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2155b7e1c893Smrg
2156b7e1c893Smrg    return TRUE;
2157b7e1c893Smrg}
2158b7e1c893Smrg
2159b7e1c893Smrgstatic void
2160b7e1c893SmrgR600FinishAccess(PixmapPtr pPix, int index)
2161b7e1c893Smrg{
2162b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2163b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2164b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
2165b7e1c893Smrg
2166b7e1c893Smrg    /* flush HDP read/write caches */
2167b7e1c893Smrg    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2168b7e1c893Smrg
2169b7e1c893Smrg}
2170b7e1c893Smrg
2171b7e1c893SmrgBool
2172b7e1c893SmrgR600DrawInit(ScreenPtr pScreen)
2173b7e1c893Smrg{
2174b7e1c893Smrg    ScrnInfoPtr pScrn =  xf86Screens[pScreen->myNum];
2175b7e1c893Smrg    RADEONInfoPtr info   = RADEONPTR(pScrn);
2176b7e1c893Smrg
2177b7e1c893Smrg    if (info->accel_state->exa == NULL) {
2178b7e1c893Smrg	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2179b7e1c893Smrg	return FALSE;
2180b7e1c893Smrg    }
2181b7e1c893Smrg
2182b7e1c893Smrg    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2183b7e1c893Smrg    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2184b7e1c893Smrg
2185b7e1c893Smrg    info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2186b7e1c893Smrg    info->accel_state->exa->Solid = R600Solid;
2187b7e1c893Smrg    info->accel_state->exa->DoneSolid = R600DoneSolid;
2188b7e1c893Smrg
2189b7e1c893Smrg    info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2190b7e1c893Smrg    info->accel_state->exa->Copy = R600Copy;
2191b7e1c893Smrg    info->accel_state->exa->DoneCopy = R600DoneCopy;
2192b7e1c893Smrg
2193b7e1c893Smrg    info->accel_state->exa->MarkSync = R600MarkSync;
2194b7e1c893Smrg    info->accel_state->exa->WaitMarker = R600Sync;
2195b7e1c893Smrg
2196ad43ddacSmrg#ifdef XF86DRM_MODE
2197ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2198ad43ddacSmrg    if (info->cs) {
2199ad43ddacSmrg	info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
2200ad43ddacSmrg	info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
2201ad43ddacSmrg	info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
2202ad43ddacSmrg	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
2203ad43ddacSmrg	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
2204ad43ddacSmrg	info->accel_state->exa->UploadToScreen = R600UploadToScreenCS;
2205ad43ddacSmrg	info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS;
22060974d292Smrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5)
22070974d292Smrg        info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
22080974d292Smrg#endif
2209ad43ddacSmrg    } else
2210ad43ddacSmrg#endif
2211ad43ddacSmrg#endif
2212ad43ddacSmrg    {
2213ad43ddacSmrg	info->accel_state->exa->PrepareAccess = R600PrepareAccess;
2214ad43ddacSmrg	info->accel_state->exa->FinishAccess = R600FinishAccess;
2215ad43ddacSmrg
2216ad43ddacSmrg	/* AGP seems to have problems with gart transfers */
2217ad43ddacSmrg	if (info->accelDFS) {
2218ad43ddacSmrg	    info->accel_state->exa->UploadToScreen = R600UploadToScreen;
2219ad43ddacSmrg	    info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen;
2220ad43ddacSmrg	}
2221b7e1c893Smrg    }
2222b7e1c893Smrg
2223b7e1c893Smrg    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
2224b7e1c893Smrg#ifdef EXA_SUPPORTS_PREPARE_AUX
2225b7e1c893Smrg    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
2226ad43ddacSmrg#endif
2227ad43ddacSmrg
2228ad43ddacSmrg#ifdef XF86DRM_MODE
2229ad43ddacSmrg#ifdef EXA_HANDLES_PIXMAPS
2230ad43ddacSmrg    if (info->cs) {
2231ad43ddacSmrg	info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
2232ad43ddacSmrg#ifdef EXA_MIXED_PIXMAPS
2233ad43ddacSmrg	info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
2234ad43ddacSmrg#endif
2235ad43ddacSmrg    }
2236ad43ddacSmrg#endif
2237b7e1c893Smrg#endif
2238b7e1c893Smrg    info->accel_state->exa->pixmapOffsetAlign = 256;
2239b7e1c893Smrg    info->accel_state->exa->pixmapPitchAlign = 256;
2240b7e1c893Smrg
2241b7e1c893Smrg    info->accel_state->exa->CheckComposite = R600CheckComposite;
2242b7e1c893Smrg    info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2243b7e1c893Smrg    info->accel_state->exa->Composite = R600Composite;
2244b7e1c893Smrg    info->accel_state->exa->DoneComposite = R600DoneComposite;
2245b7e1c893Smrg
2246b7e1c893Smrg#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
2247b7e1c893Smrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
2248b7e1c893Smrg
2249b7e1c893Smrg    info->accel_state->exa->maxPitchBytes = 32768;
2250b7e1c893Smrg    info->accel_state->exa->maxX = 8192;
2251b7e1c893Smrg#else
2252b7e1c893Smrg    info->accel_state->exa->maxX = 8192;
2253b7e1c893Smrg#endif
2254b7e1c893Smrg    info->accel_state->exa->maxY = 8192;
2255b7e1c893Smrg
2256b7e1c893Smrg    /* not supported yet */
2257ad43ddacSmrg    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2258ad43ddacSmrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2259ad43ddacSmrg	info->accel_state->vsync = TRUE;
2260ad43ddacSmrg    } else
2261ad43ddacSmrg	info->accel_state->vsync = FALSE;
2262b7e1c893Smrg
2263b7e1c893Smrg    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
22642f39173dSmrg	free(info->accel_state->exa);
2265b7e1c893Smrg	return FALSE;
2266b7e1c893Smrg    }
2267b7e1c893Smrg
2268ad43ddacSmrg#ifdef XF86DRM_MODE
2269ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2270ad43ddacSmrg    if (!info->cs)
2271ad43ddacSmrg#endif
2272ad43ddacSmrg#endif
2273ad43ddacSmrg	if (!info->gartLocation)
2274ad43ddacSmrg	    return FALSE;
2275b7e1c893Smrg
2276b7e1c893Smrg    info->accel_state->XInited3D = FALSE;
2277b7e1c893Smrg    info->accel_state->copy_area = NULL;
2278ad43ddacSmrg    info->accel_state->src_obj[0].bo = NULL;
2279ad43ddacSmrg    info->accel_state->src_obj[1].bo = NULL;
2280ad43ddacSmrg    info->accel_state->dst_obj.bo = NULL;
2281ad43ddacSmrg    info->accel_state->copy_area_bo = NULL;
2282921a55d8Smrg    info->accel_state->vbo.vb_start_op = -1;
22830974d292Smrg    info->accel_state->finish_op = r600_finish_op;
2284921a55d8Smrg    info->accel_state->vbo.verts_per_op = 3;
22850974d292Smrg    RADEONVlineHelperClear(pScrn);
2286ad43ddacSmrg
2287ad43ddacSmrg#ifdef XF86DRM_MODE
2288ad43ddacSmrg    radeon_vbo_init_lists(pScrn);
2289ad43ddacSmrg#endif
2290b7e1c893Smrg
2291b7e1c893Smrg    if (!R600AllocShaders(pScrn, pScreen))
2292b7e1c893Smrg	return FALSE;
2293b7e1c893Smrg
2294b7e1c893Smrg    if (!R600LoadShaders(pScrn))
2295b7e1c893Smrg	return FALSE;
2296b7e1c893Smrg
2297b7e1c893Smrg    exaMarkSync(pScreen);
2298b7e1c893Smrg
2299b7e1c893Smrg    return TRUE;
2300b7e1c893Smrg
2301b7e1c893Smrg}
2302b7e1c893Smrg
2303