r600_exa.c revision c503f109
1b7e1c893Smrg/*
2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3b7e1c893Smrg *
4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"),
6b7e1c893Smrg * to deal in the Software without restriction, including without limitation
7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions:
10b7e1c893Smrg *
11b7e1c893Smrg * The above copyright notice and this permission notice (including the next
12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the
13b7e1c893Smrg * Software.
14b7e1c893Smrg *
15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b7e1c893Smrg * SOFTWARE.
22b7e1c893Smrg *
23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24b7e1c893Smrg *
25b7e1c893Smrg */
26b7e1c893Smrg
27b7e1c893Smrg#ifdef HAVE_CONFIG_H
28b7e1c893Smrg#include "config.h"
29b7e1c893Smrg#endif
30b7e1c893Smrg
31b7e1c893Smrg#include "xf86.h"
32b7e1c893Smrg
33b7e1c893Smrg#include "exa.h"
34b7e1c893Smrg
35b7e1c893Smrg#include "radeon.h"
36b7e1c893Smrg#include "radeon_macros.h"
37b7e1c893Smrg#include "radeon_reg.h"
38b7e1c893Smrg#include "r600_shader.h"
39b7e1c893Smrg#include "r600_reg.h"
40b7e1c893Smrg#include "r600_state.h"
41b7e1c893Smrg
42b7e1c893Smrgextern PixmapPtr
43b7e1c893SmrgRADEONGetDrawablePixmap(DrawablePtr pDrawable);
44b7e1c893Smrg
45b7e1c893Smrg/* #define SHOW_VERTEXES */
46b7e1c893Smrg
47b7e1c893Smrg#       define RADEON_ROP3_ZERO             0x00000000
48b7e1c893Smrg#       define RADEON_ROP3_DSa              0x00880000
49b7e1c893Smrg#       define RADEON_ROP3_SDna             0x00440000
50b7e1c893Smrg#       define RADEON_ROP3_S                0x00cc0000
51b7e1c893Smrg#       define RADEON_ROP3_DSna             0x00220000
52b7e1c893Smrg#       define RADEON_ROP3_D                0x00aa0000
53b7e1c893Smrg#       define RADEON_ROP3_DSx              0x00660000
54b7e1c893Smrg#       define RADEON_ROP3_DSo              0x00ee0000
55b7e1c893Smrg#       define RADEON_ROP3_DSon             0x00110000
56b7e1c893Smrg#       define RADEON_ROP3_DSxn             0x00990000
57b7e1c893Smrg#       define RADEON_ROP3_Dn               0x00550000
58b7e1c893Smrg#       define RADEON_ROP3_SDno             0x00dd0000
59b7e1c893Smrg#       define RADEON_ROP3_Sn               0x00330000
60b7e1c893Smrg#       define RADEON_ROP3_DSno             0x00bb0000
61b7e1c893Smrg#       define RADEON_ROP3_DSan             0x00770000
62b7e1c893Smrg#       define RADEON_ROP3_ONE              0x00ff0000
63b7e1c893Smrg
64b7e1c893Smrguint32_t RADEON_ROP[16] = {
65b7e1c893Smrg    RADEON_ROP3_ZERO, /* GXclear        */
66b7e1c893Smrg    RADEON_ROP3_DSa,  /* Gxand          */
67b7e1c893Smrg    RADEON_ROP3_SDna, /* GXandReverse   */
68b7e1c893Smrg    RADEON_ROP3_S,    /* GXcopy         */
69b7e1c893Smrg    RADEON_ROP3_DSna, /* GXandInverted  */
70b7e1c893Smrg    RADEON_ROP3_D,    /* GXnoop         */
71b7e1c893Smrg    RADEON_ROP3_DSx,  /* GXxor          */
72b7e1c893Smrg    RADEON_ROP3_DSo,  /* GXor           */
73b7e1c893Smrg    RADEON_ROP3_DSon, /* GXnor          */
74b7e1c893Smrg    RADEON_ROP3_DSxn, /* GXequiv        */
75b7e1c893Smrg    RADEON_ROP3_Dn,   /* GXinvert       */
76b7e1c893Smrg    RADEON_ROP3_SDno, /* GXorReverse    */
77b7e1c893Smrg    RADEON_ROP3_Sn,   /* GXcopyInverted */
78b7e1c893Smrg    RADEON_ROP3_DSno, /* GXorInverted   */
79b7e1c893Smrg    RADEON_ROP3_DSan, /* GXnand         */
80b7e1c893Smrg    RADEON_ROP3_ONE,  /* GXset          */
81b7e1c893Smrg};
82b7e1c893Smrg
83b7e1c893Smrgstatic void
84b7e1c893SmrgR600DoneSolid(PixmapPtr pPix);
85b7e1c893Smrg
86b7e1c893Smrgstatic void
87b7e1c893SmrgR600DoneComposite(PixmapPtr pDst);
88b7e1c893Smrg
89b7e1c893Smrg
90b7e1c893Smrgstatic Bool
91b7e1c893SmrgR600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
92b7e1c893Smrg{
93b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
94b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
95b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
96b7e1c893Smrg    cb_config_t     cb_conf;
97b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
98b7e1c893Smrg    int pmask = 0;
99b7e1c893Smrg    uint32_t a, r, g, b;
100b7e1c893Smrg    float ps_alu_consts[4];
101b7e1c893Smrg
102b7e1c893Smrg    accel_state->dst_mc_addr = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
103b7e1c893Smrg    accel_state->dst_size = exaGetPixmapPitch(pPix) * pPix->drawable.height;
104b7e1c893Smrg    accel_state->dst_pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
105b7e1c893Smrg
106b7e1c893Smrg    /* bad pitch */
107b7e1c893Smrg    if (accel_state->dst_pitch & 7)
108b7e1c893Smrg	return FALSE;
109b7e1c893Smrg
110b7e1c893Smrg    /* bad offset */
111b7e1c893Smrg    if (accel_state->dst_mc_addr & 0xff)
112b7e1c893Smrg	return FALSE;
113b7e1c893Smrg
114b7e1c893Smrg    if (pPix->drawable.bitsPerPixel == 24)
115b7e1c893Smrg	return FALSE;
116b7e1c893Smrg
117b7e1c893Smrg    CLEAR (cb_conf);
118b7e1c893Smrg    CLEAR (vs_conf);
119b7e1c893Smrg    CLEAR (ps_conf);
120b7e1c893Smrg
121b7e1c893Smrg    /* return FALSE; */
122b7e1c893Smrg
123b7e1c893Smrg#ifdef SHOW_VERTEXES
124b7e1c893Smrg    ErrorF("%dx%d @ %dbpp, 0x%08x\n", pPix->drawable.width, pPix->drawable.height,
125b7e1c893Smrg	   pPix->drawable.bitsPerPixel, exaGetPixmapPitch(pPix));
126b7e1c893Smrg#endif
127b7e1c893Smrg
128b7e1c893Smrg    accel_state->ib = RADEONCPGetBuffer(pScrn);
129b7e1c893Smrg
130b7e1c893Smrg    /* Init */
131b7e1c893Smrg    start_3d(pScrn, accel_state->ib);
132b7e1c893Smrg
133b7e1c893Smrg    set_default_state(pScrn, accel_state->ib);
134b7e1c893Smrg
135b7e1c893Smrg    /* Scissor / viewport */
136b7e1c893Smrg    EREG(accel_state->ib, PA_CL_VTE_CNTL,                      VTX_XY_FMT_bit);
137b7e1c893Smrg    EREG(accel_state->ib, PA_CL_CLIP_CNTL,                     CLIP_DISABLE_bit);
138b7e1c893Smrg
139b7e1c893Smrg    accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
140b7e1c893Smrg	accel_state->solid_vs_offset;
141b7e1c893Smrg    accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
142b7e1c893Smrg	accel_state->solid_ps_offset;
143b7e1c893Smrg    accel_state->vs_size = 512;
144b7e1c893Smrg    accel_state->ps_size = 512;
145b7e1c893Smrg
146b7e1c893Smrg    /* Shader */
147b7e1c893Smrg
148b7e1c893Smrg    /* flush SQ cache */
149b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
150b7e1c893Smrg			accel_state->vs_size, accel_state->vs_mc_addr);
151b7e1c893Smrg
152b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
153b7e1c893Smrg    vs_conf.num_gprs            = 2;
154b7e1c893Smrg    vs_conf.stack_size          = 0;
155b7e1c893Smrg    vs_setup                    (pScrn, accel_state->ib, &vs_conf);
156b7e1c893Smrg
157b7e1c893Smrg    /* flush SQ cache */
158b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
159b7e1c893Smrg			accel_state->ps_size, accel_state->ps_mc_addr);
160b7e1c893Smrg
161b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
162b7e1c893Smrg    ps_conf.num_gprs            = 1;
163b7e1c893Smrg    ps_conf.stack_size          = 0;
164b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
165b7e1c893Smrg    ps_conf.clamp_consts        = 0;
166b7e1c893Smrg    ps_conf.export_mode         = 2;
167b7e1c893Smrg    ps_setup                    (pScrn, accel_state->ib, &ps_conf);
168b7e1c893Smrg
169b7e1c893Smrg    /* Render setup */
170b7e1c893Smrg    if (pm & 0x000000ff)
171b7e1c893Smrg	pmask |= 4; /* B */
172b7e1c893Smrg    if (pm & 0x0000ff00)
173b7e1c893Smrg	pmask |= 2; /* G */
174b7e1c893Smrg    if (pm & 0x00ff0000)
175b7e1c893Smrg	pmask |= 1; /* R */
176b7e1c893Smrg    if (pm & 0xff000000)
177b7e1c893Smrg	pmask |= 8; /* A */
178b7e1c893Smrg    EREG(accel_state->ib, CB_SHADER_MASK,                      (pmask << OUTPUT0_ENABLE_shift));
179b7e1c893Smrg    EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
180b7e1c893Smrg    EREG(accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[alu]);
181b7e1c893Smrg
182b7e1c893Smrg    cb_conf.id = 0;
183b7e1c893Smrg    cb_conf.w = accel_state->dst_pitch;
184b7e1c893Smrg    cb_conf.h = pPix->drawable.height;
185b7e1c893Smrg    cb_conf.base = accel_state->dst_mc_addr;
186b7e1c893Smrg
187b7e1c893Smrg    if (pPix->drawable.bitsPerPixel == 8) {
188b7e1c893Smrg	cb_conf.format = COLOR_8;
189b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
190b7e1c893Smrg    } else if (pPix->drawable.bitsPerPixel == 16) {
191b7e1c893Smrg	cb_conf.format = COLOR_5_6_5;
192b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
193b7e1c893Smrg    } else {
194b7e1c893Smrg	cb_conf.format = COLOR_8_8_8_8;
195b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
196b7e1c893Smrg    }
197b7e1c893Smrg    cb_conf.source_format = 1;
198b7e1c893Smrg    cb_conf.blend_clamp = 1;
199b7e1c893Smrg    set_render_target(pScrn, accel_state->ib, &cb_conf);
200b7e1c893Smrg
201b7e1c893Smrg    EREG(accel_state->ib, PA_SU_SC_MODE_CNTL,                  (FACE_bit			|
202b7e1c893Smrg								(POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift)	|
203b7e1c893Smrg								(POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
204b7e1c893Smrg    EREG(accel_state->ib, DB_SHADER_CONTROL,                   ((1 << Z_ORDER_shift)		| /* EARLY_Z_THEN_LATE_Z */
205b7e1c893Smrg								DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
206b7e1c893Smrg
207b7e1c893Smrg    /* Interpolator setup */
208b7e1c893Smrg    /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */
209b7e1c893Smrg    EREG(accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift));
210b7e1c893Smrg    EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
211b7e1c893Smrg
212b7e1c893Smrg    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
213b7e1c893Smrg     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
214b7e1c893Smrg    /* no VS exports as PS input (NUM_INTERP is not zero based, no minus one) */
215b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_IN_CONTROL_0,                 (0 << NUM_INTERP_shift));
216b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
217b7e1c893Smrg    /* color semantic id 0 -> GPR[0] */
218b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)	|
219b7e1c893Smrg								  (0x03 << DEFAULT_VAL_shift)	|
220b7e1c893Smrg								  FLAT_SHADE_bit		|
221b7e1c893Smrg								  SEL_CENTROID_bit));
222b7e1c893Smrg    EREG(accel_state->ib, SPI_INTERP_CONTROL_0,                FLAT_SHADE_ENA_bit | 0);
223b7e1c893Smrg
224b7e1c893Smrg    /* PS alu constants */
225b7e1c893Smrg    if (pPix->drawable.bitsPerPixel == 16) {
226b7e1c893Smrg	r = (fg >> 11) & 0x1f;
227b7e1c893Smrg	g = (fg >> 5) & 0x3f;
228b7e1c893Smrg	b = (fg >> 0) & 0x1f;
229b7e1c893Smrg	ps_alu_consts[0] = (float)r / 31; /* R */
230b7e1c893Smrg	ps_alu_consts[1] = (float)g / 63; /* G */
231b7e1c893Smrg	ps_alu_consts[2] = (float)b / 31; /* B */
232b7e1c893Smrg	ps_alu_consts[3] = 1.0; /* A */
233b7e1c893Smrg    } else if (pPix->drawable.bitsPerPixel == 8) {
234b7e1c893Smrg	a = (fg >> 0) & 0xff;
235b7e1c893Smrg	ps_alu_consts[0] = 0.0; /* R */
236b7e1c893Smrg	ps_alu_consts[1] = 0.0; /* G */
237b7e1c893Smrg	ps_alu_consts[2] = 0.0; /* B */
238b7e1c893Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
239b7e1c893Smrg    } else {
240b7e1c893Smrg	a = (fg >> 24) & 0xff;
241b7e1c893Smrg	r = (fg >> 16) & 0xff;
242b7e1c893Smrg	g = (fg >> 8) & 0xff;
243b7e1c893Smrg	b = (fg >> 0) & 0xff;
244b7e1c893Smrg	ps_alu_consts[0] = (float)r / 255; /* R */
245b7e1c893Smrg	ps_alu_consts[1] = (float)g / 255; /* G */
246b7e1c893Smrg	ps_alu_consts[2] = (float)b / 255; /* B */
247b7e1c893Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
248b7e1c893Smrg    }
249b7e1c893Smrg    set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
250b7e1c893Smrg		   sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
251b7e1c893Smrg
252b7e1c893Smrg    accel_state->vb_index = 0;
253b7e1c893Smrg
254b7e1c893Smrg#ifdef SHOW_VERTEXES
255b7e1c893Smrg    ErrorF("PM: 0x%08x\n", pm);
256b7e1c893Smrg#endif
257b7e1c893Smrg
258b7e1c893Smrg    return TRUE;
259b7e1c893Smrg}
260b7e1c893Smrg
261b7e1c893Smrg
262b7e1c893Smrgstatic void
263b7e1c893SmrgR600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
264b7e1c893Smrg{
265b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
266b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
267b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
268b7e1c893Smrg    float *vb;
269b7e1c893Smrg
270b7e1c893Smrg    if (((accel_state->vb_index + 3) * 8) > (accel_state->ib->total / 2)) {
271b7e1c893Smrg	R600DoneSolid(pPix);
272b7e1c893Smrg	accel_state->vb_index = 0;
273b7e1c893Smrg	accel_state->ib = RADEONCPGetBuffer(pScrn);
274b7e1c893Smrg    }
275b7e1c893Smrg
276b7e1c893Smrg    vb = (pointer)((char*)accel_state->ib->address +
277b7e1c893Smrg		   (accel_state->ib->total / 2) +
278b7e1c893Smrg		   accel_state->vb_index * 8);
279b7e1c893Smrg
280b7e1c893Smrg    vb[0] = (float)x1;
281b7e1c893Smrg    vb[1] = (float)y1;
282b7e1c893Smrg
283b7e1c893Smrg    vb[2] = (float)x1;
284b7e1c893Smrg    vb[3] = (float)y2;
285b7e1c893Smrg
286b7e1c893Smrg    vb[4] = (float)x2;
287b7e1c893Smrg    vb[5] = (float)y2;
288b7e1c893Smrg
289b7e1c893Smrg    accel_state->vb_index += 3;
290b7e1c893Smrg
291b7e1c893Smrg}
292b7e1c893Smrg
293b7e1c893Smrgstatic void
294b7e1c893SmrgR600DoneSolid(PixmapPtr pPix)
295b7e1c893Smrg{
296b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
297b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
298b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
299b7e1c893Smrg    draw_config_t   draw_conf;
300b7e1c893Smrg    vtx_resource_t  vtx_res;
301b7e1c893Smrg
302b7e1c893Smrg    CLEAR (draw_conf);
303b7e1c893Smrg    CLEAR (vtx_res);
304b7e1c893Smrg
305b7e1c893Smrg    if (accel_state->vb_index == 0) {
306b7e1c893Smrg	R600IBDiscard(pScrn, accel_state->ib);
307b7e1c893Smrg	return;
308b7e1c893Smrg    }
309b7e1c893Smrg
310b7e1c893Smrg    accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart +
311b7e1c893Smrg	(accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2);
312b7e1c893Smrg    accel_state->vb_size = accel_state->vb_index * 8;
313b7e1c893Smrg
314b7e1c893Smrg    /* flush vertex cache */
315b7e1c893Smrg    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
316b7e1c893Smrg	(info->ChipFamily == CHIP_FAMILY_RV620) ||
317b7e1c893Smrg	(info->ChipFamily == CHIP_FAMILY_RS780) ||
318c503f109Smrg	(info->ChipFamily == CHIP_FAMILY_RS880) ||
319b7e1c893Smrg	(info->ChipFamily == CHIP_FAMILY_RV710))
320b7e1c893Smrg	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
321b7e1c893Smrg			    accel_state->vb_size, accel_state->vb_mc_addr);
322b7e1c893Smrg    else
323b7e1c893Smrg	cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
324b7e1c893Smrg			    accel_state->vb_size, accel_state->vb_mc_addr);
325b7e1c893Smrg
326b7e1c893Smrg    /* Vertex buffer setup */
327b7e1c893Smrg    vtx_res.id              = SQ_VTX_RESOURCE_vs;
328b7e1c893Smrg    vtx_res.vtx_size_dw     = 8 / 4;
329b7e1c893Smrg    vtx_res.vtx_num_entries = accel_state->vb_size / 4;
330b7e1c893Smrg    vtx_res.mem_req_size    = 1;
331b7e1c893Smrg    vtx_res.vb_addr         = accel_state->vb_mc_addr;
332b7e1c893Smrg    set_vtx_resource        (pScrn, accel_state->ib, &vtx_res);
333b7e1c893Smrg
334b7e1c893Smrg    /* Draw */
335b7e1c893Smrg    draw_conf.prim_type          = DI_PT_RECTLIST;
336b7e1c893Smrg    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
337b7e1c893Smrg    draw_conf.num_instances      = 1;
338b7e1c893Smrg    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
339b7e1c893Smrg    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
340b7e1c893Smrg
341b7e1c893Smrg    draw_auto(pScrn, accel_state->ib, &draw_conf);
342b7e1c893Smrg
343b7e1c893Smrg    wait_3d_idle_clean(pScrn, accel_state->ib);
344b7e1c893Smrg
345b7e1c893Smrg    /* sync dst surface */
346b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
347b7e1c893Smrg			accel_state->dst_size, accel_state->dst_mc_addr);
348b7e1c893Smrg
349b7e1c893Smrg    R600CPFlushIndirect(pScrn, accel_state->ib);
350b7e1c893Smrg}
351b7e1c893Smrg
352b7e1c893Smrgstatic void
353b7e1c893SmrgR600DoPrepareCopy(ScrnInfoPtr pScrn,
354b7e1c893Smrg		  int src_pitch, int src_width, int src_height, uint32_t src_offset, int src_bpp,
355b7e1c893Smrg		  int dst_pitch, int dst_height, uint32_t dst_offset, int dst_bpp,
356b7e1c893Smrg		  int rop, Pixel planemask)
357b7e1c893Smrg{
358b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
359b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
360b7e1c893Smrg    int pmask = 0;
361b7e1c893Smrg    cb_config_t     cb_conf;
362b7e1c893Smrg    tex_resource_t  tex_res;
363b7e1c893Smrg    tex_sampler_t   tex_samp;
364b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
365b7e1c893Smrg
366b7e1c893Smrg    CLEAR (cb_conf);
367b7e1c893Smrg    CLEAR (tex_res);
368b7e1c893Smrg    CLEAR (tex_samp);
369b7e1c893Smrg    CLEAR (vs_conf);
370b7e1c893Smrg    CLEAR (ps_conf);
371b7e1c893Smrg
372b7e1c893Smrg    accel_state->ib = RADEONCPGetBuffer(pScrn);
373b7e1c893Smrg
374b7e1c893Smrg    /* Init */
375b7e1c893Smrg    start_3d(pScrn, accel_state->ib);
376b7e1c893Smrg
377b7e1c893Smrg    set_default_state(pScrn, accel_state->ib);
378b7e1c893Smrg
379b7e1c893Smrg    /* Scissor / viewport */
380b7e1c893Smrg    EREG(accel_state->ib, PA_CL_VTE_CNTL,                      VTX_XY_FMT_bit);
381b7e1c893Smrg    EREG(accel_state->ib, PA_CL_CLIP_CNTL,                     CLIP_DISABLE_bit);
382b7e1c893Smrg
383b7e1c893Smrg    accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
384b7e1c893Smrg	accel_state->copy_vs_offset;
385b7e1c893Smrg    accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
386b7e1c893Smrg	accel_state->copy_ps_offset;
387b7e1c893Smrg    accel_state->vs_size = 512;
388b7e1c893Smrg    accel_state->ps_size = 512;
389b7e1c893Smrg
390b7e1c893Smrg    /* Shader */
391b7e1c893Smrg
392b7e1c893Smrg    /* flush SQ cache */
393b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
394b7e1c893Smrg			accel_state->vs_size, accel_state->vs_mc_addr);
395b7e1c893Smrg
396b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
397b7e1c893Smrg    vs_conf.num_gprs            = 2;
398b7e1c893Smrg    vs_conf.stack_size          = 0;
399b7e1c893Smrg    vs_setup                    (pScrn, accel_state->ib, &vs_conf);
400b7e1c893Smrg
401b7e1c893Smrg    /* flush SQ cache */
402b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
403b7e1c893Smrg			accel_state->ps_size, accel_state->ps_mc_addr);
404b7e1c893Smrg
405b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
406b7e1c893Smrg    ps_conf.num_gprs            = 1;
407b7e1c893Smrg    ps_conf.stack_size          = 0;
408b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
409b7e1c893Smrg    ps_conf.clamp_consts        = 0;
410b7e1c893Smrg    ps_conf.export_mode         = 2;
411b7e1c893Smrg    ps_setup                    (pScrn, accel_state->ib, &ps_conf);
412b7e1c893Smrg
413b7e1c893Smrg    accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8);
414b7e1c893Smrg    accel_state->src_mc_addr[0] = src_offset;
415b7e1c893Smrg    accel_state->src_pitch[0] = src_pitch;
416b7e1c893Smrg    accel_state->src_width[0] = src_width;
417b7e1c893Smrg    accel_state->src_height[0] = src_height;
418b7e1c893Smrg    accel_state->src_bpp[0] = src_bpp;
419b7e1c893Smrg
420b7e1c893Smrg    /* flush texture cache */
421b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
422b7e1c893Smrg			accel_state->src_size[0], accel_state->src_mc_addr[0]);
423b7e1c893Smrg
424b7e1c893Smrg    /* Texture */
425b7e1c893Smrg    tex_res.id                  = 0;
426b7e1c893Smrg    tex_res.w                   = src_width;
427b7e1c893Smrg    tex_res.h                   = src_height;
428b7e1c893Smrg    tex_res.pitch               = accel_state->src_pitch[0];
429b7e1c893Smrg    tex_res.depth               = 0;
430b7e1c893Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
431b7e1c893Smrg    tex_res.base                = accel_state->src_mc_addr[0];
432b7e1c893Smrg    tex_res.mip_base            = accel_state->src_mc_addr[0];
433b7e1c893Smrg    if (src_bpp == 8) {
434b7e1c893Smrg	tex_res.format              = FMT_8;
435b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
436b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
437b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
438b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
439b7e1c893Smrg    } else if (src_bpp == 16) {
440b7e1c893Smrg	tex_res.format              = FMT_5_6_5;
441b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
442b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
443b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
444b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
445b7e1c893Smrg    } else {
446b7e1c893Smrg	tex_res.format              = FMT_8_8_8_8;
447b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
448b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
449b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
450b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
451b7e1c893Smrg    }
452b7e1c893Smrg
453b7e1c893Smrg    tex_res.request_size        = 1;
454b7e1c893Smrg    tex_res.base_level          = 0;
455b7e1c893Smrg    tex_res.last_level          = 0;
456b7e1c893Smrg    tex_res.perf_modulation     = 0;
457b7e1c893Smrg    set_tex_resource            (pScrn, accel_state->ib, &tex_res);
458b7e1c893Smrg
459b7e1c893Smrg    tex_samp.id                 = 0;
460b7e1c893Smrg    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
461b7e1c893Smrg    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
462b7e1c893Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
463b7e1c893Smrg    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
464b7e1c893Smrg    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
465b7e1c893Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
466b7e1c893Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
467b7e1c893Smrg    set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
468b7e1c893Smrg
469b7e1c893Smrg
470b7e1c893Smrg    /* Render setup */
471b7e1c893Smrg    if (planemask & 0x000000ff)
472b7e1c893Smrg	pmask |= 4; /* B */
473b7e1c893Smrg    if (planemask & 0x0000ff00)
474b7e1c893Smrg	pmask |= 2; /* G */
475b7e1c893Smrg    if (planemask & 0x00ff0000)
476b7e1c893Smrg	pmask |= 1; /* R */
477b7e1c893Smrg    if (planemask & 0xff000000)
478b7e1c893Smrg	pmask |= 8; /* A */
479b7e1c893Smrg    EREG(accel_state->ib, CB_SHADER_MASK,                      (pmask << OUTPUT0_ENABLE_shift));
480b7e1c893Smrg    EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
481b7e1c893Smrg    EREG(accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[rop]);
482b7e1c893Smrg
483b7e1c893Smrg    accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8);
484b7e1c893Smrg    accel_state->dst_mc_addr = dst_offset;
485b7e1c893Smrg    accel_state->dst_pitch = dst_pitch;
486b7e1c893Smrg    accel_state->dst_height = dst_height;
487b7e1c893Smrg    accel_state->dst_bpp = dst_bpp;
488b7e1c893Smrg
489b7e1c893Smrg    cb_conf.id = 0;
490b7e1c893Smrg    cb_conf.w = accel_state->dst_pitch;
491b7e1c893Smrg    cb_conf.h = dst_height;
492b7e1c893Smrg    cb_conf.base = accel_state->dst_mc_addr;
493b7e1c893Smrg    if (dst_bpp == 8) {
494b7e1c893Smrg	cb_conf.format = COLOR_8;
495b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
496b7e1c893Smrg    } else if (dst_bpp == 16) {
497b7e1c893Smrg	cb_conf.format = COLOR_5_6_5;
498b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
499b7e1c893Smrg    } else {
500b7e1c893Smrg	cb_conf.format = COLOR_8_8_8_8;
501b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
502b7e1c893Smrg    }
503b7e1c893Smrg    cb_conf.source_format = 1;
504b7e1c893Smrg    cb_conf.blend_clamp = 1;
505b7e1c893Smrg    set_render_target(pScrn, accel_state->ib, &cb_conf);
506b7e1c893Smrg
507b7e1c893Smrg    EREG(accel_state->ib, PA_SU_SC_MODE_CNTL,                  (FACE_bit			|
508b7e1c893Smrg								(POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift)	|
509b7e1c893Smrg								(POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
510b7e1c893Smrg    EREG(accel_state->ib, DB_SHADER_CONTROL,                   ((1 << Z_ORDER_shift)		| /* EARLY_Z_THEN_LATE_Z */
511b7e1c893Smrg								DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
512b7e1c893Smrg
513b7e1c893Smrg    /* Interpolator setup */
514b7e1c893Smrg    /* export tex coord from VS */
515b7e1c893Smrg    EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
516b7e1c893Smrg    EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
517b7e1c893Smrg
518b7e1c893Smrg    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
519b7e1c893Smrg     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
520b7e1c893Smrg    /* input tex coord from VS */
521b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_IN_CONTROL_0,                 ((1 << NUM_INTERP_shift)));
522b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
523b7e1c893Smrg    /* color semantic id 0 -> GPR[0] */
524b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)	|
525b7e1c893Smrg								(0x01 << DEFAULT_VAL_shift)	|
526b7e1c893Smrg								SEL_CENTROID_bit));
527b7e1c893Smrg    EREG(accel_state->ib, SPI_INTERP_CONTROL_0,                0);
528b7e1c893Smrg
529b7e1c893Smrg    accel_state->vb_index = 0;
530b7e1c893Smrg
531b7e1c893Smrg}
532b7e1c893Smrg
533b7e1c893Smrgstatic void
534b7e1c893SmrgR600DoCopy(ScrnInfoPtr pScrn)
535b7e1c893Smrg{
536b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
537b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
538b7e1c893Smrg    draw_config_t   draw_conf;
539b7e1c893Smrg    vtx_resource_t  vtx_res;
540b7e1c893Smrg
541b7e1c893Smrg    CLEAR (draw_conf);
542b7e1c893Smrg    CLEAR (vtx_res);
543b7e1c893Smrg
544b7e1c893Smrg    if (accel_state->vb_index == 0) {
545b7e1c893Smrg	R600IBDiscard(pScrn, accel_state->ib);
546b7e1c893Smrg	return;
547b7e1c893Smrg    }
548b7e1c893Smrg
549b7e1c893Smrg    accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart +
550b7e1c893Smrg	(accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2);
551b7e1c893Smrg    accel_state->vb_size = accel_state->vb_index * 16;
552b7e1c893Smrg
553b7e1c893Smrg    /* flush vertex cache */
554b7e1c893Smrg    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
555b7e1c893Smrg	(info->ChipFamily == CHIP_FAMILY_RV620) ||
556b7e1c893Smrg	(info->ChipFamily == CHIP_FAMILY_RS780) ||
557c503f109Smrg	(info->ChipFamily == CHIP_FAMILY_RS880) ||
558b7e1c893Smrg	(info->ChipFamily == CHIP_FAMILY_RV710))
559b7e1c893Smrg	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
560b7e1c893Smrg			    accel_state->vb_size, accel_state->vb_mc_addr);
561b7e1c893Smrg    else
562b7e1c893Smrg	cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
563b7e1c893Smrg			    accel_state->vb_size, accel_state->vb_mc_addr);
564b7e1c893Smrg
565b7e1c893Smrg    /* Vertex buffer setup */
566b7e1c893Smrg    vtx_res.id              = SQ_VTX_RESOURCE_vs;
567b7e1c893Smrg    vtx_res.vtx_size_dw     = 16 / 4;
568b7e1c893Smrg    vtx_res.vtx_num_entries = accel_state->vb_size / 4;
569b7e1c893Smrg    vtx_res.mem_req_size    = 1;
570b7e1c893Smrg    vtx_res.vb_addr         = accel_state->vb_mc_addr;
571b7e1c893Smrg    set_vtx_resource        (pScrn, accel_state->ib, &vtx_res);
572b7e1c893Smrg
573b7e1c893Smrg    draw_conf.prim_type          = DI_PT_RECTLIST;
574b7e1c893Smrg    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
575b7e1c893Smrg    draw_conf.num_instances      = 1;
576b7e1c893Smrg    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
577b7e1c893Smrg    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
578b7e1c893Smrg
579b7e1c893Smrg    draw_auto(pScrn, accel_state->ib, &draw_conf);
580b7e1c893Smrg
581b7e1c893Smrg    wait_3d_idle_clean(pScrn, accel_state->ib);
582b7e1c893Smrg
583b7e1c893Smrg    /* sync dst surface */
584b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
585b7e1c893Smrg			accel_state->dst_size, accel_state->dst_mc_addr);
586b7e1c893Smrg
587b7e1c893Smrg    R600CPFlushIndirect(pScrn, accel_state->ib);
588b7e1c893Smrg}
589b7e1c893Smrg
590b7e1c893Smrgstatic void
591b7e1c893SmrgR600AppendCopyVertex(ScrnInfoPtr pScrn,
592b7e1c893Smrg		     int srcX, int srcY,
593b7e1c893Smrg		     int dstX, int dstY,
594b7e1c893Smrg		     int w, int h)
595b7e1c893Smrg{
596b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
597b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
598b7e1c893Smrg    float *vb;
599b7e1c893Smrg
600b7e1c893Smrg    if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) {
601b7e1c893Smrg	R600DoCopy(pScrn);
602b7e1c893Smrg	accel_state->vb_index = 0;
603b7e1c893Smrg	accel_state->ib = RADEONCPGetBuffer(pScrn);
604b7e1c893Smrg    }
605b7e1c893Smrg
606b7e1c893Smrg    vb = (pointer)((char*)accel_state->ib->address +
607b7e1c893Smrg		   (accel_state->ib->total / 2) +
608b7e1c893Smrg		   accel_state->vb_index * 16);
609b7e1c893Smrg
610b7e1c893Smrg    vb[0] = (float)dstX;
611b7e1c893Smrg    vb[1] = (float)dstY;
612b7e1c893Smrg    vb[2] = (float)srcX;
613b7e1c893Smrg    vb[3] = (float)srcY;
614b7e1c893Smrg
615b7e1c893Smrg    vb[4] = (float)dstX;
616b7e1c893Smrg    vb[5] = (float)(dstY + h);
617b7e1c893Smrg    vb[6] = (float)srcX;
618b7e1c893Smrg    vb[7] = (float)(srcY + h);
619b7e1c893Smrg
620b7e1c893Smrg    vb[8] = (float)(dstX + w);
621b7e1c893Smrg    vb[9] = (float)(dstY + h);
622b7e1c893Smrg    vb[10] = (float)(srcX + w);
623b7e1c893Smrg    vb[11] = (float)(srcY + h);
624b7e1c893Smrg
625b7e1c893Smrg    accel_state->vb_index += 3;
626b7e1c893Smrg}
627b7e1c893Smrg
628b7e1c893Smrgstatic Bool
629b7e1c893SmrgR600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
630b7e1c893Smrg		int xdir, int ydir,
631b7e1c893Smrg		int rop,
632b7e1c893Smrg		Pixel planemask)
633b7e1c893Smrg{
634b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
635b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
636b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
637b7e1c893Smrg
638b7e1c893Smrg    accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
639b7e1c893Smrg    accel_state->src_pitch[0] = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
640b7e1c893Smrg
641b7e1c893Smrg    accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
642b7e1c893Smrg    accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
643b7e1c893Smrg
644b7e1c893Smrg    accel_state->src_width[0] = pSrc->drawable.width;
645b7e1c893Smrg    accel_state->src_height[0] = pSrc->drawable.height;
646b7e1c893Smrg    accel_state->src_bpp[0] = pSrc->drawable.bitsPerPixel;
647b7e1c893Smrg    accel_state->dst_height = pDst->drawable.height;
648b7e1c893Smrg    accel_state->dst_bpp = pDst->drawable.bitsPerPixel;
649b7e1c893Smrg
650b7e1c893Smrg    /* bad pitch */
651b7e1c893Smrg    if (accel_state->src_pitch[0] & 7)
652b7e1c893Smrg	return FALSE;
653b7e1c893Smrg    if (accel_state->dst_pitch & 7)
654b7e1c893Smrg	return FALSE;
655b7e1c893Smrg
656b7e1c893Smrg    /* bad offset */
657b7e1c893Smrg    if (accel_state->src_mc_addr[0] & 0xff)
658b7e1c893Smrg	return FALSE;
659b7e1c893Smrg    if (accel_state->dst_mc_addr & 0xff)
660b7e1c893Smrg	return FALSE;
661b7e1c893Smrg
662b7e1c893Smrg    if (pSrc->drawable.bitsPerPixel == 24)
663b7e1c893Smrg	return FALSE;
664b7e1c893Smrg    if (pDst->drawable.bitsPerPixel == 24)
665b7e1c893Smrg	return FALSE;
666b7e1c893Smrg
667b7e1c893Smrg    /* return FALSE; */
668b7e1c893Smrg
669b7e1c893Smrg#ifdef SHOW_VERTEXES
670b7e1c893Smrg    ErrorF("src: %dx%d @ %dbpp, 0x%08x\n", pSrc->drawable.width, pSrc->drawable.height,
671b7e1c893Smrg	   pSrc->drawable.bitsPerPixel, exaGetPixmapPitch(pSrc));
672b7e1c893Smrg    ErrorF("dst: %dx%d @ %dbpp, 0x%08x\n", pDst->drawable.width, pDst->drawable.height,
673b7e1c893Smrg	   pDst->drawable.bitsPerPixel, exaGetPixmapPitch(pDst));
674b7e1c893Smrg#endif
675b7e1c893Smrg
676b7e1c893Smrg    accel_state->rop = rop;
677b7e1c893Smrg    accel_state->planemask = planemask;
678b7e1c893Smrg
679b7e1c893Smrg    if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) {
680b7e1c893Smrg	unsigned long size = pDst->drawable.height * accel_state->dst_pitch * pDst->drawable.bitsPerPixel/8;
681b7e1c893Smrg	accel_state->same_surface = TRUE;
682b7e1c893Smrg
683b7e1c893Smrg	if (accel_state->copy_area) {
684b7e1c893Smrg	    exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
685b7e1c893Smrg	    accel_state->copy_area = NULL;
686b7e1c893Smrg	}
687b7e1c893Smrg	accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
688b7e1c893Smrg    } else {
689b7e1c893Smrg	accel_state->same_surface = FALSE;
690b7e1c893Smrg
691b7e1c893Smrg	R600DoPrepareCopy(pScrn,
692b7e1c893Smrg			  accel_state->src_pitch[0], pSrc->drawable.width, pSrc->drawable.height,
693b7e1c893Smrg			  accel_state->src_mc_addr[0], pSrc->drawable.bitsPerPixel,
694b7e1c893Smrg			  accel_state->dst_pitch, pDst->drawable.height,
695b7e1c893Smrg			  accel_state->dst_mc_addr, pDst->drawable.bitsPerPixel,
696b7e1c893Smrg			  rop, planemask);
697b7e1c893Smrg
698b7e1c893Smrg    }
699b7e1c893Smrg
700b7e1c893Smrg    return TRUE;
701b7e1c893Smrg}
702b7e1c893Smrg
703b7e1c893Smrgstatic Bool
704b7e1c893Smrgis_overlap(int sx1, int sx2, int sy1, int sy2, int dx1, int dx2, int dy1, int dy2)
705b7e1c893Smrg{
706b7e1c893Smrg    if (((sx1 >= dx1) && (sx1 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || /* TL x1, y1 */
707b7e1c893Smrg	((sx2 >= dx1) && (sx2 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || /* TR x2, y1 */
708b7e1c893Smrg	((sx1 >= dx1) && (sx1 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2)) || /* BL x1, y2 */
709b7e1c893Smrg	((sx2 >= dx1) && (sx2 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2)))   /* BR x2, y2 */
710b7e1c893Smrg	return TRUE;
711b7e1c893Smrg    else
712b7e1c893Smrg	return FALSE;
713b7e1c893Smrg}
714b7e1c893Smrg
715b7e1c893Smrgstatic void
716b7e1c893SmrgR600OverlapCopy(PixmapPtr pDst,
717b7e1c893Smrg		int srcX, int srcY,
718b7e1c893Smrg		int dstX, int dstY,
719b7e1c893Smrg		int w, int h)
720b7e1c893Smrg{
721b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
722b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
723b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
724b7e1c893Smrg    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
725b7e1c893Smrg    uint32_t dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
726b7e1c893Smrg    int i, hchunk, vchunk;
727b7e1c893Smrg
728b7e1c893Smrg    if (is_overlap(srcX, srcX + w, srcY, srcY + h,
729b7e1c893Smrg		   dstX, dstX + w, dstY, dstY + h)) {
730b7e1c893Smrg        /* Calculate height/width of non-overlapping area */
731b7e1c893Smrg        hchunk = (srcX < dstX) ? (dstX - srcX) : (srcX - dstX);
732b7e1c893Smrg        vchunk = (srcY < dstY) ? (dstY - srcY) : (srcY - dstY);
733b7e1c893Smrg
734b7e1c893Smrg        /* Diagonally offset overlap is reduced to either horizontal or vertical offset-only
735b7e1c893Smrg         * by copying a part of the  non-overlapping portion, then adjusting coordinates
736b7e1c893Smrg         * Choose horizontal vs vertical to minimize the total number of copy operations
737b7e1c893Smrg         */
738b7e1c893Smrg        if (vchunk != 0 && hchunk != 0) { /* diagonal */
739b7e1c893Smrg            if ((w / hchunk) <= (h / vchunk)) { /* reduce to horizontal  */
740b7e1c893Smrg                if (srcY > dstY ) { /* diagonal up */
741b7e1c893Smrg                    R600DoPrepareCopy(pScrn,
742b7e1c893Smrg                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
743b7e1c893Smrg                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
744b7e1c893Smrg                                      accel_state->rop, accel_state->planemask);
745b7e1c893Smrg                    R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, vchunk);
746b7e1c893Smrg                    R600DoCopy(pScrn);
747b7e1c893Smrg
748b7e1c893Smrg                    srcY = srcY + vchunk;
749b7e1c893Smrg                    dstY = dstY + vchunk;
750b7e1c893Smrg                } else { /* diagonal down */
751b7e1c893Smrg                    R600DoPrepareCopy(pScrn,
752b7e1c893Smrg                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
753b7e1c893Smrg                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
754b7e1c893Smrg                                      accel_state->rop, accel_state->planemask);
755b7e1c893Smrg                    R600AppendCopyVertex(pScrn, srcX, srcY + h - vchunk, dstX, dstY + h - vchunk, w, vchunk);
756b7e1c893Smrg                    R600DoCopy(pScrn);
757b7e1c893Smrg                }
758b7e1c893Smrg                h = h - vchunk;
759b7e1c893Smrg                vchunk = 0;
760b7e1c893Smrg            } else { /* reduce to vertical */
761b7e1c893Smrg                if (srcX > dstX ) { /* diagonal left */
762b7e1c893Smrg                    R600DoPrepareCopy(pScrn,
763b7e1c893Smrg                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
764b7e1c893Smrg                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
765b7e1c893Smrg                                      accel_state->rop, accel_state->planemask);
766b7e1c893Smrg                    R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, hchunk, h);
767b7e1c893Smrg                    R600DoCopy(pScrn);
768b7e1c893Smrg
769b7e1c893Smrg                    srcX = srcX + hchunk;
770b7e1c893Smrg                    dstX = dstX + hchunk;
771b7e1c893Smrg                } else { /* diagonal right */
772b7e1c893Smrg                    R600DoPrepareCopy(pScrn,
773b7e1c893Smrg                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
774b7e1c893Smrg                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
775b7e1c893Smrg                                      accel_state->rop, accel_state->planemask);
776b7e1c893Smrg                    R600AppendCopyVertex(pScrn, srcX + w - hchunk, srcY, dstX + w - hchunk, dstY, hchunk, h);
777b7e1c893Smrg                    R600DoCopy(pScrn);
778b7e1c893Smrg                }
779b7e1c893Smrg                w = w - hchunk;
780b7e1c893Smrg                hchunk = 0;
781b7e1c893Smrg            }
782b7e1c893Smrg        }
783b7e1c893Smrg
784b7e1c893Smrg	if (vchunk == 0) { /* left/right */
785b7e1c893Smrg	    if (srcX < dstX) { /* right */
786b7e1c893Smrg		/* copy right to left */
787b7e1c893Smrg		for (i = w; i > 0; i -= hchunk) {
788b7e1c893Smrg		    R600DoPrepareCopy(pScrn,
789b7e1c893Smrg				      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
790b7e1c893Smrg				      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
791b7e1c893Smrg				      accel_state->rop, accel_state->planemask);
792b7e1c893Smrg		    R600AppendCopyVertex(pScrn, srcX + i - hchunk, srcY, dstX + i - hchunk, dstY, hchunk, h);
793b7e1c893Smrg		    R600DoCopy(pScrn);
794b7e1c893Smrg		}
795b7e1c893Smrg	    } else { /* left */
796b7e1c893Smrg		/* copy left to right */
797b7e1c893Smrg		for (i = 0; i < w; i += hchunk) {
798b7e1c893Smrg		    R600DoPrepareCopy(pScrn,
799b7e1c893Smrg				      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
800b7e1c893Smrg				      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
801b7e1c893Smrg				      accel_state->rop, accel_state->planemask);
802b7e1c893Smrg
803b7e1c893Smrg		    R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY, hchunk, h);
804b7e1c893Smrg		    R600DoCopy(pScrn);
805b7e1c893Smrg		}
806b7e1c893Smrg	    }
807b7e1c893Smrg	} else { /* up/down */
808b7e1c893Smrg	    if (srcY > dstY) { /* up */
809b7e1c893Smrg		/* copy top to bottom */
810b7e1c893Smrg                for (i = 0; i < h; i += vchunk) {
811b7e1c893Smrg                    R600DoPrepareCopy(pScrn,
812b7e1c893Smrg                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
813b7e1c893Smrg                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
814b7e1c893Smrg                                      accel_state->rop, accel_state->planemask);
815b7e1c893Smrg
816b7e1c893Smrg                    if (vchunk > h - i) vchunk = h - i;
817b7e1c893Smrg                    R600AppendCopyVertex(pScrn, srcX, srcY + i, dstX, dstY + i, w, vchunk);
818b7e1c893Smrg                    R600DoCopy(pScrn);
819b7e1c893Smrg                }
820b7e1c893Smrg	    } else { /* down */
821b7e1c893Smrg		/* copy bottom to top */
822b7e1c893Smrg                for (i = h; i > 0; i -= vchunk) {
823b7e1c893Smrg                    R600DoPrepareCopy(pScrn,
824b7e1c893Smrg                                      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
825b7e1c893Smrg                                      dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
826b7e1c893Smrg                                      accel_state->rop, accel_state->planemask);
827b7e1c893Smrg
828b7e1c893Smrg                    if (vchunk > i) vchunk = i;
829b7e1c893Smrg                    R600AppendCopyVertex(pScrn, srcX, srcY + i - vchunk, dstX, dstY + i - vchunk, w, vchunk);
830b7e1c893Smrg                    R600DoCopy(pScrn);
831b7e1c893Smrg                }
832b7e1c893Smrg            }
833b7e1c893Smrg	}
834b7e1c893Smrg    } else {
835b7e1c893Smrg	R600DoPrepareCopy(pScrn,
836b7e1c893Smrg			  dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
837b7e1c893Smrg			  dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
838b7e1c893Smrg			  accel_state->rop, accel_state->planemask);
839b7e1c893Smrg
840b7e1c893Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
841b7e1c893Smrg	R600DoCopy(pScrn);
842b7e1c893Smrg    }
843b7e1c893Smrg}
844b7e1c893Smrg
845b7e1c893Smrgstatic void
846b7e1c893SmrgR600Copy(PixmapPtr pDst,
847b7e1c893Smrg	 int srcX, int srcY,
848b7e1c893Smrg	 int dstX, int dstY,
849b7e1c893Smrg	 int w, int h)
850b7e1c893Smrg{
851b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
852b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
853b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
854b7e1c893Smrg
855b7e1c893Smrg    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
856b7e1c893Smrg	return;
857b7e1c893Smrg
858b7e1c893Smrg    if (accel_state->same_surface && is_overlap(srcX, srcX + w, srcY, srcY + h, dstX, dstX + w, dstY, dstY + h)) {
859b7e1c893Smrg	if (accel_state->copy_area) {
860b7e1c893Smrg	    uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
861b7e1c893Smrg	    uint32_t orig_offset, tmp_offset;
862b7e1c893Smrg
863b7e1c893Smrg	    tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset;
864b7e1c893Smrg	    orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
865b7e1c893Smrg
866b7e1c893Smrg	    R600DoPrepareCopy(pScrn,
867b7e1c893Smrg			      pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel,
868b7e1c893Smrg			      pitch,                       pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel,
869b7e1c893Smrg			      accel_state->rop, accel_state->planemask);
870b7e1c893Smrg	    R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
871b7e1c893Smrg	    R600DoCopy(pScrn);
872b7e1c893Smrg	    R600DoPrepareCopy(pScrn,
873b7e1c893Smrg			      pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel,
874b7e1c893Smrg			      pitch,                       pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel,
875b7e1c893Smrg			      accel_state->rop, accel_state->planemask);
876b7e1c893Smrg	    R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
877b7e1c893Smrg	    R600DoCopy(pScrn);
878b7e1c893Smrg	} else
879b7e1c893Smrg	    R600OverlapCopy(pDst, srcX, srcY, dstX, dstY, w, h);
880b7e1c893Smrg    } else if (accel_state->same_surface) {
881b7e1c893Smrg	uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
882b7e1c893Smrg	uint32_t offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
883b7e1c893Smrg
884b7e1c893Smrg	R600DoPrepareCopy(pScrn,
885b7e1c893Smrg			  pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel,
886b7e1c893Smrg			  pitch,                       pDst->drawable.height, offset, pDst->drawable.bitsPerPixel,
887b7e1c893Smrg			  accel_state->rop, accel_state->planemask);
888b7e1c893Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
889b7e1c893Smrg	R600DoCopy(pScrn);
890b7e1c893Smrg    } else {
891b7e1c893Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
892b7e1c893Smrg    }
893b7e1c893Smrg
894b7e1c893Smrg}
895b7e1c893Smrg
896b7e1c893Smrgstatic void
897b7e1c893SmrgR600DoneCopy(PixmapPtr pDst)
898b7e1c893Smrg{
899b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
900b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
901b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
902b7e1c893Smrg
903b7e1c893Smrg    if (!accel_state->same_surface)
904b7e1c893Smrg	R600DoCopy(pScrn);
905b7e1c893Smrg
906b7e1c893Smrg    if (accel_state->copy_area) {
907b7e1c893Smrg	exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
908b7e1c893Smrg	accel_state->copy_area = NULL;
909b7e1c893Smrg    }
910b7e1c893Smrg
911b7e1c893Smrg}
912b7e1c893Smrg
913b7e1c893Smrg#define RADEON_TRACE_FALL 0
914b7e1c893Smrg#define RADEON_TRACE_DRAW 0
915b7e1c893Smrg
916b7e1c893Smrg#if RADEON_TRACE_FALL
917b7e1c893Smrg#define RADEON_FALLBACK(x)     		\
918b7e1c893Smrgdo {					\
919b7e1c893Smrg	ErrorF("%s: ", __FUNCTION__);	\
920b7e1c893Smrg	ErrorF x;			\
921b7e1c893Smrg	return FALSE;			\
922b7e1c893Smrg} while (0)
923b7e1c893Smrg#else
924b7e1c893Smrg#define RADEON_FALLBACK(x) return FALSE
925b7e1c893Smrg#endif
926b7e1c893Smrg
927b7e1c893Smrg#define xFixedToFloat(f) (((float) (f)) / 65536)
928b7e1c893Smrg
929b7e1c893Smrgstatic inline void transformPoint(PictTransform *transform, xPointFixed *point)
930b7e1c893Smrg{
931b7e1c893Smrg    PictVector v;
932b7e1c893Smrg    v.vector[0] = point->x;
933b7e1c893Smrg    v.vector[1] = point->y;
934b7e1c893Smrg    v.vector[2] = xFixed1;
935b7e1c893Smrg    PictureTransformPoint(transform, &v);
936b7e1c893Smrg    point->x = v.vector[0];
937b7e1c893Smrg    point->y = v.vector[1];
938b7e1c893Smrg}
939b7e1c893Smrg
940b7e1c893Smrgstruct blendinfo {
941b7e1c893Smrg    Bool dst_alpha;
942b7e1c893Smrg    Bool src_alpha;
943b7e1c893Smrg    uint32_t blend_cntl;
944b7e1c893Smrg};
945b7e1c893Smrg
946b7e1c893Smrgstatic struct blendinfo R600BlendOp[] = {
947b7e1c893Smrg    /* Clear */
948b7e1c893Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
949b7e1c893Smrg    /* Src */
950b7e1c893Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
951b7e1c893Smrg    /* Dst */
952b7e1c893Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
953b7e1c893Smrg    /* Over */
954b7e1c893Smrg    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
955b7e1c893Smrg    /* OverReverse */
956b7e1c893Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
957b7e1c893Smrg    /* In */
958b7e1c893Smrg    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
959b7e1c893Smrg    /* InReverse */
960b7e1c893Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
961b7e1c893Smrg    /* Out */
962b7e1c893Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
963b7e1c893Smrg    /* OutReverse */
964b7e1c893Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
965b7e1c893Smrg    /* Atop */
966b7e1c893Smrg    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
967b7e1c893Smrg    /* AtopReverse */
968b7e1c893Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
969b7e1c893Smrg    /* Xor */
970b7e1c893Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
971b7e1c893Smrg    /* Add */
972b7e1c893Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
973b7e1c893Smrg};
974b7e1c893Smrg
975b7e1c893Smrgstruct formatinfo {
976b7e1c893Smrg    unsigned int fmt;
977b7e1c893Smrg    uint32_t card_fmt;
978b7e1c893Smrg};
979b7e1c893Smrg
980b7e1c893Smrgstatic struct formatinfo R600TexFormats[] = {
981b7e1c893Smrg    {PICT_a8r8g8b8,	FMT_8_8_8_8},
982b7e1c893Smrg    {PICT_x8r8g8b8,	FMT_8_8_8_8},
983b7e1c893Smrg    {PICT_a8b8g8r8,	FMT_8_8_8_8},
984b7e1c893Smrg    {PICT_x8b8g8r8,	FMT_8_8_8_8},
985b7e1c893Smrg    {PICT_r5g6b5,	FMT_5_6_5},
986b7e1c893Smrg    {PICT_a1r5g5b5,	FMT_1_5_5_5},
987b7e1c893Smrg    {PICT_x1r5g5b5,     FMT_1_5_5_5},
988b7e1c893Smrg    {PICT_a8,		FMT_8},
989b7e1c893Smrg};
990b7e1c893Smrg
991b7e1c893Smrgstatic uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
992b7e1c893Smrg{
993b7e1c893Smrg    uint32_t sblend, dblend;
994b7e1c893Smrg
995b7e1c893Smrg    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
996b7e1c893Smrg    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
997b7e1c893Smrg
998b7e1c893Smrg    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
999b7e1c893Smrg     * it as always 1.
1000b7e1c893Smrg     */
1001b7e1c893Smrg    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
1002b7e1c893Smrg	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
1003b7e1c893Smrg	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
1004b7e1c893Smrg	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
1005b7e1c893Smrg	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
1006b7e1c893Smrg    }
1007b7e1c893Smrg
1008b7e1c893Smrg    /* If the source alpha is being used, then we should only be in a case where
1009b7e1c893Smrg     * the source blend factor is 0, and the source blend value is the mask
1010b7e1c893Smrg     * channels multiplied by the source picture's alpha.
1011b7e1c893Smrg     */
1012b7e1c893Smrg    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
1013b7e1c893Smrg	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
1014b7e1c893Smrg	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
1015b7e1c893Smrg	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
1016b7e1c893Smrg	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
1017b7e1c893Smrg	}
1018b7e1c893Smrg    }
1019b7e1c893Smrg
1020b7e1c893Smrg    return sblend | dblend;
1021b7e1c893Smrg}
1022b7e1c893Smrg
1023b7e1c893Smrgstatic Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
1024b7e1c893Smrg{
1025b7e1c893Smrg    switch (pDstPicture->format) {
1026b7e1c893Smrg    case PICT_a8r8g8b8:
1027b7e1c893Smrg    case PICT_x8r8g8b8:
1028b7e1c893Smrg	*dst_format = COLOR_8_8_8_8;
1029b7e1c893Smrg	break;
1030b7e1c893Smrg    case PICT_r5g6b5:
1031b7e1c893Smrg	*dst_format = COLOR_5_6_5;
1032b7e1c893Smrg	break;
1033b7e1c893Smrg    case PICT_a1r5g5b5:
1034b7e1c893Smrg    case PICT_x1r5g5b5:
1035b7e1c893Smrg	*dst_format = COLOR_1_5_5_5;
1036b7e1c893Smrg	break;
1037b7e1c893Smrg    case PICT_a8:
1038b7e1c893Smrg	*dst_format = COLOR_8;
1039b7e1c893Smrg	break;
1040b7e1c893Smrg    default:
1041b7e1c893Smrg	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
1042b7e1c893Smrg	       (int)pDstPicture->format));
1043b7e1c893Smrg    }
1044b7e1c893Smrg    return TRUE;
1045b7e1c893Smrg}
1046b7e1c893Smrg
1047b7e1c893Smrgstatic Bool R600CheckCompositeTexture(PicturePtr pPict,
1048b7e1c893Smrg				      PicturePtr pDstPict,
1049b7e1c893Smrg				      int op,
1050b7e1c893Smrg				      int unit)
1051b7e1c893Smrg{
1052b7e1c893Smrg    int w = pPict->pDrawable->width;
1053b7e1c893Smrg    int h = pPict->pDrawable->height;
1054b7e1c893Smrg    unsigned int i;
1055b7e1c893Smrg    int max_tex_w, max_tex_h;
1056b7e1c893Smrg
1057b7e1c893Smrg    max_tex_w = 8192;
1058b7e1c893Smrg    max_tex_h = 8192;
1059b7e1c893Smrg
1060b7e1c893Smrg    if ((w > max_tex_w) || (h > max_tex_h))
1061b7e1c893Smrg	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
1062b7e1c893Smrg
1063b7e1c893Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
1064b7e1c893Smrg	if (R600TexFormats[i].fmt == pPict->format)
1065b7e1c893Smrg	    break;
1066b7e1c893Smrg    }
1067b7e1c893Smrg    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
1068b7e1c893Smrg	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
1069b7e1c893Smrg			 (int)pPict->format));
1070b7e1c893Smrg
1071b7e1c893Smrg    if (pPict->filter != PictFilterNearest &&
1072b7e1c893Smrg	pPict->filter != PictFilterBilinear)
1073b7e1c893Smrg	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
1074b7e1c893Smrg
1075b7e1c893Smrg    /* for REPEAT_NONE, Render semantics are that sampling outside the source
1076b7e1c893Smrg     * picture results in alpha=0 pixels. We can implement this with a border color
1077b7e1c893Smrg     * *if* our source texture has an alpha channel, otherwise we need to fall
1078b7e1c893Smrg     * back. If we're not transformed then we hope that upper layers have clipped
1079b7e1c893Smrg     * rendering to the bounds of the source drawable, in which case it doesn't
1080b7e1c893Smrg     * matter. I have not, however, verified that the X server always does such
1081b7e1c893Smrg     * clipping.
1082b7e1c893Smrg     */
1083b7e1c893Smrg    /* FIXME R6xx */
1084b7e1c893Smrg    if (pPict->transform != 0 && !pPict->repeat && PICT_FORMAT_A(pPict->format) == 0) {
1085b7e1c893Smrg	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
1086b7e1c893Smrg	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
1087b7e1c893Smrg    }
1088b7e1c893Smrg
1089b7e1c893Smrg    return TRUE;
1090b7e1c893Smrg}
1091b7e1c893Smrg
1092b7e1c893Smrgstatic Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
1093b7e1c893Smrg					int unit)
1094b7e1c893Smrg{
1095b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
1096b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1097b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1098b7e1c893Smrg    int w = pPict->pDrawable->width;
1099b7e1c893Smrg    int h = pPict->pDrawable->height;
1100b7e1c893Smrg    unsigned int i;
1101b7e1c893Smrg    tex_resource_t  tex_res;
1102b7e1c893Smrg    tex_sampler_t   tex_samp;
1103b7e1c893Smrg    int pix_r, pix_g, pix_b, pix_a;
1104b7e1c893Smrg
1105b7e1c893Smrg    CLEAR (tex_res);
1106b7e1c893Smrg    CLEAR (tex_samp);
1107b7e1c893Smrg
1108b7e1c893Smrg    accel_state->src_mc_addr[unit] = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
1109b7e1c893Smrg    accel_state->src_pitch[unit] = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
1110b7e1c893Smrg    accel_state->src_size[unit] = exaGetPixmapPitch(pPix) * pPix->drawable.height;
1111b7e1c893Smrg
1112c503f109Smrg    if (accel_state->src_pitch[unit] & 7)
1113b7e1c893Smrg	RADEON_FALLBACK(("Bad pitch %d 0x%x\n", (int)accel_state->src_pitch[unit], unit));
1114b7e1c893Smrg
1115c503f109Smrg    if (accel_state->src_mc_addr[unit] & 0xff)
1116b7e1c893Smrg	RADEON_FALLBACK(("Bad offset %d 0x%x\n", (int)accel_state->src_mc_addr[unit], unit));
1117b7e1c893Smrg
1118b7e1c893Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
1119b7e1c893Smrg	if (R600TexFormats[i].fmt == pPict->format)
1120b7e1c893Smrg	    break;
1121b7e1c893Smrg    }
1122b7e1c893Smrg
1123b7e1c893Smrg    accel_state->texW[unit] = w;
1124b7e1c893Smrg    accel_state->texH[unit] = h;
1125b7e1c893Smrg
1126b7e1c893Smrg    /* ErrorF("Tex %d setup %dx%d\n", unit, w, h);  */
1127b7e1c893Smrg
1128b7e1c893Smrg    /* flush texture cache */
1129b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
1130b7e1c893Smrg			accel_state->src_size[unit], accel_state->src_mc_addr[unit]);
1131b7e1c893Smrg
1132b7e1c893Smrg    /* Texture */
1133b7e1c893Smrg    tex_res.id                  = unit;
1134b7e1c893Smrg    tex_res.w                   = w;
1135b7e1c893Smrg    tex_res.h                   = h;
1136b7e1c893Smrg    tex_res.pitch               = accel_state->src_pitch[unit];
1137b7e1c893Smrg    tex_res.depth               = 0;
1138b7e1c893Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
1139b7e1c893Smrg    tex_res.base                = accel_state->src_mc_addr[unit];
1140b7e1c893Smrg    tex_res.mip_base            = accel_state->src_mc_addr[unit];
1141b7e1c893Smrg    tex_res.format              = R600TexFormats[i].card_fmt;
1142b7e1c893Smrg    tex_res.request_size        = 1;
1143b7e1c893Smrg
1144b7e1c893Smrg    /* component swizzles */
1145b7e1c893Smrg    switch (pPict->format) {
1146b7e1c893Smrg    case PICT_a1r5g5b5:
1147b7e1c893Smrg    case PICT_a8r8g8b8:
1148b7e1c893Smrg	pix_r = SQ_SEL_Z; /* R */
1149b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1150b7e1c893Smrg	pix_b = SQ_SEL_X; /* B */
1151b7e1c893Smrg	pix_a = SQ_SEL_W; /* A */
1152b7e1c893Smrg	break;
1153b7e1c893Smrg    case PICT_a8b8g8r8:
1154b7e1c893Smrg	pix_r = SQ_SEL_X; /* R */
1155b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1156b7e1c893Smrg	pix_b = SQ_SEL_Z; /* B */
1157b7e1c893Smrg	pix_a = SQ_SEL_W; /* A */
1158b7e1c893Smrg	break;
1159b7e1c893Smrg    case PICT_x8b8g8r8:
1160b7e1c893Smrg	pix_r = SQ_SEL_X; /* R */
1161b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1162b7e1c893Smrg	pix_b = SQ_SEL_Z; /* B */
1163b7e1c893Smrg	pix_a = SQ_SEL_1; /* A */
1164b7e1c893Smrg	break;
1165b7e1c893Smrg    case PICT_x1r5g5b5:
1166b7e1c893Smrg    case PICT_x8r8g8b8:
1167b7e1c893Smrg    case PICT_r5g6b5:
1168b7e1c893Smrg	pix_r = SQ_SEL_Z; /* R */
1169b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1170b7e1c893Smrg	pix_b = SQ_SEL_X; /* B */
1171b7e1c893Smrg	pix_a = SQ_SEL_1; /* A */
1172b7e1c893Smrg	break;
1173b7e1c893Smrg    case PICT_a8:
1174b7e1c893Smrg	pix_r = SQ_SEL_0; /* R */
1175b7e1c893Smrg	pix_g = SQ_SEL_0; /* G */
1176b7e1c893Smrg	pix_b = SQ_SEL_0; /* B */
1177b7e1c893Smrg	pix_a = SQ_SEL_X; /* A */
1178b7e1c893Smrg	break;
1179b7e1c893Smrg    default:
1180b7e1c893Smrg	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
1181b7e1c893Smrg    }
1182b7e1c893Smrg
1183b7e1c893Smrg    if (unit == 0) {
1184b7e1c893Smrg	if (!accel_state->has_mask) {
1185b7e1c893Smrg	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
1186b7e1c893Smrg		pix_r = SQ_SEL_0;
1187b7e1c893Smrg		pix_g = SQ_SEL_0;
1188b7e1c893Smrg		pix_b = SQ_SEL_0;
1189b7e1c893Smrg	    }
1190b7e1c893Smrg
1191b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1192b7e1c893Smrg		pix_a = SQ_SEL_1;
1193b7e1c893Smrg	} else {
1194b7e1c893Smrg	    if (accel_state->component_alpha) {
1195b7e1c893Smrg		if (accel_state->src_alpha) {
1196b7e1c893Smrg		    if (PICT_FORMAT_A(pPict->format) == 0) {
1197b7e1c893Smrg			pix_r = SQ_SEL_1;
1198b7e1c893Smrg			pix_g = SQ_SEL_1;
1199b7e1c893Smrg			pix_b = SQ_SEL_1;
1200b7e1c893Smrg			pix_a = SQ_SEL_1;
1201b7e1c893Smrg		    } else {
1202b7e1c893Smrg			pix_r = pix_a;
1203b7e1c893Smrg			pix_g = pix_a;
1204b7e1c893Smrg			pix_b = pix_a;
1205b7e1c893Smrg		    }
1206b7e1c893Smrg		} else {
1207b7e1c893Smrg		    if (PICT_FORMAT_A(pPict->format) == 0)
1208b7e1c893Smrg			pix_a = SQ_SEL_1;
1209b7e1c893Smrg		}
1210b7e1c893Smrg	    } else {
1211b7e1c893Smrg		if (PICT_FORMAT_RGB(pPict->format) == 0) {
1212b7e1c893Smrg		    pix_r = SQ_SEL_0;
1213b7e1c893Smrg		    pix_g = SQ_SEL_0;
1214b7e1c893Smrg		    pix_b = SQ_SEL_0;
1215b7e1c893Smrg		}
1216b7e1c893Smrg
1217b7e1c893Smrg		if (PICT_FORMAT_A(pPict->format) == 0)
1218b7e1c893Smrg		    pix_a = SQ_SEL_1;
1219b7e1c893Smrg	    }
1220b7e1c893Smrg	}
1221b7e1c893Smrg    } else {
1222b7e1c893Smrg	if (accel_state->component_alpha) {
1223b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1224b7e1c893Smrg		pix_a = SQ_SEL_1;
1225b7e1c893Smrg	} else {
1226b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0) {
1227b7e1c893Smrg		pix_r = SQ_SEL_1;
1228b7e1c893Smrg		pix_g = SQ_SEL_1;
1229b7e1c893Smrg		pix_b = SQ_SEL_1;
1230b7e1c893Smrg		pix_a = SQ_SEL_1;
1231b7e1c893Smrg	    } else {
1232b7e1c893Smrg		pix_r = pix_a;
1233b7e1c893Smrg		pix_g = pix_a;
1234b7e1c893Smrg		pix_b = pix_a;
1235b7e1c893Smrg	    }
1236b7e1c893Smrg	}
1237b7e1c893Smrg    }
1238b7e1c893Smrg
1239b7e1c893Smrg    tex_res.dst_sel_x           = pix_r; /* R */
1240b7e1c893Smrg    tex_res.dst_sel_y           = pix_g; /* G */
1241b7e1c893Smrg    tex_res.dst_sel_z           = pix_b; /* B */
1242b7e1c893Smrg    tex_res.dst_sel_w           = pix_a; /* A */
1243b7e1c893Smrg
1244b7e1c893Smrg    tex_res.base_level          = 0;
1245b7e1c893Smrg    tex_res.last_level          = 0;
1246b7e1c893Smrg    tex_res.perf_modulation     = 0;
1247b7e1c893Smrg    set_tex_resource            (pScrn, accel_state->ib, &tex_res);
1248b7e1c893Smrg
1249b7e1c893Smrg    tex_samp.id                 = unit;
1250b7e1c893Smrg    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1251b7e1c893Smrg
1252b7e1c893Smrg    if (pPict->repeat) {
1253b7e1c893Smrg	switch (pPict->repeatType) {
1254b7e1c893Smrg	case RepeatNormal:
1255b7e1c893Smrg	    tex_samp.clamp_x            = SQ_TEX_WRAP;
1256b7e1c893Smrg	    tex_samp.clamp_y            = SQ_TEX_WRAP;
1257b7e1c893Smrg	    break;
1258b7e1c893Smrg	case RepeatPad:
1259b7e1c893Smrg	    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1260b7e1c893Smrg	    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1261b7e1c893Smrg	    break;
1262b7e1c893Smrg	case RepeatReflect:
1263b7e1c893Smrg	    tex_samp.clamp_x            = SQ_TEX_MIRROR;
1264b7e1c893Smrg	    tex_samp.clamp_y            = SQ_TEX_MIRROR;
1265b7e1c893Smrg	    break;
1266b7e1c893Smrg	case RepeatNone:
1267b7e1c893Smrg	    tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1268b7e1c893Smrg	    tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1269b7e1c893Smrg	    break;
1270b7e1c893Smrg	default:
1271b7e1c893Smrg	    RADEON_FALLBACK(("Bad repeat 0x%x\n", pPict->repeatType));
1272b7e1c893Smrg	}
1273b7e1c893Smrg    } else {
1274b7e1c893Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1275b7e1c893Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1276b7e1c893Smrg    }
1277b7e1c893Smrg
1278b7e1c893Smrg    switch (pPict->filter) {
1279b7e1c893Smrg    case PictFilterNearest:
1280b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1281b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1282b7e1c893Smrg	break;
1283b7e1c893Smrg    case PictFilterBilinear:
1284b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1285b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1286b7e1c893Smrg	break;
1287b7e1c893Smrg    default:
1288b7e1c893Smrg	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1289b7e1c893Smrg    }
1290b7e1c893Smrg
1291b7e1c893Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
1292b7e1c893Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1293b7e1c893Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
1294b7e1c893Smrg    set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
1295b7e1c893Smrg
1296b7e1c893Smrg    if (pPict->transform != 0) {
1297b7e1c893Smrg	accel_state->is_transform[unit] = TRUE;
1298b7e1c893Smrg	accel_state->transform[unit] = pPict->transform;
1299b7e1c893Smrg    } else
1300b7e1c893Smrg	accel_state->is_transform[unit] = FALSE;
1301b7e1c893Smrg
1302b7e1c893Smrg    return TRUE;
1303b7e1c893Smrg}
1304b7e1c893Smrg
1305b7e1c893Smrgstatic Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1306b7e1c893Smrg			       PicturePtr pDstPicture)
1307b7e1c893Smrg{
1308b7e1c893Smrg    uint32_t tmp1;
1309b7e1c893Smrg    PixmapPtr pSrcPixmap, pDstPixmap;
1310b7e1c893Smrg    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1311b7e1c893Smrg
1312b7e1c893Smrg    /* Check for unsupported compositing operations. */
1313b7e1c893Smrg    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1314b7e1c893Smrg	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1315b7e1c893Smrg
1316b7e1c893Smrg    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1317b7e1c893Smrg
1318b7e1c893Smrg    max_tex_w = 8192;
1319b7e1c893Smrg    max_tex_h = 8192;
1320b7e1c893Smrg    max_dst_w = 8192;
1321b7e1c893Smrg    max_dst_h = 8192;
1322b7e1c893Smrg
1323b7e1c893Smrg    if (pSrcPixmap->drawable.width >= max_tex_w ||
1324b7e1c893Smrg	pSrcPixmap->drawable.height >= max_tex_h) {
1325b7e1c893Smrg	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1326b7e1c893Smrg			 pSrcPixmap->drawable.width,
1327b7e1c893Smrg			 pSrcPixmap->drawable.height));
1328b7e1c893Smrg    }
1329b7e1c893Smrg
1330b7e1c893Smrg    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1331b7e1c893Smrg
1332b7e1c893Smrg    if (pDstPixmap->drawable.width >= max_dst_w ||
1333b7e1c893Smrg	pDstPixmap->drawable.height >= max_dst_h) {
1334b7e1c893Smrg	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1335b7e1c893Smrg			 pDstPixmap->drawable.width,
1336b7e1c893Smrg			 pDstPixmap->drawable.height));
1337b7e1c893Smrg    }
1338b7e1c893Smrg
1339b7e1c893Smrg    if (pMaskPicture) {
1340b7e1c893Smrg	PixmapPtr pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1341b7e1c893Smrg
1342b7e1c893Smrg	if (pMaskPixmap->drawable.width >= max_tex_w ||
1343b7e1c893Smrg	    pMaskPixmap->drawable.height >= max_tex_h) {
1344b7e1c893Smrg	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1345b7e1c893Smrg			     pMaskPixmap->drawable.width,
1346b7e1c893Smrg			     pMaskPixmap->drawable.height));
1347b7e1c893Smrg	}
1348b7e1c893Smrg
1349b7e1c893Smrg	if (pMaskPicture->componentAlpha) {
1350b7e1c893Smrg	    /* Check if it's component alpha that relies on a source alpha and
1351b7e1c893Smrg	     * on the source value.  We can only get one of those into the
1352b7e1c893Smrg	     * single source value that we get to blend with.
1353b7e1c893Smrg	     */
1354b7e1c893Smrg	    if (R600BlendOp[op].src_alpha &&
1355b7e1c893Smrg		(R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1356b7e1c893Smrg		(BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1357b7e1c893Smrg		RADEON_FALLBACK(("Component alpha not supported with source "
1358b7e1c893Smrg				 "alpha and source value blending.\n"));
1359b7e1c893Smrg	    }
1360b7e1c893Smrg	}
1361b7e1c893Smrg
1362b7e1c893Smrg	if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1363b7e1c893Smrg	    return FALSE;
1364b7e1c893Smrg    }
1365b7e1c893Smrg
1366b7e1c893Smrg    if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1367b7e1c893Smrg	return FALSE;
1368b7e1c893Smrg
1369b7e1c893Smrg    if (!R600GetDestFormat(pDstPicture, &tmp1))
1370b7e1c893Smrg	return FALSE;
1371b7e1c893Smrg
1372b7e1c893Smrg    return TRUE;
1373b7e1c893Smrg
1374b7e1c893Smrg}
1375b7e1c893Smrg
1376b7e1c893Smrgstatic Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1377b7e1c893Smrg				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1378b7e1c893Smrg				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1379b7e1c893Smrg{
1380b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1381b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1382b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1383b7e1c893Smrg    uint32_t blendcntl, dst_format;
1384b7e1c893Smrg    cb_config_t cb_conf;
1385b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
1386b7e1c893Smrg
1387b7e1c893Smrg    /* return FALSE; */
1388b7e1c893Smrg
1389b7e1c893Smrg    if (pMask) {
1390b7e1c893Smrg	accel_state->has_mask = TRUE;
1391b7e1c893Smrg	if (pMaskPicture->componentAlpha) {
1392b7e1c893Smrg	    accel_state->component_alpha = TRUE;
1393b7e1c893Smrg	    if (R600BlendOp[op].src_alpha)
1394b7e1c893Smrg		accel_state->src_alpha = TRUE;
1395b7e1c893Smrg	    else
1396b7e1c893Smrg		accel_state->src_alpha = FALSE;
1397b7e1c893Smrg	} else {
1398b7e1c893Smrg	    accel_state->component_alpha = FALSE;
1399b7e1c893Smrg	    accel_state->src_alpha = FALSE;
1400b7e1c893Smrg	}
1401b7e1c893Smrg    } else {
1402b7e1c893Smrg	accel_state->has_mask = FALSE;
1403b7e1c893Smrg	accel_state->component_alpha = FALSE;
1404b7e1c893Smrg	accel_state->src_alpha = FALSE;
1405b7e1c893Smrg    }
1406b7e1c893Smrg
1407b7e1c893Smrg    accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1408b7e1c893Smrg    accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1409b7e1c893Smrg    accel_state->dst_size = exaGetPixmapPitch(pDst) * pDst->drawable.height;
1410b7e1c893Smrg
1411b7e1c893Smrg    if (accel_state->dst_pitch & 7)
1412b7e1c893Smrg	RADEON_FALLBACK(("Bad dst pitch 0x%x\n", (int)accel_state->dst_pitch));
1413b7e1c893Smrg
1414b7e1c893Smrg    if (accel_state->dst_mc_addr & 0xff)
1415b7e1c893Smrg	RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)accel_state->dst_mc_addr));
1416b7e1c893Smrg
1417b7e1c893Smrg    if (!R600GetDestFormat(pDstPicture, &dst_format))
1418b7e1c893Smrg	return FALSE;
1419b7e1c893Smrg
1420b7e1c893Smrg    CLEAR (cb_conf);
1421b7e1c893Smrg    CLEAR (vs_conf);
1422b7e1c893Smrg    CLEAR (ps_conf);
1423b7e1c893Smrg
1424b7e1c893Smrg    accel_state->ib = RADEONCPGetBuffer(pScrn);
1425b7e1c893Smrg
1426b7e1c893Smrg    /* Init */
1427b7e1c893Smrg    start_3d(pScrn, accel_state->ib);
1428b7e1c893Smrg
1429b7e1c893Smrg    set_default_state(pScrn, accel_state->ib);
1430b7e1c893Smrg
1431b7e1c893Smrg    /* Scissor / viewport */
1432b7e1c893Smrg    EREG(accel_state->ib, PA_CL_VTE_CNTL,                      VTX_XY_FMT_bit);
1433b7e1c893Smrg    EREG(accel_state->ib, PA_CL_CLIP_CNTL,                     CLIP_DISABLE_bit);
1434b7e1c893Smrg
1435b7e1c893Smrg    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
1436b7e1c893Smrg	R600IBDiscard(pScrn, accel_state->ib);
1437b7e1c893Smrg	return FALSE;
1438b7e1c893Smrg    }
1439b7e1c893Smrg
1440b7e1c893Smrg    if (pMask) {
1441b7e1c893Smrg	if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
1442b7e1c893Smrg	    R600IBDiscard(pScrn, accel_state->ib);
1443b7e1c893Smrg	    return FALSE;
1444b7e1c893Smrg	}
1445b7e1c893Smrg    } else
1446b7e1c893Smrg	accel_state->is_transform[1] = FALSE;
1447b7e1c893Smrg
1448b7e1c893Smrg    if (pMask) {
1449b7e1c893Smrg	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
1450b7e1c893Smrg	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
1451b7e1c893Smrg	    accel_state->comp_mask_ps_offset;
1452b7e1c893Smrg    } else {
1453b7e1c893Smrg	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
1454b7e1c893Smrg	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
1455b7e1c893Smrg	    accel_state->comp_ps_offset;
1456b7e1c893Smrg    }
1457b7e1c893Smrg
1458b7e1c893Smrg    accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
1459b7e1c893Smrg	accel_state->comp_vs_offset;
1460b7e1c893Smrg
1461b7e1c893Smrg    accel_state->vs_size = 512;
1462b7e1c893Smrg    accel_state->ps_size = 512;
1463b7e1c893Smrg
1464b7e1c893Smrg    /* Shader */
1465b7e1c893Smrg
1466b7e1c893Smrg    /* flush SQ cache */
1467b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
1468b7e1c893Smrg			accel_state->vs_size, accel_state->vs_mc_addr);
1469b7e1c893Smrg
1470b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
1471b7e1c893Smrg    vs_conf.num_gprs            = 3;
1472b7e1c893Smrg    vs_conf.stack_size          = 1;
1473b7e1c893Smrg    vs_setup                    (pScrn, accel_state->ib, &vs_conf);
1474b7e1c893Smrg
1475b7e1c893Smrg    /* flush SQ cache */
1476b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
1477b7e1c893Smrg			accel_state->ps_size, accel_state->ps_mc_addr);
1478b7e1c893Smrg
1479b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
1480b7e1c893Smrg    ps_conf.num_gprs            = 3;
1481b7e1c893Smrg    ps_conf.stack_size          = 0;
1482b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
1483b7e1c893Smrg    ps_conf.clamp_consts        = 0;
1484b7e1c893Smrg    ps_conf.export_mode         = 2;
1485b7e1c893Smrg    ps_setup                    (pScrn, accel_state->ib, &ps_conf);
1486b7e1c893Smrg
1487b7e1c893Smrg    EREG(accel_state->ib, CB_SHADER_MASK,                      (0xf << OUTPUT0_ENABLE_shift));
1488b7e1c893Smrg    EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
1489b7e1c893Smrg
1490b7e1c893Smrg    blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1491b7e1c893Smrg
1492b7e1c893Smrg    if (info->ChipFamily == CHIP_FAMILY_R600) {
1493b7e1c893Smrg	/* no per-MRT blend on R600 */
1494b7e1c893Smrg	EREG(accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift));
1495b7e1c893Smrg	EREG(accel_state->ib, CB_BLEND_CONTROL,                    blendcntl);
1496b7e1c893Smrg    } else {
1497b7e1c893Smrg	EREG(accel_state->ib, CB_COLOR_CONTROL,                    (RADEON_ROP[3] |
1498b7e1c893Smrg								    (1 << TARGET_BLEND_ENABLE_shift) |
1499b7e1c893Smrg								    PER_MRT_BLEND_bit));
1500b7e1c893Smrg	EREG(accel_state->ib, CB_BLEND0_CONTROL,                   blendcntl);
1501b7e1c893Smrg    }
1502b7e1c893Smrg
1503b7e1c893Smrg    cb_conf.id = 0;
1504b7e1c893Smrg    cb_conf.w = accel_state->dst_pitch;
1505b7e1c893Smrg    cb_conf.h = pDst->drawable.height;
1506b7e1c893Smrg    cb_conf.base = accel_state->dst_mc_addr;
1507b7e1c893Smrg    cb_conf.format = dst_format;
1508b7e1c893Smrg
1509b7e1c893Smrg    switch (pDstPicture->format) {
1510b7e1c893Smrg    case PICT_a8r8g8b8:
1511b7e1c893Smrg    case PICT_x8r8g8b8:
1512b7e1c893Smrg    case PICT_a1r5g5b5:
1513b7e1c893Smrg    case PICT_x1r5g5b5:
1514b7e1c893Smrg    default:
1515b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
1516b7e1c893Smrg	break;
1517b7e1c893Smrg    case PICT_r5g6b5:
1518b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
1519b7e1c893Smrg	break;
1520b7e1c893Smrg    case PICT_a8:
1521b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
1522b7e1c893Smrg	break;
1523b7e1c893Smrg    }
1524b7e1c893Smrg    cb_conf.source_format = 1;
1525b7e1c893Smrg    cb_conf.blend_clamp = 1;
1526b7e1c893Smrg    set_render_target(pScrn, accel_state->ib, &cb_conf);
1527b7e1c893Smrg
1528b7e1c893Smrg    EREG(accel_state->ib, PA_SU_SC_MODE_CNTL,                  (FACE_bit			|
1529b7e1c893Smrg								(POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift)	|
1530b7e1c893Smrg								(POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)));
1531b7e1c893Smrg    EREG(accel_state->ib, DB_SHADER_CONTROL,                   ((1 << Z_ORDER_shift)		| /* EARLY_Z_THEN_LATE_Z */
1532b7e1c893Smrg								DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
1533b7e1c893Smrg
1534b7e1c893Smrg    /* Interpolator setup */
1535b7e1c893Smrg    if (pMask) {
1536b7e1c893Smrg	/* export 2 tex coords from VS */
1537b7e1c893Smrg	EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift));
1538b7e1c893Smrg	/* src = semantic id 0; mask = semantic id 1 */
1539b7e1c893Smrg	EREG(accel_state->ib, SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1540b7e1c893Smrg						  (1 << SEMANTIC_1_shift)));
1541b7e1c893Smrg	/* input 2 tex coords from VS */
1542b7e1c893Smrg	EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (2 << NUM_INTERP_shift));
1543b7e1c893Smrg    } else {
1544b7e1c893Smrg	/* export 1 tex coords from VS */
1545b7e1c893Smrg	EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
1546b7e1c893Smrg	/* src = semantic id 0 */
1547b7e1c893Smrg	EREG(accel_state->ib, SPI_VS_OUT_ID_0,   (0 << SEMANTIC_0_shift));
1548b7e1c893Smrg	/* input 1 tex coords from VS */
1549b7e1c893Smrg	EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift));
1550b7e1c893Smrg    }
1551b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
1552b7e1c893Smrg    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1553b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)	|
1554b7e1c893Smrg								(0x01 << DEFAULT_VAL_shift)	|
1555b7e1c893Smrg								SEL_CENTROID_bit));
1556b7e1c893Smrg    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1557b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (1 <<2),       ((1    << SEMANTIC_shift)	|
1558b7e1c893Smrg								(0x01 << DEFAULT_VAL_shift)	|
1559b7e1c893Smrg								SEL_CENTROID_bit));
1560b7e1c893Smrg    EREG(accel_state->ib, SPI_INTERP_CONTROL_0,                0);
1561b7e1c893Smrg
1562b7e1c893Smrg    accel_state->vb_index = 0;
1563b7e1c893Smrg
1564b7e1c893Smrg    return TRUE;
1565b7e1c893Smrg}
1566b7e1c893Smrg
1567b7e1c893Smrgstatic void R600Composite(PixmapPtr pDst,
1568b7e1c893Smrg			  int srcX, int srcY,
1569b7e1c893Smrg			  int maskX, int maskY,
1570b7e1c893Smrg			  int dstX, int dstY,
1571b7e1c893Smrg			  int w, int h)
1572b7e1c893Smrg{
1573b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1574b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1575b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1576b7e1c893Smrg    float *vb;
1577b7e1c893Smrg    xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight;
1578b7e1c893Smrg
1579b7e1c893Smrg    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1580b7e1c893Smrg       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1581b7e1c893Smrg
1582b7e1c893Smrg    srcTopLeft.x     = IntToxFixed(srcX);
1583b7e1c893Smrg    srcTopLeft.y     = IntToxFixed(srcY);
1584b7e1c893Smrg    srcTopRight.x    = IntToxFixed(srcX + w);
1585b7e1c893Smrg    srcTopRight.y    = IntToxFixed(srcY);
1586b7e1c893Smrg    srcBottomLeft.x  = IntToxFixed(srcX);
1587b7e1c893Smrg    srcBottomLeft.y  = IntToxFixed(srcY + h);
1588b7e1c893Smrg    srcBottomRight.x = IntToxFixed(srcX + w);
1589b7e1c893Smrg    srcBottomRight.y = IntToxFixed(srcY + h);
1590b7e1c893Smrg
1591b7e1c893Smrg    /* XXX do transform in vertex shader */
1592b7e1c893Smrg    if (accel_state->is_transform[0]) {
1593b7e1c893Smrg	transformPoint(accel_state->transform[0], &srcTopLeft);
1594b7e1c893Smrg	transformPoint(accel_state->transform[0], &srcTopRight);
1595b7e1c893Smrg	transformPoint(accel_state->transform[0], &srcBottomLeft);
1596b7e1c893Smrg	transformPoint(accel_state->transform[0], &srcBottomRight);
1597b7e1c893Smrg    }
1598b7e1c893Smrg
1599b7e1c893Smrg    if (accel_state->has_mask) {
1600b7e1c893Smrg	xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight;
1601b7e1c893Smrg
1602b7e1c893Smrg	if (((accel_state->vb_index + 3) * 24) > (accel_state->ib->total / 2)) {
1603b7e1c893Smrg	    R600DoneComposite(pDst);
1604b7e1c893Smrg	    accel_state->vb_index = 0;
1605b7e1c893Smrg	    accel_state->ib = RADEONCPGetBuffer(pScrn);
1606b7e1c893Smrg	}
1607b7e1c893Smrg
1608b7e1c893Smrg	vb = (pointer)((char*)accel_state->ib->address +
1609b7e1c893Smrg		       (accel_state->ib->total / 2) +
1610b7e1c893Smrg		       accel_state->vb_index * 24);
1611b7e1c893Smrg
1612b7e1c893Smrg	maskTopLeft.x     = IntToxFixed(maskX);
1613b7e1c893Smrg	maskTopLeft.y     = IntToxFixed(maskY);
1614b7e1c893Smrg	maskTopRight.x    = IntToxFixed(maskX + w);
1615b7e1c893Smrg	maskTopRight.y    = IntToxFixed(maskY);
1616b7e1c893Smrg	maskBottomLeft.x  = IntToxFixed(maskX);
1617b7e1c893Smrg	maskBottomLeft.y  = IntToxFixed(maskY + h);
1618b7e1c893Smrg	maskBottomRight.x = IntToxFixed(maskX + w);
1619b7e1c893Smrg	maskBottomRight.y = IntToxFixed(maskY + h);
1620b7e1c893Smrg
1621b7e1c893Smrg	if (accel_state->is_transform[1]) {
1622b7e1c893Smrg	    transformPoint(accel_state->transform[1], &maskTopLeft);
1623b7e1c893Smrg	    transformPoint(accel_state->transform[1], &maskTopRight);
1624b7e1c893Smrg	    transformPoint(accel_state->transform[1], &maskBottomLeft);
1625b7e1c893Smrg	    transformPoint(accel_state->transform[1], &maskBottomRight);
1626b7e1c893Smrg	}
1627b7e1c893Smrg
1628b7e1c893Smrg	vb[0] = (float)dstX;
1629b7e1c893Smrg	vb[1] = (float)dstY;
1630b7e1c893Smrg	vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0];
1631b7e1c893Smrg	vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0];
1632b7e1c893Smrg	vb[4] = xFixedToFloat(maskTopLeft.x) / accel_state->texW[1];
1633b7e1c893Smrg	vb[5] = xFixedToFloat(maskTopLeft.y) / accel_state->texH[1];
1634b7e1c893Smrg
1635b7e1c893Smrg	vb[6] = (float)dstX;
1636b7e1c893Smrg	vb[7] = (float)(dstY + h);
1637b7e1c893Smrg	vb[8] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0];
1638b7e1c893Smrg	vb[9] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0];
1639b7e1c893Smrg	vb[10] = xFixedToFloat(maskBottomLeft.x) / accel_state->texW[1];
1640b7e1c893Smrg	vb[11] = xFixedToFloat(maskBottomLeft.y) / accel_state->texH[1];
1641b7e1c893Smrg
1642b7e1c893Smrg	vb[12] = (float)(dstX + w);
1643b7e1c893Smrg	vb[13] = (float)(dstY + h);
1644b7e1c893Smrg	vb[14] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0];
1645b7e1c893Smrg	vb[15] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0];
1646b7e1c893Smrg	vb[16] = xFixedToFloat(maskBottomRight.x) / accel_state->texW[1];
1647b7e1c893Smrg	vb[17] = xFixedToFloat(maskBottomRight.y) / accel_state->texH[1];
1648b7e1c893Smrg
1649b7e1c893Smrg    } else {
1650b7e1c893Smrg	if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) {
1651b7e1c893Smrg	    R600DoneComposite(pDst);
1652b7e1c893Smrg	    accel_state->vb_index = 0;
1653b7e1c893Smrg	    accel_state->ib = RADEONCPGetBuffer(pScrn);
1654b7e1c893Smrg	}
1655b7e1c893Smrg
1656b7e1c893Smrg	vb = (pointer)((char*)accel_state->ib->address +
1657b7e1c893Smrg		       (accel_state->ib->total / 2) +
1658b7e1c893Smrg		       accel_state->vb_index * 16);
1659b7e1c893Smrg
1660b7e1c893Smrg	vb[0] = (float)dstX;
1661b7e1c893Smrg	vb[1] = (float)dstY;
1662b7e1c893Smrg	vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0];
1663b7e1c893Smrg	vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0];
1664b7e1c893Smrg
1665b7e1c893Smrg	vb[4] = (float)dstX;
1666b7e1c893Smrg	vb[5] = (float)(dstY + h);
1667b7e1c893Smrg	vb[6] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0];
1668b7e1c893Smrg	vb[7] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0];
1669b7e1c893Smrg
1670b7e1c893Smrg	vb[8] = (float)(dstX + w);
1671b7e1c893Smrg	vb[9] = (float)(dstY + h);
1672b7e1c893Smrg	vb[10] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0];
1673b7e1c893Smrg	vb[11] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0];
1674b7e1c893Smrg    }
1675b7e1c893Smrg
1676b7e1c893Smrg    accel_state->vb_index += 3;
1677b7e1c893Smrg
1678b7e1c893Smrg}
1679b7e1c893Smrg
1680b7e1c893Smrgstatic void R600DoneComposite(PixmapPtr pDst)
1681b7e1c893Smrg{
1682b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1683b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1684b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1685b7e1c893Smrg    draw_config_t   draw_conf;
1686b7e1c893Smrg    vtx_resource_t  vtx_res;
1687b7e1c893Smrg
1688b7e1c893Smrg    CLEAR (draw_conf);
1689b7e1c893Smrg    CLEAR (vtx_res);
1690b7e1c893Smrg
1691b7e1c893Smrg    if (accel_state->vb_index == 0) {
1692b7e1c893Smrg	R600IBDiscard(pScrn, accel_state->ib);
1693b7e1c893Smrg	return;
1694b7e1c893Smrg    }
1695b7e1c893Smrg
1696b7e1c893Smrg    accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart +
1697b7e1c893Smrg	(accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2);
1698b7e1c893Smrg
1699b7e1c893Smrg
1700b7e1c893Smrg    /* Vertex buffer setup */
1701b7e1c893Smrg    if (accel_state->has_mask) {
1702b7e1c893Smrg	accel_state->vb_size = accel_state->vb_index * 24;
1703b7e1c893Smrg	vtx_res.id              = SQ_VTX_RESOURCE_vs;
1704b7e1c893Smrg	vtx_res.vtx_size_dw     = 24 / 4;
1705b7e1c893Smrg	vtx_res.vtx_num_entries = accel_state->vb_size / 4;
1706b7e1c893Smrg	vtx_res.mem_req_size    = 1;
1707b7e1c893Smrg	vtx_res.vb_addr         = accel_state->vb_mc_addr;
1708b7e1c893Smrg    } else {
1709b7e1c893Smrg	accel_state->vb_size = accel_state->vb_index * 16;
1710b7e1c893Smrg	vtx_res.id              = SQ_VTX_RESOURCE_vs;
1711b7e1c893Smrg	vtx_res.vtx_size_dw     = 16 / 4;
1712b7e1c893Smrg	vtx_res.vtx_num_entries = accel_state->vb_size / 4;
1713b7e1c893Smrg	vtx_res.mem_req_size    = 1;
1714b7e1c893Smrg	vtx_res.vb_addr         = accel_state->vb_mc_addr;
1715b7e1c893Smrg    }
1716b7e1c893Smrg    /* flush vertex cache */
1717b7e1c893Smrg    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
1718b7e1c893Smrg	(info->ChipFamily == CHIP_FAMILY_RV620) ||
1719b7e1c893Smrg	(info->ChipFamily == CHIP_FAMILY_RS780) ||
1720c503f109Smrg	(info->ChipFamily == CHIP_FAMILY_RS880) ||
1721b7e1c893Smrg	(info->ChipFamily == CHIP_FAMILY_RV710))
1722b7e1c893Smrg	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
1723b7e1c893Smrg			    accel_state->vb_size, accel_state->vb_mc_addr);
1724b7e1c893Smrg    else
1725b7e1c893Smrg	cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit,
1726b7e1c893Smrg			    accel_state->vb_size, accel_state->vb_mc_addr);
1727b7e1c893Smrg
1728b7e1c893Smrg    set_vtx_resource        (pScrn, accel_state->ib, &vtx_res);
1729b7e1c893Smrg
1730b7e1c893Smrg    draw_conf.prim_type          = DI_PT_RECTLIST;
1731b7e1c893Smrg    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
1732b7e1c893Smrg    draw_conf.num_instances      = 1;
1733b7e1c893Smrg    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
1734b7e1c893Smrg    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
1735b7e1c893Smrg
1736b7e1c893Smrg    draw_auto(pScrn, accel_state->ib, &draw_conf);
1737b7e1c893Smrg
1738b7e1c893Smrg    wait_3d_idle_clean(pScrn, accel_state->ib);
1739b7e1c893Smrg
1740b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
1741b7e1c893Smrg			accel_state->dst_size, accel_state->dst_mc_addr);
1742b7e1c893Smrg
1743b7e1c893Smrg    R600CPFlushIndirect(pScrn, accel_state->ib);
1744b7e1c893Smrg}
1745b7e1c893Smrg
1746b7e1c893SmrgBool
1747b7e1c893SmrgR600CopyToVRAM(ScrnInfoPtr pScrn,
1748b7e1c893Smrg	       char *src, int src_pitch,
1749b7e1c893Smrg	       uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_height, int bpp,
1750b7e1c893Smrg	       int x, int y, int w, int h)
1751b7e1c893Smrg{
1752b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1753b7e1c893Smrg    uint32_t scratch_mc_addr;
1754b7e1c893Smrg    int wpass = w * (bpp/8);
1755b7e1c893Smrg    int scratch_pitch_bytes = (wpass + 255) & ~255;
1756b7e1c893Smrg    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1757b7e1c893Smrg    int scratch_offset = 0, hpass, temph;
1758b7e1c893Smrg    char *dst;
1759b7e1c893Smrg    drmBufPtr scratch;
1760b7e1c893Smrg
1761b7e1c893Smrg    if (dst_pitch & 7)
1762b7e1c893Smrg	return FALSE;
1763b7e1c893Smrg
1764b7e1c893Smrg    if (dst_mc_addr & 0xff)
1765b7e1c893Smrg	return FALSE;
1766b7e1c893Smrg
1767b7e1c893Smrg    scratch = RADEONCPGetBuffer(pScrn);
1768b7e1c893Smrg    if (scratch == NULL)
1769b7e1c893Smrg	return FALSE;
1770b7e1c893Smrg
1771b7e1c893Smrg    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1772b7e1c893Smrg    temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1773b7e1c893Smrg    dst = (char *)scratch->address;
1774b7e1c893Smrg
1775b7e1c893Smrg    /* memcopy from sys to scratch */
1776b7e1c893Smrg    while (temph--) {
1777b7e1c893Smrg	memcpy (dst, src, wpass);
1778b7e1c893Smrg	src += src_pitch;
1779b7e1c893Smrg	dst += scratch_pitch_bytes;
1780b7e1c893Smrg    }
1781b7e1c893Smrg
1782b7e1c893Smrg    while (h) {
1783b7e1c893Smrg	uint32_t offset = scratch_mc_addr + scratch_offset;
1784b7e1c893Smrg	int oldhpass = hpass;
1785b7e1c893Smrg	h -= oldhpass;
1786b7e1c893Smrg	temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1787b7e1c893Smrg
1788b7e1c893Smrg	if (hpass) {
1789b7e1c893Smrg	    scratch_offset = scratch->total/2 - scratch_offset;
1790b7e1c893Smrg	    dst = (char *)scratch->address + scratch_offset;
1791b7e1c893Smrg	    /* wait for the engine to be idle */
1792b7e1c893Smrg	    RADEONWaitForIdleCP(pScrn);
1793b7e1c893Smrg	    //memcopy from sys to scratch
1794b7e1c893Smrg	    while (temph--) {
1795b7e1c893Smrg		memcpy (dst, src, wpass);
1796b7e1c893Smrg		src += src_pitch;
1797b7e1c893Smrg		dst += scratch_pitch_bytes;
1798b7e1c893Smrg	    }
1799b7e1c893Smrg	}
1800b7e1c893Smrg	/* blit from scratch to vram */
1801b7e1c893Smrg	R600DoPrepareCopy(pScrn,
1802b7e1c893Smrg			  scratch_pitch, w, oldhpass, offset, bpp,
1803b7e1c893Smrg			  dst_pitch, dst_height, dst_mc_addr, bpp,
1804b7e1c893Smrg			  3, 0xffffffff);
1805b7e1c893Smrg	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
1806b7e1c893Smrg	R600DoCopy(pScrn);
1807b7e1c893Smrg	y += oldhpass;
1808b7e1c893Smrg    }
1809b7e1c893Smrg
1810b7e1c893Smrg    R600IBDiscard(pScrn, scratch);
1811b7e1c893Smrg
1812b7e1c893Smrg    return TRUE;
1813b7e1c893Smrg}
1814b7e1c893Smrg
1815b7e1c893Smrgstatic Bool
1816b7e1c893SmrgR600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1817b7e1c893Smrg		   char *src, int src_pitch)
1818b7e1c893Smrg{
1819b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1820b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1821b7e1c893Smrg    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1822b7e1c893Smrg    uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1823b7e1c893Smrg    uint32_t dst_height = pDst->drawable.height;
1824b7e1c893Smrg    int bpp = pDst->drawable.bitsPerPixel;
1825b7e1c893Smrg
1826b7e1c893Smrg    return R600CopyToVRAM(pScrn,
1827b7e1c893Smrg			  src, src_pitch,
1828b7e1c893Smrg			  dst_pitch, dst_mc_addr, dst_height, bpp,
1829b7e1c893Smrg			  x, y, w, h);
1830b7e1c893Smrg}
1831b7e1c893Smrg
1832b7e1c893Smrgstatic Bool
1833b7e1c893SmrgR600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
1834b7e1c893Smrg		       char *dst, int dst_pitch)
1835b7e1c893Smrg{
1836b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1837b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1838b7e1c893Smrg    uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1839b7e1c893Smrg    uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1840b7e1c893Smrg    uint32_t src_width = pSrc->drawable.width;
1841b7e1c893Smrg    uint32_t src_height = pSrc->drawable.height;
1842b7e1c893Smrg    int bpp = pSrc->drawable.bitsPerPixel;
1843b7e1c893Smrg    uint32_t scratch_mc_addr;
1844b7e1c893Smrg    int scratch_pitch_bytes = (dst_pitch + 255) & ~255;
1845b7e1c893Smrg    int scratch_offset = 0, hpass;
1846b7e1c893Smrg    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1847b7e1c893Smrg    int wpass = w * (bpp/8);
1848b7e1c893Smrg    drmBufPtr scratch;
1849b7e1c893Smrg
1850c503f109Smrg    /* RV740 seems to be particularly problematic with small xfers */
1851c503f109Smrg    if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32))
1852c503f109Smrg	return FALSE;
1853c503f109Smrg
1854b7e1c893Smrg    if (src_pitch & 7)
1855b7e1c893Smrg	return FALSE;
1856b7e1c893Smrg
1857b7e1c893Smrg    scratch = RADEONCPGetBuffer(pScrn);
1858b7e1c893Smrg    if (scratch == NULL)
1859b7e1c893Smrg	return FALSE;
1860b7e1c893Smrg
1861b7e1c893Smrg    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1862b7e1c893Smrg    hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1863b7e1c893Smrg
1864b7e1c893Smrg    /* blit from vram to scratch */
1865b7e1c893Smrg    R600DoPrepareCopy(pScrn,
1866b7e1c893Smrg		      src_pitch, src_width, src_height, src_mc_addr, bpp,
1867b7e1c893Smrg		      scratch_pitch, hpass, scratch_mc_addr, bpp,
1868b7e1c893Smrg		      3, 0xffffffff);
1869b7e1c893Smrg    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1870b7e1c893Smrg    R600DoCopy(pScrn);
1871b7e1c893Smrg
1872b7e1c893Smrg    while (h) {
1873b7e1c893Smrg	char *src = (char *)scratch->address + scratch_offset;
1874b7e1c893Smrg	int oldhpass = hpass;
1875b7e1c893Smrg	h -= oldhpass;
1876b7e1c893Smrg	y += oldhpass;
1877b7e1c893Smrg	hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1878b7e1c893Smrg
1879b7e1c893Smrg	if (hpass) {
1880b7e1c893Smrg	    scratch_offset = scratch->total/2 - scratch_offset;
1881b7e1c893Smrg	    /* blit from vram to scratch */
1882b7e1c893Smrg	    R600DoPrepareCopy(pScrn,
1883b7e1c893Smrg			      src_pitch, src_width, src_height, src_mc_addr, bpp,
1884b7e1c893Smrg			      scratch_pitch, hpass, scratch_mc_addr + scratch_offset, bpp,
1885b7e1c893Smrg			      3, 0xffffffff);
1886b7e1c893Smrg	    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1887b7e1c893Smrg	    R600DoCopy(pScrn);
1888b7e1c893Smrg	}
1889b7e1c893Smrg
1890b7e1c893Smrg	/* wait for the engine to be idle */
1891b7e1c893Smrg	RADEONWaitForIdleCP(pScrn);
1892b7e1c893Smrg	/* memcopy from scratch to sys */
1893b7e1c893Smrg	while (oldhpass--) {
1894b7e1c893Smrg	    memcpy (dst, src, wpass);
1895b7e1c893Smrg	    dst += dst_pitch;
1896b7e1c893Smrg	    src += scratch_pitch_bytes;
1897b7e1c893Smrg	}
1898b7e1c893Smrg    }
1899b7e1c893Smrg
1900b7e1c893Smrg    R600IBDiscard(pScrn, scratch);
1901b7e1c893Smrg
1902b7e1c893Smrg    return TRUE;
1903b7e1c893Smrg
1904b7e1c893Smrg}
1905b7e1c893Smrg
1906b7e1c893Smrgstatic int
1907b7e1c893SmrgR600MarkSync(ScreenPtr pScreen)
1908b7e1c893Smrg{
1909b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
1910b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1911b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1912b7e1c893Smrg
1913b7e1c893Smrg    return ++accel_state->exaSyncMarker;
1914b7e1c893Smrg
1915b7e1c893Smrg}
1916b7e1c893Smrg
1917b7e1c893Smrgstatic void
1918b7e1c893SmrgR600Sync(ScreenPtr pScreen, int marker)
1919b7e1c893Smrg{
1920b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
1921b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1922b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1923b7e1c893Smrg
1924b7e1c893Smrg    if (accel_state->exaMarkerSynced != marker) {
1925b7e1c893Smrg	RADEONWaitForIdleCP(pScrn);
1926b7e1c893Smrg	accel_state->exaMarkerSynced = marker;
1927b7e1c893Smrg    }
1928b7e1c893Smrg
1929b7e1c893Smrg}
1930b7e1c893Smrg
1931b7e1c893Smrgstatic Bool
1932b7e1c893SmrgR600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
1933b7e1c893Smrg{
1934b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1935b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1936b7e1c893Smrg
1937b7e1c893Smrg    /* 512 bytes per shader for now */
1938b7e1c893Smrg    int size = 512 * 9;
1939b7e1c893Smrg
1940b7e1c893Smrg    accel_state->shaders = NULL;
1941b7e1c893Smrg
1942b7e1c893Smrg    accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
1943b7e1c893Smrg					     TRUE, NULL, NULL);
1944b7e1c893Smrg
1945b7e1c893Smrg    if (accel_state->shaders == NULL)
1946b7e1c893Smrg	return FALSE;
1947b7e1c893Smrg    return TRUE;
1948b7e1c893Smrg}
1949b7e1c893Smrg
1950b7e1c893SmrgBool
1951b7e1c893SmrgR600LoadShaders(ScrnInfoPtr pScrn)
1952b7e1c893Smrg{
1953b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1954b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1955b7e1c893Smrg    RADEONChipFamily ChipSet = info->ChipFamily;
1956b7e1c893Smrg    uint32_t *shader;
1957b7e1c893Smrg
1958b7e1c893Smrg    shader = (pointer)((char *)info->FB + accel_state->shaders->offset);
1959b7e1c893Smrg
1960b7e1c893Smrg    /*  solid vs --------------------------------------- */
1961b7e1c893Smrg    accel_state->solid_vs_offset = 0;
1962b7e1c893Smrg    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
1963b7e1c893Smrg
1964b7e1c893Smrg    /*  solid ps --------------------------------------- */
1965b7e1c893Smrg    accel_state->solid_ps_offset = 512;
1966b7e1c893Smrg    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
1967b7e1c893Smrg
1968b7e1c893Smrg    /*  copy vs --------------------------------------- */
1969b7e1c893Smrg    accel_state->copy_vs_offset = 1024;
1970b7e1c893Smrg    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
1971b7e1c893Smrg
1972b7e1c893Smrg    /*  copy ps --------------------------------------- */
1973b7e1c893Smrg    accel_state->copy_ps_offset = 1536;
1974b7e1c893Smrg    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
1975b7e1c893Smrg
1976b7e1c893Smrg    /*  comp vs --------------------------------------- */
1977b7e1c893Smrg    accel_state->comp_vs_offset = 2048;
1978b7e1c893Smrg    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
1979b7e1c893Smrg
1980b7e1c893Smrg    /*  comp ps --------------------------------------- */
1981b7e1c893Smrg    accel_state->comp_ps_offset = 2560;
1982b7e1c893Smrg    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
1983b7e1c893Smrg
1984b7e1c893Smrg    /*  comp mask ps --------------------------------------- */
1985b7e1c893Smrg    accel_state->comp_mask_ps_offset = 3072;
1986b7e1c893Smrg    R600_comp_mask_ps(ChipSet, shader + accel_state->comp_mask_ps_offset / 4);
1987b7e1c893Smrg
1988b7e1c893Smrg    /*  xv vs --------------------------------------- */
1989b7e1c893Smrg    accel_state->xv_vs_offset = 3584;
1990b7e1c893Smrg    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
1991b7e1c893Smrg
1992b7e1c893Smrg    /*  xv ps --------------------------------------- */
1993b7e1c893Smrg    accel_state->xv_ps_offset = 4096;
1994b7e1c893Smrg    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
1995b7e1c893Smrg
1996b7e1c893Smrg    return TRUE;
1997b7e1c893Smrg}
1998b7e1c893Smrg
1999b7e1c893Smrgstatic Bool
2000b7e1c893SmrgR600PrepareAccess(PixmapPtr pPix, int index)
2001b7e1c893Smrg{
2002b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2003b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2004b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
2005b7e1c893Smrg
2006b7e1c893Smrg    /* flush HDP read/write caches */
2007b7e1c893Smrg    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2008b7e1c893Smrg
2009b7e1c893Smrg    return TRUE;
2010b7e1c893Smrg}
2011b7e1c893Smrg
2012b7e1c893Smrgstatic void
2013b7e1c893SmrgR600FinishAccess(PixmapPtr pPix, int index)
2014b7e1c893Smrg{
2015b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2016b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2017b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
2018b7e1c893Smrg
2019b7e1c893Smrg    /* flush HDP read/write caches */
2020b7e1c893Smrg    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2021b7e1c893Smrg
2022b7e1c893Smrg}
2023b7e1c893Smrg
2024b7e1c893Smrg
2025b7e1c893SmrgBool
2026b7e1c893SmrgR600DrawInit(ScreenPtr pScreen)
2027b7e1c893Smrg{
2028b7e1c893Smrg    ScrnInfoPtr pScrn =  xf86Screens[pScreen->myNum];
2029b7e1c893Smrg    RADEONInfoPtr info   = RADEONPTR(pScrn);
2030b7e1c893Smrg
2031b7e1c893Smrg    if (info->accel_state->exa == NULL) {
2032b7e1c893Smrg	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2033b7e1c893Smrg	return FALSE;
2034b7e1c893Smrg    }
2035b7e1c893Smrg
2036b7e1c893Smrg    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2037b7e1c893Smrg    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2038b7e1c893Smrg
2039b7e1c893Smrg    info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2040b7e1c893Smrg    info->accel_state->exa->Solid = R600Solid;
2041b7e1c893Smrg    info->accel_state->exa->DoneSolid = R600DoneSolid;
2042b7e1c893Smrg
2043b7e1c893Smrg    info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2044b7e1c893Smrg    info->accel_state->exa->Copy = R600Copy;
2045b7e1c893Smrg    info->accel_state->exa->DoneCopy = R600DoneCopy;
2046b7e1c893Smrg
2047b7e1c893Smrg    info->accel_state->exa->MarkSync = R600MarkSync;
2048b7e1c893Smrg    info->accel_state->exa->WaitMarker = R600Sync;
2049b7e1c893Smrg
2050b7e1c893Smrg    info->accel_state->exa->PrepareAccess = R600PrepareAccess;
2051b7e1c893Smrg    info->accel_state->exa->FinishAccess = R600FinishAccess;
2052b7e1c893Smrg
2053b7e1c893Smrg    /* AGP seems to have problems with gart transfers */
2054b7e1c893Smrg    if (info->accelDFS) {
2055b7e1c893Smrg	info->accel_state->exa->UploadToScreen = R600UploadToScreen;
2056b7e1c893Smrg	info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen;
2057b7e1c893Smrg    }
2058b7e1c893Smrg
2059b7e1c893Smrg    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
2060b7e1c893Smrg#ifdef EXA_SUPPORTS_PREPARE_AUX
2061b7e1c893Smrg    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
2062b7e1c893Smrg#endif
2063b7e1c893Smrg    info->accel_state->exa->pixmapOffsetAlign = 256;
2064b7e1c893Smrg    info->accel_state->exa->pixmapPitchAlign = 256;
2065b7e1c893Smrg
2066b7e1c893Smrg    info->accel_state->exa->CheckComposite = R600CheckComposite;
2067b7e1c893Smrg    info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2068b7e1c893Smrg    info->accel_state->exa->Composite = R600Composite;
2069b7e1c893Smrg    info->accel_state->exa->DoneComposite = R600DoneComposite;
2070b7e1c893Smrg
2071b7e1c893Smrg#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
2072b7e1c893Smrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
2073b7e1c893Smrg
2074b7e1c893Smrg    info->accel_state->exa->maxPitchBytes = 32768;
2075b7e1c893Smrg    info->accel_state->exa->maxX = 8192;
2076b7e1c893Smrg#else
2077b7e1c893Smrg    info->accel_state->exa->maxX = 8192;
2078b7e1c893Smrg#endif
2079b7e1c893Smrg    info->accel_state->exa->maxY = 8192;
2080b7e1c893Smrg
2081b7e1c893Smrg    /* not supported yet */
2082b7e1c893Smrg    info->accel_state->vsync = FALSE;
2083b7e1c893Smrg
2084b7e1c893Smrg    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
2085b7e1c893Smrg	xfree(info->accel_state->exa);
2086b7e1c893Smrg	return FALSE;
2087b7e1c893Smrg    }
2088b7e1c893Smrg
2089b7e1c893Smrg    if (!info->gartLocation)
2090b7e1c893Smrg	return FALSE;
2091b7e1c893Smrg
2092b7e1c893Smrg    info->accel_state->XInited3D = FALSE;
2093b7e1c893Smrg    info->accel_state->copy_area = NULL;
2094b7e1c893Smrg
2095b7e1c893Smrg    if (!R600AllocShaders(pScrn, pScreen))
2096b7e1c893Smrg	return FALSE;
2097b7e1c893Smrg
2098b7e1c893Smrg    if (!R600LoadShaders(pScrn))
2099b7e1c893Smrg	return FALSE;
2100b7e1c893Smrg
2101b7e1c893Smrg    exaMarkSync(pScreen);
2102b7e1c893Smrg
2103b7e1c893Smrg    return TRUE;
2104b7e1c893Smrg
2105b7e1c893Smrg}
2106b7e1c893Smrg
2107