r600_exa.c revision ad43ddac
1b7e1c893Smrg/*
2b7e1c893Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3b7e1c893Smrg *
4b7e1c893Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b7e1c893Smrg * copy of this software and associated documentation files (the "Software"),
6b7e1c893Smrg * to deal in the Software without restriction, including without limitation
7b7e1c893Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b7e1c893Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b7e1c893Smrg * Software is furnished to do so, subject to the following conditions:
10b7e1c893Smrg *
11b7e1c893Smrg * The above copyright notice and this permission notice (including the next
12b7e1c893Smrg * paragraph) shall be included in all copies or substantial portions of the
13b7e1c893Smrg * Software.
14b7e1c893Smrg *
15b7e1c893Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b7e1c893Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b7e1c893Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b7e1c893Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b7e1c893Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b7e1c893Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b7e1c893Smrg * SOFTWARE.
22b7e1c893Smrg *
23b7e1c893Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24b7e1c893Smrg *
25b7e1c893Smrg */
26b7e1c893Smrg
27b7e1c893Smrg#ifdef HAVE_CONFIG_H
28b7e1c893Smrg#include "config.h"
29b7e1c893Smrg#endif
30b7e1c893Smrg
31b7e1c893Smrg#include "xf86.h"
32b7e1c893Smrg
33b7e1c893Smrg#include "exa.h"
34b7e1c893Smrg
35b7e1c893Smrg#include "radeon.h"
36b7e1c893Smrg#include "radeon_macros.h"
37b7e1c893Smrg#include "radeon_reg.h"
38b7e1c893Smrg#include "r600_shader.h"
39b7e1c893Smrg#include "r600_reg.h"
40b7e1c893Smrg#include "r600_state.h"
41ad43ddacSmrg#include "radeon_vbo.h"
42ad43ddacSmrg
43ad43ddacSmrg#define RADEON_TRACE_FALL 0
44ad43ddacSmrg#define RADEON_TRACE_DRAW 0
45ad43ddacSmrg
46ad43ddacSmrg#if RADEON_TRACE_FALL
47ad43ddacSmrg#define RADEON_FALLBACK(x)     		\
48ad43ddacSmrgdo {					\
49ad43ddacSmrg	ErrorF("%s: ", __FUNCTION__);	\
50ad43ddacSmrg	ErrorF x;			\
51ad43ddacSmrg	return FALSE;			\
52ad43ddacSmrg} while (0)
53ad43ddacSmrg#else
54ad43ddacSmrg#define RADEON_FALLBACK(x) return FALSE
55ad43ddacSmrg#endif
56b7e1c893Smrg
57b7e1c893Smrgextern PixmapPtr
58b7e1c893SmrgRADEONGetDrawablePixmap(DrawablePtr pDrawable);
59b7e1c893Smrg
60b7e1c893Smrg/* #define SHOW_VERTEXES */
61b7e1c893Smrg
62b7e1c893Smrg#       define RADEON_ROP3_ZERO             0x00000000
63b7e1c893Smrg#       define RADEON_ROP3_DSa              0x00880000
64b7e1c893Smrg#       define RADEON_ROP3_SDna             0x00440000
65b7e1c893Smrg#       define RADEON_ROP3_S                0x00cc0000
66b7e1c893Smrg#       define RADEON_ROP3_DSna             0x00220000
67b7e1c893Smrg#       define RADEON_ROP3_D                0x00aa0000
68b7e1c893Smrg#       define RADEON_ROP3_DSx              0x00660000
69b7e1c893Smrg#       define RADEON_ROP3_DSo              0x00ee0000
70b7e1c893Smrg#       define RADEON_ROP3_DSon             0x00110000
71b7e1c893Smrg#       define RADEON_ROP3_DSxn             0x00990000
72b7e1c893Smrg#       define RADEON_ROP3_Dn               0x00550000
73b7e1c893Smrg#       define RADEON_ROP3_SDno             0x00dd0000
74b7e1c893Smrg#       define RADEON_ROP3_Sn               0x00330000
75b7e1c893Smrg#       define RADEON_ROP3_DSno             0x00bb0000
76b7e1c893Smrg#       define RADEON_ROP3_DSan             0x00770000
77b7e1c893Smrg#       define RADEON_ROP3_ONE              0x00ff0000
78b7e1c893Smrg
79b7e1c893Smrguint32_t RADEON_ROP[16] = {
80b7e1c893Smrg    RADEON_ROP3_ZERO, /* GXclear        */
81b7e1c893Smrg    RADEON_ROP3_DSa,  /* Gxand          */
82b7e1c893Smrg    RADEON_ROP3_SDna, /* GXandReverse   */
83b7e1c893Smrg    RADEON_ROP3_S,    /* GXcopy         */
84b7e1c893Smrg    RADEON_ROP3_DSna, /* GXandInverted  */
85b7e1c893Smrg    RADEON_ROP3_D,    /* GXnoop         */
86b7e1c893Smrg    RADEON_ROP3_DSx,  /* GXxor          */
87b7e1c893Smrg    RADEON_ROP3_DSo,  /* GXor           */
88b7e1c893Smrg    RADEON_ROP3_DSon, /* GXnor          */
89b7e1c893Smrg    RADEON_ROP3_DSxn, /* GXequiv        */
90b7e1c893Smrg    RADEON_ROP3_Dn,   /* GXinvert       */
91b7e1c893Smrg    RADEON_ROP3_SDno, /* GXorReverse    */
92b7e1c893Smrg    RADEON_ROP3_Sn,   /* GXcopyInverted */
93b7e1c893Smrg    RADEON_ROP3_DSno, /* GXorInverted   */
94b7e1c893Smrg    RADEON_ROP3_DSan, /* GXnand         */
95b7e1c893Smrg    RADEON_ROP3_ONE,  /* GXset          */
96b7e1c893Smrg};
97b7e1c893Smrg
98ad43ddacSmrgstatic void R600VlineHelperClear(ScrnInfoPtr pScrn)
99ad43ddacSmrg{
100ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
101ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
102ad43ddacSmrg
103ad43ddacSmrg    accel_state->vline_crtc = NULL;
104ad43ddacSmrg    accel_state->vline_y1 = -1;
105ad43ddacSmrg    accel_state->vline_y2 = 0;
106ad43ddacSmrg}
107ad43ddacSmrg
108ad43ddacSmrgstatic void R600VlineHelperSet(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
109ad43ddacSmrg{
110ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
111ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
112ad43ddacSmrg
113ad43ddacSmrg    accel_state->vline_crtc = radeon_pick_best_crtc(pScrn, x1, x2, y1, y2);
114ad43ddacSmrg    if (accel_state->vline_y1 == -1)
115ad43ddacSmrg	accel_state->vline_y1 = y1;
116ad43ddacSmrg    if (y1 < accel_state->vline_y1)
117ad43ddacSmrg	accel_state->vline_y1 = y1;
118ad43ddacSmrg    if (y2 > accel_state->vline_y2)
119ad43ddacSmrg	accel_state->vline_y2 = y2;
120ad43ddacSmrg}
121ad43ddacSmrg
122ad43ddacSmrgstatic Bool R600ValidPM(uint32_t pm, int bpp)
123ad43ddacSmrg{
124ad43ddacSmrg    uint8_t r, g, b, a;
125ad43ddacSmrg    Bool ret = FALSE;
126ad43ddacSmrg
127ad43ddacSmrg    switch (bpp) {
128ad43ddacSmrg    case 8:
129ad43ddacSmrg	a = pm & 0xff;
130ad43ddacSmrg	if ((a == 0) || (a == 0xff))
131ad43ddacSmrg	    ret = TRUE;
132ad43ddacSmrg	break;
133ad43ddacSmrg    case 16:
134ad43ddacSmrg	r = (pm >> 11) & 0x1f;
135ad43ddacSmrg	g = (pm >> 5) & 0x3f;
136ad43ddacSmrg	b = (pm >> 0) & 0x1f;
137ad43ddacSmrg	if (((r == 0) || (r == 0x1f)) &&
138ad43ddacSmrg	    ((g == 0) || (g == 0x3f)) &&
139ad43ddacSmrg	    ((b == 0) || (b == 0x1f)))
140ad43ddacSmrg	    ret = TRUE;
141ad43ddacSmrg	break;
142ad43ddacSmrg    case 32:
143ad43ddacSmrg	a = (pm >> 24) & 0xff;
144ad43ddacSmrg	r = (pm >> 16) & 0xff;
145ad43ddacSmrg	g = (pm >> 8) & 0xff;
146ad43ddacSmrg	b = (pm >> 0) & 0xff;
147ad43ddacSmrg	if (((a == 0) || (a == 0xff)) &&
148ad43ddacSmrg	    ((r == 0) || (r == 0xff)) &&
149ad43ddacSmrg	    ((g == 0) || (g == 0xff)) &&
150ad43ddacSmrg	    ((b == 0) || (b == 0xff)))
151ad43ddacSmrg	    ret = TRUE;
152ad43ddacSmrg	break;
153ad43ddacSmrg    default:
154ad43ddacSmrg	break;
155ad43ddacSmrg    }
156ad43ddacSmrg    return ret;
157ad43ddacSmrg}
158ad43ddacSmrg
159ad43ddacSmrgstatic Bool R600CheckBPP(int bpp)
160ad43ddacSmrg{
161ad43ddacSmrg	switch (bpp) {
162ad43ddacSmrg	case 8:
163ad43ddacSmrg	case 16:
164ad43ddacSmrg	case 32:
165ad43ddacSmrg		return TRUE;
166ad43ddacSmrg	default:
167ad43ddacSmrg		break;
168ad43ddacSmrg	}
169ad43ddacSmrg	return FALSE;
170ad43ddacSmrg}
171ad43ddacSmrg
172ad43ddacSmrgBool
173ad43ddacSmrgR600SetAccelState(ScrnInfoPtr pScrn,
174ad43ddacSmrg		  struct r600_accel_object *src0,
175ad43ddacSmrg		  struct r600_accel_object *src1,
176ad43ddacSmrg		  struct r600_accel_object *dst,
177ad43ddacSmrg		  uint32_t vs_offset, uint32_t ps_offset,
178ad43ddacSmrg		  int rop, Pixel planemask)
179ad43ddacSmrg{
180ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
181ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
182ad43ddacSmrg    int ret;
183ad43ddacSmrg
184ad43ddacSmrg    if (src0) {
185ad43ddacSmrg	memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
186ad43ddacSmrg	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
187ad43ddacSmrg    } else {
188ad43ddacSmrg	memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
189ad43ddacSmrg	accel_state->src_size[0] = 0;
190ad43ddacSmrg    }
191ad43ddacSmrg
192ad43ddacSmrg    if (src1) {
193ad43ddacSmrg	memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
194ad43ddacSmrg	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
195ad43ddacSmrg    } else {
196ad43ddacSmrg	memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
197ad43ddacSmrg	accel_state->src_size[1] = 0;
198ad43ddacSmrg    }
199ad43ddacSmrg
200ad43ddacSmrg    if (dst) {
201ad43ddacSmrg	memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
202ad43ddacSmrg	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
203ad43ddacSmrg    } else {
204ad43ddacSmrg	memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
205ad43ddacSmrg	accel_state->dst_size = 0;
206ad43ddacSmrg    }
207ad43ddacSmrg
208ad43ddacSmrg    accel_state->rop = rop;
209ad43ddacSmrg    accel_state->planemask = planemask;
210ad43ddacSmrg
211ad43ddacSmrg    /* bad pitch */
212ad43ddacSmrg    if (accel_state->src_obj[0].pitch & 7)
213ad43ddacSmrg	RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
214ad43ddacSmrg
215ad43ddacSmrg    /* bad offset */
216ad43ddacSmrg    if (accel_state->src_obj[0].offset & 0xff)
217ad43ddacSmrg	RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset));
218ad43ddacSmrg
219ad43ddacSmrg    /* bad pitch */
220ad43ddacSmrg    if (accel_state->src_obj[1].pitch & 7)
221ad43ddacSmrg	RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
222ad43ddacSmrg
223ad43ddacSmrg    /* bad offset */
224ad43ddacSmrg    if (accel_state->src_obj[1].offset & 0xff)
225ad43ddacSmrg	RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset));
226ad43ddacSmrg
227ad43ddacSmrg    if (accel_state->dst_obj.pitch & 7)
228ad43ddacSmrg	RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
229ad43ddacSmrg
230ad43ddacSmrg    if (accel_state->dst_obj.offset & 0xff)
231ad43ddacSmrg	RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset));
232ad43ddacSmrg
233ad43ddacSmrg    accel_state->vs_size = 512;
234ad43ddacSmrg    accel_state->ps_size = 512;
235ad43ddacSmrg#if defined(XF86DRM_MODE)
236ad43ddacSmrg    if (info->cs) {
237ad43ddacSmrg	accel_state->vs_mc_addr = vs_offset;
238ad43ddacSmrg	accel_state->ps_mc_addr = ps_offset;
239ad43ddacSmrg
240ad43ddacSmrg	radeon_cs_space_reset_bos(info->cs);
241ad43ddacSmrg	radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
242ad43ddacSmrg					  RADEON_GEM_DOMAIN_VRAM, 0);
243ad43ddacSmrg	if (accel_state->src_obj[0].bo)
244ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
245ad43ddacSmrg					      accel_state->src_obj[0].domain, 0);
246ad43ddacSmrg	if (accel_state->src_obj[1].bo)
247ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
248ad43ddacSmrg					      accel_state->src_obj[1].domain, 0);
249ad43ddacSmrg	if (accel_state->dst_obj.bo)
250ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
251ad43ddacSmrg					      0, accel_state->dst_obj.domain);
252ad43ddacSmrg	ret = radeon_cs_space_check(info->cs);
253ad43ddacSmrg	if (ret)
254ad43ddacSmrg	    RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
255ad43ddacSmrg
256ad43ddacSmrg    } else
257ad43ddacSmrg#endif
258ad43ddacSmrg    {
259ad43ddacSmrg	accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
260ad43ddacSmrg	    vs_offset;
261ad43ddacSmrg	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
262ad43ddacSmrg	    ps_offset;
263ad43ddacSmrg    }
264ad43ddacSmrg
265ad43ddacSmrg    return TRUE;
266ad43ddacSmrg}
267ad43ddacSmrg
268ad43ddacSmrg#if defined(XF86DRM_MODE)
269ad43ddacSmrgstatic inline void radeon_add_pixmap(struct radeon_cs *cs, PixmapPtr pPix, int read_domains, int write_domain)
270ad43ddacSmrg{
271ad43ddacSmrg    struct radeon_exa_pixmap_priv *driver_priv = exaGetPixmapDriverPrivate(pPix);
272ad43ddacSmrg
273ad43ddacSmrg    radeon_cs_space_add_persistent_bo(cs, driver_priv->bo, read_domains, write_domain);
274ad43ddacSmrg}
275ad43ddacSmrg#endif
276ad43ddacSmrg
277b7e1c893Smrgstatic void
278b7e1c893SmrgR600DoneSolid(PixmapPtr pPix);
279b7e1c893Smrg
280b7e1c893Smrgstatic void
281b7e1c893SmrgR600DoneComposite(PixmapPtr pDst);
282b7e1c893Smrg
283b7e1c893Smrg
284b7e1c893Smrgstatic Bool
285b7e1c893SmrgR600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
286b7e1c893Smrg{
287b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
288b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
289b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
290b7e1c893Smrg    cb_config_t     cb_conf;
291b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
292b7e1c893Smrg    int pmask = 0;
293b7e1c893Smrg    uint32_t a, r, g, b;
294b7e1c893Smrg    float ps_alu_consts[4];
295ad43ddacSmrg    struct r600_accel_object dst;
296b7e1c893Smrg
297ad43ddacSmrg    if (!R600CheckBPP(pPix->drawable.bitsPerPixel))
298ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype failed\n"));
299ad43ddacSmrg    if (!R600ValidPM(pm, pPix->drawable.bitsPerPixel))
300ad43ddacSmrg	RADEON_FALLBACK(("invalid planemask\n"));
301b7e1c893Smrg
302ad43ddacSmrg#if defined(XF86DRM_MODE)
303ad43ddacSmrg    if (info->cs) {
304ad43ddacSmrg	dst.offset = 0;
305ad43ddacSmrg	dst.bo = radeon_get_pixmap_bo(pPix);
306ad43ddacSmrg    } else
307ad43ddacSmrg#endif
308ad43ddacSmrg    {
309ad43ddacSmrg	dst.offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
310ad43ddacSmrg	dst.bo = NULL;
311ad43ddacSmrg    }
312b7e1c893Smrg
313ad43ddacSmrg    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
314ad43ddacSmrg    dst.width = pPix->drawable.width;
315ad43ddacSmrg    dst.height = pPix->drawable.height;
316ad43ddacSmrg    dst.bpp = pPix->drawable.bitsPerPixel;
317ad43ddacSmrg    dst.domain = RADEON_GEM_DOMAIN_VRAM;
318ad43ddacSmrg
319ad43ddacSmrg    if (!R600SetAccelState(pScrn,
320ad43ddacSmrg			   NULL,
321ad43ddacSmrg			   NULL,
322ad43ddacSmrg			   &dst,
323ad43ddacSmrg			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
324ad43ddacSmrg			   alu, pm))
325b7e1c893Smrg	return FALSE;
326b7e1c893Smrg
327b7e1c893Smrg    CLEAR (cb_conf);
328b7e1c893Smrg    CLEAR (vs_conf);
329b7e1c893Smrg    CLEAR (ps_conf);
330b7e1c893Smrg
331ad43ddacSmrg    radeon_vbo_check(pScrn, 16);
332ad43ddacSmrg    r600_cp_start(pScrn);
333b7e1c893Smrg
334b7e1c893Smrg    set_default_state(pScrn, accel_state->ib);
335b7e1c893Smrg
336ad43ddacSmrg    set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
337ad43ddacSmrg    set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
338ad43ddacSmrg    set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
339b7e1c893Smrg
340b7e1c893Smrg    /* Shader */
341b7e1c893Smrg
342b7e1c893Smrg    /* flush SQ cache */
343b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
344ad43ddacSmrg			accel_state->vs_size, accel_state->vs_mc_addr,
345ad43ddacSmrg			accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
346b7e1c893Smrg
347b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
348b7e1c893Smrg    vs_conf.num_gprs            = 2;
349b7e1c893Smrg    vs_conf.stack_size          = 0;
350ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
351ad43ddacSmrg    vs_setup                    (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
352b7e1c893Smrg
353b7e1c893Smrg    /* flush SQ cache */
354b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
355ad43ddacSmrg			accel_state->ps_size, accel_state->ps_mc_addr,
356ad43ddacSmrg			accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
357b7e1c893Smrg
358b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
359b7e1c893Smrg    ps_conf.num_gprs            = 1;
360b7e1c893Smrg    ps_conf.stack_size          = 0;
361b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
362b7e1c893Smrg    ps_conf.clamp_consts        = 0;
363b7e1c893Smrg    ps_conf.export_mode         = 2;
364ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
365ad43ddacSmrg    ps_setup                    (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
366b7e1c893Smrg
367b7e1c893Smrg    /* Render setup */
368ad43ddacSmrg    if (accel_state->planemask & 0x000000ff)
369b7e1c893Smrg	pmask |= 4; /* B */
370ad43ddacSmrg    if (accel_state->planemask & 0x0000ff00)
371b7e1c893Smrg	pmask |= 2; /* G */
372ad43ddacSmrg    if (accel_state->planemask & 0x00ff0000)
373b7e1c893Smrg	pmask |= 1; /* R */
374ad43ddacSmrg    if (accel_state->planemask & 0xff000000)
375b7e1c893Smrg	pmask |= 8; /* A */
376ad43ddacSmrg    BEGIN_BATCH(6);
377ad43ddacSmrg    EREG(accel_state->ib, CB_TARGET_MASK,                      (pmask << TARGET0_ENABLE_shift));
378ad43ddacSmrg    EREG(accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[accel_state->rop]);
379ad43ddacSmrg    END_BATCH();
380b7e1c893Smrg
381b7e1c893Smrg    cb_conf.id = 0;
382ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
383ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
384ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
385ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
386b7e1c893Smrg
387ad43ddacSmrg    if (accel_state->dst_obj.bpp == 8) {
388b7e1c893Smrg	cb_conf.format = COLOR_8;
389b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
390ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 16) {
391b7e1c893Smrg	cb_conf.format = COLOR_5_6_5;
392b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
393b7e1c893Smrg    } else {
394b7e1c893Smrg	cb_conf.format = COLOR_8_8_8_8;
395b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
396b7e1c893Smrg    }
397b7e1c893Smrg    cb_conf.source_format = 1;
398b7e1c893Smrg    cb_conf.blend_clamp = 1;
399ad43ddacSmrg    set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
400b7e1c893Smrg
401b7e1c893Smrg    /* Interpolator setup */
402b7e1c893Smrg    /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */
403ad43ddacSmrg    BEGIN_BATCH(18);
404b7e1c893Smrg    EREG(accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift));
405b7e1c893Smrg    EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
406b7e1c893Smrg
407b7e1c893Smrg    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
408b7e1c893Smrg     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
409b7e1c893Smrg    /* no VS exports as PS input (NUM_INTERP is not zero based, no minus one) */
410b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_IN_CONTROL_0,                 (0 << NUM_INTERP_shift));
411b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
412b7e1c893Smrg    /* color semantic id 0 -> GPR[0] */
413b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)	|
414b7e1c893Smrg								  (0x03 << DEFAULT_VAL_shift)	|
415b7e1c893Smrg								  FLAT_SHADE_bit		|
416b7e1c893Smrg								  SEL_CENTROID_bit));
417ad43ddacSmrg    EREG(accel_state->ib, SPI_INTERP_CONTROL_0,                FLAT_SHADE_ENA_bit);
418ad43ddacSmrg    END_BATCH();
419b7e1c893Smrg
420b7e1c893Smrg    /* PS alu constants */
421ad43ddacSmrg    if (accel_state->dst_obj.bpp == 16) {
422b7e1c893Smrg	r = (fg >> 11) & 0x1f;
423b7e1c893Smrg	g = (fg >> 5) & 0x3f;
424b7e1c893Smrg	b = (fg >> 0) & 0x1f;
425b7e1c893Smrg	ps_alu_consts[0] = (float)r / 31; /* R */
426b7e1c893Smrg	ps_alu_consts[1] = (float)g / 63; /* G */
427b7e1c893Smrg	ps_alu_consts[2] = (float)b / 31; /* B */
428b7e1c893Smrg	ps_alu_consts[3] = 1.0; /* A */
429ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 8) {
430b7e1c893Smrg	a = (fg >> 0) & 0xff;
431b7e1c893Smrg	ps_alu_consts[0] = 0.0; /* R */
432b7e1c893Smrg	ps_alu_consts[1] = 0.0; /* G */
433b7e1c893Smrg	ps_alu_consts[2] = 0.0; /* B */
434b7e1c893Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
435b7e1c893Smrg    } else {
436b7e1c893Smrg	a = (fg >> 24) & 0xff;
437b7e1c893Smrg	r = (fg >> 16) & 0xff;
438b7e1c893Smrg	g = (fg >> 8) & 0xff;
439b7e1c893Smrg	b = (fg >> 0) & 0xff;
440b7e1c893Smrg	ps_alu_consts[0] = (float)r / 255; /* R */
441b7e1c893Smrg	ps_alu_consts[1] = (float)g / 255; /* G */
442b7e1c893Smrg	ps_alu_consts[2] = (float)b / 255; /* B */
443b7e1c893Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
444b7e1c893Smrg    }
445b7e1c893Smrg    set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
446b7e1c893Smrg		   sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
447b7e1c893Smrg
448ad43ddacSmrg    if (accel_state->vsync)
449ad43ddacSmrg	R600VlineHelperClear(pScrn);
450b7e1c893Smrg
451b7e1c893Smrg    return TRUE;
452b7e1c893Smrg}
453b7e1c893Smrg
454b7e1c893Smrg
455b7e1c893Smrgstatic void
456b7e1c893SmrgR600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
457b7e1c893Smrg{
458b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
459b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
460b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
461b7e1c893Smrg    float *vb;
462b7e1c893Smrg
463ad43ddacSmrg    if (accel_state->vsync)
464ad43ddacSmrg	R600VlineHelperSet(pScrn, x1, y1, x2, y2);
465b7e1c893Smrg
466ad43ddacSmrg    vb = radeon_vbo_space(pScrn, 8);
467b7e1c893Smrg
468b7e1c893Smrg    vb[0] = (float)x1;
469b7e1c893Smrg    vb[1] = (float)y1;
470b7e1c893Smrg
471b7e1c893Smrg    vb[2] = (float)x1;
472b7e1c893Smrg    vb[3] = (float)y2;
473b7e1c893Smrg
474b7e1c893Smrg    vb[4] = (float)x2;
475b7e1c893Smrg    vb[5] = (float)y2;
476b7e1c893Smrg
477ad43ddacSmrg    radeon_vbo_commit(pScrn);
478b7e1c893Smrg}
479b7e1c893Smrg
480b7e1c893Smrgstatic void
481b7e1c893SmrgR600DoneSolid(PixmapPtr pPix)
482b7e1c893Smrg{
483b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
484b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
485b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
486b7e1c893Smrg
487ad43ddacSmrg    if (accel_state->vsync)
488ad43ddacSmrg	cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
489ad43ddacSmrg			   accel_state->vline_crtc,
490ad43ddacSmrg			   accel_state->vline_y1,
491ad43ddacSmrg			   accel_state->vline_y2);
492b7e1c893Smrg
493ad43ddacSmrg    r600_finish_op(pScrn, 8);
494b7e1c893Smrg}
495b7e1c893Smrg
496b7e1c893Smrgstatic void
497ad43ddacSmrgR600DoPrepareCopy(ScrnInfoPtr pScrn)
498b7e1c893Smrg{
499b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
500b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
501b7e1c893Smrg    int pmask = 0;
502b7e1c893Smrg    cb_config_t     cb_conf;
503b7e1c893Smrg    tex_resource_t  tex_res;
504b7e1c893Smrg    tex_sampler_t   tex_samp;
505b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
506b7e1c893Smrg
507b7e1c893Smrg    CLEAR (cb_conf);
508b7e1c893Smrg    CLEAR (tex_res);
509b7e1c893Smrg    CLEAR (tex_samp);
510b7e1c893Smrg    CLEAR (vs_conf);
511b7e1c893Smrg    CLEAR (ps_conf);
512b7e1c893Smrg
513ad43ddacSmrg    radeon_vbo_check(pScrn, 16);
514ad43ddacSmrg    r600_cp_start(pScrn);
515b7e1c893Smrg
516b7e1c893Smrg    set_default_state(pScrn, accel_state->ib);
517b7e1c893Smrg
518ad43ddacSmrg    set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
519ad43ddacSmrg    set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
520ad43ddacSmrg    set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
521b7e1c893Smrg
522b7e1c893Smrg    /* Shader */
523b7e1c893Smrg
524b7e1c893Smrg    /* flush SQ cache */
525b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
526ad43ddacSmrg			accel_state->vs_size, accel_state->vs_mc_addr,
527ad43ddacSmrg			accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
528b7e1c893Smrg
529b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
530b7e1c893Smrg    vs_conf.num_gprs            = 2;
531b7e1c893Smrg    vs_conf.stack_size          = 0;
532ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
533ad43ddacSmrg    vs_setup                    (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
534b7e1c893Smrg
535b7e1c893Smrg    /* flush SQ cache */
536b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
537ad43ddacSmrg			accel_state->ps_size, accel_state->ps_mc_addr,
538ad43ddacSmrg			accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
539b7e1c893Smrg
540b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
541b7e1c893Smrg    ps_conf.num_gprs            = 1;
542b7e1c893Smrg    ps_conf.stack_size          = 0;
543b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
544b7e1c893Smrg    ps_conf.clamp_consts        = 0;
545b7e1c893Smrg    ps_conf.export_mode         = 2;
546ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
547ad43ddacSmrg    ps_setup                    (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
548b7e1c893Smrg
549b7e1c893Smrg    /* flush texture cache */
550b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
551ad43ddacSmrg			accel_state->src_size[0], accel_state->src_obj[0].offset,
552ad43ddacSmrg			accel_state->src_obj[0].bo, accel_state->src_obj[0].domain, 0);
553b7e1c893Smrg
554b7e1c893Smrg    /* Texture */
555b7e1c893Smrg    tex_res.id                  = 0;
556ad43ddacSmrg    tex_res.w                   = accel_state->src_obj[0].width;
557ad43ddacSmrg    tex_res.h                   = accel_state->src_obj[0].height;
558ad43ddacSmrg    tex_res.pitch               = accel_state->src_obj[0].pitch;
559b7e1c893Smrg    tex_res.depth               = 0;
560b7e1c893Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
561ad43ddacSmrg    tex_res.base                = accel_state->src_obj[0].offset;
562ad43ddacSmrg    tex_res.mip_base            = accel_state->src_obj[0].offset;
563ad43ddacSmrg    tex_res.bo                  = accel_state->src_obj[0].bo;
564ad43ddacSmrg    tex_res.mip_bo              = accel_state->src_obj[0].bo;
565ad43ddacSmrg    if (accel_state->src_obj[0].bpp == 8) {
566b7e1c893Smrg	tex_res.format              = FMT_8;
567b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
568b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
569b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
570b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
571ad43ddacSmrg    } else if (accel_state->src_obj[0].bpp == 16) {
572b7e1c893Smrg	tex_res.format              = FMT_5_6_5;
573b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
574b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
575b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
576b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
577b7e1c893Smrg    } else {
578b7e1c893Smrg	tex_res.format              = FMT_8_8_8_8;
579b7e1c893Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
580b7e1c893Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
581b7e1c893Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
582b7e1c893Smrg	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
583b7e1c893Smrg    }
584b7e1c893Smrg
585b7e1c893Smrg    tex_res.request_size        = 1;
586b7e1c893Smrg    tex_res.base_level          = 0;
587b7e1c893Smrg    tex_res.last_level          = 0;
588b7e1c893Smrg    tex_res.perf_modulation     = 0;
589ad43ddacSmrg    set_tex_resource            (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
590b7e1c893Smrg
591b7e1c893Smrg    tex_samp.id                 = 0;
592b7e1c893Smrg    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
593b7e1c893Smrg    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
594b7e1c893Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
595b7e1c893Smrg    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
596b7e1c893Smrg    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
597b7e1c893Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
598b7e1c893Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
599b7e1c893Smrg    set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
600b7e1c893Smrg
601b7e1c893Smrg
602b7e1c893Smrg    /* Render setup */
603ad43ddacSmrg    if (accel_state->planemask & 0x000000ff)
604b7e1c893Smrg	pmask |= 4; /* B */
605ad43ddacSmrg    if (accel_state->planemask & 0x0000ff00)
606b7e1c893Smrg	pmask |= 2; /* G */
607ad43ddacSmrg    if (accel_state->planemask & 0x00ff0000)
608b7e1c893Smrg	pmask |= 1; /* R */
609ad43ddacSmrg    if (accel_state->planemask & 0xff000000)
610b7e1c893Smrg	pmask |= 8; /* A */
611ad43ddacSmrg    BEGIN_BATCH(6);
612ad43ddacSmrg    EREG(accel_state->ib, CB_TARGET_MASK,                      (pmask << TARGET0_ENABLE_shift));
613ad43ddacSmrg    EREG(accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[accel_state->rop]);
614ad43ddacSmrg    END_BATCH();
615b7e1c893Smrg
616b7e1c893Smrg    cb_conf.id = 0;
617ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
618ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
619ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
620ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
621ad43ddacSmrg    if (accel_state->dst_obj.bpp == 8) {
622b7e1c893Smrg	cb_conf.format = COLOR_8;
623b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
624ad43ddacSmrg    } else if (accel_state->dst_obj.bpp == 16) {
625b7e1c893Smrg	cb_conf.format = COLOR_5_6_5;
626b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
627b7e1c893Smrg    } else {
628b7e1c893Smrg	cb_conf.format = COLOR_8_8_8_8;
629b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
630b7e1c893Smrg    }
631b7e1c893Smrg    cb_conf.source_format = 1;
632b7e1c893Smrg    cb_conf.blend_clamp = 1;
633ad43ddacSmrg    set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
634b7e1c893Smrg
635b7e1c893Smrg    /* Interpolator setup */
636b7e1c893Smrg    /* export tex coord from VS */
637ad43ddacSmrg    BEGIN_BATCH(18);
638b7e1c893Smrg    EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
639b7e1c893Smrg    EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
640b7e1c893Smrg
641b7e1c893Smrg    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
642b7e1c893Smrg     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
643b7e1c893Smrg    /* input tex coord from VS */
644b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_IN_CONTROL_0,                 ((1 << NUM_INTERP_shift)));
645b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
646b7e1c893Smrg    /* color semantic id 0 -> GPR[0] */
647b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)	|
648b7e1c893Smrg								(0x01 << DEFAULT_VAL_shift)	|
649b7e1c893Smrg								SEL_CENTROID_bit));
650b7e1c893Smrg    EREG(accel_state->ib, SPI_INTERP_CONTROL_0,                0);
651ad43ddacSmrg    END_BATCH();
652b7e1c893Smrg
653b7e1c893Smrg}
654b7e1c893Smrg
655b7e1c893Smrgstatic void
656b7e1c893SmrgR600DoCopy(ScrnInfoPtr pScrn)
657b7e1c893Smrg{
658ad43ddacSmrg    r600_finish_op(pScrn, 16);
659ad43ddacSmrg}
660ad43ddacSmrg
661ad43ddacSmrgstatic void
662ad43ddacSmrgR600DoCopyVline(PixmapPtr pPix)
663ad43ddacSmrg{
664ad43ddacSmrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
665b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
666b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
667b7e1c893Smrg
668ad43ddacSmrg    if (accel_state->vsync)
669ad43ddacSmrg	cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
670ad43ddacSmrg			   accel_state->vline_crtc,
671ad43ddacSmrg			   accel_state->vline_y1,
672ad43ddacSmrg			   accel_state->vline_y2);
673b7e1c893Smrg
674ad43ddacSmrg    r600_finish_op(pScrn, 16);
675b7e1c893Smrg}
676b7e1c893Smrg
677b7e1c893Smrgstatic void
678b7e1c893SmrgR600AppendCopyVertex(ScrnInfoPtr pScrn,
679b7e1c893Smrg		     int srcX, int srcY,
680b7e1c893Smrg		     int dstX, int dstY,
681b7e1c893Smrg		     int w, int h)
682b7e1c893Smrg{
683b7e1c893Smrg    float *vb;
684b7e1c893Smrg
685ad43ddacSmrg    vb = radeon_vbo_space(pScrn, 16);
686b7e1c893Smrg
687b7e1c893Smrg    vb[0] = (float)dstX;
688b7e1c893Smrg    vb[1] = (float)dstY;
689b7e1c893Smrg    vb[2] = (float)srcX;
690b7e1c893Smrg    vb[3] = (float)srcY;
691b7e1c893Smrg
692b7e1c893Smrg    vb[4] = (float)dstX;
693b7e1c893Smrg    vb[5] = (float)(dstY + h);
694b7e1c893Smrg    vb[6] = (float)srcX;
695b7e1c893Smrg    vb[7] = (float)(srcY + h);
696b7e1c893Smrg
697b7e1c893Smrg    vb[8] = (float)(dstX + w);
698b7e1c893Smrg    vb[9] = (float)(dstY + h);
699b7e1c893Smrg    vb[10] = (float)(srcX + w);
700b7e1c893Smrg    vb[11] = (float)(srcY + h);
701b7e1c893Smrg
702ad43ddacSmrg    radeon_vbo_commit(pScrn);
703b7e1c893Smrg}
704b7e1c893Smrg
705b7e1c893Smrgstatic Bool
706b7e1c893SmrgR600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
707b7e1c893Smrg		int xdir, int ydir,
708b7e1c893Smrg		int rop,
709b7e1c893Smrg		Pixel planemask)
710b7e1c893Smrg{
711b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
712b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
713b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
714ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
715ad43ddacSmrg
716ad43ddacSmrg    if (!R600CheckBPP(pSrc->drawable.bitsPerPixel))
717ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype src failed\n"));
718ad43ddacSmrg    if (!R600CheckBPP(pDst->drawable.bitsPerPixel))
719ad43ddacSmrg	RADEON_FALLBACK(("R600CheckDatatype dst failed\n"));
720ad43ddacSmrg    if (!R600ValidPM(planemask, pDst->drawable.bitsPerPixel))
721ad43ddacSmrg	RADEON_FALLBACK(("Invalid planemask\n"));
722ad43ddacSmrg
723ad43ddacSmrg    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
724ad43ddacSmrg    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
725ad43ddacSmrg
726ad43ddacSmrg    accel_state->same_surface = FALSE;
727ad43ddacSmrg
728ad43ddacSmrg#if defined(XF86DRM_MODE)
729ad43ddacSmrg    if (info->cs) {
730ad43ddacSmrg	src_obj.offset = 0;
731ad43ddacSmrg	dst_obj.offset = 0;
732ad43ddacSmrg	src_obj.bo = radeon_get_pixmap_bo(pSrc);
733ad43ddacSmrg	dst_obj.bo = radeon_get_pixmap_bo(pDst);
734ad43ddacSmrg	if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
735ad43ddacSmrg	    accel_state->same_surface = TRUE;
736ad43ddacSmrg    } else
737b7e1c893Smrg#endif
738ad43ddacSmrg    {
739ad43ddacSmrg	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
740ad43ddacSmrg	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
741ad43ddacSmrg	if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst))
742ad43ddacSmrg	    accel_state->same_surface = TRUE;
743ad43ddacSmrg	src_obj.bo = NULL;
744ad43ddacSmrg	dst_obj.bo = NULL;
745b7e1c893Smrg    }
746b7e1c893Smrg
747ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
748ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
749ad43ddacSmrg    src_obj.bpp = pSrc->drawable.bitsPerPixel;
750ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
751ad43ddacSmrg
752ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
753ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
754ad43ddacSmrg    dst_obj.bpp = pDst->drawable.bitsPerPixel;
755ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
756ad43ddacSmrg
757ad43ddacSmrg    if (!R600SetAccelState(pScrn,
758ad43ddacSmrg			   &src_obj,
759ad43ddacSmrg			   NULL,
760ad43ddacSmrg			   &dst_obj,
761ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
762ad43ddacSmrg			   rop, planemask))
763b7e1c893Smrg	return FALSE;
764b7e1c893Smrg
765ad43ddacSmrg    if (accel_state->same_surface == TRUE) {
766ad43ddacSmrg	unsigned long size = pDst->drawable.height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
767b7e1c893Smrg
768ad43ddacSmrg#if defined(XF86DRM_MODE)
769ad43ddacSmrg	if (info->cs) {
770ad43ddacSmrg	    if (accel_state->copy_area_bo) {
771ad43ddacSmrg		radeon_bo_unref(accel_state->copy_area_bo);
772ad43ddacSmrg		accel_state->copy_area_bo = NULL;
773b7e1c893Smrg	    }
774ad43ddacSmrg	    accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
775ad43ddacSmrg						       RADEON_GEM_DOMAIN_VRAM,
776ad43ddacSmrg						       0);
777ad43ddacSmrg	    if (accel_state->copy_area_bo == NULL)
778ad43ddacSmrg		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
779ad43ddacSmrg
780ad43ddacSmrg	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
781ad43ddacSmrg					      RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_DOMAIN_VRAM);
782ad43ddacSmrg	    if (radeon_cs_space_check(info->cs)) {
783ad43ddacSmrg		radeon_bo_unref(accel_state->copy_area_bo);
784ad43ddacSmrg		accel_state->copy_area_bo = NULL;
785ad43ddacSmrg		return FALSE;
786ad43ddacSmrg	    }
787ad43ddacSmrg	    accel_state->copy_area = (void*)accel_state->copy_area_bo;
788ad43ddacSmrg	} else
789ad43ddacSmrg#endif
790ad43ddacSmrg	{
791ad43ddacSmrg	    if (accel_state->copy_area) {
792ad43ddacSmrg		exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
793ad43ddacSmrg		accel_state->copy_area = NULL;
794ad43ddacSmrg	    }
795ad43ddacSmrg	    accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
796ad43ddacSmrg	    if (!accel_state->copy_area)
797ad43ddacSmrg		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
798b7e1c893Smrg	}
799ad43ddacSmrg    } else
800ad43ddacSmrg	R600DoPrepareCopy(pScrn);
801b7e1c893Smrg
802ad43ddacSmrg    if (accel_state->vsync)
803ad43ddacSmrg	R600VlineHelperClear(pScrn);
804ad43ddacSmrg
805ad43ddacSmrg    return TRUE;
806b7e1c893Smrg}
807b7e1c893Smrg
808b7e1c893Smrgstatic void
809b7e1c893SmrgR600Copy(PixmapPtr pDst,
810b7e1c893Smrg	 int srcX, int srcY,
811b7e1c893Smrg	 int dstX, int dstY,
812b7e1c893Smrg	 int w, int h)
813b7e1c893Smrg{
814b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
815b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
816b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
817b7e1c893Smrg
818b7e1c893Smrg    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
819b7e1c893Smrg	return;
820b7e1c893Smrg
821ad43ddacSmrg    if (accel_state->vsync)
822ad43ddacSmrg	R600VlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
823b7e1c893Smrg
824ad43ddacSmrg    if (accel_state->same_surface && accel_state->copy_area) {
825ad43ddacSmrg	uint32_t orig_offset, tmp_offset;
826ad43ddacSmrg	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
827ad43ddacSmrg	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
828ad43ddacSmrg	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
829ad43ddacSmrg
830ad43ddacSmrg#if defined(XF86DRM_MODE)
831ad43ddacSmrg	if (info->cs) {
832ad43ddacSmrg	    tmp_offset = 0;
833ad43ddacSmrg	    orig_offset = 0;
834ad43ddacSmrg	} else
835ad43ddacSmrg#endif
836ad43ddacSmrg	{
837b7e1c893Smrg	    tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset;
838b7e1c893Smrg	    orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
839ad43ddacSmrg	}
840b7e1c893Smrg
841ad43ddacSmrg	/* src to tmp */
842ad43ddacSmrg	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
843ad43ddacSmrg	accel_state->dst_obj.bo = accel_state->copy_area_bo;
844ad43ddacSmrg	accel_state->dst_obj.offset = tmp_offset;
845ad43ddacSmrg	R600DoPrepareCopy(pScrn);
846b7e1c893Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
847b7e1c893Smrg	R600DoCopy(pScrn);
848ad43ddacSmrg
849ad43ddacSmrg	/* tmp to dst */
850ad43ddacSmrg	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
851ad43ddacSmrg	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
852ad43ddacSmrg	accel_state->src_obj[0].offset = tmp_offset;
853ad43ddacSmrg	accel_state->dst_obj.domain = orig_dst_domain;
854ad43ddacSmrg	accel_state->dst_obj.bo = orig_bo;
855ad43ddacSmrg	accel_state->dst_obj.offset = orig_offset;
856ad43ddacSmrg	R600DoPrepareCopy(pScrn);
857ad43ddacSmrg	R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
858ad43ddacSmrg	R600DoCopyVline(pDst);
859ad43ddacSmrg
860ad43ddacSmrg	/* restore state */
861ad43ddacSmrg	accel_state->src_obj[0].domain = orig_src_domain;
862ad43ddacSmrg	accel_state->src_obj[0].bo = orig_bo;
863ad43ddacSmrg	accel_state->src_obj[0].offset = orig_offset;
864ad43ddacSmrg    } else
865b7e1c893Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
866b7e1c893Smrg
867b7e1c893Smrg}
868b7e1c893Smrg
869b7e1c893Smrgstatic void
870b7e1c893SmrgR600DoneCopy(PixmapPtr pDst)
871b7e1c893Smrg{
872b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
873b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
874b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
875b7e1c893Smrg
876b7e1c893Smrg    if (!accel_state->same_surface)
877ad43ddacSmrg	R600DoCopyVline(pDst);
878b7e1c893Smrg
879b7e1c893Smrg    if (accel_state->copy_area) {
880ad43ddacSmrg	if (!info->cs)
881ad43ddacSmrg	    exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
882b7e1c893Smrg	accel_state->copy_area = NULL;
883b7e1c893Smrg    }
884b7e1c893Smrg
885b7e1c893Smrg}
886b7e1c893Smrg
887b7e1c893Smrg
888b7e1c893Smrg#define xFixedToFloat(f) (((float) (f)) / 65536)
889b7e1c893Smrg
890b7e1c893Smrgstruct blendinfo {
891b7e1c893Smrg    Bool dst_alpha;
892b7e1c893Smrg    Bool src_alpha;
893b7e1c893Smrg    uint32_t blend_cntl;
894b7e1c893Smrg};
895b7e1c893Smrg
896b7e1c893Smrgstatic struct blendinfo R600BlendOp[] = {
897b7e1c893Smrg    /* Clear */
898b7e1c893Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
899b7e1c893Smrg    /* Src */
900b7e1c893Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
901b7e1c893Smrg    /* Dst */
902b7e1c893Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
903b7e1c893Smrg    /* Over */
904b7e1c893Smrg    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
905b7e1c893Smrg    /* OverReverse */
906b7e1c893Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
907b7e1c893Smrg    /* In */
908b7e1c893Smrg    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
909b7e1c893Smrg    /* InReverse */
910b7e1c893Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
911b7e1c893Smrg    /* Out */
912b7e1c893Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
913b7e1c893Smrg    /* OutReverse */
914b7e1c893Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
915b7e1c893Smrg    /* Atop */
916b7e1c893Smrg    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
917b7e1c893Smrg    /* AtopReverse */
918b7e1c893Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
919b7e1c893Smrg    /* Xor */
920b7e1c893Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
921b7e1c893Smrg    /* Add */
922b7e1c893Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
923b7e1c893Smrg};
924b7e1c893Smrg
925b7e1c893Smrgstruct formatinfo {
926b7e1c893Smrg    unsigned int fmt;
927b7e1c893Smrg    uint32_t card_fmt;
928b7e1c893Smrg};
929b7e1c893Smrg
930b7e1c893Smrgstatic struct formatinfo R600TexFormats[] = {
931b7e1c893Smrg    {PICT_a8r8g8b8,	FMT_8_8_8_8},
932b7e1c893Smrg    {PICT_x8r8g8b8,	FMT_8_8_8_8},
933b7e1c893Smrg    {PICT_a8b8g8r8,	FMT_8_8_8_8},
934b7e1c893Smrg    {PICT_x8b8g8r8,	FMT_8_8_8_8},
935ad43ddacSmrg#ifdef PICT_TYPE_BGRA
936ad43ddacSmrg    {PICT_b8g8r8a8,	FMT_8_8_8_8},
937ad43ddacSmrg    {PICT_b8g8r8x8,	FMT_8_8_8_8},
938ad43ddacSmrg#endif
939b7e1c893Smrg    {PICT_r5g6b5,	FMT_5_6_5},
940b7e1c893Smrg    {PICT_a1r5g5b5,	FMT_1_5_5_5},
941b7e1c893Smrg    {PICT_x1r5g5b5,     FMT_1_5_5_5},
942b7e1c893Smrg    {PICT_a8,		FMT_8},
943b7e1c893Smrg};
944b7e1c893Smrg
945b7e1c893Smrgstatic uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
946b7e1c893Smrg{
947b7e1c893Smrg    uint32_t sblend, dblend;
948b7e1c893Smrg
949b7e1c893Smrg    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
950b7e1c893Smrg    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
951b7e1c893Smrg
952b7e1c893Smrg    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
953b7e1c893Smrg     * it as always 1.
954b7e1c893Smrg     */
955b7e1c893Smrg    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
956b7e1c893Smrg	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
957b7e1c893Smrg	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
958b7e1c893Smrg	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
959b7e1c893Smrg	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
960b7e1c893Smrg    }
961b7e1c893Smrg
962b7e1c893Smrg    /* If the source alpha is being used, then we should only be in a case where
963b7e1c893Smrg     * the source blend factor is 0, and the source blend value is the mask
964b7e1c893Smrg     * channels multiplied by the source picture's alpha.
965b7e1c893Smrg     */
966b7e1c893Smrg    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
967b7e1c893Smrg	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
968b7e1c893Smrg	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
969b7e1c893Smrg	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
970b7e1c893Smrg	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
971b7e1c893Smrg	}
972b7e1c893Smrg    }
973b7e1c893Smrg
974b7e1c893Smrg    return sblend | dblend;
975b7e1c893Smrg}
976b7e1c893Smrg
977b7e1c893Smrgstatic Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
978b7e1c893Smrg{
979b7e1c893Smrg    switch (pDstPicture->format) {
980b7e1c893Smrg    case PICT_a8r8g8b8:
981b7e1c893Smrg    case PICT_x8r8g8b8:
982ad43ddacSmrg    case PICT_a8b8g8r8:
983ad43ddacSmrg    case PICT_x8b8g8r8:
984ad43ddacSmrg#ifdef PICT_TYPE_BGRA
985ad43ddacSmrg    case PICT_b8g8r8a8:
986ad43ddacSmrg    case PICT_b8g8r8x8:
987ad43ddacSmrg#endif
988b7e1c893Smrg	*dst_format = COLOR_8_8_8_8;
989b7e1c893Smrg	break;
990b7e1c893Smrg    case PICT_r5g6b5:
991b7e1c893Smrg	*dst_format = COLOR_5_6_5;
992b7e1c893Smrg	break;
993b7e1c893Smrg    case PICT_a1r5g5b5:
994b7e1c893Smrg    case PICT_x1r5g5b5:
995b7e1c893Smrg	*dst_format = COLOR_1_5_5_5;
996b7e1c893Smrg	break;
997b7e1c893Smrg    case PICT_a8:
998b7e1c893Smrg	*dst_format = COLOR_8;
999b7e1c893Smrg	break;
1000b7e1c893Smrg    default:
1001b7e1c893Smrg	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
1002b7e1c893Smrg	       (int)pDstPicture->format));
1003b7e1c893Smrg    }
1004b7e1c893Smrg    return TRUE;
1005b7e1c893Smrg}
1006b7e1c893Smrg
1007b7e1c893Smrgstatic Bool R600CheckCompositeTexture(PicturePtr pPict,
1008b7e1c893Smrg				      PicturePtr pDstPict,
1009b7e1c893Smrg				      int op,
1010b7e1c893Smrg				      int unit)
1011b7e1c893Smrg{
1012b7e1c893Smrg    int w = pPict->pDrawable->width;
1013b7e1c893Smrg    int h = pPict->pDrawable->height;
1014ad43ddacSmrg    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
1015b7e1c893Smrg    unsigned int i;
1016b7e1c893Smrg    int max_tex_w, max_tex_h;
1017b7e1c893Smrg
1018b7e1c893Smrg    max_tex_w = 8192;
1019b7e1c893Smrg    max_tex_h = 8192;
1020b7e1c893Smrg
1021b7e1c893Smrg    if ((w > max_tex_w) || (h > max_tex_h))
1022b7e1c893Smrg	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
1023b7e1c893Smrg
1024b7e1c893Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
1025b7e1c893Smrg	if (R600TexFormats[i].fmt == pPict->format)
1026b7e1c893Smrg	    break;
1027b7e1c893Smrg    }
1028b7e1c893Smrg    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
1029b7e1c893Smrg	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
1030b7e1c893Smrg			 (int)pPict->format));
1031b7e1c893Smrg
1032b7e1c893Smrg    if (pPict->filter != PictFilterNearest &&
1033b7e1c893Smrg	pPict->filter != PictFilterBilinear)
1034b7e1c893Smrg	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
1035b7e1c893Smrg
1036b7e1c893Smrg    /* for REPEAT_NONE, Render semantics are that sampling outside the source
1037b7e1c893Smrg     * picture results in alpha=0 pixels. We can implement this with a border color
1038b7e1c893Smrg     * *if* our source texture has an alpha channel, otherwise we need to fall
1039b7e1c893Smrg     * back. If we're not transformed then we hope that upper layers have clipped
1040b7e1c893Smrg     * rendering to the bounds of the source drawable, in which case it doesn't
1041b7e1c893Smrg     * matter. I have not, however, verified that the X server always does such
1042b7e1c893Smrg     * clipping.
1043b7e1c893Smrg     */
1044b7e1c893Smrg    /* FIXME R6xx */
1045ad43ddacSmrg    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
1046b7e1c893Smrg	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
1047b7e1c893Smrg	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
1048b7e1c893Smrg    }
1049b7e1c893Smrg
1050b7e1c893Smrg    return TRUE;
1051b7e1c893Smrg}
1052b7e1c893Smrg
1053b7e1c893Smrgstatic Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
1054b7e1c893Smrg					int unit)
1055b7e1c893Smrg{
1056b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
1057b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1058b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1059b7e1c893Smrg    int w = pPict->pDrawable->width;
1060b7e1c893Smrg    int h = pPict->pDrawable->height;
1061ad43ddacSmrg    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
1062b7e1c893Smrg    unsigned int i;
1063b7e1c893Smrg    tex_resource_t  tex_res;
1064b7e1c893Smrg    tex_sampler_t   tex_samp;
1065b7e1c893Smrg    int pix_r, pix_g, pix_b, pix_a;
1066ad43ddacSmrg    float vs_alu_consts[8];
1067b7e1c893Smrg
1068b7e1c893Smrg    CLEAR (tex_res);
1069b7e1c893Smrg    CLEAR (tex_samp);
1070b7e1c893Smrg
1071b7e1c893Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
1072b7e1c893Smrg	if (R600TexFormats[i].fmt == pPict->format)
1073b7e1c893Smrg	    break;
1074b7e1c893Smrg    }
1075b7e1c893Smrg
1076b7e1c893Smrg    /* flush texture cache */
1077b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
1078ad43ddacSmrg			accel_state->src_size[unit], accel_state->src_obj[unit].offset,
1079ad43ddacSmrg			accel_state->src_obj[unit].bo, accel_state->src_obj[unit].domain, 0);
1080b7e1c893Smrg
1081b7e1c893Smrg    /* Texture */
1082b7e1c893Smrg    tex_res.id                  = unit;
1083b7e1c893Smrg    tex_res.w                   = w;
1084b7e1c893Smrg    tex_res.h                   = h;
1085ad43ddacSmrg    tex_res.pitch               = accel_state->src_obj[unit].pitch;
1086b7e1c893Smrg    tex_res.depth               = 0;
1087b7e1c893Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
1088ad43ddacSmrg    tex_res.base                = accel_state->src_obj[unit].offset;
1089ad43ddacSmrg    tex_res.mip_base            = accel_state->src_obj[unit].offset;
1090b7e1c893Smrg    tex_res.format              = R600TexFormats[i].card_fmt;
1091ad43ddacSmrg    tex_res.bo                  = accel_state->src_obj[unit].bo;
1092ad43ddacSmrg    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
1093b7e1c893Smrg    tex_res.request_size        = 1;
1094b7e1c893Smrg
1095b7e1c893Smrg    /* component swizzles */
1096b7e1c893Smrg    switch (pPict->format) {
1097b7e1c893Smrg    case PICT_a1r5g5b5:
1098b7e1c893Smrg    case PICT_a8r8g8b8:
1099b7e1c893Smrg	pix_r = SQ_SEL_Z; /* R */
1100b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1101b7e1c893Smrg	pix_b = SQ_SEL_X; /* B */
1102b7e1c893Smrg	pix_a = SQ_SEL_W; /* A */
1103b7e1c893Smrg	break;
1104b7e1c893Smrg    case PICT_a8b8g8r8:
1105b7e1c893Smrg	pix_r = SQ_SEL_X; /* R */
1106b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1107b7e1c893Smrg	pix_b = SQ_SEL_Z; /* B */
1108b7e1c893Smrg	pix_a = SQ_SEL_W; /* A */
1109b7e1c893Smrg	break;
1110b7e1c893Smrg    case PICT_x8b8g8r8:
1111b7e1c893Smrg	pix_r = SQ_SEL_X; /* R */
1112b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1113b7e1c893Smrg	pix_b = SQ_SEL_Z; /* B */
1114b7e1c893Smrg	pix_a = SQ_SEL_1; /* A */
1115b7e1c893Smrg	break;
1116ad43ddacSmrg#ifdef PICT_TYPE_BGRA
1117ad43ddacSmrg    case PICT_b8g8r8a8:
1118ad43ddacSmrg	pix_r = SQ_SEL_Y; /* R */
1119ad43ddacSmrg	pix_g = SQ_SEL_Z; /* G */
1120ad43ddacSmrg	pix_b = SQ_SEL_W; /* B */
1121ad43ddacSmrg	pix_a = SQ_SEL_X; /* A */
1122ad43ddacSmrg	break;
1123ad43ddacSmrg    case PICT_b8g8r8x8:
1124ad43ddacSmrg	pix_r = SQ_SEL_Y; /* R */
1125ad43ddacSmrg	pix_g = SQ_SEL_Z; /* G */
1126ad43ddacSmrg	pix_b = SQ_SEL_W; /* B */
1127ad43ddacSmrg	pix_a = SQ_SEL_1; /* A */
1128ad43ddacSmrg	break;
1129ad43ddacSmrg#endif
1130b7e1c893Smrg    case PICT_x1r5g5b5:
1131b7e1c893Smrg    case PICT_x8r8g8b8:
1132b7e1c893Smrg    case PICT_r5g6b5:
1133b7e1c893Smrg	pix_r = SQ_SEL_Z; /* R */
1134b7e1c893Smrg	pix_g = SQ_SEL_Y; /* G */
1135b7e1c893Smrg	pix_b = SQ_SEL_X; /* B */
1136b7e1c893Smrg	pix_a = SQ_SEL_1; /* A */
1137b7e1c893Smrg	break;
1138b7e1c893Smrg    case PICT_a8:
1139b7e1c893Smrg	pix_r = SQ_SEL_0; /* R */
1140b7e1c893Smrg	pix_g = SQ_SEL_0; /* G */
1141b7e1c893Smrg	pix_b = SQ_SEL_0; /* B */
1142b7e1c893Smrg	pix_a = SQ_SEL_X; /* A */
1143b7e1c893Smrg	break;
1144b7e1c893Smrg    default:
1145b7e1c893Smrg	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
1146b7e1c893Smrg    }
1147b7e1c893Smrg
1148b7e1c893Smrg    if (unit == 0) {
1149ad43ddacSmrg	if (!accel_state->msk_pic) {
1150b7e1c893Smrg	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
1151b7e1c893Smrg		pix_r = SQ_SEL_0;
1152b7e1c893Smrg		pix_g = SQ_SEL_0;
1153b7e1c893Smrg		pix_b = SQ_SEL_0;
1154b7e1c893Smrg	    }
1155b7e1c893Smrg
1156b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1157b7e1c893Smrg		pix_a = SQ_SEL_1;
1158b7e1c893Smrg	} else {
1159b7e1c893Smrg	    if (accel_state->component_alpha) {
1160b7e1c893Smrg		if (accel_state->src_alpha) {
1161b7e1c893Smrg		    if (PICT_FORMAT_A(pPict->format) == 0) {
1162b7e1c893Smrg			pix_r = SQ_SEL_1;
1163b7e1c893Smrg			pix_g = SQ_SEL_1;
1164b7e1c893Smrg			pix_b = SQ_SEL_1;
1165b7e1c893Smrg			pix_a = SQ_SEL_1;
1166b7e1c893Smrg		    } else {
1167b7e1c893Smrg			pix_r = pix_a;
1168b7e1c893Smrg			pix_g = pix_a;
1169b7e1c893Smrg			pix_b = pix_a;
1170b7e1c893Smrg		    }
1171b7e1c893Smrg		} else {
1172b7e1c893Smrg		    if (PICT_FORMAT_A(pPict->format) == 0)
1173b7e1c893Smrg			pix_a = SQ_SEL_1;
1174b7e1c893Smrg		}
1175b7e1c893Smrg	    } else {
1176b7e1c893Smrg		if (PICT_FORMAT_RGB(pPict->format) == 0) {
1177b7e1c893Smrg		    pix_r = SQ_SEL_0;
1178b7e1c893Smrg		    pix_g = SQ_SEL_0;
1179b7e1c893Smrg		    pix_b = SQ_SEL_0;
1180b7e1c893Smrg		}
1181b7e1c893Smrg
1182b7e1c893Smrg		if (PICT_FORMAT_A(pPict->format) == 0)
1183b7e1c893Smrg		    pix_a = SQ_SEL_1;
1184b7e1c893Smrg	    }
1185b7e1c893Smrg	}
1186b7e1c893Smrg    } else {
1187b7e1c893Smrg	if (accel_state->component_alpha) {
1188b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1189b7e1c893Smrg		pix_a = SQ_SEL_1;
1190b7e1c893Smrg	} else {
1191b7e1c893Smrg	    if (PICT_FORMAT_A(pPict->format) == 0) {
1192b7e1c893Smrg		pix_r = SQ_SEL_1;
1193b7e1c893Smrg		pix_g = SQ_SEL_1;
1194b7e1c893Smrg		pix_b = SQ_SEL_1;
1195b7e1c893Smrg		pix_a = SQ_SEL_1;
1196b7e1c893Smrg	    } else {
1197b7e1c893Smrg		pix_r = pix_a;
1198b7e1c893Smrg		pix_g = pix_a;
1199b7e1c893Smrg		pix_b = pix_a;
1200b7e1c893Smrg	    }
1201b7e1c893Smrg	}
1202b7e1c893Smrg    }
1203b7e1c893Smrg
1204b7e1c893Smrg    tex_res.dst_sel_x           = pix_r; /* R */
1205b7e1c893Smrg    tex_res.dst_sel_y           = pix_g; /* G */
1206b7e1c893Smrg    tex_res.dst_sel_z           = pix_b; /* B */
1207b7e1c893Smrg    tex_res.dst_sel_w           = pix_a; /* A */
1208b7e1c893Smrg
1209b7e1c893Smrg    tex_res.base_level          = 0;
1210b7e1c893Smrg    tex_res.last_level          = 0;
1211b7e1c893Smrg    tex_res.perf_modulation     = 0;
1212ad43ddacSmrg    set_tex_resource            (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
1213b7e1c893Smrg
1214b7e1c893Smrg    tex_samp.id                 = unit;
1215b7e1c893Smrg    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1216b7e1c893Smrg
1217ad43ddacSmrg    switch (repeatType) {
1218ad43ddacSmrg    case RepeatNormal:
1219ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_WRAP;
1220ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_WRAP;
1221ad43ddacSmrg	break;
1222ad43ddacSmrg    case RepeatPad:
1223ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1224ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1225ad43ddacSmrg	break;
1226ad43ddacSmrg    case RepeatReflect:
1227ad43ddacSmrg	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1228ad43ddacSmrg	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1229ad43ddacSmrg	break;
1230ad43ddacSmrg    case RepeatNone:
1231b7e1c893Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1232b7e1c893Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1233ad43ddacSmrg	break;
1234ad43ddacSmrg    default:
1235ad43ddacSmrg	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1236b7e1c893Smrg    }
1237b7e1c893Smrg
1238b7e1c893Smrg    switch (pPict->filter) {
1239b7e1c893Smrg    case PictFilterNearest:
1240b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1241b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1242b7e1c893Smrg	break;
1243b7e1c893Smrg    case PictFilterBilinear:
1244b7e1c893Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1245b7e1c893Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1246b7e1c893Smrg	break;
1247b7e1c893Smrg    default:
1248b7e1c893Smrg	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1249b7e1c893Smrg    }
1250b7e1c893Smrg
1251b7e1c893Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
1252b7e1c893Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1253b7e1c893Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
1254b7e1c893Smrg    set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
1255b7e1c893Smrg
1256b7e1c893Smrg    if (pPict->transform != 0) {
1257b7e1c893Smrg	accel_state->is_transform[unit] = TRUE;
1258b7e1c893Smrg	accel_state->transform[unit] = pPict->transform;
1259ad43ddacSmrg
1260ad43ddacSmrg	vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
1261ad43ddacSmrg	vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
1262ad43ddacSmrg	vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
1263ad43ddacSmrg	vs_alu_consts[3] = 1.0 / w;
1264ad43ddacSmrg
1265ad43ddacSmrg	vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
1266ad43ddacSmrg	vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
1267ad43ddacSmrg	vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
1268ad43ddacSmrg	vs_alu_consts[7] = 1.0 / h;
1269ad43ddacSmrg    } else {
1270b7e1c893Smrg	accel_state->is_transform[unit] = FALSE;
1271b7e1c893Smrg
1272ad43ddacSmrg	vs_alu_consts[0] = 1.0;
1273ad43ddacSmrg	vs_alu_consts[1] = 0.0;
1274ad43ddacSmrg	vs_alu_consts[2] = 0.0;
1275ad43ddacSmrg	vs_alu_consts[3] = 1.0 / w;
1276ad43ddacSmrg
1277ad43ddacSmrg	vs_alu_consts[4] = 0.0;
1278ad43ddacSmrg	vs_alu_consts[5] = 1.0;
1279ad43ddacSmrg	vs_alu_consts[6] = 0.0;
1280ad43ddacSmrg	vs_alu_consts[7] = 1.0 / h;
1281ad43ddacSmrg    }
1282ad43ddacSmrg
1283ad43ddacSmrg    /* VS alu constants */
1284ad43ddacSmrg    set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
1285ad43ddacSmrg		   sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
1286ad43ddacSmrg
1287b7e1c893Smrg    return TRUE;
1288b7e1c893Smrg}
1289b7e1c893Smrg
1290b7e1c893Smrgstatic Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1291b7e1c893Smrg			       PicturePtr pDstPicture)
1292b7e1c893Smrg{
1293b7e1c893Smrg    uint32_t tmp1;
1294b7e1c893Smrg    PixmapPtr pSrcPixmap, pDstPixmap;
1295b7e1c893Smrg    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1296b7e1c893Smrg
1297b7e1c893Smrg    /* Check for unsupported compositing operations. */
1298b7e1c893Smrg    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1299b7e1c893Smrg	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1300b7e1c893Smrg
1301ad43ddacSmrg    if (!pSrcPicture->pDrawable)
1302ad43ddacSmrg	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1303ad43ddacSmrg
1304b7e1c893Smrg    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1305b7e1c893Smrg
1306b7e1c893Smrg    max_tex_w = 8192;
1307b7e1c893Smrg    max_tex_h = 8192;
1308b7e1c893Smrg    max_dst_w = 8192;
1309b7e1c893Smrg    max_dst_h = 8192;
1310b7e1c893Smrg
1311b7e1c893Smrg    if (pSrcPixmap->drawable.width >= max_tex_w ||
1312b7e1c893Smrg	pSrcPixmap->drawable.height >= max_tex_h) {
1313b7e1c893Smrg	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1314b7e1c893Smrg			 pSrcPixmap->drawable.width,
1315b7e1c893Smrg			 pSrcPixmap->drawable.height));
1316b7e1c893Smrg    }
1317b7e1c893Smrg
1318b7e1c893Smrg    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1319b7e1c893Smrg
1320b7e1c893Smrg    if (pDstPixmap->drawable.width >= max_dst_w ||
1321b7e1c893Smrg	pDstPixmap->drawable.height >= max_dst_h) {
1322b7e1c893Smrg	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1323b7e1c893Smrg			 pDstPixmap->drawable.width,
1324b7e1c893Smrg			 pDstPixmap->drawable.height));
1325b7e1c893Smrg    }
1326b7e1c893Smrg
1327b7e1c893Smrg    if (pMaskPicture) {
1328ad43ddacSmrg	PixmapPtr pMaskPixmap;
1329ad43ddacSmrg
1330ad43ddacSmrg	if (!pMaskPicture->pDrawable)
1331ad43ddacSmrg	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1332ad43ddacSmrg
1333ad43ddacSmrg	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1334b7e1c893Smrg
1335b7e1c893Smrg	if (pMaskPixmap->drawable.width >= max_tex_w ||
1336b7e1c893Smrg	    pMaskPixmap->drawable.height >= max_tex_h) {
1337b7e1c893Smrg	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1338b7e1c893Smrg			     pMaskPixmap->drawable.width,
1339b7e1c893Smrg			     pMaskPixmap->drawable.height));
1340b7e1c893Smrg	}
1341b7e1c893Smrg
1342b7e1c893Smrg	if (pMaskPicture->componentAlpha) {
1343b7e1c893Smrg	    /* Check if it's component alpha that relies on a source alpha and
1344b7e1c893Smrg	     * on the source value.  We can only get one of those into the
1345b7e1c893Smrg	     * single source value that we get to blend with.
1346b7e1c893Smrg	     */
1347b7e1c893Smrg	    if (R600BlendOp[op].src_alpha &&
1348b7e1c893Smrg		(R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1349b7e1c893Smrg		(BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1350b7e1c893Smrg		RADEON_FALLBACK(("Component alpha not supported with source "
1351b7e1c893Smrg				 "alpha and source value blending.\n"));
1352b7e1c893Smrg	    }
1353b7e1c893Smrg	}
1354b7e1c893Smrg
1355b7e1c893Smrg	if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1356b7e1c893Smrg	    return FALSE;
1357b7e1c893Smrg    }
1358b7e1c893Smrg
1359b7e1c893Smrg    if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1360b7e1c893Smrg	return FALSE;
1361b7e1c893Smrg
1362b7e1c893Smrg    if (!R600GetDestFormat(pDstPicture, &tmp1))
1363b7e1c893Smrg	return FALSE;
1364b7e1c893Smrg
1365b7e1c893Smrg    return TRUE;
1366b7e1c893Smrg
1367b7e1c893Smrg}
1368b7e1c893Smrg
1369b7e1c893Smrgstatic Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1370b7e1c893Smrg				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1371b7e1c893Smrg				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1372b7e1c893Smrg{
1373b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1374b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1375b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1376b7e1c893Smrg    uint32_t blendcntl, dst_format;
1377b7e1c893Smrg    cb_config_t cb_conf;
1378b7e1c893Smrg    shader_config_t vs_conf, ps_conf;
1379ad43ddacSmrg    struct r600_accel_object src_obj, mask_obj, dst_obj;
1380b7e1c893Smrg
1381ad43ddacSmrg    if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8)
1382ad43ddacSmrg	return FALSE;
1383ad43ddacSmrg
1384ad43ddacSmrg#if defined(XF86DRM_MODE)
1385ad43ddacSmrg    if (info->cs) {
1386ad43ddacSmrg	src_obj.offset = 0;
1387ad43ddacSmrg	dst_obj.offset = 0;
1388ad43ddacSmrg	src_obj.bo = radeon_get_pixmap_bo(pSrc);
1389ad43ddacSmrg	dst_obj.bo = radeon_get_pixmap_bo(pDst);
1390ad43ddacSmrg    } else
1391ad43ddacSmrg#endif
1392ad43ddacSmrg    {
1393ad43ddacSmrg	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1394ad43ddacSmrg	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1395ad43ddacSmrg	src_obj.bo = NULL;
1396ad43ddacSmrg	dst_obj.bo = NULL;
1397ad43ddacSmrg    }
1398ad43ddacSmrg    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1399ad43ddacSmrg    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1400ad43ddacSmrg
1401ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
1402ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
1403ad43ddacSmrg    src_obj.bpp = pSrc->drawable.bitsPerPixel;
1404ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1405ad43ddacSmrg
1406ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
1407ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
1408ad43ddacSmrg    dst_obj.bpp = pDst->drawable.bitsPerPixel;
1409ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1410b7e1c893Smrg
1411b7e1c893Smrg    if (pMask) {
1412ad43ddacSmrg#if defined(XF86DRM_MODE)
1413ad43ddacSmrg	if (info->cs) {
1414ad43ddacSmrg	    mask_obj.offset = 0;
1415ad43ddacSmrg	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
1416ad43ddacSmrg	} else
1417ad43ddacSmrg#endif
1418ad43ddacSmrg	{
1419ad43ddacSmrg	    mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset;
1420ad43ddacSmrg	    mask_obj.bo = NULL;
1421ad43ddacSmrg	}
1422ad43ddacSmrg	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1423ad43ddacSmrg
1424ad43ddacSmrg	mask_obj.width = pMask->drawable.width;
1425ad43ddacSmrg	mask_obj.height = pMask->drawable.height;
1426ad43ddacSmrg	mask_obj.bpp = pMask->drawable.bitsPerPixel;
1427ad43ddacSmrg	mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1428ad43ddacSmrg
1429ad43ddacSmrg	if (!R600SetAccelState(pScrn,
1430ad43ddacSmrg			       &src_obj,
1431ad43ddacSmrg			       &mask_obj,
1432ad43ddacSmrg			       &dst_obj,
1433ad43ddacSmrg			       accel_state->comp_vs_offset, accel_state->comp_mask_ps_offset,
1434ad43ddacSmrg			       3, 0xffffffff))
1435ad43ddacSmrg	    return FALSE;
1436ad43ddacSmrg
1437ad43ddacSmrg	accel_state->msk_pic = pMaskPicture;
1438b7e1c893Smrg	if (pMaskPicture->componentAlpha) {
1439b7e1c893Smrg	    accel_state->component_alpha = TRUE;
1440b7e1c893Smrg	    if (R600BlendOp[op].src_alpha)
1441b7e1c893Smrg		accel_state->src_alpha = TRUE;
1442b7e1c893Smrg	    else
1443b7e1c893Smrg		accel_state->src_alpha = FALSE;
1444b7e1c893Smrg	} else {
1445b7e1c893Smrg	    accel_state->component_alpha = FALSE;
1446b7e1c893Smrg	    accel_state->src_alpha = FALSE;
1447b7e1c893Smrg	}
1448b7e1c893Smrg    } else {
1449ad43ddacSmrg	if (!R600SetAccelState(pScrn,
1450ad43ddacSmrg			       &src_obj,
1451ad43ddacSmrg			       NULL,
1452ad43ddacSmrg			       &dst_obj,
1453ad43ddacSmrg			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1454ad43ddacSmrg			       3, 0xffffffff))
1455ad43ddacSmrg	    return FALSE;
1456ad43ddacSmrg
1457ad43ddacSmrg	accel_state->msk_pic = NULL;
1458b7e1c893Smrg	accel_state->component_alpha = FALSE;
1459b7e1c893Smrg	accel_state->src_alpha = FALSE;
1460b7e1c893Smrg    }
1461b7e1c893Smrg
1462b7e1c893Smrg    if (!R600GetDestFormat(pDstPicture, &dst_format))
1463b7e1c893Smrg	return FALSE;
1464b7e1c893Smrg
1465b7e1c893Smrg    CLEAR (cb_conf);
1466b7e1c893Smrg    CLEAR (vs_conf);
1467b7e1c893Smrg    CLEAR (ps_conf);
1468b7e1c893Smrg
1469ad43ddacSmrg    if (pMask)
1470ad43ddacSmrg        radeon_vbo_check(pScrn, 24);
1471ad43ddacSmrg    else
1472ad43ddacSmrg        radeon_vbo_check(pScrn, 16);
1473b7e1c893Smrg
1474ad43ddacSmrg    r600_cp_start(pScrn);
1475b7e1c893Smrg
1476b7e1c893Smrg    set_default_state(pScrn, accel_state->ib);
1477b7e1c893Smrg
1478ad43ddacSmrg    set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1479ad43ddacSmrg    set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1480ad43ddacSmrg    set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1481b7e1c893Smrg
1482b7e1c893Smrg    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
1483ad43ddacSmrg        R600IBDiscard(pScrn, accel_state->ib);
1484ad43ddacSmrg        r600_vb_discard(pScrn);
1485ad43ddacSmrg        return FALSE;
1486b7e1c893Smrg    }
1487b7e1c893Smrg
1488b7e1c893Smrg    if (pMask) {
1489ad43ddacSmrg        if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
1490ad43ddacSmrg            R600IBDiscard(pScrn, accel_state->ib);
1491ad43ddacSmrg            r600_vb_discard(pScrn);
1492ad43ddacSmrg            return FALSE;
1493ad43ddacSmrg        }
1494b7e1c893Smrg    } else
1495ad43ddacSmrg        accel_state->is_transform[1] = FALSE;
1496b7e1c893Smrg
1497ad43ddacSmrg    if (pMask)
1498b7e1c893Smrg	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
1499ad43ddacSmrg    else
1500b7e1c893Smrg	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
1501b7e1c893Smrg
1502b7e1c893Smrg    /* Shader */
1503b7e1c893Smrg
1504b7e1c893Smrg    /* flush SQ cache */
1505b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
1506ad43ddacSmrg			accel_state->vs_size, accel_state->vs_mc_addr,
1507ad43ddacSmrg			accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1508b7e1c893Smrg
1509b7e1c893Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
1510b7e1c893Smrg    vs_conf.num_gprs            = 3;
1511b7e1c893Smrg    vs_conf.stack_size          = 1;
1512ad43ddacSmrg    vs_conf.bo                  = accel_state->shaders_bo;
1513ad43ddacSmrg    vs_setup                    (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1514b7e1c893Smrg
1515b7e1c893Smrg    /* flush SQ cache */
1516b7e1c893Smrg    cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit,
1517ad43ddacSmrg			accel_state->ps_size, accel_state->ps_mc_addr,
1518ad43ddacSmrg			accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
1519b7e1c893Smrg
1520b7e1c893Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
1521b7e1c893Smrg    ps_conf.num_gprs            = 3;
1522b7e1c893Smrg    ps_conf.stack_size          = 0;
1523b7e1c893Smrg    ps_conf.uncached_first_inst = 1;
1524b7e1c893Smrg    ps_conf.clamp_consts        = 0;
1525b7e1c893Smrg    ps_conf.export_mode         = 2;
1526ad43ddacSmrg    ps_conf.bo                  = accel_state->shaders_bo;
1527ad43ddacSmrg    ps_setup                    (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1528b7e1c893Smrg
1529ad43ddacSmrg    BEGIN_BATCH(9);
1530ad43ddacSmrg    EREG(accel_state->ib, CB_TARGET_MASK,                      (0xf << TARGET0_ENABLE_shift));
1531b7e1c893Smrg
1532b7e1c893Smrg    blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1533b7e1c893Smrg
1534b7e1c893Smrg    if (info->ChipFamily == CHIP_FAMILY_R600) {
1535b7e1c893Smrg	/* no per-MRT blend on R600 */
1536b7e1c893Smrg	EREG(accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift));
1537b7e1c893Smrg	EREG(accel_state->ib, CB_BLEND_CONTROL,                    blendcntl);
1538b7e1c893Smrg    } else {
1539b7e1c893Smrg	EREG(accel_state->ib, CB_COLOR_CONTROL,                    (RADEON_ROP[3] |
1540b7e1c893Smrg								    (1 << TARGET_BLEND_ENABLE_shift) |
1541b7e1c893Smrg								    PER_MRT_BLEND_bit));
1542b7e1c893Smrg	EREG(accel_state->ib, CB_BLEND0_CONTROL,                   blendcntl);
1543b7e1c893Smrg    }
1544ad43ddacSmrg    END_BATCH();
1545b7e1c893Smrg
1546b7e1c893Smrg    cb_conf.id = 0;
1547ad43ddacSmrg    cb_conf.w = accel_state->dst_obj.pitch;
1548ad43ddacSmrg    cb_conf.h = accel_state->dst_obj.height;
1549ad43ddacSmrg    cb_conf.base = accel_state->dst_obj.offset;
1550b7e1c893Smrg    cb_conf.format = dst_format;
1551ad43ddacSmrg    cb_conf.bo = accel_state->dst_obj.bo;
1552b7e1c893Smrg
1553b7e1c893Smrg    switch (pDstPicture->format) {
1554b7e1c893Smrg    case PICT_a8r8g8b8:
1555b7e1c893Smrg    case PICT_x8r8g8b8:
1556b7e1c893Smrg    case PICT_a1r5g5b5:
1557b7e1c893Smrg    case PICT_x1r5g5b5:
1558b7e1c893Smrg    default:
1559b7e1c893Smrg	cb_conf.comp_swap = 1; /* ARGB */
1560b7e1c893Smrg	break;
1561ad43ddacSmrg    case PICT_a8b8g8r8:
1562ad43ddacSmrg    case PICT_x8b8g8r8:
1563ad43ddacSmrg	cb_conf.comp_swap = 0; /* ABGR */
1564ad43ddacSmrg	break;
1565ad43ddacSmrg#ifdef PICT_TYPE_BGRA
1566ad43ddacSmrg    case PICT_b8g8r8a8:
1567ad43ddacSmrg    case PICT_b8g8r8x8:
1568ad43ddacSmrg	cb_conf.comp_swap = 3; /* BGRA */
1569ad43ddacSmrg	break;
1570ad43ddacSmrg#endif
1571b7e1c893Smrg    case PICT_r5g6b5:
1572b7e1c893Smrg	cb_conf.comp_swap = 2; /* RGB */
1573b7e1c893Smrg	break;
1574b7e1c893Smrg    case PICT_a8:
1575b7e1c893Smrg	cb_conf.comp_swap = 3; /* A */
1576b7e1c893Smrg	break;
1577b7e1c893Smrg    }
1578b7e1c893Smrg    cb_conf.source_format = 1;
1579b7e1c893Smrg    cb_conf.blend_clamp = 1;
1580ad43ddacSmrg    set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
1581b7e1c893Smrg
1582b7e1c893Smrg    /* Interpolator setup */
1583ad43ddacSmrg    BEGIN_BATCH(21);
1584b7e1c893Smrg    if (pMask) {
1585b7e1c893Smrg	/* export 2 tex coords from VS */
1586b7e1c893Smrg	EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift));
1587b7e1c893Smrg	/* src = semantic id 0; mask = semantic id 1 */
1588b7e1c893Smrg	EREG(accel_state->ib, SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1589b7e1c893Smrg						  (1 << SEMANTIC_1_shift)));
1590b7e1c893Smrg	/* input 2 tex coords from VS */
1591b7e1c893Smrg	EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (2 << NUM_INTERP_shift));
1592b7e1c893Smrg    } else {
1593b7e1c893Smrg	/* export 1 tex coords from VS */
1594b7e1c893Smrg	EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
1595b7e1c893Smrg	/* src = semantic id 0 */
1596b7e1c893Smrg	EREG(accel_state->ib, SPI_VS_OUT_ID_0,   (0 << SEMANTIC_0_shift));
1597b7e1c893Smrg	/* input 1 tex coords from VS */
1598b7e1c893Smrg	EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift));
1599b7e1c893Smrg    }
1600b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
1601b7e1c893Smrg    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1602b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)	|
1603b7e1c893Smrg								(0x01 << DEFAULT_VAL_shift)	|
1604b7e1c893Smrg								SEL_CENTROID_bit));
1605b7e1c893Smrg    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1606b7e1c893Smrg    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (1 <<2),       ((1    << SEMANTIC_shift)	|
1607b7e1c893Smrg								(0x01 << DEFAULT_VAL_shift)	|
1608b7e1c893Smrg								SEL_CENTROID_bit));
1609b7e1c893Smrg    EREG(accel_state->ib, SPI_INTERP_CONTROL_0,                0);
1610ad43ddacSmrg    END_BATCH();
1611b7e1c893Smrg
1612ad43ddacSmrg    if (accel_state->vsync)
1613ad43ddacSmrg	R600VlineHelperClear(pScrn);
1614b7e1c893Smrg
1615b7e1c893Smrg    return TRUE;
1616b7e1c893Smrg}
1617b7e1c893Smrg
1618b7e1c893Smrgstatic void R600Composite(PixmapPtr pDst,
1619b7e1c893Smrg			  int srcX, int srcY,
1620b7e1c893Smrg			  int maskX, int maskY,
1621b7e1c893Smrg			  int dstX, int dstY,
1622b7e1c893Smrg			  int w, int h)
1623b7e1c893Smrg{
1624b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1625b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1626b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1627b7e1c893Smrg    float *vb;
1628b7e1c893Smrg
1629b7e1c893Smrg    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1630b7e1c893Smrg       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1631b7e1c893Smrg
1632ad43ddacSmrg    if (accel_state->vsync)
1633ad43ddacSmrg	R600VlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1634b7e1c893Smrg
1635ad43ddacSmrg    if (accel_state->msk_pic) {
1636b7e1c893Smrg
1637ad43ddacSmrg	vb = radeon_vbo_space(pScrn, 24);
1638b7e1c893Smrg
1639b7e1c893Smrg	vb[0] = (float)dstX;
1640b7e1c893Smrg	vb[1] = (float)dstY;
1641ad43ddacSmrg	vb[2] = (float)srcX;
1642ad43ddacSmrg	vb[3] = (float)srcY;
1643ad43ddacSmrg	vb[4] = (float)maskX;
1644ad43ddacSmrg	vb[5] = (float)maskY;
1645b7e1c893Smrg
1646b7e1c893Smrg	vb[6] = (float)dstX;
1647b7e1c893Smrg	vb[7] = (float)(dstY + h);
1648ad43ddacSmrg	vb[8] = (float)srcX;
1649ad43ddacSmrg	vb[9] = (float)(srcY + h);
1650ad43ddacSmrg	vb[10] = (float)maskX;
1651ad43ddacSmrg	vb[11] = (float)(maskY + h);
1652b7e1c893Smrg
1653b7e1c893Smrg	vb[12] = (float)(dstX + w);
1654b7e1c893Smrg	vb[13] = (float)(dstY + h);
1655ad43ddacSmrg	vb[14] = (float)(srcX + w);
1656ad43ddacSmrg	vb[15] = (float)(srcY + h);
1657ad43ddacSmrg	vb[16] = (float)(maskX + w);
1658ad43ddacSmrg	vb[17] = (float)(maskY + h);
1659ad43ddacSmrg
1660ad43ddacSmrg	radeon_vbo_commit(pScrn);
1661b7e1c893Smrg
1662b7e1c893Smrg    } else {
1663b7e1c893Smrg
1664ad43ddacSmrg	vb = radeon_vbo_space(pScrn, 16);
1665b7e1c893Smrg
1666b7e1c893Smrg	vb[0] = (float)dstX;
1667b7e1c893Smrg	vb[1] = (float)dstY;
1668ad43ddacSmrg	vb[2] = (float)srcX;
1669ad43ddacSmrg	vb[3] = (float)srcY;
1670b7e1c893Smrg
1671b7e1c893Smrg	vb[4] = (float)dstX;
1672b7e1c893Smrg	vb[5] = (float)(dstY + h);
1673ad43ddacSmrg	vb[6] = (float)srcX;
1674ad43ddacSmrg	vb[7] = (float)(srcY + h);
1675b7e1c893Smrg
1676b7e1c893Smrg	vb[8] = (float)(dstX + w);
1677b7e1c893Smrg	vb[9] = (float)(dstY + h);
1678ad43ddacSmrg	vb[10] = (float)(srcX + w);
1679ad43ddacSmrg	vb[11] = (float)(srcY + h);
1680ad43ddacSmrg
1681ad43ddacSmrg	radeon_vbo_commit(pScrn);
1682b7e1c893Smrg    }
1683b7e1c893Smrg
1684b7e1c893Smrg
1685b7e1c893Smrg}
1686b7e1c893Smrg
1687b7e1c893Smrgstatic void R600DoneComposite(PixmapPtr pDst)
1688b7e1c893Smrg{
1689b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1690b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1691b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1692ad43ddacSmrg    int vtx_size;
1693b7e1c893Smrg
1694ad43ddacSmrg    if (accel_state->vsync)
1695ad43ddacSmrg	cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
1696ad43ddacSmrg			   accel_state->vline_crtc,
1697ad43ddacSmrg			   accel_state->vline_y1,
1698ad43ddacSmrg			   accel_state->vline_y2);
1699b7e1c893Smrg
1700ad43ddacSmrg    vtx_size = accel_state->msk_pic ? 24 : 16;
1701b7e1c893Smrg
1702ad43ddacSmrg    r600_finish_op(pScrn, vtx_size);
1703b7e1c893Smrg}
1704b7e1c893Smrg
1705b7e1c893SmrgBool
1706b7e1c893SmrgR600CopyToVRAM(ScrnInfoPtr pScrn,
1707b7e1c893Smrg	       char *src, int src_pitch,
1708ad43ddacSmrg	       uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_width, uint32_t dst_height, int bpp,
1709b7e1c893Smrg	       int x, int y, int w, int h)
1710b7e1c893Smrg{
1711b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1712ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1713b7e1c893Smrg    uint32_t scratch_mc_addr;
1714b7e1c893Smrg    int wpass = w * (bpp/8);
1715ad43ddacSmrg    int scratch_pitch_bytes = RADEON_ALIGN(wpass, 256);
1716b7e1c893Smrg    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1717b7e1c893Smrg    int scratch_offset = 0, hpass, temph;
1718b7e1c893Smrg    char *dst;
1719b7e1c893Smrg    drmBufPtr scratch;
1720ad43ddacSmrg    struct r600_accel_object scratch_obj, dst_obj;
1721b7e1c893Smrg
1722b7e1c893Smrg    if (dst_pitch & 7)
1723b7e1c893Smrg	return FALSE;
1724b7e1c893Smrg
1725b7e1c893Smrg    if (dst_mc_addr & 0xff)
1726b7e1c893Smrg	return FALSE;
1727b7e1c893Smrg
1728b7e1c893Smrg    scratch = RADEONCPGetBuffer(pScrn);
1729b7e1c893Smrg    if (scratch == NULL)
1730b7e1c893Smrg	return FALSE;
1731b7e1c893Smrg
1732b7e1c893Smrg    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1733b7e1c893Smrg    temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1734b7e1c893Smrg    dst = (char *)scratch->address;
1735b7e1c893Smrg
1736ad43ddacSmrg    scratch_obj.pitch = scratch_pitch;
1737ad43ddacSmrg    scratch_obj.width = w;
1738ad43ddacSmrg    scratch_obj.height = hpass;
1739ad43ddacSmrg    scratch_obj.offset = scratch_mc_addr;
1740ad43ddacSmrg    scratch_obj.bpp = bpp;
1741ad43ddacSmrg    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1742ad43ddacSmrg    scratch_obj.bo = NULL;
1743ad43ddacSmrg
1744ad43ddacSmrg    dst_obj.pitch = dst_pitch;
1745ad43ddacSmrg    dst_obj.width = dst_width;
1746ad43ddacSmrg    dst_obj.height = dst_height;
1747ad43ddacSmrg    dst_obj.offset = dst_mc_addr;
1748ad43ddacSmrg    dst_obj.bo = NULL;
1749ad43ddacSmrg    dst_obj.bpp = bpp;
1750ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1751ad43ddacSmrg
1752ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1753ad43ddacSmrg			   &scratch_obj,
1754ad43ddacSmrg			   NULL,
1755ad43ddacSmrg			   &dst_obj,
1756ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1757ad43ddacSmrg			   3, 0xffffffff))
1758ad43ddacSmrg	return FALSE;
1759ad43ddacSmrg
1760b7e1c893Smrg    /* memcopy from sys to scratch */
1761b7e1c893Smrg    while (temph--) {
1762b7e1c893Smrg	memcpy (dst, src, wpass);
1763b7e1c893Smrg	src += src_pitch;
1764b7e1c893Smrg	dst += scratch_pitch_bytes;
1765b7e1c893Smrg    }
1766b7e1c893Smrg
1767b7e1c893Smrg    while (h) {
1768b7e1c893Smrg	uint32_t offset = scratch_mc_addr + scratch_offset;
1769b7e1c893Smrg	int oldhpass = hpass;
1770b7e1c893Smrg	h -= oldhpass;
1771b7e1c893Smrg	temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1772b7e1c893Smrg
1773b7e1c893Smrg	if (hpass) {
1774b7e1c893Smrg	    scratch_offset = scratch->total/2 - scratch_offset;
1775b7e1c893Smrg	    dst = (char *)scratch->address + scratch_offset;
1776b7e1c893Smrg	    /* wait for the engine to be idle */
1777b7e1c893Smrg	    RADEONWaitForIdleCP(pScrn);
1778b7e1c893Smrg	    //memcopy from sys to scratch
1779b7e1c893Smrg	    while (temph--) {
1780b7e1c893Smrg		memcpy (dst, src, wpass);
1781b7e1c893Smrg		src += src_pitch;
1782b7e1c893Smrg		dst += scratch_pitch_bytes;
1783b7e1c893Smrg	    }
1784b7e1c893Smrg	}
1785b7e1c893Smrg	/* blit from scratch to vram */
1786ad43ddacSmrg	info->accel_state->src_obj[0].height = oldhpass;
1787ad43ddacSmrg	info->accel_state->src_obj[0].offset = offset;
1788ad43ddacSmrg	R600DoPrepareCopy(pScrn);
1789b7e1c893Smrg	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
1790b7e1c893Smrg	R600DoCopy(pScrn);
1791b7e1c893Smrg	y += oldhpass;
1792b7e1c893Smrg    }
1793b7e1c893Smrg
1794b7e1c893Smrg    R600IBDiscard(pScrn, scratch);
1795ad43ddacSmrg    r600_vb_discard(pScrn);
1796b7e1c893Smrg
1797b7e1c893Smrg    return TRUE;
1798b7e1c893Smrg}
1799b7e1c893Smrg
1800b7e1c893Smrgstatic Bool
1801b7e1c893SmrgR600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1802b7e1c893Smrg		   char *src, int src_pitch)
1803b7e1c893Smrg{
1804b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1805b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1806b7e1c893Smrg    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1807b7e1c893Smrg    uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1808b7e1c893Smrg    int bpp = pDst->drawable.bitsPerPixel;
1809b7e1c893Smrg
1810b7e1c893Smrg    return R600CopyToVRAM(pScrn,
1811b7e1c893Smrg			  src, src_pitch,
1812ad43ddacSmrg			  dst_pitch, dst_mc_addr, pDst->drawable.width, pDst->drawable.height, bpp,
1813b7e1c893Smrg			  x, y, w, h);
1814b7e1c893Smrg}
1815b7e1c893Smrg
1816b7e1c893Smrgstatic Bool
1817b7e1c893SmrgR600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
1818b7e1c893Smrg		       char *dst, int dst_pitch)
1819b7e1c893Smrg{
1820b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1821b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1822ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1823b7e1c893Smrg    uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1824b7e1c893Smrg    uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1825b7e1c893Smrg    uint32_t src_width = pSrc->drawable.width;
1826b7e1c893Smrg    uint32_t src_height = pSrc->drawable.height;
1827b7e1c893Smrg    int bpp = pSrc->drawable.bitsPerPixel;
1828b7e1c893Smrg    uint32_t scratch_mc_addr;
1829ad43ddacSmrg    int scratch_pitch_bytes = RADEON_ALIGN(dst_pitch, 256);
1830b7e1c893Smrg    int scratch_offset = 0, hpass;
1831b7e1c893Smrg    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1832b7e1c893Smrg    int wpass = w * (bpp/8);
1833b7e1c893Smrg    drmBufPtr scratch;
1834ad43ddacSmrg    struct r600_accel_object scratch_obj, src_obj;
1835b7e1c893Smrg
1836ad43ddacSmrg    /* bad pipe setup in drm prior to 1.32 */
1837ad43ddacSmrg    if (info->dri->pKernelDRMVersion->version_minor < 32) {
1838ad43ddacSmrg	    if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32))
1839ad43ddacSmrg		    return FALSE;
1840ad43ddacSmrg    }
1841c503f109Smrg
1842b7e1c893Smrg    if (src_pitch & 7)
1843b7e1c893Smrg	return FALSE;
1844b7e1c893Smrg
1845b7e1c893Smrg    scratch = RADEONCPGetBuffer(pScrn);
1846b7e1c893Smrg    if (scratch == NULL)
1847b7e1c893Smrg	return FALSE;
1848b7e1c893Smrg
1849b7e1c893Smrg    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1850b7e1c893Smrg    hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1851b7e1c893Smrg
1852ad43ddacSmrg    src_obj.pitch = src_pitch;
1853ad43ddacSmrg    src_obj.width = src_width;
1854ad43ddacSmrg    src_obj.height = src_height;
1855ad43ddacSmrg    src_obj.offset = src_mc_addr;
1856ad43ddacSmrg    src_obj.bo = NULL;
1857ad43ddacSmrg    src_obj.bpp = bpp;
1858ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1859ad43ddacSmrg
1860ad43ddacSmrg    scratch_obj.pitch = scratch_pitch;
1861ad43ddacSmrg    scratch_obj.width = src_width;
1862ad43ddacSmrg    scratch_obj.height = hpass;
1863ad43ddacSmrg    scratch_obj.offset = scratch_mc_addr;
1864ad43ddacSmrg    scratch_obj.bpp = bpp;
1865ad43ddacSmrg    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1866ad43ddacSmrg    scratch_obj.bo = NULL;
1867ad43ddacSmrg
1868ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1869ad43ddacSmrg			   &src_obj,
1870ad43ddacSmrg			   NULL,
1871ad43ddacSmrg			   &scratch_obj,
1872ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1873ad43ddacSmrg			   3, 0xffffffff))
1874ad43ddacSmrg	return FALSE;
1875ad43ddacSmrg
1876b7e1c893Smrg    /* blit from vram to scratch */
1877ad43ddacSmrg    R600DoPrepareCopy(pScrn);
1878b7e1c893Smrg    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1879b7e1c893Smrg    R600DoCopy(pScrn);
1880b7e1c893Smrg
1881b7e1c893Smrg    while (h) {
1882b7e1c893Smrg	char *src = (char *)scratch->address + scratch_offset;
1883b7e1c893Smrg	int oldhpass = hpass;
1884b7e1c893Smrg	h -= oldhpass;
1885b7e1c893Smrg	y += oldhpass;
1886b7e1c893Smrg	hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1887b7e1c893Smrg
1888b7e1c893Smrg	if (hpass) {
1889b7e1c893Smrg	    scratch_offset = scratch->total/2 - scratch_offset;
1890b7e1c893Smrg	    /* blit from vram to scratch */
1891ad43ddacSmrg	    info->accel_state->dst_obj.height = hpass;
1892ad43ddacSmrg	    info->accel_state->dst_obj.offset = scratch_mc_addr + scratch_offset;
1893ad43ddacSmrg	    R600DoPrepareCopy(pScrn);
1894b7e1c893Smrg	    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1895b7e1c893Smrg	    R600DoCopy(pScrn);
1896b7e1c893Smrg	}
1897b7e1c893Smrg
1898b7e1c893Smrg	/* wait for the engine to be idle */
1899b7e1c893Smrg	RADEONWaitForIdleCP(pScrn);
1900b7e1c893Smrg	/* memcopy from scratch to sys */
1901b7e1c893Smrg	while (oldhpass--) {
1902b7e1c893Smrg	    memcpy (dst, src, wpass);
1903b7e1c893Smrg	    dst += dst_pitch;
1904b7e1c893Smrg	    src += scratch_pitch_bytes;
1905b7e1c893Smrg	}
1906b7e1c893Smrg    }
1907b7e1c893Smrg
1908b7e1c893Smrg    R600IBDiscard(pScrn, scratch);
1909ad43ddacSmrg    r600_vb_discard(pScrn);
1910b7e1c893Smrg
1911b7e1c893Smrg    return TRUE;
1912b7e1c893Smrg
1913b7e1c893Smrg}
1914b7e1c893Smrg
1915ad43ddacSmrg#if defined(XF86DRM_MODE)
1916ad43ddacSmrg
1917ad43ddacSmrgstatic Bool
1918ad43ddacSmrgR600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
1919ad43ddacSmrg		     char *src, int src_pitch)
1920ad43ddacSmrg{
1921ad43ddacSmrg    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1922ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1923ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
1924ad43ddacSmrg    struct radeon_exa_pixmap_priv *driver_priv;
1925ad43ddacSmrg    struct radeon_bo *scratch;
1926ad43ddacSmrg    unsigned size;
1927ad43ddacSmrg    uint32_t dst_domain;
1928ad43ddacSmrg    int bpp = pDst->drawable.bitsPerPixel;
1929ad43ddacSmrg    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
1930ad43ddacSmrg    uint32_t src_pitch_hw = scratch_pitch / (bpp / 8);
1931ad43ddacSmrg    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
1932ad43ddacSmrg    Bool r;
1933ad43ddacSmrg    int i;
1934ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
1935ad43ddacSmrg
1936ad43ddacSmrg    if (bpp < 8)
1937ad43ddacSmrg	return FALSE;
1938ad43ddacSmrg
1939ad43ddacSmrg    driver_priv = exaGetPixmapDriverPrivate(pDst);
1940ad43ddacSmrg
1941ad43ddacSmrg    /* If we know the BO won't be busy, don't bother */
1942ad43ddacSmrg    if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs) &&
1943ad43ddacSmrg	!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
1944ad43ddacSmrg	return FALSE;
1945ad43ddacSmrg
1946ad43ddacSmrg    size = scratch_pitch * h;
1947ad43ddacSmrg    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
1948ad43ddacSmrg    if (scratch == NULL) {
1949ad43ddacSmrg	return FALSE;
1950ad43ddacSmrg    }
1951ad43ddacSmrg
1952ad43ddacSmrg    src_obj.pitch = src_pitch_hw;
1953ad43ddacSmrg    src_obj.width = w;
1954ad43ddacSmrg    src_obj.height = h;
1955ad43ddacSmrg    src_obj.offset = 0;
1956ad43ddacSmrg    src_obj.bpp = bpp;
1957ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1958ad43ddacSmrg    src_obj.bo = scratch;
1959ad43ddacSmrg
1960ad43ddacSmrg    dst_obj.pitch = dst_pitch_hw;
1961ad43ddacSmrg    dst_obj.width = pDst->drawable.width;
1962ad43ddacSmrg    dst_obj.height = pDst->drawable.height;
1963ad43ddacSmrg    dst_obj.offset = 0;
1964ad43ddacSmrg    dst_obj.bpp = bpp;
1965ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1966ad43ddacSmrg    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1967ad43ddacSmrg
1968ad43ddacSmrg    if (!R600SetAccelState(pScrn,
1969ad43ddacSmrg			   &src_obj,
1970ad43ddacSmrg			   NULL,
1971ad43ddacSmrg			   &dst_obj,
1972ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1973ad43ddacSmrg			   3, 0xffffffff)) {
1974ad43ddacSmrg        r = FALSE;
1975ad43ddacSmrg        goto out;
1976ad43ddacSmrg    }
1977ad43ddacSmrg
1978ad43ddacSmrg    r = radeon_bo_map(scratch, 0);
1979ad43ddacSmrg    if (r) {
1980ad43ddacSmrg        r = FALSE;
1981ad43ddacSmrg        goto out;
1982ad43ddacSmrg    }
1983ad43ddacSmrg    r = TRUE;
1984ad43ddacSmrg    size = w * bpp / 8;
1985ad43ddacSmrg    for (i = 0; i < h; i++) {
1986ad43ddacSmrg        memcpy(scratch->ptr + i * scratch_pitch, src, size);
1987ad43ddacSmrg        src += src_pitch;
1988ad43ddacSmrg    }
1989ad43ddacSmrg    radeon_bo_unmap(scratch);
1990ad43ddacSmrg
1991ad43ddacSmrg    if (info->accel_state->vsync)
1992ad43ddacSmrg	R600VlineHelperSet(pScrn, x, y, x + w, y + h);
1993ad43ddacSmrg
1994ad43ddacSmrg    /* blit from gart to vram */
1995ad43ddacSmrg    R600DoPrepareCopy(pScrn);
1996ad43ddacSmrg    R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
1997ad43ddacSmrg    R600DoCopyVline(pDst);
1998ad43ddacSmrg
1999ad43ddacSmrgout:
2000ad43ddacSmrg    radeon_bo_unref(scratch);
2001ad43ddacSmrg    return r;
2002ad43ddacSmrg}
2003ad43ddacSmrg
2004ad43ddacSmrgstatic Bool
2005ad43ddacSmrgR600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
2006ad43ddacSmrg			 int h, char *dst, int dst_pitch)
2007ad43ddacSmrg{
2008ad43ddacSmrg    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
2009ad43ddacSmrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2010ad43ddacSmrg    struct radeon_accel_state *accel_state = info->accel_state;
2011ad43ddacSmrg    struct radeon_exa_pixmap_priv *driver_priv;
2012ad43ddacSmrg    struct radeon_bo *scratch;
2013ad43ddacSmrg    unsigned size;
2014ad43ddacSmrg    uint32_t src_domain = 0;
2015ad43ddacSmrg    int bpp = pSrc->drawable.bitsPerPixel;
2016ad43ddacSmrg    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
2017ad43ddacSmrg    uint32_t dst_pitch_hw = scratch_pitch / (bpp / 8);
2018ad43ddacSmrg    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
2019ad43ddacSmrg    Bool r;
2020ad43ddacSmrg    struct r600_accel_object src_obj, dst_obj;
2021ad43ddacSmrg
2022ad43ddacSmrg    if (bpp < 8)
2023ad43ddacSmrg	return FALSE;
2024ad43ddacSmrg
2025ad43ddacSmrg    driver_priv = exaGetPixmapDriverPrivate(pSrc);
2026ad43ddacSmrg
2027ad43ddacSmrg    /* If we know the BO won't end up in VRAM anyway, don't bother */
2028ad43ddacSmrg    if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
2029ad43ddacSmrg	src_domain = radeon_bo_get_src_domain(driver_priv->bo);
2030ad43ddacSmrg	if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
2031ad43ddacSmrg	    (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
2032ad43ddacSmrg	    src_domain = 0;
2033ad43ddacSmrg    }
2034ad43ddacSmrg
2035ad43ddacSmrg    if (!src_domain)
2036ad43ddacSmrg	radeon_bo_is_busy(driver_priv->bo, &src_domain);
2037ad43ddacSmrg
2038ad43ddacSmrg    if (src_domain != RADEON_GEM_DOMAIN_VRAM)
2039ad43ddacSmrg	return FALSE;
2040ad43ddacSmrg
2041ad43ddacSmrg    size = scratch_pitch * h;
2042ad43ddacSmrg    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
2043ad43ddacSmrg    if (scratch == NULL) {
2044ad43ddacSmrg	return FALSE;
2045ad43ddacSmrg    }
2046ad43ddacSmrg    radeon_cs_space_reset_bos(info->cs);
2047ad43ddacSmrg    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
2048ad43ddacSmrg				      RADEON_GEM_DOMAIN_VRAM, 0);
2049ad43ddacSmrg    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
2050ad43ddacSmrg    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
2051ad43ddacSmrg    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
2052ad43ddacSmrg    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
2053ad43ddacSmrg    r = radeon_cs_space_check(info->cs);
2054ad43ddacSmrg    if (r) {
2055ad43ddacSmrg        r = FALSE;
2056ad43ddacSmrg        goto out;
2057ad43ddacSmrg    }
2058ad43ddacSmrg
2059ad43ddacSmrg    src_obj.pitch = src_pitch_hw;
2060ad43ddacSmrg    src_obj.width = pSrc->drawable.width;
2061ad43ddacSmrg    src_obj.height = pSrc->drawable.height;
2062ad43ddacSmrg    src_obj.offset = 0;
2063ad43ddacSmrg    src_obj.bpp = bpp;
2064ad43ddacSmrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
2065ad43ddacSmrg    src_obj.bo = radeon_get_pixmap_bo(pSrc);
2066ad43ddacSmrg
2067ad43ddacSmrg    dst_obj.pitch = dst_pitch_hw;
2068ad43ddacSmrg    dst_obj.width = w;
2069ad43ddacSmrg    dst_obj.height = h;
2070ad43ddacSmrg    dst_obj.offset = 0;
2071ad43ddacSmrg    dst_obj.bo = scratch;
2072ad43ddacSmrg    dst_obj.bpp = bpp;
2073ad43ddacSmrg    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
2074ad43ddacSmrg
2075ad43ddacSmrg    if (!R600SetAccelState(pScrn,
2076ad43ddacSmrg			   &src_obj,
2077ad43ddacSmrg			   NULL,
2078ad43ddacSmrg			   &dst_obj,
2079ad43ddacSmrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
2080ad43ddacSmrg			   3, 0xffffffff)) {
2081ad43ddacSmrg        r = FALSE;
2082ad43ddacSmrg        goto out;
2083ad43ddacSmrg    }
2084ad43ddacSmrg
2085ad43ddacSmrg    /* blit from vram to gart */
2086ad43ddacSmrg    R600DoPrepareCopy(pScrn);
2087ad43ddacSmrg    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
2088ad43ddacSmrg    R600DoCopy(pScrn);
2089ad43ddacSmrg
2090ad43ddacSmrg    if (info->cs)
2091ad43ddacSmrg	radeon_cs_flush_indirect(pScrn);
2092ad43ddacSmrg
2093ad43ddacSmrg    r = radeon_bo_map(scratch, 0);
2094ad43ddacSmrg    if (r) {
2095ad43ddacSmrg        r = FALSE;
2096ad43ddacSmrg        goto out;
2097ad43ddacSmrg    }
2098ad43ddacSmrg    r = TRUE;
2099ad43ddacSmrg    w *= bpp / 8;
2100ad43ddacSmrg    size = 0;
2101ad43ddacSmrg    while (h--) {
2102ad43ddacSmrg        memcpy(dst, scratch->ptr + size, w);
2103ad43ddacSmrg        size += scratch_pitch;
2104ad43ddacSmrg        dst += dst_pitch;
2105ad43ddacSmrg    }
2106ad43ddacSmrg    radeon_bo_unmap(scratch);
2107ad43ddacSmrgout:
2108ad43ddacSmrg    radeon_bo_unref(scratch);
2109ad43ddacSmrg    return r;
2110ad43ddacSmrg}
2111ad43ddacSmrg#endif
2112ad43ddacSmrg
2113b7e1c893Smrgstatic int
2114b7e1c893SmrgR600MarkSync(ScreenPtr pScreen)
2115b7e1c893Smrg{
2116b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
2117b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2118b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2119b7e1c893Smrg
2120b7e1c893Smrg    return ++accel_state->exaSyncMarker;
2121b7e1c893Smrg
2122b7e1c893Smrg}
2123b7e1c893Smrg
2124b7e1c893Smrgstatic void
2125b7e1c893SmrgR600Sync(ScreenPtr pScreen, int marker)
2126b7e1c893Smrg{
2127b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
2128b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2129b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2130b7e1c893Smrg
2131b7e1c893Smrg    if (accel_state->exaMarkerSynced != marker) {
2132ad43ddacSmrg#ifdef XF86DRM_MODE
2133ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2134ad43ddacSmrg	if (!info->cs)
2135ad43ddacSmrg#endif
2136ad43ddacSmrg#endif
2137ad43ddacSmrg	    RADEONWaitForIdleCP(pScrn);
2138b7e1c893Smrg	accel_state->exaMarkerSynced = marker;
2139b7e1c893Smrg    }
2140b7e1c893Smrg
2141b7e1c893Smrg}
2142b7e1c893Smrg
2143b7e1c893Smrgstatic Bool
2144b7e1c893SmrgR600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
2145b7e1c893Smrg{
2146b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2147b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2148b7e1c893Smrg
2149b7e1c893Smrg    /* 512 bytes per shader for now */
2150b7e1c893Smrg    int size = 512 * 9;
2151b7e1c893Smrg
2152b7e1c893Smrg    accel_state->shaders = NULL;
2153b7e1c893Smrg
2154ad43ddacSmrg#ifdef XF86DRM_MODE
2155ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2156ad43ddacSmrg    if (info->cs) {
2157ad43ddacSmrg	accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
2158ad43ddacSmrg						 RADEON_GEM_DOMAIN_VRAM, 0);
2159ad43ddacSmrg	if (accel_state->shaders_bo == NULL) {
2160ad43ddacSmrg	    ErrorF("Allocating shader failed\n");
2161ad43ddacSmrg	    return FALSE;
2162ad43ddacSmrg	}
2163ad43ddacSmrg	return TRUE;
2164ad43ddacSmrg    } else
2165ad43ddacSmrg#endif
2166ad43ddacSmrg#endif
2167ad43ddacSmrg    {
2168ad43ddacSmrg	accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
2169ad43ddacSmrg						 TRUE, NULL, NULL);
2170ad43ddacSmrg
2171ad43ddacSmrg	if (accel_state->shaders == NULL)
2172ad43ddacSmrg	    return FALSE;
2173ad43ddacSmrg    }
2174b7e1c893Smrg
2175b7e1c893Smrg    return TRUE;
2176b7e1c893Smrg}
2177b7e1c893Smrg
2178b7e1c893SmrgBool
2179b7e1c893SmrgR600LoadShaders(ScrnInfoPtr pScrn)
2180b7e1c893Smrg{
2181b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2182b7e1c893Smrg    struct radeon_accel_state *accel_state = info->accel_state;
2183b7e1c893Smrg    RADEONChipFamily ChipSet = info->ChipFamily;
2184b7e1c893Smrg    uint32_t *shader;
2185ad43ddacSmrg#ifdef XF86DRM_MODE
2186ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2187ad43ddacSmrg    int ret;
2188ad43ddacSmrg
2189ad43ddacSmrg    if (info->cs) {
2190ad43ddacSmrg	ret = radeon_bo_map(accel_state->shaders_bo, 1);
2191ad43ddacSmrg	if (ret) {
2192ad43ddacSmrg	    FatalError("failed to map shader %d\n", ret);
2193ad43ddacSmrg	    return FALSE;
2194ad43ddacSmrg	}
2195ad43ddacSmrg	shader = accel_state->shaders_bo->ptr;
2196ad43ddacSmrg    } else
2197ad43ddacSmrg#endif
2198ad43ddacSmrg#endif
2199ad43ddacSmrg	shader = (pointer)((char *)info->FB + accel_state->shaders->offset);
2200b7e1c893Smrg
2201b7e1c893Smrg    /*  solid vs --------------------------------------- */
2202b7e1c893Smrg    accel_state->solid_vs_offset = 0;
2203b7e1c893Smrg    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
2204b7e1c893Smrg
2205b7e1c893Smrg    /*  solid ps --------------------------------------- */
2206b7e1c893Smrg    accel_state->solid_ps_offset = 512;
2207b7e1c893Smrg    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
2208b7e1c893Smrg
2209b7e1c893Smrg    /*  copy vs --------------------------------------- */
2210b7e1c893Smrg    accel_state->copy_vs_offset = 1024;
2211b7e1c893Smrg    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
2212b7e1c893Smrg
2213b7e1c893Smrg    /*  copy ps --------------------------------------- */
2214b7e1c893Smrg    accel_state->copy_ps_offset = 1536;
2215b7e1c893Smrg    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2216b7e1c893Smrg
2217b7e1c893Smrg    /*  comp vs --------------------------------------- */
2218b7e1c893Smrg    accel_state->comp_vs_offset = 2048;
2219b7e1c893Smrg    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2220b7e1c893Smrg
2221b7e1c893Smrg    /*  comp ps --------------------------------------- */
2222b7e1c893Smrg    accel_state->comp_ps_offset = 2560;
2223b7e1c893Smrg    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2224b7e1c893Smrg
2225b7e1c893Smrg    /*  comp mask ps --------------------------------------- */
2226b7e1c893Smrg    accel_state->comp_mask_ps_offset = 3072;
2227b7e1c893Smrg    R600_comp_mask_ps(ChipSet, shader + accel_state->comp_mask_ps_offset / 4);
2228b7e1c893Smrg
2229b7e1c893Smrg    /*  xv vs --------------------------------------- */
2230b7e1c893Smrg    accel_state->xv_vs_offset = 3584;
2231b7e1c893Smrg    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2232b7e1c893Smrg
2233b7e1c893Smrg    /*  xv ps --------------------------------------- */
2234b7e1c893Smrg    accel_state->xv_ps_offset = 4096;
2235b7e1c893Smrg    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2236b7e1c893Smrg
2237ad43ddacSmrg#ifdef XF86DRM_MODE
2238ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2239ad43ddacSmrg    if (info->cs) {
2240ad43ddacSmrg	radeon_bo_unmap(accel_state->shaders_bo);
2241ad43ddacSmrg    }
2242ad43ddacSmrg#endif
2243ad43ddacSmrg#endif
2244ad43ddacSmrg
2245b7e1c893Smrg    return TRUE;
2246b7e1c893Smrg}
2247b7e1c893Smrg
2248b7e1c893Smrgstatic Bool
2249b7e1c893SmrgR600PrepareAccess(PixmapPtr pPix, int index)
2250b7e1c893Smrg{
2251b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2252b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2253b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
2254b7e1c893Smrg
2255b7e1c893Smrg    /* flush HDP read/write caches */
2256b7e1c893Smrg    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2257b7e1c893Smrg
2258b7e1c893Smrg    return TRUE;
2259b7e1c893Smrg}
2260b7e1c893Smrg
2261b7e1c893Smrgstatic void
2262b7e1c893SmrgR600FinishAccess(PixmapPtr pPix, int index)
2263b7e1c893Smrg{
2264b7e1c893Smrg    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2265b7e1c893Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
2266b7e1c893Smrg    unsigned char *RADEONMMIO = info->MMIO;
2267b7e1c893Smrg
2268b7e1c893Smrg    /* flush HDP read/write caches */
2269b7e1c893Smrg    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2270b7e1c893Smrg
2271b7e1c893Smrg}
2272b7e1c893Smrg
2273b7e1c893SmrgBool
2274b7e1c893SmrgR600DrawInit(ScreenPtr pScreen)
2275b7e1c893Smrg{
2276b7e1c893Smrg    ScrnInfoPtr pScrn =  xf86Screens[pScreen->myNum];
2277b7e1c893Smrg    RADEONInfoPtr info   = RADEONPTR(pScrn);
2278b7e1c893Smrg
2279b7e1c893Smrg    if (info->accel_state->exa == NULL) {
2280b7e1c893Smrg	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2281b7e1c893Smrg	return FALSE;
2282b7e1c893Smrg    }
2283b7e1c893Smrg
2284b7e1c893Smrg    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2285b7e1c893Smrg    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2286b7e1c893Smrg
2287b7e1c893Smrg    info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2288b7e1c893Smrg    info->accel_state->exa->Solid = R600Solid;
2289b7e1c893Smrg    info->accel_state->exa->DoneSolid = R600DoneSolid;
2290b7e1c893Smrg
2291b7e1c893Smrg    info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2292b7e1c893Smrg    info->accel_state->exa->Copy = R600Copy;
2293b7e1c893Smrg    info->accel_state->exa->DoneCopy = R600DoneCopy;
2294b7e1c893Smrg
2295b7e1c893Smrg    info->accel_state->exa->MarkSync = R600MarkSync;
2296b7e1c893Smrg    info->accel_state->exa->WaitMarker = R600Sync;
2297b7e1c893Smrg
2298ad43ddacSmrg#ifdef XF86DRM_MODE
2299ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2300ad43ddacSmrg    if (info->cs) {
2301ad43ddacSmrg	info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
2302ad43ddacSmrg	info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
2303ad43ddacSmrg	info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
2304ad43ddacSmrg	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
2305ad43ddacSmrg	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
2306ad43ddacSmrg	info->accel_state->exa->UploadToScreen = R600UploadToScreenCS;
2307ad43ddacSmrg	info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS;
2308ad43ddacSmrg    } else
2309ad43ddacSmrg#endif
2310ad43ddacSmrg#endif
2311ad43ddacSmrg    {
2312ad43ddacSmrg	info->accel_state->exa->PrepareAccess = R600PrepareAccess;
2313ad43ddacSmrg	info->accel_state->exa->FinishAccess = R600FinishAccess;
2314ad43ddacSmrg
2315ad43ddacSmrg	/* AGP seems to have problems with gart transfers */
2316ad43ddacSmrg	if (info->accelDFS) {
2317ad43ddacSmrg	    info->accel_state->exa->UploadToScreen = R600UploadToScreen;
2318ad43ddacSmrg	    info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen;
2319ad43ddacSmrg	}
2320b7e1c893Smrg    }
2321b7e1c893Smrg
2322b7e1c893Smrg    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
2323b7e1c893Smrg#ifdef EXA_SUPPORTS_PREPARE_AUX
2324b7e1c893Smrg    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
2325ad43ddacSmrg#endif
2326ad43ddacSmrg
2327ad43ddacSmrg#ifdef XF86DRM_MODE
2328ad43ddacSmrg#ifdef EXA_HANDLES_PIXMAPS
2329ad43ddacSmrg    if (info->cs) {
2330ad43ddacSmrg	info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
2331ad43ddacSmrg#ifdef EXA_MIXED_PIXMAPS
2332ad43ddacSmrg	info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
2333ad43ddacSmrg#endif
2334ad43ddacSmrg    }
2335ad43ddacSmrg#endif
2336b7e1c893Smrg#endif
2337b7e1c893Smrg    info->accel_state->exa->pixmapOffsetAlign = 256;
2338b7e1c893Smrg    info->accel_state->exa->pixmapPitchAlign = 256;
2339b7e1c893Smrg
2340b7e1c893Smrg    info->accel_state->exa->CheckComposite = R600CheckComposite;
2341b7e1c893Smrg    info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2342b7e1c893Smrg    info->accel_state->exa->Composite = R600Composite;
2343b7e1c893Smrg    info->accel_state->exa->DoneComposite = R600DoneComposite;
2344b7e1c893Smrg
2345b7e1c893Smrg#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
2346b7e1c893Smrg    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
2347b7e1c893Smrg
2348b7e1c893Smrg    info->accel_state->exa->maxPitchBytes = 32768;
2349b7e1c893Smrg    info->accel_state->exa->maxX = 8192;
2350b7e1c893Smrg#else
2351b7e1c893Smrg    info->accel_state->exa->maxX = 8192;
2352b7e1c893Smrg#endif
2353b7e1c893Smrg    info->accel_state->exa->maxY = 8192;
2354b7e1c893Smrg
2355b7e1c893Smrg    /* not supported yet */
2356ad43ddacSmrg    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2357ad43ddacSmrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2358ad43ddacSmrg	info->accel_state->vsync = TRUE;
2359ad43ddacSmrg    } else
2360ad43ddacSmrg	info->accel_state->vsync = FALSE;
2361b7e1c893Smrg
2362b7e1c893Smrg    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
2363b7e1c893Smrg	xfree(info->accel_state->exa);
2364b7e1c893Smrg	return FALSE;
2365b7e1c893Smrg    }
2366b7e1c893Smrg
2367ad43ddacSmrg#ifdef XF86DRM_MODE
2368ad43ddacSmrg#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2369ad43ddacSmrg    if (!info->cs)
2370ad43ddacSmrg#endif
2371ad43ddacSmrg#endif
2372ad43ddacSmrg	if (!info->gartLocation)
2373ad43ddacSmrg	    return FALSE;
2374b7e1c893Smrg
2375b7e1c893Smrg    info->accel_state->XInited3D = FALSE;
2376b7e1c893Smrg    info->accel_state->copy_area = NULL;
2377ad43ddacSmrg    info->accel_state->src_obj[0].bo = NULL;
2378ad43ddacSmrg    info->accel_state->src_obj[1].bo = NULL;
2379ad43ddacSmrg    info->accel_state->dst_obj.bo = NULL;
2380ad43ddacSmrg    info->accel_state->copy_area_bo = NULL;
2381ad43ddacSmrg    info->accel_state->vb_start_op = -1;
2382ad43ddacSmrg    R600VlineHelperClear(pScrn);
2383ad43ddacSmrg
2384ad43ddacSmrg#ifdef XF86DRM_MODE
2385ad43ddacSmrg    radeon_vbo_init_lists(pScrn);
2386ad43ddacSmrg#endif
2387b7e1c893Smrg
2388b7e1c893Smrg    if (!R600AllocShaders(pScrn, pScreen))
2389b7e1c893Smrg	return FALSE;
2390b7e1c893Smrg
2391b7e1c893Smrg    if (!R600LoadShaders(pScrn))
2392b7e1c893Smrg	return FALSE;
2393b7e1c893Smrg
2394b7e1c893Smrg    exaMarkSync(pScreen);
2395b7e1c893Smrg
2396b7e1c893Smrg    return TRUE;
2397b7e1c893Smrg
2398b7e1c893Smrg}
2399b7e1c893Smrg
2400