r600_exa.c revision f3a0071a
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "exa.h"
34
35#include "radeon.h"
36#include "radeon_macros.h"
37#include "radeon_reg.h"
38#include "r600_shader.h"
39#include "r600_reg.h"
40#include "r600_state.h"
41#include "radeon_exa_shared.h"
42#include "radeon_vbo.h"
43
44/* #define SHOW_VERTEXES */
45
46Bool
47R600SetAccelState(ScrnInfoPtr pScrn,
48		  struct r600_accel_object *src0,
49		  struct r600_accel_object *src1,
50		  struct r600_accel_object *dst,
51		  uint32_t vs_offset, uint32_t ps_offset,
52		  int rop, Pixel planemask)
53{
54    RADEONInfoPtr info = RADEONPTR(pScrn);
55    struct radeon_accel_state *accel_state = info->accel_state;
56    uint32_t pitch_align = 0x7, base_align = 0xff;
57#if defined(XF86DRM_MODE)
58    int ret;
59#endif
60
61    if (src0) {
62	memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
63	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
64#if defined(XF86DRM_MODE)
65	if (info->cs && src0->surface) {
66		accel_state->src_size[0] = src0->surface->bo_size;
67	}
68#endif
69
70	/* bad pitch */
71	if (accel_state->src_obj[0].pitch & pitch_align)
72	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
73
74	/* bad offset */
75	if (accel_state->src_obj[0].offset & base_align)
76	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset));
77
78    } else {
79	memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
80	accel_state->src_size[0] = 0;
81    }
82
83    if (src1) {
84	memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
85	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
86#if defined(XF86DRM_MODE)
87	if (info->cs && src1->surface) {
88		accel_state->src_size[1] = src1->surface->bo_size;
89	}
90#endif
91
92	/* bad pitch */
93	if (accel_state->src_obj[1].pitch & pitch_align)
94	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
95
96	/* bad offset */
97	if (accel_state->src_obj[1].offset & base_align)
98	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset));
99    } else {
100	memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
101	accel_state->src_size[1] = 0;
102    }
103
104    if (dst) {
105	memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
106	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
107#if defined(XF86DRM_MODE)
108	if (info->cs && dst->surface) {
109		accel_state->dst_size = dst->surface->bo_size;
110	} else
111#endif
112	{
113		accel_state->dst_obj.tiling_flags = 0;
114	}
115	if (accel_state->dst_obj.pitch & pitch_align)
116	    RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
117
118	if (accel_state->dst_obj.offset & base_align)
119	    RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset));
120    } else {
121	memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
122	accel_state->dst_size = 0;
123    }
124
125#ifdef XF86DRM_MODE
126    if (info->cs && CS_FULL(info->cs))
127	radeon_cs_flush_indirect(pScrn);
128#endif
129
130    accel_state->rop = rop;
131    accel_state->planemask = planemask;
132
133    accel_state->vs_size = 512;
134    accel_state->ps_size = 512;
135#if defined(XF86DRM_MODE)
136    if (info->cs) {
137	accel_state->vs_mc_addr = vs_offset;
138	accel_state->ps_mc_addr = ps_offset;
139
140	radeon_cs_space_reset_bos(info->cs);
141	radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
142					  RADEON_GEM_DOMAIN_VRAM, 0);
143	if (accel_state->src_obj[0].bo)
144	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
145					      accel_state->src_obj[0].domain, 0);
146	if (accel_state->src_obj[1].bo)
147	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
148					      accel_state->src_obj[1].domain, 0);
149	if (accel_state->dst_obj.bo)
150	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
151					      0, accel_state->dst_obj.domain);
152	ret = radeon_cs_space_check(info->cs);
153	if (ret)
154	    RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
155
156    } else
157#endif
158    {
159	accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
160	    vs_offset;
161	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
162	    ps_offset;
163    }
164
165    return TRUE;
166}
167
168static Bool
169R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
170{
171    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
172    RADEONInfoPtr info = RADEONPTR(pScrn);
173    struct radeon_accel_state *accel_state = info->accel_state;
174    cb_config_t     cb_conf;
175    shader_config_t vs_conf, ps_conf;
176    uint32_t a, r, g, b;
177    float ps_alu_consts[4];
178    struct r600_accel_object dst;
179
180    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
181	RADEON_FALLBACK(("R600CheckDatatype failed\n"));
182    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
183	RADEON_FALLBACK(("invalid planemask\n"));
184
185#if defined(XF86DRM_MODE)
186    if (info->cs) {
187	dst.offset = 0;
188	dst.bo = radeon_get_pixmap_bo(pPix);
189	dst.tiling_flags = radeon_get_pixmap_tiling(pPix);
190	dst.surface = radeon_get_pixmap_surface(pPix);
191    } else
192#endif
193    {
194	dst.offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
195	dst.bo = NULL;
196    }
197
198    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
199    dst.width = pPix->drawable.width;
200    dst.height = pPix->drawable.height;
201    dst.bpp = pPix->drawable.bitsPerPixel;
202    dst.domain = RADEON_GEM_DOMAIN_VRAM;
203
204    if (!R600SetAccelState(pScrn,
205			   NULL,
206			   NULL,
207			   &dst,
208			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
209			   alu, pm))
210	return FALSE;
211
212    CLEAR (cb_conf);
213    CLEAR (vs_conf);
214    CLEAR (ps_conf);
215
216    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
217    radeon_cp_start(pScrn);
218
219    r600_set_default_state(pScrn, accel_state->ib);
220
221    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
222    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
223    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
224
225    /* Shader */
226    vs_conf.shader_addr         = accel_state->vs_mc_addr;
227    vs_conf.shader_size         = accel_state->vs_size;
228    vs_conf.num_gprs            = 2;
229    vs_conf.stack_size          = 0;
230    vs_conf.bo                  = accel_state->shaders_bo;
231    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
232
233    ps_conf.shader_addr         = accel_state->ps_mc_addr;
234    ps_conf.shader_size         = accel_state->ps_size;
235    ps_conf.num_gprs            = 1;
236    ps_conf.stack_size          = 0;
237    ps_conf.uncached_first_inst = 1;
238    ps_conf.clamp_consts        = 0;
239    ps_conf.export_mode         = 2;
240    ps_conf.bo                  = accel_state->shaders_bo;
241    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
242
243    cb_conf.id = 0;
244    cb_conf.w = accel_state->dst_obj.pitch;
245    cb_conf.h = accel_state->dst_obj.height;
246    cb_conf.base = accel_state->dst_obj.offset;
247    cb_conf.bo = accel_state->dst_obj.bo;
248#ifdef XF86DRM_MODE
249    cb_conf.surface = accel_state->dst_obj.surface;
250#endif
251
252    if (accel_state->dst_obj.bpp == 8) {
253	cb_conf.format = COLOR_8;
254	cb_conf.comp_swap = 3; /* A */
255    } else if (accel_state->dst_obj.bpp == 16) {
256	cb_conf.format = COLOR_5_6_5;
257	cb_conf.comp_swap = 2; /* RGB */
258#if X_BYTE_ORDER == X_BIG_ENDIAN
259	cb_conf.endian = ENDIAN_8IN16;
260#endif
261    } else {
262	cb_conf.format = COLOR_8_8_8_8;
263	cb_conf.comp_swap = 1; /* ARGB */
264#if X_BYTE_ORDER == X_BIG_ENDIAN
265	cb_conf.endian = ENDIAN_8IN32;
266#endif
267    }
268    cb_conf.source_format = 1;
269    cb_conf.blend_clamp = 1;
270    /* Render setup */
271    if (accel_state->planemask & 0x000000ff)
272	cb_conf.pmask |= 4; /* B */
273    if (accel_state->planemask & 0x0000ff00)
274	cb_conf.pmask |= 2; /* G */
275    if (accel_state->planemask & 0x00ff0000)
276	cb_conf.pmask |= 1; /* R */
277    if (accel_state->planemask & 0xff000000)
278	cb_conf.pmask |= 8; /* A */
279    cb_conf.rop = accel_state->rop;
280    if (accel_state->dst_obj.tiling_flags == 0)
281	cb_conf.array_mode = 0;
282    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
283
284    r600_set_spi(pScrn, accel_state->ib, 0, 0);
285
286    /* PS alu constants */
287    if (accel_state->dst_obj.bpp == 16) {
288	r = (fg >> 11) & 0x1f;
289	g = (fg >> 5) & 0x3f;
290	b = (fg >> 0) & 0x1f;
291	ps_alu_consts[0] = (float)r / 31; /* R */
292	ps_alu_consts[1] = (float)g / 63; /* G */
293	ps_alu_consts[2] = (float)b / 31; /* B */
294	ps_alu_consts[3] = 1.0; /* A */
295    } else if (accel_state->dst_obj.bpp == 8) {
296	a = (fg >> 0) & 0xff;
297	ps_alu_consts[0] = 0.0; /* R */
298	ps_alu_consts[1] = 0.0; /* G */
299	ps_alu_consts[2] = 0.0; /* B */
300	ps_alu_consts[3] = (float)a / 255; /* A */
301    } else {
302	a = (fg >> 24) & 0xff;
303	r = (fg >> 16) & 0xff;
304	g = (fg >> 8) & 0xff;
305	b = (fg >> 0) & 0xff;
306	ps_alu_consts[0] = (float)r / 255; /* R */
307	ps_alu_consts[1] = (float)g / 255; /* G */
308	ps_alu_consts[2] = (float)b / 255; /* B */
309	ps_alu_consts[3] = (float)a / 255; /* A */
310    }
311    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
312			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
313
314    if (accel_state->vsync)
315	RADEONVlineHelperClear(pScrn);
316
317    accel_state->dst_pix = pPix;
318    accel_state->fg = fg;
319
320    return TRUE;
321}
322
323static void
324R600DoneSolid(PixmapPtr pPix)
325{
326    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
327    RADEONInfoPtr info = RADEONPTR(pScrn);
328    struct radeon_accel_state *accel_state = info->accel_state;
329
330    if (accel_state->vsync)
331	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
332				accel_state->vline_crtc,
333				accel_state->vline_y1,
334				accel_state->vline_y2);
335
336    r600_finish_op(pScrn, 8);
337}
338
339static void
340R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
341{
342    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
343    RADEONInfoPtr info = RADEONPTR(pScrn);
344    struct radeon_accel_state *accel_state = info->accel_state;
345    float *vb;
346
347#ifdef XF86DRM_MODE
348    if (info->cs && CS_FULL(info->cs)) {
349	R600DoneSolid(info->accel_state->dst_pix);
350	radeon_cs_flush_indirect(pScrn);
351	R600PrepareSolid(accel_state->dst_pix,
352			 accel_state->rop,
353			 accel_state->planemask,
354			 accel_state->fg);
355    }
356#endif
357
358    if (accel_state->vsync)
359	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
360
361    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
362
363    vb[0] = (float)x1;
364    vb[1] = (float)y1;
365
366    vb[2] = (float)x1;
367    vb[3] = (float)y2;
368
369    vb[4] = (float)x2;
370    vb[5] = (float)y2;
371
372    radeon_vbo_commit(pScrn, &accel_state->vbo);
373}
374
375static void
376R600DoPrepareCopy(ScrnInfoPtr pScrn)
377{
378    RADEONInfoPtr info = RADEONPTR(pScrn);
379    struct radeon_accel_state *accel_state = info->accel_state;
380    cb_config_t     cb_conf;
381    tex_resource_t  tex_res;
382    tex_sampler_t   tex_samp;
383    shader_config_t vs_conf, ps_conf;
384
385    CLEAR (cb_conf);
386    CLEAR (tex_res);
387    CLEAR (tex_samp);
388    CLEAR (vs_conf);
389    CLEAR (ps_conf);
390
391    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
392    radeon_cp_start(pScrn);
393
394    r600_set_default_state(pScrn, accel_state->ib);
395
396    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
397    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
398    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
399
400    /* Shader */
401    vs_conf.shader_addr         = accel_state->vs_mc_addr;
402    vs_conf.shader_size         = accel_state->vs_size;
403    vs_conf.num_gprs            = 2;
404    vs_conf.stack_size          = 0;
405    vs_conf.bo                  = accel_state->shaders_bo;
406    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
407
408    ps_conf.shader_addr         = accel_state->ps_mc_addr;
409    ps_conf.shader_size         = accel_state->ps_size;
410    ps_conf.num_gprs            = 1;
411    ps_conf.stack_size          = 0;
412    ps_conf.uncached_first_inst = 1;
413    ps_conf.clamp_consts        = 0;
414    ps_conf.export_mode         = 2;
415    ps_conf.bo                  = accel_state->shaders_bo;
416    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
417
418    /* Texture */
419    tex_res.id                  = 0;
420    tex_res.w                   = accel_state->src_obj[0].width;
421    tex_res.h                   = accel_state->src_obj[0].height;
422    tex_res.pitch               = accel_state->src_obj[0].pitch;
423    tex_res.depth               = 0;
424    tex_res.dim                 = SQ_TEX_DIM_2D;
425    tex_res.base                = accel_state->src_obj[0].offset;
426    tex_res.mip_base            = accel_state->src_obj[0].offset;
427    tex_res.size                = accel_state->src_size[0];
428    tex_res.bo                  = accel_state->src_obj[0].bo;
429    tex_res.mip_bo              = accel_state->src_obj[0].bo;
430#ifdef XF86DRM_MODE
431    tex_res.surface             = accel_state->src_obj[0].surface;
432#endif
433    if (accel_state->src_obj[0].bpp == 8) {
434	tex_res.format              = FMT_8;
435	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
436	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
437	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
438	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
439    } else if (accel_state->src_obj[0].bpp == 16) {
440	tex_res.format              = FMT_5_6_5;
441	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
442	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
443	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
444	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
445    } else {
446	tex_res.format              = FMT_8_8_8_8;
447	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
448	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
449	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
450	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
451    }
452
453    tex_res.request_size        = 1;
454    tex_res.base_level          = 0;
455    tex_res.last_level          = 0;
456    tex_res.perf_modulation     = 0;
457    if (accel_state->src_obj[0].tiling_flags == 0)
458	tex_res.tile_mode           = 1;
459    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
460
461    tex_samp.id                 = 0;
462    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
463    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
464    tex_samp.clamp_z            = SQ_TEX_WRAP;
465    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
466    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
467    tex_samp.mc_coord_truncate  = 1;
468    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
469    tex_samp.mip_filter         = 0;			/* no mipmap */
470    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
471
472    cb_conf.id = 0;
473    cb_conf.w = accel_state->dst_obj.pitch;
474    cb_conf.h = accel_state->dst_obj.height;
475    cb_conf.base = accel_state->dst_obj.offset;
476    cb_conf.bo = accel_state->dst_obj.bo;
477#ifdef XF86DRM_MODE
478    cb_conf.surface = accel_state->dst_obj.surface;
479#endif
480    if (accel_state->dst_obj.bpp == 8) {
481	cb_conf.format = COLOR_8;
482	cb_conf.comp_swap = 3; /* A */
483    } else if (accel_state->dst_obj.bpp == 16) {
484	cb_conf.format = COLOR_5_6_5;
485	cb_conf.comp_swap = 2; /* RGB */
486    } else {
487	cb_conf.format = COLOR_8_8_8_8;
488	cb_conf.comp_swap = 1; /* ARGB */
489    }
490    cb_conf.source_format = 1;
491    cb_conf.blend_clamp = 1;
492
493    /* Render setup */
494    if (accel_state->planemask & 0x000000ff)
495	cb_conf.pmask |= 4; /* B */
496    if (accel_state->planemask & 0x0000ff00)
497	cb_conf.pmask |= 2; /* G */
498    if (accel_state->planemask & 0x00ff0000)
499	cb_conf.pmask |= 1; /* R */
500    if (accel_state->planemask & 0xff000000)
501	cb_conf.pmask |= 8; /* A */
502    cb_conf.rop = accel_state->rop;
503    if (accel_state->dst_obj.tiling_flags == 0)
504	cb_conf.array_mode = 0;
505    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
506
507    r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
508
509}
510
511static void
512R600DoCopy(ScrnInfoPtr pScrn)
513{
514    r600_finish_op(pScrn, 16);
515}
516
517static void
518R600DoCopyVline(PixmapPtr pPix)
519{
520    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
521    RADEONInfoPtr info = RADEONPTR(pScrn);
522    struct radeon_accel_state *accel_state = info->accel_state;
523
524    if (accel_state->vsync)
525	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
526				accel_state->vline_crtc,
527				accel_state->vline_y1,
528				accel_state->vline_y2);
529
530    r600_finish_op(pScrn, 16);
531}
532
533static void
534R600AppendCopyVertex(ScrnInfoPtr pScrn,
535		     int srcX, int srcY,
536		     int dstX, int dstY,
537		     int w, int h)
538{
539    RADEONInfoPtr info = RADEONPTR(pScrn);
540    struct radeon_accel_state *accel_state = info->accel_state;
541    float *vb;
542
543    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
544
545    vb[0] = (float)dstX;
546    vb[1] = (float)dstY;
547    vb[2] = (float)srcX;
548    vb[3] = (float)srcY;
549
550    vb[4] = (float)dstX;
551    vb[5] = (float)(dstY + h);
552    vb[6] = (float)srcX;
553    vb[7] = (float)(srcY + h);
554
555    vb[8] = (float)(dstX + w);
556    vb[9] = (float)(dstY + h);
557    vb[10] = (float)(srcX + w);
558    vb[11] = (float)(srcY + h);
559
560    radeon_vbo_commit(pScrn, &accel_state->vbo);
561}
562
563static Bool
564R600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
565		int xdir, int ydir,
566		int rop,
567		Pixel planemask)
568{
569    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
570    RADEONInfoPtr info = RADEONPTR(pScrn);
571    struct radeon_accel_state *accel_state = info->accel_state;
572    struct r600_accel_object src_obj, dst_obj;
573
574    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
575	RADEON_FALLBACK(("R600CheckDatatype src failed\n"));
576    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
577	RADEON_FALLBACK(("R600CheckDatatype dst failed\n"));
578    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
579	RADEON_FALLBACK(("Invalid planemask\n"));
580
581    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
582    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
583
584    accel_state->same_surface = FALSE;
585
586#if defined(XF86DRM_MODE)
587    if (info->cs) {
588	src_obj.offset = 0;
589	dst_obj.offset = 0;
590	src_obj.bo = radeon_get_pixmap_bo(pSrc);
591	dst_obj.bo = radeon_get_pixmap_bo(pDst);
592	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
593	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
594	src_obj.surface = radeon_get_pixmap_surface(pSrc);
595	dst_obj.surface = radeon_get_pixmap_surface(pDst);
596	if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
597	    accel_state->same_surface = TRUE;
598    } else
599#endif
600    {
601	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
602	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
603	if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst))
604	    accel_state->same_surface = TRUE;
605	src_obj.bo = NULL;
606	dst_obj.bo = NULL;
607    }
608
609    src_obj.width = pSrc->drawable.width;
610    src_obj.height = pSrc->drawable.height;
611    src_obj.bpp = pSrc->drawable.bitsPerPixel;
612    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
613
614    dst_obj.width = pDst->drawable.width;
615    dst_obj.height = pDst->drawable.height;
616    dst_obj.bpp = pDst->drawable.bitsPerPixel;
617    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
618
619    if (!R600SetAccelState(pScrn,
620			   &src_obj,
621			   NULL,
622			   &dst_obj,
623			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
624			   rop, planemask))
625	return FALSE;
626
627    if (accel_state->same_surface == TRUE) {
628#if defined(XF86DRM_MODE)
629	unsigned long size = accel_state->dst_obj.surface->bo_size;
630	unsigned long align = accel_state->dst_obj.surface->bo_alignment;
631#else
632	unsigned height = pDst->drawable.height;
633	unsigned long size = height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
634#endif
635
636#if defined(XF86DRM_MODE)
637	if (info->cs) {
638	    if (accel_state->copy_area_bo) {
639		radeon_bo_unref(accel_state->copy_area_bo);
640		accel_state->copy_area_bo = NULL;
641	    }
642	    accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, align,
643						       RADEON_GEM_DOMAIN_VRAM,
644						       0);
645	    if (accel_state->copy_area_bo == NULL)
646		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
647
648	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
649					      0, RADEON_GEM_DOMAIN_VRAM);
650	    if (radeon_cs_space_check(info->cs)) {
651		radeon_bo_unref(accel_state->copy_area_bo);
652		accel_state->copy_area_bo = NULL;
653		return FALSE;
654	    }
655	    accel_state->copy_area = (void*)accel_state->copy_area_bo;
656	} else
657#endif
658	{
659	    if (accel_state->copy_area) {
660		exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
661		accel_state->copy_area = NULL;
662	    }
663	    accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
664	    if (!accel_state->copy_area)
665		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
666	}
667    } else
668	R600DoPrepareCopy(pScrn);
669
670    if (accel_state->vsync)
671	RADEONVlineHelperClear(pScrn);
672
673    accel_state->dst_pix = pDst;
674    accel_state->src_pix = pSrc;
675    accel_state->xdir = xdir;
676    accel_state->ydir = ydir;
677
678    return TRUE;
679}
680
681static void
682R600DoneCopy(PixmapPtr pDst)
683{
684    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
685    RADEONInfoPtr info = RADEONPTR(pScrn);
686    struct radeon_accel_state *accel_state = info->accel_state;
687
688    if (!accel_state->same_surface)
689	R600DoCopyVline(pDst);
690
691    if (accel_state->copy_area) {
692	if (!info->cs)
693	    exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
694	accel_state->copy_area = NULL;
695    }
696
697}
698
699static void
700R600Copy(PixmapPtr pDst,
701	 int srcX, int srcY,
702	 int dstX, int dstY,
703	 int w, int h)
704{
705    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
706    RADEONInfoPtr info = RADEONPTR(pScrn);
707    struct radeon_accel_state *accel_state = info->accel_state;
708
709    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
710	return;
711
712#ifdef XF86DRM_MODE
713    if (info->cs && CS_FULL(info->cs)) {
714	R600DoneCopy(info->accel_state->dst_pix);
715	radeon_cs_flush_indirect(pScrn);
716	R600PrepareCopy(accel_state->src_pix,
717			accel_state->dst_pix,
718			accel_state->xdir,
719			accel_state->ydir,
720			accel_state->rop,
721			accel_state->planemask);
722    }
723#endif
724
725    if (accel_state->vsync)
726	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
727
728    if (accel_state->same_surface && accel_state->copy_area) {
729	uint32_t orig_offset, tmp_offset;
730	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
731	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
732	uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags;
733	uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags;
734	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
735	int orig_rop = accel_state->rop;
736
737#if defined(XF86DRM_MODE)
738	if (info->cs) {
739	    tmp_offset = 0;
740	    orig_offset = 0;
741	} else
742#endif
743	{
744	    tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset;
745	    orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
746	}
747
748	/* src to tmp */
749	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
750	accel_state->dst_obj.bo = accel_state->copy_area_bo;
751	accel_state->dst_obj.offset = tmp_offset;
752	accel_state->dst_obj.tiling_flags = 0;
753	accel_state->rop = 3;
754	R600DoPrepareCopy(pScrn);
755	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
756	R600DoCopy(pScrn);
757
758	/* tmp to dst */
759	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
760	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
761	accel_state->src_obj[0].offset = tmp_offset;
762	accel_state->src_obj[0].tiling_flags = 0;
763	accel_state->dst_obj.domain = orig_dst_domain;
764	accel_state->dst_obj.bo = orig_bo;
765	accel_state->dst_obj.offset = orig_offset;
766	accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags;
767	accel_state->rop = orig_rop;
768	R600DoPrepareCopy(pScrn);
769	R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
770	R600DoCopyVline(pDst);
771
772	/* restore state */
773	accel_state->src_obj[0].domain = orig_src_domain;
774	accel_state->src_obj[0].bo = orig_bo;
775	accel_state->src_obj[0].offset = orig_offset;
776	accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags;
777    } else
778	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
779
780}
781
782struct blendinfo {
783    Bool dst_alpha;
784    Bool src_alpha;
785    uint32_t blend_cntl;
786};
787
788static struct blendinfo R600BlendOp[] = {
789    /* Clear */
790    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
791    /* Src */
792    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
793    /* Dst */
794    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
795    /* Over */
796    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
797    /* OverReverse */
798    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
799    /* In */
800    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
801    /* InReverse */
802    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
803    /* Out */
804    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
805    /* OutReverse */
806    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
807    /* Atop */
808    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
809    /* AtopReverse */
810    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
811    /* Xor */
812    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
813    /* Add */
814    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
815};
816
817struct formatinfo {
818    unsigned int fmt;
819    uint32_t card_fmt;
820};
821
822static struct formatinfo R600TexFormats[] = {
823    {PICT_a8r8g8b8,	FMT_8_8_8_8},
824    {PICT_x8r8g8b8,	FMT_8_8_8_8},
825    {PICT_a8b8g8r8,	FMT_8_8_8_8},
826    {PICT_x8b8g8r8,	FMT_8_8_8_8},
827#ifdef PICT_TYPE_BGRA
828    {PICT_b8g8r8a8,	FMT_8_8_8_8},
829    {PICT_b8g8r8x8,	FMT_8_8_8_8},
830#endif
831    {PICT_r5g6b5,	FMT_5_6_5},
832    {PICT_a1r5g5b5,	FMT_1_5_5_5},
833    {PICT_x1r5g5b5,     FMT_1_5_5_5},
834    {PICT_a8,		FMT_8},
835};
836
837static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
838{
839    uint32_t sblend, dblend;
840
841    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
842    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
843
844    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
845     * it as always 1.
846     */
847    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
848	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
849	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
850	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
851	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
852    }
853
854    /* If the source alpha is being used, then we should only be in a case where
855     * the source blend factor is 0, and the source blend value is the mask
856     * channels multiplied by the source picture's alpha.
857     */
858    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
859	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
860	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
861	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
862	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
863	}
864    }
865
866    return sblend | dblend;
867}
868
869static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
870{
871    switch (pDstPicture->format) {
872    case PICT_a8r8g8b8:
873    case PICT_x8r8g8b8:
874    case PICT_a8b8g8r8:
875    case PICT_x8b8g8r8:
876#ifdef PICT_TYPE_BGRA
877    case PICT_b8g8r8a8:
878    case PICT_b8g8r8x8:
879#endif
880	*dst_format = COLOR_8_8_8_8;
881	break;
882    case PICT_r5g6b5:
883	*dst_format = COLOR_5_6_5;
884	break;
885    case PICT_a1r5g5b5:
886    case PICT_x1r5g5b5:
887	*dst_format = COLOR_1_5_5_5;
888	break;
889    case PICT_a8:
890	*dst_format = COLOR_8;
891	break;
892    default:
893	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
894	       (int)pDstPicture->format));
895    }
896    return TRUE;
897}
898
899static Bool R600CheckCompositeTexture(PicturePtr pPict,
900				      PicturePtr pDstPict,
901				      int op,
902				      int unit)
903{
904    int w = pPict->pDrawable->width;
905    int h = pPict->pDrawable->height;
906    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
907    unsigned int i;
908    int max_tex_w, max_tex_h;
909
910    max_tex_w = 8192;
911    max_tex_h = 8192;
912
913    if ((w > max_tex_w) || (h > max_tex_h))
914	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
915
916    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
917	if (R600TexFormats[i].fmt == pPict->format)
918	    break;
919    }
920    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
921	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
922			 (int)pPict->format));
923
924    if (pPict->filter != PictFilterNearest &&
925	pPict->filter != PictFilterBilinear)
926	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
927
928    /* for REPEAT_NONE, Render semantics are that sampling outside the source
929     * picture results in alpha=0 pixels. We can implement this with a border color
930     * *if* our source texture has an alpha channel, otherwise we need to fall
931     * back. If we're not transformed then we hope that upper layers have clipped
932     * rendering to the bounds of the source drawable, in which case it doesn't
933     * matter. I have not, however, verified that the X server always does such
934     * clipping.
935     */
936    /* FIXME R6xx */
937    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
938	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
939	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
940    }
941
942    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
943	RADEON_FALLBACK(("non-affine transforms not supported\n"));
944
945    return TRUE;
946}
947
948static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
949					int unit)
950{
951    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
952    RADEONInfoPtr info = RADEONPTR(pScrn);
953    struct radeon_accel_state *accel_state = info->accel_state;
954    int w = pPict->pDrawable->width;
955    int h = pPict->pDrawable->height;
956    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
957    unsigned int i;
958    tex_resource_t  tex_res;
959    tex_sampler_t   tex_samp;
960    int pix_r, pix_g, pix_b, pix_a;
961    float vs_alu_consts[8];
962
963    CLEAR (tex_res);
964    CLEAR (tex_samp);
965
966    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
967	if (R600TexFormats[i].fmt == pPict->format)
968	    break;
969    }
970
971    /* Texture */
972    tex_res.id                  = unit;
973    tex_res.w                   = w;
974    tex_res.h                   = h;
975    tex_res.pitch               = accel_state->src_obj[unit].pitch;
976    tex_res.depth               = 0;
977    tex_res.dim                 = SQ_TEX_DIM_2D;
978    tex_res.base                = accel_state->src_obj[unit].offset;
979    tex_res.mip_base            = accel_state->src_obj[unit].offset;
980    tex_res.size                = accel_state->src_size[unit];
981    tex_res.format              = R600TexFormats[i].card_fmt;
982    tex_res.bo                  = accel_state->src_obj[unit].bo;
983    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
984#ifdef XF86DRM_MODE
985    tex_res.surface             = accel_state->src_obj[unit].surface;
986#endif
987    tex_res.request_size        = 1;
988
989#if X_BYTE_ORDER == X_BIG_ENDIAN
990    switch (accel_state->src_obj[unit].bpp) {
991    case 16:
992	tex_res.endian = SQ_ENDIAN_8IN16;
993	break;
994    case 32:
995	tex_res.endian = SQ_ENDIAN_8IN32;
996	break;
997    default :
998	break;
999    }
1000#endif
1001
1002    /* component swizzles */
1003    switch (pPict->format) {
1004    case PICT_a1r5g5b5:
1005    case PICT_a8r8g8b8:
1006	pix_r = SQ_SEL_Z; /* R */
1007	pix_g = SQ_SEL_Y; /* G */
1008	pix_b = SQ_SEL_X; /* B */
1009	pix_a = SQ_SEL_W; /* A */
1010	break;
1011    case PICT_a8b8g8r8:
1012	pix_r = SQ_SEL_X; /* R */
1013	pix_g = SQ_SEL_Y; /* G */
1014	pix_b = SQ_SEL_Z; /* B */
1015	pix_a = SQ_SEL_W; /* A */
1016	break;
1017    case PICT_x8b8g8r8:
1018	pix_r = SQ_SEL_X; /* R */
1019	pix_g = SQ_SEL_Y; /* G */
1020	pix_b = SQ_SEL_Z; /* B */
1021	pix_a = SQ_SEL_1; /* A */
1022	break;
1023#ifdef PICT_TYPE_BGRA
1024    case PICT_b8g8r8a8:
1025	pix_r = SQ_SEL_Y; /* R */
1026	pix_g = SQ_SEL_Z; /* G */
1027	pix_b = SQ_SEL_W; /* B */
1028	pix_a = SQ_SEL_X; /* A */
1029	break;
1030    case PICT_b8g8r8x8:
1031	pix_r = SQ_SEL_Y; /* R */
1032	pix_g = SQ_SEL_Z; /* G */
1033	pix_b = SQ_SEL_W; /* B */
1034	pix_a = SQ_SEL_1; /* A */
1035	break;
1036#endif
1037    case PICT_x1r5g5b5:
1038    case PICT_x8r8g8b8:
1039    case PICT_r5g6b5:
1040	pix_r = SQ_SEL_Z; /* R */
1041	pix_g = SQ_SEL_Y; /* G */
1042	pix_b = SQ_SEL_X; /* B */
1043	pix_a = SQ_SEL_1; /* A */
1044	break;
1045    case PICT_a8:
1046	pix_r = SQ_SEL_0; /* R */
1047	pix_g = SQ_SEL_0; /* G */
1048	pix_b = SQ_SEL_0; /* B */
1049	pix_a = SQ_SEL_X; /* A */
1050	break;
1051    default:
1052	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
1053    }
1054
1055    if (unit == 0) {
1056	if (!accel_state->msk_pic) {
1057	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
1058		pix_r = SQ_SEL_0;
1059		pix_g = SQ_SEL_0;
1060		pix_b = SQ_SEL_0;
1061	    }
1062
1063	    if (PICT_FORMAT_A(pPict->format) == 0)
1064		pix_a = SQ_SEL_1;
1065	} else {
1066	    if (accel_state->component_alpha) {
1067		if (accel_state->src_alpha) {
1068		    if (PICT_FORMAT_A(pPict->format) == 0) {
1069			pix_r = SQ_SEL_1;
1070			pix_g = SQ_SEL_1;
1071			pix_b = SQ_SEL_1;
1072			pix_a = SQ_SEL_1;
1073		    } else {
1074			pix_r = pix_a;
1075			pix_g = pix_a;
1076			pix_b = pix_a;
1077		    }
1078		} else {
1079		    if (PICT_FORMAT_A(pPict->format) == 0)
1080			pix_a = SQ_SEL_1;
1081		}
1082	    } else {
1083		if (PICT_FORMAT_RGB(pPict->format) == 0) {
1084		    pix_r = SQ_SEL_0;
1085		    pix_g = SQ_SEL_0;
1086		    pix_b = SQ_SEL_0;
1087		}
1088
1089		if (PICT_FORMAT_A(pPict->format) == 0)
1090		    pix_a = SQ_SEL_1;
1091	    }
1092	}
1093    } else {
1094	if (accel_state->component_alpha) {
1095	    if (PICT_FORMAT_A(pPict->format) == 0)
1096		pix_a = SQ_SEL_1;
1097	} else {
1098	    if (PICT_FORMAT_A(pPict->format) == 0) {
1099		pix_r = SQ_SEL_1;
1100		pix_g = SQ_SEL_1;
1101		pix_b = SQ_SEL_1;
1102		pix_a = SQ_SEL_1;
1103	    } else {
1104		pix_r = pix_a;
1105		pix_g = pix_a;
1106		pix_b = pix_a;
1107	    }
1108	}
1109    }
1110
1111    tex_res.dst_sel_x           = pix_r; /* R */
1112    tex_res.dst_sel_y           = pix_g; /* G */
1113    tex_res.dst_sel_z           = pix_b; /* B */
1114    tex_res.dst_sel_w           = pix_a; /* A */
1115
1116    tex_res.base_level          = 0;
1117    tex_res.last_level          = 0;
1118    tex_res.perf_modulation     = 0;
1119    if (accel_state->src_obj[unit].tiling_flags == 0)
1120	tex_res.tile_mode           = 1;
1121    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
1122
1123    tex_samp.id                 = unit;
1124    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1125
1126    switch (repeatType) {
1127    case RepeatNormal:
1128	tex_samp.clamp_x            = SQ_TEX_WRAP;
1129	tex_samp.clamp_y            = SQ_TEX_WRAP;
1130	break;
1131    case RepeatPad:
1132	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1133	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1134	break;
1135    case RepeatReflect:
1136	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1137	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1138	break;
1139    case RepeatNone:
1140	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1141	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1142	break;
1143    default:
1144	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1145    }
1146
1147    switch (pPict->filter) {
1148    case PictFilterNearest:
1149	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1150	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1151	tex_samp.mc_coord_truncate  = 1;
1152	break;
1153    case PictFilterBilinear:
1154	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1155	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1156	break;
1157    default:
1158	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1159    }
1160
1161    tex_samp.clamp_z            = SQ_TEX_WRAP;
1162    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1163    tex_samp.mip_filter         = 0;			/* no mipmap */
1164    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
1165
1166    if (pPict->transform != 0) {
1167	accel_state->is_transform[unit] = TRUE;
1168	accel_state->transform[unit] = pPict->transform;
1169
1170	vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
1171	vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
1172	vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
1173	vs_alu_consts[3] = 1.0 / w;
1174
1175	vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
1176	vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
1177	vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
1178	vs_alu_consts[7] = 1.0 / h;
1179    } else {
1180	accel_state->is_transform[unit] = FALSE;
1181
1182	vs_alu_consts[0] = 1.0;
1183	vs_alu_consts[1] = 0.0;
1184	vs_alu_consts[2] = 0.0;
1185	vs_alu_consts[3] = 1.0 / w;
1186
1187	vs_alu_consts[4] = 0.0;
1188	vs_alu_consts[5] = 1.0;
1189	vs_alu_consts[6] = 0.0;
1190	vs_alu_consts[7] = 1.0 / h;
1191    }
1192
1193    /* VS alu constants */
1194    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
1195			sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
1196
1197    return TRUE;
1198}
1199
1200static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1201			       PicturePtr pDstPicture)
1202{
1203    uint32_t tmp1;
1204    PixmapPtr pSrcPixmap, pDstPixmap;
1205    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1206
1207    /* Check for unsupported compositing operations. */
1208    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1209	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1210
1211    if (!pSrcPicture->pDrawable)
1212	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1213
1214    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1215
1216    max_tex_w = 8192;
1217    max_tex_h = 8192;
1218    max_dst_w = 8192;
1219    max_dst_h = 8192;
1220
1221    if (pSrcPixmap->drawable.width >= max_tex_w ||
1222	pSrcPixmap->drawable.height >= max_tex_h) {
1223	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1224			 pSrcPixmap->drawable.width,
1225			 pSrcPixmap->drawable.height));
1226    }
1227
1228    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1229
1230    if (pDstPixmap->drawable.width >= max_dst_w ||
1231	pDstPixmap->drawable.height >= max_dst_h) {
1232	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1233			 pDstPixmap->drawable.width,
1234			 pDstPixmap->drawable.height));
1235    }
1236
1237    if (pMaskPicture) {
1238	PixmapPtr pMaskPixmap;
1239
1240	if (!pMaskPicture->pDrawable)
1241	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1242
1243	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1244
1245	if (pMaskPixmap->drawable.width >= max_tex_w ||
1246	    pMaskPixmap->drawable.height >= max_tex_h) {
1247	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1248			     pMaskPixmap->drawable.width,
1249			     pMaskPixmap->drawable.height));
1250	}
1251
1252	if (pMaskPicture->componentAlpha) {
1253	    /* Check if it's component alpha that relies on a source alpha and
1254	     * on the source value.  We can only get one of those into the
1255	     * single source value that we get to blend with.
1256	     */
1257	    if (R600BlendOp[op].src_alpha &&
1258		(R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1259		(BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1260		RADEON_FALLBACK(("Component alpha not supported with source "
1261				 "alpha and source value blending.\n"));
1262	    }
1263	}
1264
1265	if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1266	    return FALSE;
1267    }
1268
1269    if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1270	return FALSE;
1271
1272    if (!R600GetDestFormat(pDstPicture, &tmp1))
1273	return FALSE;
1274
1275    return TRUE;
1276
1277}
1278
1279static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1280				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1281				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1282{
1283    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1284    RADEONInfoPtr info = RADEONPTR(pScrn);
1285    struct radeon_accel_state *accel_state = info->accel_state;
1286    uint32_t dst_format;
1287    cb_config_t cb_conf;
1288    shader_config_t vs_conf, ps_conf;
1289    struct r600_accel_object src_obj, mask_obj, dst_obj;
1290
1291    if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8)
1292	return FALSE;
1293
1294#if defined(XF86DRM_MODE)
1295    if (info->cs) {
1296	src_obj.offset = 0;
1297	dst_obj.offset = 0;
1298	src_obj.bo = radeon_get_pixmap_bo(pSrc);
1299	dst_obj.bo = radeon_get_pixmap_bo(pDst);
1300	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1301	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1302	dst_obj.surface = radeon_get_pixmap_surface(pDst);
1303	src_obj.surface = radeon_get_pixmap_surface(pSrc);
1304    } else
1305#endif
1306    {
1307	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1308	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1309	src_obj.bo = NULL;
1310	dst_obj.bo = NULL;
1311    }
1312    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1313    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1314
1315    src_obj.width = pSrc->drawable.width;
1316    src_obj.height = pSrc->drawable.height;
1317    src_obj.bpp = pSrc->drawable.bitsPerPixel;
1318    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1319
1320    dst_obj.width = pDst->drawable.width;
1321    dst_obj.height = pDst->drawable.height;
1322    dst_obj.bpp = pDst->drawable.bitsPerPixel;
1323    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1324
1325    if (pMask) {
1326#if defined(XF86DRM_MODE)
1327	if (info->cs) {
1328	    mask_obj.offset = 0;
1329	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
1330	    mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
1331	    mask_obj.surface = radeon_get_pixmap_surface(pMask);
1332	} else
1333#endif
1334	{
1335	    mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset;
1336	    mask_obj.bo = NULL;
1337	}
1338	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1339
1340	mask_obj.width = pMask->drawable.width;
1341	mask_obj.height = pMask->drawable.height;
1342	mask_obj.bpp = pMask->drawable.bitsPerPixel;
1343	mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1344
1345	if (!R600SetAccelState(pScrn,
1346			       &src_obj,
1347			       &mask_obj,
1348			       &dst_obj,
1349			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1350			       3, 0xffffffff))
1351	    return FALSE;
1352
1353	accel_state->msk_pic = pMaskPicture;
1354	if (pMaskPicture->componentAlpha) {
1355	    accel_state->component_alpha = TRUE;
1356	    if (R600BlendOp[op].src_alpha)
1357		accel_state->src_alpha = TRUE;
1358	    else
1359		accel_state->src_alpha = FALSE;
1360	} else {
1361	    accel_state->component_alpha = FALSE;
1362	    accel_state->src_alpha = FALSE;
1363	}
1364    } else {
1365	if (!R600SetAccelState(pScrn,
1366			       &src_obj,
1367			       NULL,
1368			       &dst_obj,
1369			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1370			       3, 0xffffffff))
1371	    return FALSE;
1372
1373	accel_state->msk_pic = NULL;
1374	accel_state->component_alpha = FALSE;
1375	accel_state->src_alpha = FALSE;
1376    }
1377
1378    if (!R600GetDestFormat(pDstPicture, &dst_format))
1379	return FALSE;
1380
1381    CLEAR (cb_conf);
1382    CLEAR (vs_conf);
1383    CLEAR (ps_conf);
1384
1385    if (pMask)
1386        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1387    else
1388        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1389
1390    radeon_cp_start(pScrn);
1391
1392    r600_set_default_state(pScrn, accel_state->ib);
1393
1394    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1395    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1396    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1397
1398    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
1399        R600IBDiscard(pScrn, accel_state->ib);
1400        return FALSE;
1401    }
1402
1403    if (pMask) {
1404        if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
1405            R600IBDiscard(pScrn, accel_state->ib);
1406            return FALSE;
1407        }
1408    } else
1409        accel_state->is_transform[1] = FALSE;
1410
1411    if (pMask) {
1412	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
1413	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
1414    } else {
1415	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
1416	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
1417    }
1418
1419    /* Shader */
1420    vs_conf.shader_addr         = accel_state->vs_mc_addr;
1421    vs_conf.shader_size         = accel_state->vs_size;
1422    vs_conf.num_gprs            = 5;
1423    vs_conf.stack_size          = 1;
1424    vs_conf.bo                  = accel_state->shaders_bo;
1425    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1426
1427    ps_conf.shader_addr         = accel_state->ps_mc_addr;
1428    ps_conf.shader_size         = accel_state->ps_size;
1429    ps_conf.num_gprs            = 3;
1430    ps_conf.stack_size          = 1;
1431    ps_conf.uncached_first_inst = 1;
1432    ps_conf.clamp_consts        = 0;
1433    ps_conf.export_mode         = 2;
1434    ps_conf.bo                  = accel_state->shaders_bo;
1435    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1436
1437    cb_conf.id = 0;
1438    cb_conf.w = accel_state->dst_obj.pitch;
1439    cb_conf.h = accel_state->dst_obj.height;
1440    cb_conf.base = accel_state->dst_obj.offset;
1441    cb_conf.format = dst_format;
1442    cb_conf.bo = accel_state->dst_obj.bo;
1443#ifdef XF86DRM_MODE
1444    cb_conf.surface = accel_state->dst_obj.surface;
1445#endif
1446
1447    switch (pDstPicture->format) {
1448    case PICT_a8r8g8b8:
1449    case PICT_x8r8g8b8:
1450    case PICT_a1r5g5b5:
1451    case PICT_x1r5g5b5:
1452    default:
1453	cb_conf.comp_swap = 1; /* ARGB */
1454	break;
1455    case PICT_a8b8g8r8:
1456    case PICT_x8b8g8r8:
1457	cb_conf.comp_swap = 0; /* ABGR */
1458	break;
1459#ifdef PICT_TYPE_BGRA
1460    case PICT_b8g8r8a8:
1461    case PICT_b8g8r8x8:
1462	cb_conf.comp_swap = 3; /* BGRA */
1463	break;
1464#endif
1465    case PICT_r5g6b5:
1466	cb_conf.comp_swap = 2; /* RGB */
1467	break;
1468    case PICT_a8:
1469	cb_conf.comp_swap = 3; /* A */
1470	break;
1471    }
1472    cb_conf.source_format = 1;
1473    cb_conf.blend_clamp = 1;
1474    cb_conf.blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1475    cb_conf.blend_enable = 1;
1476    cb_conf.pmask = 0xf;
1477    cb_conf.rop = 3;
1478    if (accel_state->dst_obj.tiling_flags == 0)
1479	cb_conf.array_mode = 0;
1480#if X_BYTE_ORDER == X_BIG_ENDIAN
1481    switch (dst_obj.bpp) {
1482    case 16:
1483	cb_conf.endian = ENDIAN_8IN16;
1484	break;
1485    case 32:
1486	cb_conf.endian = ENDIAN_8IN32;
1487	break;
1488    default:
1489	break;
1490    }
1491#endif
1492    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
1493
1494    if (pMask)
1495	r600_set_spi(pScrn, accel_state->ib, (2 - 1), 2);
1496    else
1497	r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
1498
1499    if (accel_state->vsync)
1500	RADEONVlineHelperClear(pScrn);
1501
1502    accel_state->composite_op = op;
1503    accel_state->dst_pic = pDstPicture;
1504    accel_state->src_pic = pSrcPicture;
1505    accel_state->dst_pix = pDst;
1506    accel_state->msk_pix = pMask;
1507    accel_state->src_pix = pSrc;
1508
1509    return TRUE;
1510}
1511
1512static void R600DoneComposite(PixmapPtr pDst)
1513{
1514    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1515    RADEONInfoPtr info = RADEONPTR(pScrn);
1516    struct radeon_accel_state *accel_state = info->accel_state;
1517    int vtx_size;
1518
1519    if (accel_state->vsync)
1520       r600_cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
1521			       accel_state->vline_crtc,
1522			       accel_state->vline_y1,
1523			       accel_state->vline_y2);
1524
1525    vtx_size = accel_state->msk_pic ? 24 : 16;
1526
1527    r600_finish_op(pScrn, vtx_size);
1528}
1529
1530static void R600Composite(PixmapPtr pDst,
1531			  int srcX, int srcY,
1532			  int maskX, int maskY,
1533			  int dstX, int dstY,
1534			  int w, int h)
1535{
1536    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1537    RADEONInfoPtr info = RADEONPTR(pScrn);
1538    struct radeon_accel_state *accel_state = info->accel_state;
1539    float *vb;
1540
1541    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1542       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1543
1544#ifdef XF86DRM_MODE
1545    if (info->cs && CS_FULL(info->cs)) {
1546	R600DoneComposite(info->accel_state->dst_pix);
1547	radeon_cs_flush_indirect(pScrn);
1548	R600PrepareComposite(info->accel_state->composite_op,
1549			     info->accel_state->src_pic,
1550			     info->accel_state->msk_pic,
1551			     info->accel_state->dst_pic,
1552			     info->accel_state->src_pix,
1553			     info->accel_state->msk_pix,
1554			     info->accel_state->dst_pix);
1555    }
1556#endif
1557
1558    if (accel_state->vsync)
1559	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1560
1561    if (accel_state->msk_pic) {
1562
1563	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1564
1565	vb[0] = (float)dstX;
1566	vb[1] = (float)dstY;
1567	vb[2] = (float)srcX;
1568	vb[3] = (float)srcY;
1569	vb[4] = (float)maskX;
1570	vb[5] = (float)maskY;
1571
1572	vb[6] = (float)dstX;
1573	vb[7] = (float)(dstY + h);
1574	vb[8] = (float)srcX;
1575	vb[9] = (float)(srcY + h);
1576	vb[10] = (float)maskX;
1577	vb[11] = (float)(maskY + h);
1578
1579	vb[12] = (float)(dstX + w);
1580	vb[13] = (float)(dstY + h);
1581	vb[14] = (float)(srcX + w);
1582	vb[15] = (float)(srcY + h);
1583	vb[16] = (float)(maskX + w);
1584	vb[17] = (float)(maskY + h);
1585
1586	radeon_vbo_commit(pScrn, &accel_state->vbo);
1587
1588    } else {
1589
1590	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1591
1592	vb[0] = (float)dstX;
1593	vb[1] = (float)dstY;
1594	vb[2] = (float)srcX;
1595	vb[3] = (float)srcY;
1596
1597	vb[4] = (float)dstX;
1598	vb[5] = (float)(dstY + h);
1599	vb[6] = (float)srcX;
1600	vb[7] = (float)(srcY + h);
1601
1602	vb[8] = (float)(dstX + w);
1603	vb[9] = (float)(dstY + h);
1604	vb[10] = (float)(srcX + w);
1605	vb[11] = (float)(srcY + h);
1606
1607	radeon_vbo_commit(pScrn, &accel_state->vbo);
1608    }
1609
1610
1611}
1612
1613Bool
1614R600CopyToVRAM(ScrnInfoPtr pScrn,
1615	       char *src, int src_pitch,
1616	       uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_width, uint32_t dst_height, int bpp,
1617	       int x, int y, int w, int h)
1618{
1619    RADEONInfoPtr info = RADEONPTR(pScrn);
1620    struct radeon_accel_state *accel_state = info->accel_state;
1621    uint32_t scratch_mc_addr;
1622    int wpass = w * (bpp/8);
1623    int scratch_pitch_bytes = RADEON_ALIGN(wpass, 256);
1624    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1625    int scratch_offset = 0, hpass, temph;
1626    char *dst;
1627    drmBufPtr scratch;
1628    struct r600_accel_object scratch_obj, dst_obj;
1629
1630    if (dst_pitch & 7)
1631	return FALSE;
1632
1633    if (dst_mc_addr & 0xff)
1634	return FALSE;
1635
1636    scratch = RADEONCPGetBuffer(pScrn);
1637    if (scratch == NULL)
1638	return FALSE;
1639
1640    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1641    temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1642    dst = (char *)scratch->address;
1643
1644    scratch_obj.pitch = scratch_pitch;
1645    scratch_obj.width = w;
1646    scratch_obj.height = hpass;
1647    scratch_obj.offset = scratch_mc_addr;
1648    scratch_obj.bpp = bpp;
1649    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1650    scratch_obj.bo = NULL;
1651
1652    dst_obj.pitch = dst_pitch;
1653    dst_obj.width = dst_width;
1654    dst_obj.height = dst_height;
1655    dst_obj.offset = dst_mc_addr;
1656    dst_obj.bo = NULL;
1657    dst_obj.bpp = bpp;
1658    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1659
1660    if (!R600SetAccelState(pScrn,
1661			   &scratch_obj,
1662			   NULL,
1663			   &dst_obj,
1664			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1665			   3, 0xffffffff))
1666	return FALSE;
1667
1668    /* memcopy from sys to scratch */
1669    while (temph--) {
1670	memcpy (dst, src, wpass);
1671	src += src_pitch;
1672	dst += scratch_pitch_bytes;
1673    }
1674
1675    while (h) {
1676	uint32_t offset = scratch_mc_addr + scratch_offset;
1677	int oldhpass = hpass;
1678	h -= oldhpass;
1679	temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1680
1681	if (hpass) {
1682	    scratch_offset = scratch->total/2 - scratch_offset;
1683	    dst = (char *)scratch->address + scratch_offset;
1684	    /* wait for the engine to be idle */
1685	    RADEONWaitForIdleCP(pScrn);
1686	    //memcopy from sys to scratch
1687	    while (temph--) {
1688		memcpy (dst, src, wpass);
1689		src += src_pitch;
1690		dst += scratch_pitch_bytes;
1691	    }
1692	}
1693	/* blit from scratch to vram */
1694	info->accel_state->src_obj[0].height = oldhpass;
1695	info->accel_state->src_obj[0].offset = offset;
1696	R600DoPrepareCopy(pScrn);
1697	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
1698	R600DoCopy(pScrn);
1699	y += oldhpass;
1700    }
1701
1702    R600IBDiscard(pScrn, scratch);
1703
1704    return TRUE;
1705}
1706
1707static Bool
1708R600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1709		   char *src, int src_pitch)
1710{
1711    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1712    RADEONInfoPtr info = RADEONPTR(pScrn);
1713    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1714    uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1715    int bpp = pDst->drawable.bitsPerPixel;
1716
1717    return R600CopyToVRAM(pScrn,
1718			  src, src_pitch,
1719			  dst_pitch, dst_mc_addr, pDst->drawable.width, pDst->drawable.height, bpp,
1720			  x, y, w, h);
1721}
1722
1723static Bool
1724R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
1725		       char *dst, int dst_pitch)
1726{
1727    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1728    RADEONInfoPtr info = RADEONPTR(pScrn);
1729    struct radeon_accel_state *accel_state = info->accel_state;
1730    uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1731    uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1732    uint32_t src_width = pSrc->drawable.width;
1733    uint32_t src_height = pSrc->drawable.height;
1734    int bpp = pSrc->drawable.bitsPerPixel;
1735    uint32_t scratch_mc_addr;
1736    int scratch_pitch_bytes = RADEON_ALIGN(dst_pitch, 256);
1737    int scratch_offset = 0, hpass;
1738    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1739    int wpass = w * (bpp/8);
1740    drmBufPtr scratch;
1741    struct r600_accel_object scratch_obj, src_obj;
1742
1743    /* bad pipe setup in drm prior to 1.32 */
1744    if (info->dri->pKernelDRMVersion->version_minor < 32) {
1745	    if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32))
1746		    return FALSE;
1747    }
1748
1749    if (src_pitch & 7)
1750	return FALSE;
1751
1752    scratch = RADEONCPGetBuffer(pScrn);
1753    if (scratch == NULL)
1754	return FALSE;
1755
1756    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1757    hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1758
1759    src_obj.pitch = src_pitch;
1760    src_obj.width = src_width;
1761    src_obj.height = src_height;
1762    src_obj.offset = src_mc_addr;
1763    src_obj.bo = NULL;
1764    src_obj.bpp = bpp;
1765    src_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1766
1767    scratch_obj.pitch = scratch_pitch;
1768    scratch_obj.width = src_width;
1769    scratch_obj.height = hpass;
1770    scratch_obj.offset = scratch_mc_addr;
1771    scratch_obj.bpp = bpp;
1772    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1773    scratch_obj.bo = NULL;
1774
1775    if (!R600SetAccelState(pScrn,
1776			   &src_obj,
1777			   NULL,
1778			   &scratch_obj,
1779			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1780			   3, 0xffffffff))
1781	return FALSE;
1782
1783    /* blit from vram to scratch */
1784    R600DoPrepareCopy(pScrn);
1785    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1786    R600DoCopy(pScrn);
1787
1788    while (h) {
1789	char *src = (char *)scratch->address + scratch_offset;
1790	int oldhpass = hpass;
1791	h -= oldhpass;
1792	y += oldhpass;
1793	hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1794
1795	if (hpass) {
1796	    scratch_offset = scratch->total/2 - scratch_offset;
1797	    /* blit from vram to scratch */
1798	    info->accel_state->dst_obj.height = hpass;
1799	    info->accel_state->dst_obj.offset = scratch_mc_addr + scratch_offset;
1800	    R600DoPrepareCopy(pScrn);
1801	    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1802	    R600DoCopy(pScrn);
1803	}
1804
1805	/* wait for the engine to be idle */
1806	RADEONWaitForIdleCP(pScrn);
1807	/* memcopy from scratch to sys */
1808	while (oldhpass--) {
1809	    memcpy (dst, src, wpass);
1810	    dst += dst_pitch;
1811	    src += scratch_pitch_bytes;
1812	}
1813    }
1814
1815    R600IBDiscard(pScrn, scratch);
1816
1817    return TRUE;
1818
1819}
1820
1821#if defined(XF86DRM_MODE)
1822
1823static Bool
1824R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
1825		     char *src, int src_pitch)
1826{
1827    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1828    RADEONInfoPtr info = RADEONPTR(pScrn);
1829    struct radeon_accel_state *accel_state = info->accel_state;
1830    struct radeon_exa_pixmap_priv *driver_priv;
1831    struct radeon_bo *scratch = NULL;
1832    struct radeon_bo *copy_dst;
1833    unsigned char *dst;
1834    unsigned size;
1835    uint32_t dst_domain;
1836    int bpp = pDst->drawable.bitsPerPixel;
1837    uint32_t scratch_pitch;
1838    uint32_t copy_pitch;
1839    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
1840    int ret;
1841    Bool flush = TRUE;
1842    Bool r;
1843    int i;
1844    struct r600_accel_object src_obj, dst_obj;
1845    uint32_t height, base_align;
1846
1847    if (bpp < 8)
1848	return FALSE;
1849
1850    driver_priv = exaGetPixmapDriverPrivate(pDst);
1851    if (!driver_priv || !driver_priv->bo)
1852	return FALSE;
1853
1854    /* If we know the BO won't be busy, don't bother with a scratch */
1855    copy_dst = driver_priv->bo;
1856    copy_pitch = pDst->devKind;
1857    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1858	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1859	    flush = FALSE;
1860	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
1861		goto copy;
1862	}
1863    }
1864
1865    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1866    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1867    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1868    size = scratch_pitch * height * (bpp / 8);
1869    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1870    if (scratch == NULL) {
1871	goto copy;
1872    }
1873
1874    src_obj.pitch = scratch_pitch;
1875    src_obj.width = w;
1876    src_obj.height = h;
1877    src_obj.offset = 0;
1878    src_obj.bpp = bpp;
1879    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1880    src_obj.bo = scratch;
1881    src_obj.tiling_flags = 0;
1882#ifdef XF86DRM_MODE
1883    src_obj.surface = NULL;
1884#endif
1885
1886    dst_obj.pitch = dst_pitch_hw;
1887    dst_obj.width = pDst->drawable.width;
1888    dst_obj.height = pDst->drawable.height;
1889    dst_obj.offset = 0;
1890    dst_obj.bpp = bpp;
1891    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1892    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1893    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1894#ifdef XF86DRM_MODE
1895    dst_obj.surface = radeon_get_pixmap_surface(pDst);
1896#endif
1897
1898    if (!R600SetAccelState(pScrn,
1899			   &src_obj,
1900			   NULL,
1901			   &dst_obj,
1902			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1903			   3, 0xffffffff)) {
1904        goto copy;
1905    }
1906    copy_dst = scratch;
1907    copy_pitch = scratch_pitch * (bpp / 8);
1908    flush = FALSE;
1909
1910copy:
1911    if (flush)
1912	radeon_cs_flush_indirect(pScrn);
1913
1914    ret = radeon_bo_map(copy_dst, 0);
1915    if (ret) {
1916        r = FALSE;
1917        goto out;
1918    }
1919    r = TRUE;
1920    size = w * bpp / 8;
1921    dst = copy_dst->ptr;
1922    if (copy_dst == driver_priv->bo)
1923	dst += y * copy_pitch + x * bpp / 8;
1924    for (i = 0; i < h; i++) {
1925        memcpy(dst + i * copy_pitch, src, size);
1926        src += src_pitch;
1927    }
1928    radeon_bo_unmap(copy_dst);
1929
1930    if (copy_dst == scratch) {
1931	if (info->accel_state->vsync)
1932	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1933
1934	/* blit from gart to vram */
1935	R600DoPrepareCopy(pScrn);
1936	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
1937	R600DoCopyVline(pDst);
1938    }
1939
1940out:
1941    if (scratch)
1942	radeon_bo_unref(scratch);
1943    return r;
1944}
1945
1946static Bool
1947R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
1948			 int h, char *dst, int dst_pitch)
1949{
1950    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1951    RADEONInfoPtr info = RADEONPTR(pScrn);
1952    struct radeon_accel_state *accel_state = info->accel_state;
1953    struct radeon_exa_pixmap_priv *driver_priv;
1954    struct radeon_bo *scratch = NULL;
1955    struct radeon_bo *copy_src;
1956    unsigned size;
1957    uint32_t src_domain = 0;
1958    int bpp = pSrc->drawable.bitsPerPixel;
1959    uint32_t scratch_pitch;
1960    uint32_t copy_pitch;
1961    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
1962    int ret;
1963    Bool flush = FALSE;
1964    Bool r;
1965    struct r600_accel_object src_obj, dst_obj;
1966    uint32_t height, base_align;
1967
1968    if (bpp < 8)
1969	return FALSE;
1970
1971    driver_priv = exaGetPixmapDriverPrivate(pSrc);
1972    if (!driver_priv || !driver_priv->bo)
1973	return FALSE;
1974
1975    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
1976    copy_src = driver_priv->bo;
1977    copy_pitch = pSrc->devKind;
1978    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1979	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1980	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
1981	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
1982		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
1983		src_domain = 0;
1984	    else /* A write may be scheduled */
1985		flush = TRUE;
1986	}
1987
1988	if (!src_domain)
1989	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
1990
1991	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
1992	    goto copy;
1993    }
1994
1995    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1996    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1997    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1998    size = scratch_pitch * height * (bpp / 8);
1999    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
2000    if (scratch == NULL) {
2001	goto copy;
2002    }
2003    radeon_cs_space_reset_bos(info->cs);
2004    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
2005				      RADEON_GEM_DOMAIN_VRAM, 0);
2006    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
2007    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
2008    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
2009    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
2010    ret = radeon_cs_space_check(info->cs);
2011    if (ret) {
2012        goto copy;
2013    }
2014
2015    src_obj.pitch = src_pitch_hw;
2016    src_obj.width = pSrc->drawable.width;
2017    src_obj.height = pSrc->drawable.height;
2018    src_obj.offset = 0;
2019    src_obj.bpp = bpp;
2020    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
2021    src_obj.bo = radeon_get_pixmap_bo(pSrc);
2022    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
2023#ifdef XF86DRM_MODE
2024    src_obj.surface = radeon_get_pixmap_surface(pSrc);
2025#endif
2026
2027    dst_obj.pitch = scratch_pitch;
2028    dst_obj.width = w;
2029    dst_obj.height = h;
2030    dst_obj.offset = 0;
2031    dst_obj.bo = scratch;
2032    dst_obj.bpp = bpp;
2033    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
2034    dst_obj.tiling_flags = 0;
2035#ifdef XF86DRM_MODE
2036    dst_obj.surface = NULL;
2037#endif
2038
2039    if (!R600SetAccelState(pScrn,
2040			   &src_obj,
2041			   NULL,
2042			   &dst_obj,
2043			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
2044			   3, 0xffffffff)) {
2045        goto copy;
2046    }
2047
2048    /* blit from vram to gart */
2049    R600DoPrepareCopy(pScrn);
2050    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
2051    R600DoCopy(pScrn);
2052    copy_src = scratch;
2053    copy_pitch = scratch_pitch * (bpp / 8);
2054    flush = TRUE;
2055
2056copy:
2057    if (flush && info->cs)
2058	radeon_cs_flush_indirect(pScrn);
2059
2060    ret = radeon_bo_map(copy_src, 0);
2061    if (ret) {
2062	ErrorF("failed to map pixmap: %d\n", ret);
2063        r = FALSE;
2064        goto out;
2065    }
2066    r = TRUE;
2067    w *= bpp / 8;
2068    if (copy_src == driver_priv->bo)
2069	size = y * copy_pitch + x * bpp / 8;
2070    else
2071	size = 0;
2072    while (h--) {
2073        memcpy(dst, copy_src->ptr + size, w);
2074        size += copy_pitch;
2075        dst += dst_pitch;
2076    }
2077    radeon_bo_unmap(copy_src);
2078out:
2079    if (scratch)
2080	radeon_bo_unref(scratch);
2081    return r;
2082}
2083#endif
2084
2085static int
2086R600MarkSync(ScreenPtr pScreen)
2087{
2088    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
2089    RADEONInfoPtr info = RADEONPTR(pScrn);
2090    struct radeon_accel_state *accel_state = info->accel_state;
2091
2092    return ++accel_state->exaSyncMarker;
2093
2094}
2095
2096static void
2097R600Sync(ScreenPtr pScreen, int marker)
2098{
2099    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
2100    RADEONInfoPtr info = RADEONPTR(pScrn);
2101    struct radeon_accel_state *accel_state = info->accel_state;
2102
2103    if (accel_state->exaMarkerSynced != marker) {
2104#ifdef XF86DRM_MODE
2105#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2106	if (!info->cs)
2107#endif
2108#endif
2109	    RADEONWaitForIdleCP(pScrn);
2110	accel_state->exaMarkerSynced = marker;
2111    }
2112
2113}
2114
2115static Bool
2116R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
2117{
2118    RADEONInfoPtr info = RADEONPTR(pScrn);
2119    struct radeon_accel_state *accel_state = info->accel_state;
2120
2121    /* 512 bytes per shader for now */
2122    int size = 512 * 9;
2123
2124    accel_state->shaders = NULL;
2125
2126#ifdef XF86DRM_MODE
2127#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2128    if (info->cs) {
2129	accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
2130						 RADEON_GEM_DOMAIN_VRAM, 0);
2131	if (accel_state->shaders_bo == NULL) {
2132	    ErrorF("Allocating shader failed\n");
2133	    return FALSE;
2134	}
2135	return TRUE;
2136    } else
2137#endif
2138#endif
2139    {
2140	accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
2141						 TRUE, NULL, NULL);
2142
2143	if (accel_state->shaders == NULL)
2144	    return FALSE;
2145    }
2146
2147    return TRUE;
2148}
2149
2150Bool
2151R600LoadShaders(ScrnInfoPtr pScrn)
2152{
2153    RADEONInfoPtr info = RADEONPTR(pScrn);
2154    struct radeon_accel_state *accel_state = info->accel_state;
2155    RADEONChipFamily ChipSet = info->ChipFamily;
2156    uint32_t *shader;
2157#ifdef XF86DRM_MODE
2158#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2159    int ret;
2160
2161    if (info->cs) {
2162	ret = radeon_bo_map(accel_state->shaders_bo, 1);
2163	if (ret) {
2164	    FatalError("failed to map shader %d\n", ret);
2165	    return FALSE;
2166	}
2167	shader = accel_state->shaders_bo->ptr;
2168    } else
2169#endif
2170#endif
2171	shader = (pointer)((char *)info->FB + accel_state->shaders->offset);
2172
2173    /*  solid vs --------------------------------------- */
2174    accel_state->solid_vs_offset = 0;
2175    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
2176
2177    /*  solid ps --------------------------------------- */
2178    accel_state->solid_ps_offset = 512;
2179    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
2180
2181    /*  copy vs --------------------------------------- */
2182    accel_state->copy_vs_offset = 1024;
2183    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
2184
2185    /*  copy ps --------------------------------------- */
2186    accel_state->copy_ps_offset = 1536;
2187    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2188
2189    /*  comp vs --------------------------------------- */
2190    accel_state->comp_vs_offset = 2048;
2191    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2192
2193    /*  comp ps --------------------------------------- */
2194    accel_state->comp_ps_offset = 2560;
2195    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2196
2197    /*  xv vs --------------------------------------- */
2198    accel_state->xv_vs_offset = 3072;
2199    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2200
2201    /*  xv ps --------------------------------------- */
2202    accel_state->xv_ps_offset = 3584;
2203    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2204
2205#ifdef XF86DRM_MODE
2206#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2207    if (info->cs) {
2208	radeon_bo_unmap(accel_state->shaders_bo);
2209    }
2210#endif
2211#endif
2212
2213    return TRUE;
2214}
2215
2216static Bool
2217R600PrepareAccess(PixmapPtr pPix, int index)
2218{
2219    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2220    RADEONInfoPtr info = RADEONPTR(pScrn);
2221    unsigned char *RADEONMMIO = info->MMIO;
2222
2223    /* flush HDP read/write caches */
2224    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2225
2226    return TRUE;
2227}
2228
2229static void
2230R600FinishAccess(PixmapPtr pPix, int index)
2231{
2232    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2233    RADEONInfoPtr info = RADEONPTR(pScrn);
2234    unsigned char *RADEONMMIO = info->MMIO;
2235
2236    /* flush HDP read/write caches */
2237    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2238
2239}
2240
2241Bool
2242R600DrawInit(ScreenPtr pScreen)
2243{
2244    ScrnInfoPtr pScrn =  xf86Screens[pScreen->myNum];
2245    RADEONInfoPtr info   = RADEONPTR(pScrn);
2246
2247    if (info->accel_state->exa == NULL) {
2248	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2249	return FALSE;
2250    }
2251
2252    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2253    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2254
2255    info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2256    info->accel_state->exa->Solid = R600Solid;
2257    info->accel_state->exa->DoneSolid = R600DoneSolid;
2258
2259    info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2260    info->accel_state->exa->Copy = R600Copy;
2261    info->accel_state->exa->DoneCopy = R600DoneCopy;
2262
2263    info->accel_state->exa->MarkSync = R600MarkSync;
2264    info->accel_state->exa->WaitMarker = R600Sync;
2265
2266#ifdef XF86DRM_MODE
2267#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2268    if (info->cs) {
2269	info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
2270	info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
2271	info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
2272	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
2273	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
2274	info->accel_state->exa->UploadToScreen = R600UploadToScreenCS;
2275	info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS;
2276#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5)
2277        info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
2278#endif
2279    } else
2280#endif
2281#endif
2282    {
2283	info->accel_state->exa->PrepareAccess = R600PrepareAccess;
2284	info->accel_state->exa->FinishAccess = R600FinishAccess;
2285
2286	/* AGP seems to have problems with gart transfers */
2287	if (info->accelDFS) {
2288	    info->accel_state->exa->UploadToScreen = R600UploadToScreen;
2289	    info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen;
2290	}
2291    }
2292
2293    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
2294#ifdef EXA_SUPPORTS_PREPARE_AUX
2295    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
2296#endif
2297
2298#ifdef XF86DRM_MODE
2299#ifdef EXA_HANDLES_PIXMAPS
2300    if (info->cs) {
2301	info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
2302#ifdef EXA_MIXED_PIXMAPS
2303	info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
2304#endif
2305    }
2306#endif
2307#endif
2308    info->accel_state->exa->pixmapOffsetAlign = 256;
2309    info->accel_state->exa->pixmapPitchAlign = 256;
2310
2311    info->accel_state->exa->CheckComposite = R600CheckComposite;
2312    info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2313    info->accel_state->exa->Composite = R600Composite;
2314    info->accel_state->exa->DoneComposite = R600DoneComposite;
2315
2316#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
2317    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
2318
2319    info->accel_state->exa->maxPitchBytes = 32768;
2320    info->accel_state->exa->maxX = 8192;
2321#else
2322    info->accel_state->exa->maxX = 8192;
2323#endif
2324    info->accel_state->exa->maxY = 8192;
2325
2326    /* not supported yet */
2327    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2328	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2329	info->accel_state->vsync = TRUE;
2330    } else
2331	info->accel_state->vsync = FALSE;
2332
2333    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
2334	free(info->accel_state->exa);
2335	return FALSE;
2336    }
2337
2338#ifdef XF86DRM_MODE
2339#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2340    if (!info->cs)
2341#endif
2342#endif
2343	if (!info->gartLocation)
2344	    return FALSE;
2345
2346    info->accel_state->XInited3D = FALSE;
2347    info->accel_state->copy_area = NULL;
2348    info->accel_state->src_obj[0].bo = NULL;
2349    info->accel_state->src_obj[1].bo = NULL;
2350    info->accel_state->dst_obj.bo = NULL;
2351    info->accel_state->copy_area_bo = NULL;
2352    info->accel_state->vbo.vb_start_op = -1;
2353    info->accel_state->finish_op = r600_finish_op;
2354    info->accel_state->vbo.verts_per_op = 3;
2355    RADEONVlineHelperClear(pScrn);
2356
2357#ifdef XF86DRM_MODE
2358    radeon_vbo_init_lists(pScrn);
2359#endif
2360
2361    if (!R600AllocShaders(pScrn, pScreen))
2362	return FALSE;
2363
2364    if (!R600LoadShaders(pScrn))
2365	return FALSE;
2366
2367    exaMarkSync(pScreen);
2368
2369    return TRUE;
2370
2371}
2372
2373