r600_exa.c revision 30d12090
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "exa.h"
34
35#include "radeon.h"
36#include "radeon_macros.h"
37#include "radeon_reg.h"
38#include "r600_shader.h"
39#include "r600_reg.h"
40#include "r600_state.h"
41#include "radeon_exa_shared.h"
42#include "radeon_vbo.h"
43
44/* #define SHOW_VERTEXES */
45
46Bool
47R600SetAccelState(ScrnInfoPtr pScrn,
48		  struct r600_accel_object *src0,
49		  struct r600_accel_object *src1,
50		  struct r600_accel_object *dst,
51		  uint32_t vs_offset, uint32_t ps_offset,
52		  int rop, Pixel planemask)
53{
54    RADEONInfoPtr info = RADEONPTR(pScrn);
55    struct radeon_accel_state *accel_state = info->accel_state;
56    uint32_t pitch = 0;
57    uint32_t pitch_align = 0x7, base_align = 0xff;
58#if defined(XF86DRM_MODE)
59    int ret;
60#endif
61
62    if (src0) {
63	memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
64	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
65#if defined(XF86DRM_MODE)
66	if (info->cs) {
67	    ret = radeon_bo_get_tiling(accel_state->src_obj[0].bo,
68				       &accel_state->src_obj[0].tiling_flags,
69				       &pitch);
70	    if (ret)
71		RADEON_FALLBACK(("src0 radeon_bo_get_tiling failed\n"));
72	    pitch_align = drmmode_get_pitch_align(pScrn,
73						  accel_state->src_obj[0].bpp / 8,
74						  accel_state->src_obj[0].tiling_flags) - 1;
75	    base_align = drmmode_get_base_align(pScrn,
76						accel_state->src_obj[0].bpp / 8,
77						accel_state->src_obj[0].tiling_flags) - 1;
78	}
79#endif
80	/* bad pitch */
81	if (accel_state->src_obj[0].pitch & pitch_align)
82	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
83
84	/* bad offset */
85	if (accel_state->src_obj[0].offset & base_align)
86	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset));
87
88    } else {
89	memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
90	accel_state->src_size[0] = 0;
91    }
92
93    if (src1) {
94	memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
95	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
96#if defined(XF86DRM_MODE)
97	if (info->cs) {
98	    ret = radeon_bo_get_tiling(accel_state->src_obj[1].bo,
99				       &accel_state->src_obj[1].tiling_flags,
100				       &pitch);
101	    if (ret)
102		RADEON_FALLBACK(("src1 radeon_bo_get_tiling failed\n"));
103	    pitch_align = drmmode_get_pitch_align(pScrn,
104						  accel_state->src_obj[1].bpp / 8,
105						  accel_state->src_obj[1].tiling_flags) - 1;
106	    base_align = drmmode_get_base_align(pScrn,
107						accel_state->src_obj[1].bpp / 8,
108						accel_state->src_obj[1].tiling_flags) - 1;
109	}
110#endif
111	/* bad pitch */
112	if (accel_state->src_obj[1].pitch & pitch_align)
113	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
114
115	/* bad offset */
116	if (accel_state->src_obj[1].offset & base_align)
117	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset));
118    } else {
119	memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
120	accel_state->src_size[1] = 0;
121    }
122
123    if (dst) {
124	memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
125	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
126#if defined(XF86DRM_MODE)
127	if (info->cs) {
128	    ret = radeon_bo_get_tiling(accel_state->dst_obj.bo,
129				       &accel_state->dst_obj.tiling_flags,
130				       &pitch);
131	    if (ret)
132		RADEON_FALLBACK(("dst radeon_bo_get_tiling failed\n"));
133	    pitch_align = drmmode_get_pitch_align(pScrn,
134						  accel_state->dst_obj.bpp / 8,
135						  accel_state->dst_obj.tiling_flags) - 1;
136	    base_align = drmmode_get_base_align(pScrn,
137						accel_state->dst_obj.bpp / 8,
138						accel_state->dst_obj.tiling_flags) - 1;
139	}
140#endif
141	if (accel_state->dst_obj.pitch & pitch_align)
142	    RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
143
144	if (accel_state->dst_obj.offset & base_align)
145	    RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset));
146    } else {
147	memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
148	accel_state->dst_size = 0;
149    }
150
151    accel_state->rop = rop;
152    accel_state->planemask = planemask;
153
154    accel_state->vs_size = 512;
155    accel_state->ps_size = 512;
156#if defined(XF86DRM_MODE)
157    if (info->cs) {
158	accel_state->vs_mc_addr = vs_offset;
159	accel_state->ps_mc_addr = ps_offset;
160
161	radeon_cs_space_reset_bos(info->cs);
162	radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
163					  RADEON_GEM_DOMAIN_VRAM, 0);
164	if (accel_state->src_obj[0].bo)
165	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
166					      accel_state->src_obj[0].domain, 0);
167	if (accel_state->src_obj[1].bo)
168	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
169					      accel_state->src_obj[1].domain, 0);
170	if (accel_state->dst_obj.bo)
171	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
172					      0, accel_state->dst_obj.domain);
173	ret = radeon_cs_space_check(info->cs);
174	if (ret)
175	    RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
176
177    } else
178#endif
179    {
180	accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
181	    vs_offset;
182	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
183	    ps_offset;
184    }
185
186    return TRUE;
187}
188
189static void
190R600DoneSolid(PixmapPtr pPix);
191
192static Bool
193R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
194{
195    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
196    RADEONInfoPtr info = RADEONPTR(pScrn);
197    struct radeon_accel_state *accel_state = info->accel_state;
198    cb_config_t     cb_conf;
199    shader_config_t vs_conf, ps_conf;
200    uint32_t a, r, g, b;
201    float ps_alu_consts[4];
202    struct r600_accel_object dst;
203
204    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
205	RADEON_FALLBACK(("R600CheckDatatype failed\n"));
206    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
207	RADEON_FALLBACK(("invalid planemask\n"));
208
209#if defined(XF86DRM_MODE)
210    if (info->cs) {
211	dst.offset = 0;
212	dst.bo = radeon_get_pixmap_bo(pPix);
213    } else
214#endif
215    {
216	dst.offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
217	dst.bo = NULL;
218    }
219
220    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
221    dst.width = pPix->drawable.width;
222    dst.height = pPix->drawable.height;
223    dst.bpp = pPix->drawable.bitsPerPixel;
224    dst.domain = RADEON_GEM_DOMAIN_VRAM;
225
226    if (!R600SetAccelState(pScrn,
227			   NULL,
228			   NULL,
229			   &dst,
230			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
231			   alu, pm))
232	return FALSE;
233
234    CLEAR (cb_conf);
235    CLEAR (vs_conf);
236    CLEAR (ps_conf);
237
238    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
239    radeon_cp_start(pScrn);
240
241    r600_set_default_state(pScrn, accel_state->ib);
242
243    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
244    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
245    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
246
247    /* Shader */
248    vs_conf.shader_addr         = accel_state->vs_mc_addr;
249    vs_conf.shader_size         = accel_state->vs_size;
250    vs_conf.num_gprs            = 2;
251    vs_conf.stack_size          = 0;
252    vs_conf.bo                  = accel_state->shaders_bo;
253    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
254
255    ps_conf.shader_addr         = accel_state->ps_mc_addr;
256    ps_conf.shader_size         = accel_state->ps_size;
257    ps_conf.num_gprs            = 1;
258    ps_conf.stack_size          = 0;
259    ps_conf.uncached_first_inst = 1;
260    ps_conf.clamp_consts        = 0;
261    ps_conf.export_mode         = 2;
262    ps_conf.bo                  = accel_state->shaders_bo;
263    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
264
265    cb_conf.id = 0;
266    cb_conf.w = accel_state->dst_obj.pitch;
267    cb_conf.h = accel_state->dst_obj.height;
268    cb_conf.base = accel_state->dst_obj.offset;
269    cb_conf.bo = accel_state->dst_obj.bo;
270
271    if (accel_state->dst_obj.bpp == 8) {
272	cb_conf.format = COLOR_8;
273	cb_conf.comp_swap = 3; /* A */
274    } else if (accel_state->dst_obj.bpp == 16) {
275	cb_conf.format = COLOR_5_6_5;
276	cb_conf.comp_swap = 2; /* RGB */
277#if X_BYTE_ORDER == X_BIG_ENDIAN
278	cb_conf.endian = ENDIAN_8IN16;
279#endif
280    } else {
281	cb_conf.format = COLOR_8_8_8_8;
282	cb_conf.comp_swap = 1; /* ARGB */
283#if X_BYTE_ORDER == X_BIG_ENDIAN
284	cb_conf.endian = ENDIAN_8IN32;
285#endif
286    }
287    cb_conf.source_format = 1;
288    cb_conf.blend_clamp = 1;
289    /* Render setup */
290    if (accel_state->planemask & 0x000000ff)
291	cb_conf.pmask |= 4; /* B */
292    if (accel_state->planemask & 0x0000ff00)
293	cb_conf.pmask |= 2; /* G */
294    if (accel_state->planemask & 0x00ff0000)
295	cb_conf.pmask |= 1; /* R */
296    if (accel_state->planemask & 0xff000000)
297	cb_conf.pmask |= 8; /* A */
298    cb_conf.rop = accel_state->rop;
299    if (accel_state->dst_obj.tiling_flags == 0)
300	cb_conf.array_mode = 1;
301    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
302
303    r600_set_spi(pScrn, accel_state->ib, 0, 0);
304
305    /* PS alu constants */
306    if (accel_state->dst_obj.bpp == 16) {
307	r = (fg >> 11) & 0x1f;
308	g = (fg >> 5) & 0x3f;
309	b = (fg >> 0) & 0x1f;
310	ps_alu_consts[0] = (float)r / 31; /* R */
311	ps_alu_consts[1] = (float)g / 63; /* G */
312	ps_alu_consts[2] = (float)b / 31; /* B */
313	ps_alu_consts[3] = 1.0; /* A */
314    } else if (accel_state->dst_obj.bpp == 8) {
315	a = (fg >> 0) & 0xff;
316	ps_alu_consts[0] = 0.0; /* R */
317	ps_alu_consts[1] = 0.0; /* G */
318	ps_alu_consts[2] = 0.0; /* B */
319	ps_alu_consts[3] = (float)a / 255; /* A */
320    } else {
321	a = (fg >> 24) & 0xff;
322	r = (fg >> 16) & 0xff;
323	g = (fg >> 8) & 0xff;
324	b = (fg >> 0) & 0xff;
325	ps_alu_consts[0] = (float)r / 255; /* R */
326	ps_alu_consts[1] = (float)g / 255; /* G */
327	ps_alu_consts[2] = (float)b / 255; /* B */
328	ps_alu_consts[3] = (float)a / 255; /* A */
329    }
330    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
331			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
332
333    if (accel_state->vsync)
334	RADEONVlineHelperClear(pScrn);
335
336    return TRUE;
337}
338
339
340static void
341R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
342{
343    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
344    RADEONInfoPtr info = RADEONPTR(pScrn);
345    struct radeon_accel_state *accel_state = info->accel_state;
346    float *vb;
347
348    if (accel_state->vsync)
349	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
350
351    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
352
353    vb[0] = (float)x1;
354    vb[1] = (float)y1;
355
356    vb[2] = (float)x1;
357    vb[3] = (float)y2;
358
359    vb[4] = (float)x2;
360    vb[5] = (float)y2;
361
362    radeon_vbo_commit(pScrn, &accel_state->vbo);
363}
364
365static void
366R600DoneSolid(PixmapPtr pPix)
367{
368    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
369    RADEONInfoPtr info = RADEONPTR(pScrn);
370    struct radeon_accel_state *accel_state = info->accel_state;
371
372    if (accel_state->vsync)
373	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
374				accel_state->vline_crtc,
375				accel_state->vline_y1,
376				accel_state->vline_y2);
377
378    r600_finish_op(pScrn, 8);
379}
380
381static void
382R600DoPrepareCopy(ScrnInfoPtr pScrn)
383{
384    RADEONInfoPtr info = RADEONPTR(pScrn);
385    struct radeon_accel_state *accel_state = info->accel_state;
386    cb_config_t     cb_conf;
387    tex_resource_t  tex_res;
388    tex_sampler_t   tex_samp;
389    shader_config_t vs_conf, ps_conf;
390
391    CLEAR (cb_conf);
392    CLEAR (tex_res);
393    CLEAR (tex_samp);
394    CLEAR (vs_conf);
395    CLEAR (ps_conf);
396
397    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
398    radeon_cp_start(pScrn);
399
400    r600_set_default_state(pScrn, accel_state->ib);
401
402    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
403    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
404    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
405
406    /* Shader */
407    vs_conf.shader_addr         = accel_state->vs_mc_addr;
408    vs_conf.shader_size         = accel_state->vs_size;
409    vs_conf.num_gprs            = 2;
410    vs_conf.stack_size          = 0;
411    vs_conf.bo                  = accel_state->shaders_bo;
412    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
413
414    ps_conf.shader_addr         = accel_state->ps_mc_addr;
415    ps_conf.shader_size         = accel_state->ps_size;
416    ps_conf.num_gprs            = 1;
417    ps_conf.stack_size          = 0;
418    ps_conf.uncached_first_inst = 1;
419    ps_conf.clamp_consts        = 0;
420    ps_conf.export_mode         = 2;
421    ps_conf.bo                  = accel_state->shaders_bo;
422    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
423
424    /* Texture */
425    tex_res.id                  = 0;
426    tex_res.w                   = accel_state->src_obj[0].width;
427    tex_res.h                   = accel_state->src_obj[0].height;
428    tex_res.pitch               = accel_state->src_obj[0].pitch;
429    tex_res.depth               = 0;
430    tex_res.dim                 = SQ_TEX_DIM_2D;
431    tex_res.base                = accel_state->src_obj[0].offset;
432    tex_res.mip_base            = accel_state->src_obj[0].offset;
433    tex_res.size                = accel_state->src_size[0];
434    tex_res.bo                  = accel_state->src_obj[0].bo;
435    tex_res.mip_bo              = accel_state->src_obj[0].bo;
436    if (accel_state->src_obj[0].bpp == 8) {
437	tex_res.format              = FMT_8;
438	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
439	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
440	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
441	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
442    } else if (accel_state->src_obj[0].bpp == 16) {
443	tex_res.format              = FMT_5_6_5;
444	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
445	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
446	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
447	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
448    } else {
449	tex_res.format              = FMT_8_8_8_8;
450	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
451	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
452	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
453	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
454    }
455
456    tex_res.request_size        = 1;
457    tex_res.base_level          = 0;
458    tex_res.last_level          = 0;
459    tex_res.perf_modulation     = 0;
460    if (accel_state->src_obj[0].tiling_flags == 0)
461	tex_res.tile_mode           = 1;
462    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
463
464    tex_samp.id                 = 0;
465    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
466    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
467    tex_samp.clamp_z            = SQ_TEX_WRAP;
468    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
469    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
470    tex_samp.mc_coord_truncate  = 1;
471    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
472    tex_samp.mip_filter         = 0;			/* no mipmap */
473    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
474
475    cb_conf.id = 0;
476    cb_conf.w = accel_state->dst_obj.pitch;
477    cb_conf.h = accel_state->dst_obj.height;
478    cb_conf.base = accel_state->dst_obj.offset;
479    cb_conf.bo = accel_state->dst_obj.bo;
480    if (accel_state->dst_obj.bpp == 8) {
481	cb_conf.format = COLOR_8;
482	cb_conf.comp_swap = 3; /* A */
483    } else if (accel_state->dst_obj.bpp == 16) {
484	cb_conf.format = COLOR_5_6_5;
485	cb_conf.comp_swap = 2; /* RGB */
486    } else {
487	cb_conf.format = COLOR_8_8_8_8;
488	cb_conf.comp_swap = 1; /* ARGB */
489    }
490    cb_conf.source_format = 1;
491    cb_conf.blend_clamp = 1;
492
493    /* Render setup */
494    if (accel_state->planemask & 0x000000ff)
495	cb_conf.pmask |= 4; /* B */
496    if (accel_state->planemask & 0x0000ff00)
497	cb_conf.pmask |= 2; /* G */
498    if (accel_state->planemask & 0x00ff0000)
499	cb_conf.pmask |= 1; /* R */
500    if (accel_state->planemask & 0xff000000)
501	cb_conf.pmask |= 8; /* A */
502    cb_conf.rop = accel_state->rop;
503    if (accel_state->dst_obj.tiling_flags == 0)
504	cb_conf.array_mode = 1;
505    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
506
507    r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
508
509}
510
511static void
512R600DoCopy(ScrnInfoPtr pScrn)
513{
514    r600_finish_op(pScrn, 16);
515}
516
517static void
518R600DoCopyVline(PixmapPtr pPix)
519{
520    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
521    RADEONInfoPtr info = RADEONPTR(pScrn);
522    struct radeon_accel_state *accel_state = info->accel_state;
523
524    if (accel_state->vsync)
525	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
526				accel_state->vline_crtc,
527				accel_state->vline_y1,
528				accel_state->vline_y2);
529
530    r600_finish_op(pScrn, 16);
531}
532
533static void
534R600AppendCopyVertex(ScrnInfoPtr pScrn,
535		     int srcX, int srcY,
536		     int dstX, int dstY,
537		     int w, int h)
538{
539    RADEONInfoPtr info = RADEONPTR(pScrn);
540    struct radeon_accel_state *accel_state = info->accel_state;
541    float *vb;
542
543    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
544
545    vb[0] = (float)dstX;
546    vb[1] = (float)dstY;
547    vb[2] = (float)srcX;
548    vb[3] = (float)srcY;
549
550    vb[4] = (float)dstX;
551    vb[5] = (float)(dstY + h);
552    vb[6] = (float)srcX;
553    vb[7] = (float)(srcY + h);
554
555    vb[8] = (float)(dstX + w);
556    vb[9] = (float)(dstY + h);
557    vb[10] = (float)(srcX + w);
558    vb[11] = (float)(srcY + h);
559
560    radeon_vbo_commit(pScrn, &accel_state->vbo);
561}
562
563static Bool
564R600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
565		int xdir, int ydir,
566		int rop,
567		Pixel planemask)
568{
569    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
570    RADEONInfoPtr info = RADEONPTR(pScrn);
571    struct radeon_accel_state *accel_state = info->accel_state;
572    struct r600_accel_object src_obj, dst_obj;
573
574    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
575	RADEON_FALLBACK(("R600CheckDatatype src failed\n"));
576    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
577	RADEON_FALLBACK(("R600CheckDatatype dst failed\n"));
578    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
579	RADEON_FALLBACK(("Invalid planemask\n"));
580
581    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
582    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
583
584    accel_state->same_surface = FALSE;
585
586#if defined(XF86DRM_MODE)
587    if (info->cs) {
588	src_obj.offset = 0;
589	dst_obj.offset = 0;
590	src_obj.bo = radeon_get_pixmap_bo(pSrc);
591	dst_obj.bo = radeon_get_pixmap_bo(pDst);
592	if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
593	    accel_state->same_surface = TRUE;
594    } else
595#endif
596    {
597	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
598	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
599	if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst))
600	    accel_state->same_surface = TRUE;
601	src_obj.bo = NULL;
602	dst_obj.bo = NULL;
603    }
604
605    src_obj.width = pSrc->drawable.width;
606    src_obj.height = pSrc->drawable.height;
607    src_obj.bpp = pSrc->drawable.bitsPerPixel;
608    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
609
610    dst_obj.width = pDst->drawable.width;
611    dst_obj.height = pDst->drawable.height;
612    dst_obj.bpp = pDst->drawable.bitsPerPixel;
613    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
614
615    if (!R600SetAccelState(pScrn,
616			   &src_obj,
617			   NULL,
618			   &dst_obj,
619			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
620			   rop, planemask))
621	return FALSE;
622
623    if (accel_state->same_surface == TRUE) {
624#if defined(XF86DRM_MODE)
625	unsigned height = RADEON_ALIGN(pDst->drawable.height,
626				       drmmode_get_height_align(pScrn, accel_state->dst_obj.tiling_flags));
627#else
628	unsigned height = pDst->drawable.height;
629#endif
630	unsigned long size = height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
631
632#if defined(XF86DRM_MODE)
633	if (info->cs) {
634	    if (accel_state->copy_area_bo) {
635		radeon_bo_unref(accel_state->copy_area_bo);
636		accel_state->copy_area_bo = NULL;
637	    }
638	    accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
639						       RADEON_GEM_DOMAIN_VRAM,
640						       0);
641	    if (accel_state->copy_area_bo == NULL)
642		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
643
644	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
645					      RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_DOMAIN_VRAM);
646	    if (radeon_cs_space_check(info->cs)) {
647		radeon_bo_unref(accel_state->copy_area_bo);
648		accel_state->copy_area_bo = NULL;
649		return FALSE;
650	    }
651	    accel_state->copy_area = (void*)accel_state->copy_area_bo;
652	} else
653#endif
654	{
655	    if (accel_state->copy_area) {
656		exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
657		accel_state->copy_area = NULL;
658	    }
659	    accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
660	    if (!accel_state->copy_area)
661		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
662	}
663    } else
664	R600DoPrepareCopy(pScrn);
665
666    if (accel_state->vsync)
667	RADEONVlineHelperClear(pScrn);
668
669    return TRUE;
670}
671
672static void
673R600Copy(PixmapPtr pDst,
674	 int srcX, int srcY,
675	 int dstX, int dstY,
676	 int w, int h)
677{
678    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
679    RADEONInfoPtr info = RADEONPTR(pScrn);
680    struct radeon_accel_state *accel_state = info->accel_state;
681
682    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
683	return;
684
685    if (accel_state->vsync)
686	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
687
688    if (accel_state->same_surface && accel_state->copy_area) {
689	uint32_t orig_offset, tmp_offset;
690	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
691	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
692	uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags;
693	uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags;
694	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
695
696#if defined(XF86DRM_MODE)
697	if (info->cs) {
698	    tmp_offset = 0;
699	    orig_offset = 0;
700	} else
701#endif
702	{
703	    tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset;
704	    orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
705	}
706
707	/* src to tmp */
708	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
709	accel_state->dst_obj.bo = accel_state->copy_area_bo;
710	accel_state->dst_obj.offset = tmp_offset;
711	accel_state->dst_obj.tiling_flags = 0;
712	R600DoPrepareCopy(pScrn);
713	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
714	R600DoCopy(pScrn);
715
716	/* tmp to dst */
717	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
718	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
719	accel_state->src_obj[0].offset = tmp_offset;
720	accel_state->src_obj[0].tiling_flags = 0;
721	accel_state->dst_obj.domain = orig_dst_domain;
722	accel_state->dst_obj.bo = orig_bo;
723	accel_state->dst_obj.offset = orig_offset;
724	accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags;
725	R600DoPrepareCopy(pScrn);
726	R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
727	R600DoCopyVline(pDst);
728
729	/* restore state */
730	accel_state->src_obj[0].domain = orig_src_domain;
731	accel_state->src_obj[0].bo = orig_bo;
732	accel_state->src_obj[0].offset = orig_offset;
733	accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags;
734    } else
735	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
736
737}
738
739static void
740R600DoneCopy(PixmapPtr pDst)
741{
742    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
743    RADEONInfoPtr info = RADEONPTR(pScrn);
744    struct radeon_accel_state *accel_state = info->accel_state;
745
746    if (!accel_state->same_surface)
747	R600DoCopyVline(pDst);
748
749    if (accel_state->copy_area) {
750	if (!info->cs)
751	    exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
752	accel_state->copy_area = NULL;
753    }
754
755}
756
757struct blendinfo {
758    Bool dst_alpha;
759    Bool src_alpha;
760    uint32_t blend_cntl;
761};
762
763static struct blendinfo R600BlendOp[] = {
764    /* Clear */
765    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
766    /* Src */
767    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
768    /* Dst */
769    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
770    /* Over */
771    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
772    /* OverReverse */
773    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
774    /* In */
775    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
776    /* InReverse */
777    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
778    /* Out */
779    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
780    /* OutReverse */
781    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
782    /* Atop */
783    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
784    /* AtopReverse */
785    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
786    /* Xor */
787    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
788    /* Add */
789    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
790};
791
792struct formatinfo {
793    unsigned int fmt;
794    uint32_t card_fmt;
795};
796
797static struct formatinfo R600TexFormats[] = {
798    {PICT_a8r8g8b8,	FMT_8_8_8_8},
799    {PICT_x8r8g8b8,	FMT_8_8_8_8},
800    {PICT_a8b8g8r8,	FMT_8_8_8_8},
801    {PICT_x8b8g8r8,	FMT_8_8_8_8},
802#ifdef PICT_TYPE_BGRA
803    {PICT_b8g8r8a8,	FMT_8_8_8_8},
804    {PICT_b8g8r8x8,	FMT_8_8_8_8},
805#endif
806    {PICT_r5g6b5,	FMT_5_6_5},
807    {PICT_a1r5g5b5,	FMT_1_5_5_5},
808    {PICT_x1r5g5b5,     FMT_1_5_5_5},
809    {PICT_a8,		FMT_8},
810};
811
812static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
813{
814    uint32_t sblend, dblend;
815
816    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
817    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
818
819    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
820     * it as always 1.
821     */
822    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
823	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
824	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
825	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
826	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
827    }
828
829    /* If the source alpha is being used, then we should only be in a case where
830     * the source blend factor is 0, and the source blend value is the mask
831     * channels multiplied by the source picture's alpha.
832     */
833    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
834	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
835	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
836	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
837	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
838	}
839    }
840
841    return sblend | dblend;
842}
843
844static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
845{
846    switch (pDstPicture->format) {
847    case PICT_a8r8g8b8:
848    case PICT_x8r8g8b8:
849    case PICT_a8b8g8r8:
850    case PICT_x8b8g8r8:
851#ifdef PICT_TYPE_BGRA
852    case PICT_b8g8r8a8:
853    case PICT_b8g8r8x8:
854#endif
855	*dst_format = COLOR_8_8_8_8;
856	break;
857    case PICT_r5g6b5:
858	*dst_format = COLOR_5_6_5;
859	break;
860    case PICT_a1r5g5b5:
861    case PICT_x1r5g5b5:
862	*dst_format = COLOR_1_5_5_5;
863	break;
864    case PICT_a8:
865	*dst_format = COLOR_8;
866	break;
867    default:
868	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
869	       (int)pDstPicture->format));
870    }
871    return TRUE;
872}
873
874static Bool R600CheckCompositeTexture(PicturePtr pPict,
875				      PicturePtr pDstPict,
876				      int op,
877				      int unit)
878{
879    int w = pPict->pDrawable->width;
880    int h = pPict->pDrawable->height;
881    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
882    unsigned int i;
883    int max_tex_w, max_tex_h;
884
885    max_tex_w = 8192;
886    max_tex_h = 8192;
887
888    if ((w > max_tex_w) || (h > max_tex_h))
889	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
890
891    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
892	if (R600TexFormats[i].fmt == pPict->format)
893	    break;
894    }
895    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
896	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
897			 (int)pPict->format));
898
899    if (pPict->filter != PictFilterNearest &&
900	pPict->filter != PictFilterBilinear)
901	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
902
903    /* for REPEAT_NONE, Render semantics are that sampling outside the source
904     * picture results in alpha=0 pixels. We can implement this with a border color
905     * *if* our source texture has an alpha channel, otherwise we need to fall
906     * back. If we're not transformed then we hope that upper layers have clipped
907     * rendering to the bounds of the source drawable, in which case it doesn't
908     * matter. I have not, however, verified that the X server always does such
909     * clipping.
910     */
911    /* FIXME R6xx */
912    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
913	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
914	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
915    }
916
917    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
918	RADEON_FALLBACK(("non-affine transforms not supported\n"));
919
920    return TRUE;
921}
922
923static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
924					int unit)
925{
926    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
927    RADEONInfoPtr info = RADEONPTR(pScrn);
928    struct radeon_accel_state *accel_state = info->accel_state;
929    int w = pPict->pDrawable->width;
930    int h = pPict->pDrawable->height;
931    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
932    unsigned int i;
933    tex_resource_t  tex_res;
934    tex_sampler_t   tex_samp;
935    int pix_r, pix_g, pix_b, pix_a;
936    float vs_alu_consts[8];
937
938    CLEAR (tex_res);
939    CLEAR (tex_samp);
940
941    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
942	if (R600TexFormats[i].fmt == pPict->format)
943	    break;
944    }
945
946    /* Texture */
947    tex_res.id                  = unit;
948    tex_res.w                   = w;
949    tex_res.h                   = h;
950    tex_res.pitch               = accel_state->src_obj[unit].pitch;
951    tex_res.depth               = 0;
952    tex_res.dim                 = SQ_TEX_DIM_2D;
953    tex_res.base                = accel_state->src_obj[unit].offset;
954    tex_res.mip_base            = accel_state->src_obj[unit].offset;
955    tex_res.size                = accel_state->src_size[unit];
956    tex_res.format              = R600TexFormats[i].card_fmt;
957    tex_res.bo                  = accel_state->src_obj[unit].bo;
958    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
959    tex_res.request_size        = 1;
960
961#if X_BYTE_ORDER == X_BIG_ENDIAN
962    switch (accel_state->src_obj[unit].bpp) {
963    case 16:
964	tex_res.endian = SQ_ENDIAN_8IN16;
965	break;
966    case 32:
967	tex_res.endian = SQ_ENDIAN_8IN32;
968	break;
969    default :
970	break;
971    }
972#endif
973
974    /* component swizzles */
975    switch (pPict->format) {
976    case PICT_a1r5g5b5:
977    case PICT_a8r8g8b8:
978	pix_r = SQ_SEL_Z; /* R */
979	pix_g = SQ_SEL_Y; /* G */
980	pix_b = SQ_SEL_X; /* B */
981	pix_a = SQ_SEL_W; /* A */
982	break;
983    case PICT_a8b8g8r8:
984	pix_r = SQ_SEL_X; /* R */
985	pix_g = SQ_SEL_Y; /* G */
986	pix_b = SQ_SEL_Z; /* B */
987	pix_a = SQ_SEL_W; /* A */
988	break;
989    case PICT_x8b8g8r8:
990	pix_r = SQ_SEL_X; /* R */
991	pix_g = SQ_SEL_Y; /* G */
992	pix_b = SQ_SEL_Z; /* B */
993	pix_a = SQ_SEL_1; /* A */
994	break;
995#ifdef PICT_TYPE_BGRA
996    case PICT_b8g8r8a8:
997	pix_r = SQ_SEL_Y; /* R */
998	pix_g = SQ_SEL_Z; /* G */
999	pix_b = SQ_SEL_W; /* B */
1000	pix_a = SQ_SEL_X; /* A */
1001	break;
1002    case PICT_b8g8r8x8:
1003	pix_r = SQ_SEL_Y; /* R */
1004	pix_g = SQ_SEL_Z; /* G */
1005	pix_b = SQ_SEL_W; /* B */
1006	pix_a = SQ_SEL_1; /* A */
1007	break;
1008#endif
1009    case PICT_x1r5g5b5:
1010    case PICT_x8r8g8b8:
1011    case PICT_r5g6b5:
1012	pix_r = SQ_SEL_Z; /* R */
1013	pix_g = SQ_SEL_Y; /* G */
1014	pix_b = SQ_SEL_X; /* B */
1015	pix_a = SQ_SEL_1; /* A */
1016	break;
1017    case PICT_a8:
1018	pix_r = SQ_SEL_0; /* R */
1019	pix_g = SQ_SEL_0; /* G */
1020	pix_b = SQ_SEL_0; /* B */
1021	pix_a = SQ_SEL_X; /* A */
1022	break;
1023    default:
1024	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
1025    }
1026
1027    if (unit == 0) {
1028	if (!accel_state->msk_pic) {
1029	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
1030		pix_r = SQ_SEL_0;
1031		pix_g = SQ_SEL_0;
1032		pix_b = SQ_SEL_0;
1033	    }
1034
1035	    if (PICT_FORMAT_A(pPict->format) == 0)
1036		pix_a = SQ_SEL_1;
1037	} else {
1038	    if (accel_state->component_alpha) {
1039		if (accel_state->src_alpha) {
1040		    if (PICT_FORMAT_A(pPict->format) == 0) {
1041			pix_r = SQ_SEL_1;
1042			pix_g = SQ_SEL_1;
1043			pix_b = SQ_SEL_1;
1044			pix_a = SQ_SEL_1;
1045		    } else {
1046			pix_r = pix_a;
1047			pix_g = pix_a;
1048			pix_b = pix_a;
1049		    }
1050		} else {
1051		    if (PICT_FORMAT_A(pPict->format) == 0)
1052			pix_a = SQ_SEL_1;
1053		}
1054	    } else {
1055		if (PICT_FORMAT_RGB(pPict->format) == 0) {
1056		    pix_r = SQ_SEL_0;
1057		    pix_g = SQ_SEL_0;
1058		    pix_b = SQ_SEL_0;
1059		}
1060
1061		if (PICT_FORMAT_A(pPict->format) == 0)
1062		    pix_a = SQ_SEL_1;
1063	    }
1064	}
1065    } else {
1066	if (accel_state->component_alpha) {
1067	    if (PICT_FORMAT_A(pPict->format) == 0)
1068		pix_a = SQ_SEL_1;
1069	} else {
1070	    if (PICT_FORMAT_A(pPict->format) == 0) {
1071		pix_r = SQ_SEL_1;
1072		pix_g = SQ_SEL_1;
1073		pix_b = SQ_SEL_1;
1074		pix_a = SQ_SEL_1;
1075	    } else {
1076		pix_r = pix_a;
1077		pix_g = pix_a;
1078		pix_b = pix_a;
1079	    }
1080	}
1081    }
1082
1083    tex_res.dst_sel_x           = pix_r; /* R */
1084    tex_res.dst_sel_y           = pix_g; /* G */
1085    tex_res.dst_sel_z           = pix_b; /* B */
1086    tex_res.dst_sel_w           = pix_a; /* A */
1087
1088    tex_res.base_level          = 0;
1089    tex_res.last_level          = 0;
1090    tex_res.perf_modulation     = 0;
1091    if (accel_state->src_obj[unit].tiling_flags == 0)
1092	tex_res.tile_mode           = 1;
1093    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
1094
1095    tex_samp.id                 = unit;
1096    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1097
1098    switch (repeatType) {
1099    case RepeatNormal:
1100	tex_samp.clamp_x            = SQ_TEX_WRAP;
1101	tex_samp.clamp_y            = SQ_TEX_WRAP;
1102	break;
1103    case RepeatPad:
1104	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1105	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1106	break;
1107    case RepeatReflect:
1108	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1109	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1110	break;
1111    case RepeatNone:
1112	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1113	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1114	break;
1115    default:
1116	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1117    }
1118
1119    switch (pPict->filter) {
1120    case PictFilterNearest:
1121	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1122	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1123	tex_samp.mc_coord_truncate  = 1;
1124	break;
1125    case PictFilterBilinear:
1126	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1127	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1128	break;
1129    default:
1130	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1131    }
1132
1133    tex_samp.clamp_z            = SQ_TEX_WRAP;
1134    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1135    tex_samp.mip_filter         = 0;			/* no mipmap */
1136    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
1137
1138    if (pPict->transform != 0) {
1139	accel_state->is_transform[unit] = TRUE;
1140	accel_state->transform[unit] = pPict->transform;
1141
1142	vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
1143	vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
1144	vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
1145	vs_alu_consts[3] = 1.0 / w;
1146
1147	vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
1148	vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
1149	vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
1150	vs_alu_consts[7] = 1.0 / h;
1151    } else {
1152	accel_state->is_transform[unit] = FALSE;
1153
1154	vs_alu_consts[0] = 1.0;
1155	vs_alu_consts[1] = 0.0;
1156	vs_alu_consts[2] = 0.0;
1157	vs_alu_consts[3] = 1.0 / w;
1158
1159	vs_alu_consts[4] = 0.0;
1160	vs_alu_consts[5] = 1.0;
1161	vs_alu_consts[6] = 0.0;
1162	vs_alu_consts[7] = 1.0 / h;
1163    }
1164
1165    /* VS alu constants */
1166    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
1167			sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
1168
1169    return TRUE;
1170}
1171
1172static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1173			       PicturePtr pDstPicture)
1174{
1175    uint32_t tmp1;
1176    PixmapPtr pSrcPixmap, pDstPixmap;
1177    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1178
1179    /* Check for unsupported compositing operations. */
1180    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1181	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1182
1183    if (!pSrcPicture->pDrawable)
1184	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1185
1186    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1187
1188    max_tex_w = 8192;
1189    max_tex_h = 8192;
1190    max_dst_w = 8192;
1191    max_dst_h = 8192;
1192
1193    if (pSrcPixmap->drawable.width >= max_tex_w ||
1194	pSrcPixmap->drawable.height >= max_tex_h) {
1195	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1196			 pSrcPixmap->drawable.width,
1197			 pSrcPixmap->drawable.height));
1198    }
1199
1200    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1201
1202    if (pDstPixmap->drawable.width >= max_dst_w ||
1203	pDstPixmap->drawable.height >= max_dst_h) {
1204	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1205			 pDstPixmap->drawable.width,
1206			 pDstPixmap->drawable.height));
1207    }
1208
1209    if (pMaskPicture) {
1210	PixmapPtr pMaskPixmap;
1211
1212	if (!pMaskPicture->pDrawable)
1213	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1214
1215	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1216
1217	if (pMaskPixmap->drawable.width >= max_tex_w ||
1218	    pMaskPixmap->drawable.height >= max_tex_h) {
1219	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1220			     pMaskPixmap->drawable.width,
1221			     pMaskPixmap->drawable.height));
1222	}
1223
1224	if (pMaskPicture->componentAlpha) {
1225	    /* Check if it's component alpha that relies on a source alpha and
1226	     * on the source value.  We can only get one of those into the
1227	     * single source value that we get to blend with.
1228	     */
1229	    if (R600BlendOp[op].src_alpha &&
1230		(R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1231		(BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1232		RADEON_FALLBACK(("Component alpha not supported with source "
1233				 "alpha and source value blending.\n"));
1234	    }
1235	}
1236
1237	if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1238	    return FALSE;
1239    }
1240
1241    if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1242	return FALSE;
1243
1244    if (!R600GetDestFormat(pDstPicture, &tmp1))
1245	return FALSE;
1246
1247    return TRUE;
1248
1249}
1250
1251static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1252				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1253				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1254{
1255    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1256    RADEONInfoPtr info = RADEONPTR(pScrn);
1257    struct radeon_accel_state *accel_state = info->accel_state;
1258    uint32_t dst_format;
1259    cb_config_t cb_conf;
1260    shader_config_t vs_conf, ps_conf;
1261    struct r600_accel_object src_obj, mask_obj, dst_obj;
1262
1263    if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8)
1264	return FALSE;
1265
1266#if defined(XF86DRM_MODE)
1267    if (info->cs) {
1268	src_obj.offset = 0;
1269	dst_obj.offset = 0;
1270	src_obj.bo = radeon_get_pixmap_bo(pSrc);
1271	dst_obj.bo = radeon_get_pixmap_bo(pDst);
1272    } else
1273#endif
1274    {
1275	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1276	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1277	src_obj.bo = NULL;
1278	dst_obj.bo = NULL;
1279    }
1280    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1281    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1282
1283    src_obj.width = pSrc->drawable.width;
1284    src_obj.height = pSrc->drawable.height;
1285    src_obj.bpp = pSrc->drawable.bitsPerPixel;
1286    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1287
1288    dst_obj.width = pDst->drawable.width;
1289    dst_obj.height = pDst->drawable.height;
1290    dst_obj.bpp = pDst->drawable.bitsPerPixel;
1291    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1292
1293    if (pMask) {
1294#if defined(XF86DRM_MODE)
1295	if (info->cs) {
1296	    mask_obj.offset = 0;
1297	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
1298	} else
1299#endif
1300	{
1301	    mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset;
1302	    mask_obj.bo = NULL;
1303	}
1304	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1305
1306	mask_obj.width = pMask->drawable.width;
1307	mask_obj.height = pMask->drawable.height;
1308	mask_obj.bpp = pMask->drawable.bitsPerPixel;
1309	mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1310
1311	if (!R600SetAccelState(pScrn,
1312			       &src_obj,
1313			       &mask_obj,
1314			       &dst_obj,
1315			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1316			       3, 0xffffffff))
1317	    return FALSE;
1318
1319	accel_state->msk_pic = pMaskPicture;
1320	if (pMaskPicture->componentAlpha) {
1321	    accel_state->component_alpha = TRUE;
1322	    if (R600BlendOp[op].src_alpha)
1323		accel_state->src_alpha = TRUE;
1324	    else
1325		accel_state->src_alpha = FALSE;
1326	} else {
1327	    accel_state->component_alpha = FALSE;
1328	    accel_state->src_alpha = FALSE;
1329	}
1330    } else {
1331	if (!R600SetAccelState(pScrn,
1332			       &src_obj,
1333			       NULL,
1334			       &dst_obj,
1335			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1336			       3, 0xffffffff))
1337	    return FALSE;
1338
1339	accel_state->msk_pic = NULL;
1340	accel_state->component_alpha = FALSE;
1341	accel_state->src_alpha = FALSE;
1342    }
1343
1344    if (!R600GetDestFormat(pDstPicture, &dst_format))
1345	return FALSE;
1346
1347    CLEAR (cb_conf);
1348    CLEAR (vs_conf);
1349    CLEAR (ps_conf);
1350
1351    if (pMask)
1352        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1353    else
1354        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1355
1356    radeon_cp_start(pScrn);
1357
1358    r600_set_default_state(pScrn, accel_state->ib);
1359
1360    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1361    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1362    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1363
1364    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
1365        R600IBDiscard(pScrn, accel_state->ib);
1366        return FALSE;
1367    }
1368
1369    if (pMask) {
1370        if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
1371            R600IBDiscard(pScrn, accel_state->ib);
1372            return FALSE;
1373        }
1374    } else
1375        accel_state->is_transform[1] = FALSE;
1376
1377    if (pMask) {
1378	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
1379	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
1380    } else {
1381	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
1382	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
1383    }
1384
1385    /* Shader */
1386    vs_conf.shader_addr         = accel_state->vs_mc_addr;
1387    vs_conf.shader_size         = accel_state->vs_size;
1388    vs_conf.num_gprs            = 5;
1389    vs_conf.stack_size          = 1;
1390    vs_conf.bo                  = accel_state->shaders_bo;
1391    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1392
1393    ps_conf.shader_addr         = accel_state->ps_mc_addr;
1394    ps_conf.shader_size         = accel_state->ps_size;
1395    ps_conf.num_gprs            = 3;
1396    ps_conf.stack_size          = 1;
1397    ps_conf.uncached_first_inst = 1;
1398    ps_conf.clamp_consts        = 0;
1399    ps_conf.export_mode         = 2;
1400    ps_conf.bo                  = accel_state->shaders_bo;
1401    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1402
1403    cb_conf.id = 0;
1404    cb_conf.w = accel_state->dst_obj.pitch;
1405    cb_conf.h = accel_state->dst_obj.height;
1406    cb_conf.base = accel_state->dst_obj.offset;
1407    cb_conf.format = dst_format;
1408    cb_conf.bo = accel_state->dst_obj.bo;
1409
1410    switch (pDstPicture->format) {
1411    case PICT_a8r8g8b8:
1412    case PICT_x8r8g8b8:
1413    case PICT_a1r5g5b5:
1414    case PICT_x1r5g5b5:
1415    default:
1416	cb_conf.comp_swap = 1; /* ARGB */
1417	break;
1418    case PICT_a8b8g8r8:
1419    case PICT_x8b8g8r8:
1420	cb_conf.comp_swap = 0; /* ABGR */
1421	break;
1422#ifdef PICT_TYPE_BGRA
1423    case PICT_b8g8r8a8:
1424    case PICT_b8g8r8x8:
1425	cb_conf.comp_swap = 3; /* BGRA */
1426	break;
1427#endif
1428    case PICT_r5g6b5:
1429	cb_conf.comp_swap = 2; /* RGB */
1430	break;
1431    case PICT_a8:
1432	cb_conf.comp_swap = 3; /* A */
1433	break;
1434    }
1435    cb_conf.source_format = 1;
1436    cb_conf.blend_clamp = 1;
1437    cb_conf.blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1438    cb_conf.blend_enable = 1;
1439    cb_conf.pmask = 0xf;
1440    cb_conf.rop = 3;
1441    if (accel_state->dst_obj.tiling_flags == 0)
1442	cb_conf.array_mode = 1;
1443#if X_BYTE_ORDER == X_BIG_ENDIAN
1444    switch (dst_obj.bpp) {
1445    case 16:
1446	cb_conf.endian = ENDIAN_8IN16;
1447	break;
1448    case 32:
1449	cb_conf.endian = ENDIAN_8IN32;
1450	break;
1451    default:
1452	break;
1453    }
1454#endif
1455    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
1456
1457    if (pMask)
1458	r600_set_spi(pScrn, accel_state->ib, (2 - 1), 2);
1459    else
1460	r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
1461
1462    if (accel_state->vsync)
1463	RADEONVlineHelperClear(pScrn);
1464
1465    return TRUE;
1466}
1467
1468static void R600Composite(PixmapPtr pDst,
1469			  int srcX, int srcY,
1470			  int maskX, int maskY,
1471			  int dstX, int dstY,
1472			  int w, int h)
1473{
1474    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1475    RADEONInfoPtr info = RADEONPTR(pScrn);
1476    struct radeon_accel_state *accel_state = info->accel_state;
1477    float *vb;
1478
1479    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1480       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1481
1482    if (accel_state->vsync)
1483	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1484
1485    if (accel_state->msk_pic) {
1486
1487	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1488
1489	vb[0] = (float)dstX;
1490	vb[1] = (float)dstY;
1491	vb[2] = (float)srcX;
1492	vb[3] = (float)srcY;
1493	vb[4] = (float)maskX;
1494	vb[5] = (float)maskY;
1495
1496	vb[6] = (float)dstX;
1497	vb[7] = (float)(dstY + h);
1498	vb[8] = (float)srcX;
1499	vb[9] = (float)(srcY + h);
1500	vb[10] = (float)maskX;
1501	vb[11] = (float)(maskY + h);
1502
1503	vb[12] = (float)(dstX + w);
1504	vb[13] = (float)(dstY + h);
1505	vb[14] = (float)(srcX + w);
1506	vb[15] = (float)(srcY + h);
1507	vb[16] = (float)(maskX + w);
1508	vb[17] = (float)(maskY + h);
1509
1510	radeon_vbo_commit(pScrn, &accel_state->vbo);
1511
1512    } else {
1513
1514	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1515
1516	vb[0] = (float)dstX;
1517	vb[1] = (float)dstY;
1518	vb[2] = (float)srcX;
1519	vb[3] = (float)srcY;
1520
1521	vb[4] = (float)dstX;
1522	vb[5] = (float)(dstY + h);
1523	vb[6] = (float)srcX;
1524	vb[7] = (float)(srcY + h);
1525
1526	vb[8] = (float)(dstX + w);
1527	vb[9] = (float)(dstY + h);
1528	vb[10] = (float)(srcX + w);
1529	vb[11] = (float)(srcY + h);
1530
1531	radeon_vbo_commit(pScrn, &accel_state->vbo);
1532    }
1533
1534
1535}
1536
1537static void R600DoneComposite(PixmapPtr pDst)
1538{
1539    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1540    RADEONInfoPtr info = RADEONPTR(pScrn);
1541    struct radeon_accel_state *accel_state = info->accel_state;
1542    int vtx_size;
1543
1544    if (accel_state->vsync)
1545       r600_cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
1546			       accel_state->vline_crtc,
1547			       accel_state->vline_y1,
1548			       accel_state->vline_y2);
1549
1550    vtx_size = accel_state->msk_pic ? 24 : 16;
1551
1552    r600_finish_op(pScrn, vtx_size);
1553}
1554
1555Bool
1556R600CopyToVRAM(ScrnInfoPtr pScrn,
1557	       char *src, int src_pitch,
1558	       uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_width, uint32_t dst_height, int bpp,
1559	       int x, int y, int w, int h)
1560{
1561    RADEONInfoPtr info = RADEONPTR(pScrn);
1562    struct radeon_accel_state *accel_state = info->accel_state;
1563    uint32_t scratch_mc_addr;
1564    int wpass = w * (bpp/8);
1565    int scratch_pitch_bytes = RADEON_ALIGN(wpass, 256);
1566    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1567    int scratch_offset = 0, hpass, temph;
1568    char *dst;
1569    drmBufPtr scratch;
1570    struct r600_accel_object scratch_obj, dst_obj;
1571
1572    if (dst_pitch & 7)
1573	return FALSE;
1574
1575    if (dst_mc_addr & 0xff)
1576	return FALSE;
1577
1578    scratch = RADEONCPGetBuffer(pScrn);
1579    if (scratch == NULL)
1580	return FALSE;
1581
1582    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1583    temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1584    dst = (char *)scratch->address;
1585
1586    scratch_obj.pitch = scratch_pitch;
1587    scratch_obj.width = w;
1588    scratch_obj.height = hpass;
1589    scratch_obj.offset = scratch_mc_addr;
1590    scratch_obj.bpp = bpp;
1591    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1592    scratch_obj.bo = NULL;
1593
1594    dst_obj.pitch = dst_pitch;
1595    dst_obj.width = dst_width;
1596    dst_obj.height = dst_height;
1597    dst_obj.offset = dst_mc_addr;
1598    dst_obj.bo = NULL;
1599    dst_obj.bpp = bpp;
1600    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1601
1602    if (!R600SetAccelState(pScrn,
1603			   &scratch_obj,
1604			   NULL,
1605			   &dst_obj,
1606			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1607			   3, 0xffffffff))
1608	return FALSE;
1609
1610    /* memcopy from sys to scratch */
1611    while (temph--) {
1612	memcpy (dst, src, wpass);
1613	src += src_pitch;
1614	dst += scratch_pitch_bytes;
1615    }
1616
1617    while (h) {
1618	uint32_t offset = scratch_mc_addr + scratch_offset;
1619	int oldhpass = hpass;
1620	h -= oldhpass;
1621	temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1622
1623	if (hpass) {
1624	    scratch_offset = scratch->total/2 - scratch_offset;
1625	    dst = (char *)scratch->address + scratch_offset;
1626	    /* wait for the engine to be idle */
1627	    RADEONWaitForIdleCP(pScrn);
1628	    //memcopy from sys to scratch
1629	    while (temph--) {
1630		memcpy (dst, src, wpass);
1631		src += src_pitch;
1632		dst += scratch_pitch_bytes;
1633	    }
1634	}
1635	/* blit from scratch to vram */
1636	info->accel_state->src_obj[0].height = oldhpass;
1637	info->accel_state->src_obj[0].offset = offset;
1638	R600DoPrepareCopy(pScrn);
1639	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
1640	R600DoCopy(pScrn);
1641	y += oldhpass;
1642    }
1643
1644    R600IBDiscard(pScrn, scratch);
1645
1646    return TRUE;
1647}
1648
1649static Bool
1650R600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1651		   char *src, int src_pitch)
1652{
1653    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1654    RADEONInfoPtr info = RADEONPTR(pScrn);
1655    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1656    uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1657    int bpp = pDst->drawable.bitsPerPixel;
1658
1659    return R600CopyToVRAM(pScrn,
1660			  src, src_pitch,
1661			  dst_pitch, dst_mc_addr, pDst->drawable.width, pDst->drawable.height, bpp,
1662			  x, y, w, h);
1663}
1664
1665static Bool
1666R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
1667		       char *dst, int dst_pitch)
1668{
1669    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1670    RADEONInfoPtr info = RADEONPTR(pScrn);
1671    struct radeon_accel_state *accel_state = info->accel_state;
1672    uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1673    uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1674    uint32_t src_width = pSrc->drawable.width;
1675    uint32_t src_height = pSrc->drawable.height;
1676    int bpp = pSrc->drawable.bitsPerPixel;
1677    uint32_t scratch_mc_addr;
1678    int scratch_pitch_bytes = RADEON_ALIGN(dst_pitch, 256);
1679    int scratch_offset = 0, hpass;
1680    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1681    int wpass = w * (bpp/8);
1682    drmBufPtr scratch;
1683    struct r600_accel_object scratch_obj, src_obj;
1684
1685    /* bad pipe setup in drm prior to 1.32 */
1686    if (info->dri->pKernelDRMVersion->version_minor < 32) {
1687	    if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32))
1688		    return FALSE;
1689    }
1690
1691    if (src_pitch & 7)
1692	return FALSE;
1693
1694    scratch = RADEONCPGetBuffer(pScrn);
1695    if (scratch == NULL)
1696	return FALSE;
1697
1698    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1699    hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1700
1701    src_obj.pitch = src_pitch;
1702    src_obj.width = src_width;
1703    src_obj.height = src_height;
1704    src_obj.offset = src_mc_addr;
1705    src_obj.bo = NULL;
1706    src_obj.bpp = bpp;
1707    src_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1708
1709    scratch_obj.pitch = scratch_pitch;
1710    scratch_obj.width = src_width;
1711    scratch_obj.height = hpass;
1712    scratch_obj.offset = scratch_mc_addr;
1713    scratch_obj.bpp = bpp;
1714    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1715    scratch_obj.bo = NULL;
1716
1717    if (!R600SetAccelState(pScrn,
1718			   &src_obj,
1719			   NULL,
1720			   &scratch_obj,
1721			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1722			   3, 0xffffffff))
1723	return FALSE;
1724
1725    /* blit from vram to scratch */
1726    R600DoPrepareCopy(pScrn);
1727    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1728    R600DoCopy(pScrn);
1729
1730    while (h) {
1731	char *src = (char *)scratch->address + scratch_offset;
1732	int oldhpass = hpass;
1733	h -= oldhpass;
1734	y += oldhpass;
1735	hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1736
1737	if (hpass) {
1738	    scratch_offset = scratch->total/2 - scratch_offset;
1739	    /* blit from vram to scratch */
1740	    info->accel_state->dst_obj.height = hpass;
1741	    info->accel_state->dst_obj.offset = scratch_mc_addr + scratch_offset;
1742	    R600DoPrepareCopy(pScrn);
1743	    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1744	    R600DoCopy(pScrn);
1745	}
1746
1747	/* wait for the engine to be idle */
1748	RADEONWaitForIdleCP(pScrn);
1749	/* memcopy from scratch to sys */
1750	while (oldhpass--) {
1751	    memcpy (dst, src, wpass);
1752	    dst += dst_pitch;
1753	    src += scratch_pitch_bytes;
1754	}
1755    }
1756
1757    R600IBDiscard(pScrn, scratch);
1758
1759    return TRUE;
1760
1761}
1762
1763#if defined(XF86DRM_MODE)
1764
1765static Bool
1766R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
1767		     char *src, int src_pitch)
1768{
1769    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1770    RADEONInfoPtr info = RADEONPTR(pScrn);
1771    struct radeon_accel_state *accel_state = info->accel_state;
1772    struct radeon_exa_pixmap_priv *driver_priv;
1773    struct radeon_bo *scratch = NULL;
1774    struct radeon_bo *copy_dst;
1775    unsigned char *dst;
1776    unsigned size;
1777    uint32_t dst_domain;
1778    int bpp = pDst->drawable.bitsPerPixel;
1779    uint32_t scratch_pitch;
1780    uint32_t copy_pitch;
1781    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
1782    int ret;
1783    Bool flush = TRUE;
1784    Bool r;
1785    int i;
1786    struct r600_accel_object src_obj, dst_obj;
1787    uint32_t tiling_flags = 0, pitch = 0, height, base_align;
1788
1789    if (bpp < 8)
1790	return FALSE;
1791
1792    driver_priv = exaGetPixmapDriverPrivate(pDst);
1793    if (!driver_priv || !driver_priv->bo)
1794	return FALSE;
1795
1796    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
1797    if (ret)
1798	ErrorF("radeon_bo_get_tiling failed\n");
1799
1800    /* If we know the BO won't be busy, don't bother with a scratch */
1801    copy_dst = driver_priv->bo;
1802    copy_pitch = pDst->devKind;
1803    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1804	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1805	    flush = FALSE;
1806	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
1807		goto copy;
1808	}
1809    }
1810
1811    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1812    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1813    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1814    size = scratch_pitch * height * (bpp / 8);
1815    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1816    if (scratch == NULL) {
1817	goto copy;
1818    }
1819
1820    src_obj.pitch = scratch_pitch;
1821    src_obj.width = w;
1822    src_obj.height = h;
1823    src_obj.offset = 0;
1824    src_obj.bpp = bpp;
1825    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1826    src_obj.bo = scratch;
1827
1828    dst_obj.pitch = dst_pitch_hw;
1829    dst_obj.width = pDst->drawable.width;
1830    dst_obj.height = pDst->drawable.height;
1831    dst_obj.offset = 0;
1832    dst_obj.bpp = bpp;
1833    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1834    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1835
1836    if (!R600SetAccelState(pScrn,
1837			   &src_obj,
1838			   NULL,
1839			   &dst_obj,
1840			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1841			   3, 0xffffffff)) {
1842        goto copy;
1843    }
1844    copy_dst = scratch;
1845    copy_pitch = scratch_pitch * (bpp / 8);
1846    flush = FALSE;
1847
1848copy:
1849    if (flush)
1850	radeon_cs_flush_indirect(pScrn);
1851
1852    ret = radeon_bo_map(copy_dst, 0);
1853    if (ret) {
1854        r = FALSE;
1855        goto out;
1856    }
1857    r = TRUE;
1858    size = w * bpp / 8;
1859    dst = copy_dst->ptr;
1860    if (copy_dst == driver_priv->bo)
1861	dst += y * copy_pitch + x * bpp / 8;
1862    for (i = 0; i < h; i++) {
1863        memcpy(dst + i * copy_pitch, src, size);
1864        src += src_pitch;
1865    }
1866    radeon_bo_unmap(copy_dst);
1867
1868    if (copy_dst == scratch) {
1869	if (info->accel_state->vsync)
1870	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1871
1872	/* blit from gart to vram */
1873	R600DoPrepareCopy(pScrn);
1874	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
1875	R600DoCopyVline(pDst);
1876    }
1877
1878out:
1879    if (scratch)
1880	radeon_bo_unref(scratch);
1881    return r;
1882}
1883
1884static Bool
1885R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
1886			 int h, char *dst, int dst_pitch)
1887{
1888    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1889    RADEONInfoPtr info = RADEONPTR(pScrn);
1890    struct radeon_accel_state *accel_state = info->accel_state;
1891    struct radeon_exa_pixmap_priv *driver_priv;
1892    struct radeon_bo *scratch = NULL;
1893    struct radeon_bo *copy_src;
1894    unsigned size;
1895    uint32_t src_domain = 0;
1896    int bpp = pSrc->drawable.bitsPerPixel;
1897    uint32_t scratch_pitch;
1898    uint32_t copy_pitch;
1899    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
1900    int ret;
1901    Bool flush = FALSE;
1902    Bool r;
1903    struct r600_accel_object src_obj, dst_obj;
1904    uint32_t tiling_flags = 0, pitch = 0, height, base_align;
1905
1906    if (bpp < 8)
1907	return FALSE;
1908
1909    driver_priv = exaGetPixmapDriverPrivate(pSrc);
1910    if (!driver_priv || !driver_priv->bo)
1911	return FALSE;
1912
1913    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
1914    if (ret)
1915	ErrorF("radeon_bo_get_tiling failed\n");
1916
1917    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
1918    copy_src = driver_priv->bo;
1919    copy_pitch = pSrc->devKind;
1920    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1921	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1922	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
1923	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
1924		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
1925		src_domain = 0;
1926	    else /* A write may be scheduled */
1927		flush = TRUE;
1928	}
1929
1930	if (!src_domain)
1931	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
1932
1933	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
1934	    goto copy;
1935    }
1936
1937    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1938    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1939    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1940    size = scratch_pitch * height * (bpp / 8);
1941    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1942    if (scratch == NULL) {
1943	goto copy;
1944    }
1945    radeon_cs_space_reset_bos(info->cs);
1946    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
1947				      RADEON_GEM_DOMAIN_VRAM, 0);
1948    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
1949    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
1950    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1951    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
1952    ret = radeon_cs_space_check(info->cs);
1953    if (ret) {
1954        goto copy;
1955    }
1956
1957    src_obj.pitch = src_pitch_hw;
1958    src_obj.width = pSrc->drawable.width;
1959    src_obj.height = pSrc->drawable.height;
1960    src_obj.offset = 0;
1961    src_obj.bpp = bpp;
1962    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1963    src_obj.bo = radeon_get_pixmap_bo(pSrc);
1964
1965    dst_obj.pitch = scratch_pitch;
1966    dst_obj.width = w;
1967    dst_obj.height = h;
1968    dst_obj.offset = 0;
1969    dst_obj.bo = scratch;
1970    dst_obj.bpp = bpp;
1971    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1972
1973    if (!R600SetAccelState(pScrn,
1974			   &src_obj,
1975			   NULL,
1976			   &dst_obj,
1977			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1978			   3, 0xffffffff)) {
1979        goto copy;
1980    }
1981
1982    /* blit from vram to gart */
1983    R600DoPrepareCopy(pScrn);
1984    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
1985    R600DoCopy(pScrn);
1986    copy_src = scratch;
1987    copy_pitch = scratch_pitch * (bpp / 8);
1988    flush = TRUE;
1989
1990copy:
1991    if (flush && info->cs)
1992	radeon_cs_flush_indirect(pScrn);
1993
1994    ret = radeon_bo_map(copy_src, 0);
1995    if (ret) {
1996	ErrorF("failed to map pixmap: %d\n", ret);
1997        r = FALSE;
1998        goto out;
1999    }
2000    r = TRUE;
2001    w *= bpp / 8;
2002    if (copy_src == driver_priv->bo)
2003	size = y * copy_pitch + x * bpp / 8;
2004    else
2005	size = 0;
2006    while (h--) {
2007        memcpy(dst, copy_src->ptr + size, w);
2008        size += copy_pitch;
2009        dst += dst_pitch;
2010    }
2011    radeon_bo_unmap(copy_src);
2012out:
2013    if (scratch)
2014	radeon_bo_unref(scratch);
2015    return r;
2016}
2017#endif
2018
2019static int
2020R600MarkSync(ScreenPtr pScreen)
2021{
2022    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
2023    RADEONInfoPtr info = RADEONPTR(pScrn);
2024    struct radeon_accel_state *accel_state = info->accel_state;
2025
2026    return ++accel_state->exaSyncMarker;
2027
2028}
2029
2030static void
2031R600Sync(ScreenPtr pScreen, int marker)
2032{
2033    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
2034    RADEONInfoPtr info = RADEONPTR(pScrn);
2035    struct radeon_accel_state *accel_state = info->accel_state;
2036
2037    if (accel_state->exaMarkerSynced != marker) {
2038#ifdef XF86DRM_MODE
2039#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2040	if (!info->cs)
2041#endif
2042#endif
2043	    RADEONWaitForIdleCP(pScrn);
2044	accel_state->exaMarkerSynced = marker;
2045    }
2046
2047}
2048
2049static Bool
2050R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
2051{
2052    RADEONInfoPtr info = RADEONPTR(pScrn);
2053    struct radeon_accel_state *accel_state = info->accel_state;
2054
2055    /* 512 bytes per shader for now */
2056    int size = 512 * 9;
2057
2058    accel_state->shaders = NULL;
2059
2060#ifdef XF86DRM_MODE
2061#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2062    if (info->cs) {
2063	accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
2064						 RADEON_GEM_DOMAIN_VRAM, 0);
2065	if (accel_state->shaders_bo == NULL) {
2066	    ErrorF("Allocating shader failed\n");
2067	    return FALSE;
2068	}
2069	return TRUE;
2070    } else
2071#endif
2072#endif
2073    {
2074	accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
2075						 TRUE, NULL, NULL);
2076
2077	if (accel_state->shaders == NULL)
2078	    return FALSE;
2079    }
2080
2081    return TRUE;
2082}
2083
2084Bool
2085R600LoadShaders(ScrnInfoPtr pScrn)
2086{
2087    RADEONInfoPtr info = RADEONPTR(pScrn);
2088    struct radeon_accel_state *accel_state = info->accel_state;
2089    RADEONChipFamily ChipSet = info->ChipFamily;
2090    uint32_t *shader;
2091#ifdef XF86DRM_MODE
2092#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2093    int ret;
2094
2095    if (info->cs) {
2096	ret = radeon_bo_map(accel_state->shaders_bo, 1);
2097	if (ret) {
2098	    FatalError("failed to map shader %d\n", ret);
2099	    return FALSE;
2100	}
2101	shader = accel_state->shaders_bo->ptr;
2102    } else
2103#endif
2104#endif
2105	shader = (pointer)((char *)info->FB + accel_state->shaders->offset);
2106
2107    /*  solid vs --------------------------------------- */
2108    accel_state->solid_vs_offset = 0;
2109    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
2110
2111    /*  solid ps --------------------------------------- */
2112    accel_state->solid_ps_offset = 512;
2113    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
2114
2115    /*  copy vs --------------------------------------- */
2116    accel_state->copy_vs_offset = 1024;
2117    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
2118
2119    /*  copy ps --------------------------------------- */
2120    accel_state->copy_ps_offset = 1536;
2121    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2122
2123    /*  comp vs --------------------------------------- */
2124    accel_state->comp_vs_offset = 2048;
2125    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2126
2127    /*  comp ps --------------------------------------- */
2128    accel_state->comp_ps_offset = 2560;
2129    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2130
2131    /*  xv vs --------------------------------------- */
2132    accel_state->xv_vs_offset = 3072;
2133    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2134
2135    /*  xv ps --------------------------------------- */
2136    accel_state->xv_ps_offset = 3584;
2137    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2138
2139#ifdef XF86DRM_MODE
2140#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2141    if (info->cs) {
2142	radeon_bo_unmap(accel_state->shaders_bo);
2143    }
2144#endif
2145#endif
2146
2147    return TRUE;
2148}
2149
2150static Bool
2151R600PrepareAccess(PixmapPtr pPix, int index)
2152{
2153    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2154    RADEONInfoPtr info = RADEONPTR(pScrn);
2155    unsigned char *RADEONMMIO = info->MMIO;
2156
2157    /* flush HDP read/write caches */
2158    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2159
2160    return TRUE;
2161}
2162
2163static void
2164R600FinishAccess(PixmapPtr pPix, int index)
2165{
2166    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2167    RADEONInfoPtr info = RADEONPTR(pScrn);
2168    unsigned char *RADEONMMIO = info->MMIO;
2169
2170    /* flush HDP read/write caches */
2171    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2172
2173}
2174
2175Bool
2176R600DrawInit(ScreenPtr pScreen)
2177{
2178    ScrnInfoPtr pScrn =  xf86Screens[pScreen->myNum];
2179    RADEONInfoPtr info   = RADEONPTR(pScrn);
2180
2181    if (info->accel_state->exa == NULL) {
2182	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2183	return FALSE;
2184    }
2185
2186    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2187    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2188
2189    info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2190    info->accel_state->exa->Solid = R600Solid;
2191    info->accel_state->exa->DoneSolid = R600DoneSolid;
2192
2193    info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2194    info->accel_state->exa->Copy = R600Copy;
2195    info->accel_state->exa->DoneCopy = R600DoneCopy;
2196
2197    info->accel_state->exa->MarkSync = R600MarkSync;
2198    info->accel_state->exa->WaitMarker = R600Sync;
2199
2200#ifdef XF86DRM_MODE
2201#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2202    if (info->cs) {
2203	info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
2204	info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
2205	info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
2206	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
2207	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
2208	info->accel_state->exa->UploadToScreen = R600UploadToScreenCS;
2209	info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS;
2210#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5)
2211        info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
2212#endif
2213    } else
2214#endif
2215#endif
2216    {
2217	info->accel_state->exa->PrepareAccess = R600PrepareAccess;
2218	info->accel_state->exa->FinishAccess = R600FinishAccess;
2219
2220	/* AGP seems to have problems with gart transfers */
2221	if (info->accelDFS) {
2222	    info->accel_state->exa->UploadToScreen = R600UploadToScreen;
2223	    info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen;
2224	}
2225    }
2226
2227    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
2228#ifdef EXA_SUPPORTS_PREPARE_AUX
2229    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
2230#endif
2231
2232#ifdef XF86DRM_MODE
2233#ifdef EXA_HANDLES_PIXMAPS
2234    if (info->cs) {
2235	info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
2236#ifdef EXA_MIXED_PIXMAPS
2237	info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
2238#endif
2239    }
2240#endif
2241#endif
2242    info->accel_state->exa->pixmapOffsetAlign = 256;
2243    info->accel_state->exa->pixmapPitchAlign = 256;
2244
2245    info->accel_state->exa->CheckComposite = R600CheckComposite;
2246    info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2247    info->accel_state->exa->Composite = R600Composite;
2248    info->accel_state->exa->DoneComposite = R600DoneComposite;
2249
2250#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
2251    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
2252
2253    info->accel_state->exa->maxPitchBytes = 32768;
2254    info->accel_state->exa->maxX = 8192;
2255#else
2256    info->accel_state->exa->maxX = 8192;
2257#endif
2258    info->accel_state->exa->maxY = 8192;
2259
2260    /* not supported yet */
2261    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2262	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2263	info->accel_state->vsync = TRUE;
2264    } else
2265	info->accel_state->vsync = FALSE;
2266
2267    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
2268	free(info->accel_state->exa);
2269	return FALSE;
2270    }
2271
2272#ifdef XF86DRM_MODE
2273#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2274    if (!info->cs)
2275#endif
2276#endif
2277	if (!info->gartLocation)
2278	    return FALSE;
2279
2280    info->accel_state->XInited3D = FALSE;
2281    info->accel_state->copy_area = NULL;
2282    info->accel_state->src_obj[0].bo = NULL;
2283    info->accel_state->src_obj[1].bo = NULL;
2284    info->accel_state->dst_obj.bo = NULL;
2285    info->accel_state->copy_area_bo = NULL;
2286    info->accel_state->vbo.vb_start_op = -1;
2287    info->accel_state->finish_op = r600_finish_op;
2288    info->accel_state->vbo.verts_per_op = 3;
2289    RADEONVlineHelperClear(pScrn);
2290
2291#ifdef XF86DRM_MODE
2292    radeon_vbo_init_lists(pScrn);
2293#endif
2294
2295    if (!R600AllocShaders(pScrn, pScreen))
2296	return FALSE;
2297
2298    if (!R600LoadShaders(pScrn))
2299	return FALSE;
2300
2301    exaMarkSync(pScreen);
2302
2303    return TRUE;
2304
2305}
2306
2307