r600_exa.c revision 7821949a
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "exa.h"
34
35#include "radeon.h"
36#include "radeon_macros.h"
37#include "radeon_reg.h"
38#include "r600_shader.h"
39#include "r600_reg.h"
40#include "r600_state.h"
41#include "radeon_exa_shared.h"
42#include "radeon_vbo.h"
43
44/* #define SHOW_VERTEXES */
45
46Bool
47R600SetAccelState(ScrnInfoPtr pScrn,
48		  struct r600_accel_object *src0,
49		  struct r600_accel_object *src1,
50		  struct r600_accel_object *dst,
51		  uint32_t vs_offset, uint32_t ps_offset,
52		  int rop, Pixel planemask)
53{
54    RADEONInfoPtr info = RADEONPTR(pScrn);
55    struct radeon_accel_state *accel_state = info->accel_state;
56    uint32_t pitch_align = 0x7, base_align = 0xff;
57#if defined(XF86DRM_MODE)
58    int ret;
59#endif
60
61    if (src0) {
62	memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
63	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
64#if defined(XF86DRM_MODE)
65	if (info->cs && src0->surface) {
66		accel_state->src_size[0] = src0->surface->bo_size;
67	}
68#endif
69
70	/* bad pitch */
71	if (accel_state->src_obj[0].pitch & pitch_align)
72	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
73
74	/* bad offset */
75	if (accel_state->src_obj[0].offset & base_align)
76	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset));
77
78    } else {
79	memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
80	accel_state->src_size[0] = 0;
81    }
82
83    if (src1) {
84	memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
85	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
86#if defined(XF86DRM_MODE)
87	if (info->cs && src1->surface) {
88		accel_state->src_size[1] = src1->surface->bo_size;
89	}
90#endif
91
92	/* bad pitch */
93	if (accel_state->src_obj[1].pitch & pitch_align)
94	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
95
96	/* bad offset */
97	if (accel_state->src_obj[1].offset & base_align)
98	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset));
99    } else {
100	memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
101	accel_state->src_size[1] = 0;
102    }
103
104    if (dst) {
105	memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
106	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
107#if defined(XF86DRM_MODE)
108	if (info->cs && dst->surface) {
109		accel_state->dst_size = dst->surface->bo_size;
110	} else
111#endif
112	{
113		accel_state->dst_obj.tiling_flags = 0;
114	}
115	if (accel_state->dst_obj.pitch & pitch_align)
116	    RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
117
118	if (accel_state->dst_obj.offset & base_align)
119	    RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset));
120    } else {
121	memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
122	accel_state->dst_size = 0;
123    }
124
125#ifdef XF86DRM_MODE
126    if (info->cs && CS_FULL(info->cs))
127	radeon_cs_flush_indirect(pScrn);
128#endif
129
130    accel_state->rop = rop;
131    accel_state->planemask = planemask;
132
133    accel_state->vs_size = 512;
134    accel_state->ps_size = 512;
135#if defined(XF86DRM_MODE)
136    if (info->cs) {
137	accel_state->vs_mc_addr = vs_offset;
138	accel_state->ps_mc_addr = ps_offset;
139
140	radeon_cs_space_reset_bos(info->cs);
141	radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
142					  RADEON_GEM_DOMAIN_VRAM, 0);
143	if (accel_state->src_obj[0].bo)
144	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
145					      accel_state->src_obj[0].domain, 0);
146	if (accel_state->src_obj[1].bo)
147	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
148					      accel_state->src_obj[1].domain, 0);
149	if (accel_state->dst_obj.bo)
150	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
151					      0, accel_state->dst_obj.domain);
152	ret = radeon_cs_space_check(info->cs);
153	if (ret)
154	    RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
155
156    } else
157#endif
158    {
159	accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
160	    vs_offset;
161	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
162	    ps_offset;
163    }
164
165    return TRUE;
166}
167
168static Bool
169R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
170{
171    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
172    RADEONInfoPtr info = RADEONPTR(pScrn);
173    struct radeon_accel_state *accel_state = info->accel_state;
174    cb_config_t     cb_conf;
175    shader_config_t vs_conf, ps_conf;
176    uint32_t a, r, g, b;
177    float ps_alu_consts[4];
178    struct r600_accel_object dst;
179
180    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
181	RADEON_FALLBACK(("R600CheckDatatype failed\n"));
182    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
183	RADEON_FALLBACK(("invalid planemask\n"));
184
185#if defined(XF86DRM_MODE)
186    if (info->cs) {
187	dst.offset = 0;
188	dst.bo = radeon_get_pixmap_bo(pPix);
189	dst.tiling_flags = radeon_get_pixmap_tiling(pPix);
190	dst.surface = radeon_get_pixmap_surface(pPix);
191    } else
192#endif
193    {
194	dst.offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
195	dst.bo = NULL;
196    }
197
198    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
199    dst.width = pPix->drawable.width;
200    dst.height = pPix->drawable.height;
201    dst.bpp = pPix->drawable.bitsPerPixel;
202    dst.domain = RADEON_GEM_DOMAIN_VRAM;
203
204    if (!R600SetAccelState(pScrn,
205			   NULL,
206			   NULL,
207			   &dst,
208			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
209			   alu, pm))
210	return FALSE;
211
212    CLEAR (cb_conf);
213    CLEAR (vs_conf);
214    CLEAR (ps_conf);
215
216    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
217    radeon_cp_start(pScrn);
218
219    r600_set_default_state(pScrn, accel_state->ib);
220
221    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
222    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
223    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
224
225    /* Shader */
226    vs_conf.shader_addr         = accel_state->vs_mc_addr;
227    vs_conf.shader_size         = accel_state->vs_size;
228    vs_conf.num_gprs            = 2;
229    vs_conf.stack_size          = 0;
230    vs_conf.bo                  = accel_state->shaders_bo;
231    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
232
233    ps_conf.shader_addr         = accel_state->ps_mc_addr;
234    ps_conf.shader_size         = accel_state->ps_size;
235    ps_conf.num_gprs            = 1;
236    ps_conf.stack_size          = 0;
237    ps_conf.uncached_first_inst = 1;
238    ps_conf.clamp_consts        = 0;
239    ps_conf.export_mode         = 2;
240    ps_conf.bo                  = accel_state->shaders_bo;
241    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
242
243    cb_conf.id = 0;
244    cb_conf.w = accel_state->dst_obj.pitch;
245    cb_conf.h = accel_state->dst_obj.height;
246    cb_conf.base = accel_state->dst_obj.offset;
247    cb_conf.bo = accel_state->dst_obj.bo;
248#ifdef XF86DRM_MODE
249    cb_conf.surface = accel_state->dst_obj.surface;
250#endif
251
252    if (accel_state->dst_obj.bpp == 8) {
253	cb_conf.format = COLOR_8;
254	cb_conf.comp_swap = 3; /* A */
255    } else if (accel_state->dst_obj.bpp == 16) {
256	cb_conf.format = COLOR_5_6_5;
257	cb_conf.comp_swap = 2; /* RGB */
258#if X_BYTE_ORDER == X_BIG_ENDIAN
259	cb_conf.endian = ENDIAN_8IN16;
260#endif
261    } else {
262	cb_conf.format = COLOR_8_8_8_8;
263	cb_conf.comp_swap = 1; /* ARGB */
264#if X_BYTE_ORDER == X_BIG_ENDIAN
265	cb_conf.endian = ENDIAN_8IN32;
266#endif
267    }
268    cb_conf.source_format = 1;
269    cb_conf.blend_clamp = 1;
270    /* Render setup */
271    if (accel_state->planemask & 0x000000ff)
272	cb_conf.pmask |= 4; /* B */
273    if (accel_state->planemask & 0x0000ff00)
274	cb_conf.pmask |= 2; /* G */
275    if (accel_state->planemask & 0x00ff0000)
276	cb_conf.pmask |= 1; /* R */
277    if (accel_state->planemask & 0xff000000)
278	cb_conf.pmask |= 8; /* A */
279    cb_conf.rop = accel_state->rop;
280    if (accel_state->dst_obj.tiling_flags == 0)
281	cb_conf.array_mode = 0;
282    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
283
284    r600_set_spi(pScrn, accel_state->ib, 0, 0);
285
286    /* PS alu constants */
287    if (accel_state->dst_obj.bpp == 16) {
288	r = (fg >> 11) & 0x1f;
289	g = (fg >> 5) & 0x3f;
290	b = (fg >> 0) & 0x1f;
291	ps_alu_consts[0] = (float)r / 31; /* R */
292	ps_alu_consts[1] = (float)g / 63; /* G */
293	ps_alu_consts[2] = (float)b / 31; /* B */
294	ps_alu_consts[3] = 1.0; /* A */
295    } else if (accel_state->dst_obj.bpp == 8) {
296	a = (fg >> 0) & 0xff;
297	ps_alu_consts[0] = 0.0; /* R */
298	ps_alu_consts[1] = 0.0; /* G */
299	ps_alu_consts[2] = 0.0; /* B */
300	ps_alu_consts[3] = (float)a / 255; /* A */
301    } else {
302	a = (fg >> 24) & 0xff;
303	r = (fg >> 16) & 0xff;
304	g = (fg >> 8) & 0xff;
305	b = (fg >> 0) & 0xff;
306	ps_alu_consts[0] = (float)r / 255; /* R */
307	ps_alu_consts[1] = (float)g / 255; /* G */
308	ps_alu_consts[2] = (float)b / 255; /* B */
309	ps_alu_consts[3] = (float)a / 255; /* A */
310    }
311    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
312			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
313
314    if (accel_state->vsync)
315	RADEONVlineHelperClear(pScrn);
316
317    accel_state->dst_pix = pPix;
318    accel_state->fg = fg;
319
320    return TRUE;
321}
322
323static void
324R600DoneSolid(PixmapPtr pPix)
325{
326    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
327    RADEONInfoPtr info = RADEONPTR(pScrn);
328    struct radeon_accel_state *accel_state = info->accel_state;
329
330    if (accel_state->vsync)
331	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
332				accel_state->vline_crtc,
333				accel_state->vline_y1,
334				accel_state->vline_y2);
335
336    r600_finish_op(pScrn, 8);
337}
338
339static void
340R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
341{
342    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
343    RADEONInfoPtr info = RADEONPTR(pScrn);
344    struct radeon_accel_state *accel_state = info->accel_state;
345    float *vb;
346
347#ifdef XF86DRM_MODE
348    if (info->cs && CS_FULL(info->cs)) {
349	R600DoneSolid(info->accel_state->dst_pix);
350	radeon_cs_flush_indirect(pScrn);
351	R600PrepareSolid(accel_state->dst_pix,
352			 accel_state->rop,
353			 accel_state->planemask,
354			 accel_state->fg);
355    }
356#endif
357
358    if (accel_state->vsync)
359	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
360
361    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
362
363    vb[0] = (float)x1;
364    vb[1] = (float)y1;
365
366    vb[2] = (float)x1;
367    vb[3] = (float)y2;
368
369    vb[4] = (float)x2;
370    vb[5] = (float)y2;
371
372    radeon_vbo_commit(pScrn, &accel_state->vbo);
373}
374
375static void
376R600DoPrepareCopy(ScrnInfoPtr pScrn)
377{
378    RADEONInfoPtr info = RADEONPTR(pScrn);
379    struct radeon_accel_state *accel_state = info->accel_state;
380    cb_config_t     cb_conf;
381    tex_resource_t  tex_res;
382    tex_sampler_t   tex_samp;
383    shader_config_t vs_conf, ps_conf;
384
385    CLEAR (cb_conf);
386    CLEAR (tex_res);
387    CLEAR (tex_samp);
388    CLEAR (vs_conf);
389    CLEAR (ps_conf);
390
391    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
392    radeon_cp_start(pScrn);
393
394    r600_set_default_state(pScrn, accel_state->ib);
395
396    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
397    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
398    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
399
400    /* Shader */
401    vs_conf.shader_addr         = accel_state->vs_mc_addr;
402    vs_conf.shader_size         = accel_state->vs_size;
403    vs_conf.num_gprs            = 2;
404    vs_conf.stack_size          = 0;
405    vs_conf.bo                  = accel_state->shaders_bo;
406    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
407
408    ps_conf.shader_addr         = accel_state->ps_mc_addr;
409    ps_conf.shader_size         = accel_state->ps_size;
410    ps_conf.num_gprs            = 1;
411    ps_conf.stack_size          = 0;
412    ps_conf.uncached_first_inst = 1;
413    ps_conf.clamp_consts        = 0;
414    ps_conf.export_mode         = 2;
415    ps_conf.bo                  = accel_state->shaders_bo;
416    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
417
418    /* Texture */
419    tex_res.id                  = 0;
420    tex_res.w                   = accel_state->src_obj[0].width;
421    tex_res.h                   = accel_state->src_obj[0].height;
422    tex_res.pitch               = accel_state->src_obj[0].pitch;
423    tex_res.depth               = 0;
424    tex_res.dim                 = SQ_TEX_DIM_2D;
425    tex_res.base                = accel_state->src_obj[0].offset;
426    tex_res.mip_base            = accel_state->src_obj[0].offset;
427    tex_res.size                = accel_state->src_size[0];
428    tex_res.bo                  = accel_state->src_obj[0].bo;
429    tex_res.mip_bo              = accel_state->src_obj[0].bo;
430#ifdef XF86DRM_MODE
431    tex_res.surface             = accel_state->src_obj[0].surface;
432#endif
433    if (accel_state->src_obj[0].bpp == 8) {
434	tex_res.format              = FMT_8;
435	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
436	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
437	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
438	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
439    } else if (accel_state->src_obj[0].bpp == 16) {
440	tex_res.format              = FMT_5_6_5;
441	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
442	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
443	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
444	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
445    } else {
446	tex_res.format              = FMT_8_8_8_8;
447	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
448	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
449	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
450	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
451    }
452
453    tex_res.request_size        = 1;
454    tex_res.base_level          = 0;
455    tex_res.last_level          = 0;
456    tex_res.perf_modulation     = 0;
457    if (accel_state->src_obj[0].tiling_flags == 0)
458	tex_res.tile_mode           = 1;
459    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
460
461    tex_samp.id                 = 0;
462    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
463    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
464    tex_samp.clamp_z            = SQ_TEX_WRAP;
465    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
466    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
467    tex_samp.mc_coord_truncate  = 1;
468    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
469    tex_samp.mip_filter         = 0;			/* no mipmap */
470    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
471
472    cb_conf.id = 0;
473    cb_conf.w = accel_state->dst_obj.pitch;
474    cb_conf.h = accel_state->dst_obj.height;
475    cb_conf.base = accel_state->dst_obj.offset;
476    cb_conf.bo = accel_state->dst_obj.bo;
477#ifdef XF86DRM_MODE
478    cb_conf.surface = accel_state->dst_obj.surface;
479#endif
480    if (accel_state->dst_obj.bpp == 8) {
481	cb_conf.format = COLOR_8;
482	cb_conf.comp_swap = 3; /* A */
483    } else if (accel_state->dst_obj.bpp == 16) {
484	cb_conf.format = COLOR_5_6_5;
485	cb_conf.comp_swap = 2; /* RGB */
486    } else {
487	cb_conf.format = COLOR_8_8_8_8;
488	cb_conf.comp_swap = 1; /* ARGB */
489    }
490    cb_conf.source_format = 1;
491    cb_conf.blend_clamp = 1;
492
493    /* Render setup */
494    if (accel_state->planemask & 0x000000ff)
495	cb_conf.pmask |= 4; /* B */
496    if (accel_state->planemask & 0x0000ff00)
497	cb_conf.pmask |= 2; /* G */
498    if (accel_state->planemask & 0x00ff0000)
499	cb_conf.pmask |= 1; /* R */
500    if (accel_state->planemask & 0xff000000)
501	cb_conf.pmask |= 8; /* A */
502    cb_conf.rop = accel_state->rop;
503    if (accel_state->dst_obj.tiling_flags == 0)
504	cb_conf.array_mode = 0;
505    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
506
507    r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
508
509}
510
511static void
512R600DoCopy(ScrnInfoPtr pScrn)
513{
514    r600_finish_op(pScrn, 16);
515}
516
517static void
518R600DoCopyVline(PixmapPtr pPix)
519{
520    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
521    RADEONInfoPtr info = RADEONPTR(pScrn);
522    struct radeon_accel_state *accel_state = info->accel_state;
523
524    if (accel_state->vsync)
525	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
526				accel_state->vline_crtc,
527				accel_state->vline_y1,
528				accel_state->vline_y2);
529
530    r600_finish_op(pScrn, 16);
531}
532
533static void
534R600AppendCopyVertex(ScrnInfoPtr pScrn,
535		     int srcX, int srcY,
536		     int dstX, int dstY,
537		     int w, int h)
538{
539    RADEONInfoPtr info = RADEONPTR(pScrn);
540    struct radeon_accel_state *accel_state = info->accel_state;
541    float *vb;
542
543    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
544
545    vb[0] = (float)dstX;
546    vb[1] = (float)dstY;
547    vb[2] = (float)srcX;
548    vb[3] = (float)srcY;
549
550    vb[4] = (float)dstX;
551    vb[5] = (float)(dstY + h);
552    vb[6] = (float)srcX;
553    vb[7] = (float)(srcY + h);
554
555    vb[8] = (float)(dstX + w);
556    vb[9] = (float)(dstY + h);
557    vb[10] = (float)(srcX + w);
558    vb[11] = (float)(srcY + h);
559
560    radeon_vbo_commit(pScrn, &accel_state->vbo);
561}
562
563static Bool
564R600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
565		int xdir, int ydir,
566		int rop,
567		Pixel planemask)
568{
569    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
570    RADEONInfoPtr info = RADEONPTR(pScrn);
571    struct radeon_accel_state *accel_state = info->accel_state;
572    struct r600_accel_object src_obj, dst_obj;
573
574    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
575	RADEON_FALLBACK(("R600CheckDatatype src failed\n"));
576    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
577	RADEON_FALLBACK(("R600CheckDatatype dst failed\n"));
578    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
579	RADEON_FALLBACK(("Invalid planemask\n"));
580
581    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
582    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
583
584    accel_state->same_surface = FALSE;
585
586#if defined(XF86DRM_MODE)
587    if (info->cs) {
588	src_obj.offset = 0;
589	dst_obj.offset = 0;
590	src_obj.bo = radeon_get_pixmap_bo(pSrc);
591	dst_obj.bo = radeon_get_pixmap_bo(pDst);
592	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
593	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
594	src_obj.surface = radeon_get_pixmap_surface(pSrc);
595	dst_obj.surface = radeon_get_pixmap_surface(pDst);
596	if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
597	    accel_state->same_surface = TRUE;
598    } else
599#endif
600    {
601	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
602	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
603	if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst))
604	    accel_state->same_surface = TRUE;
605	src_obj.bo = NULL;
606	dst_obj.bo = NULL;
607    }
608
609    src_obj.width = pSrc->drawable.width;
610    src_obj.height = pSrc->drawable.height;
611    src_obj.bpp = pSrc->drawable.bitsPerPixel;
612    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
613
614    dst_obj.width = pDst->drawable.width;
615    dst_obj.height = pDst->drawable.height;
616    dst_obj.bpp = pDst->drawable.bitsPerPixel;
617    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
618
619    if (!R600SetAccelState(pScrn,
620			   &src_obj,
621			   NULL,
622			   &dst_obj,
623			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
624			   rop, planemask))
625	return FALSE;
626
627    if (accel_state->same_surface == TRUE) {
628#if defined(XF86DRM_MODE)
629	unsigned long size = accel_state->dst_obj.surface->bo_size;
630	unsigned long align = accel_state->dst_obj.surface->bo_alignment;
631#else
632	unsigned height = pDst->drawable.height;
633	unsigned long size = height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
634#endif
635
636#if defined(XF86DRM_MODE)
637	if (info->cs) {
638	    if (accel_state->copy_area_bo) {
639		radeon_bo_unref(accel_state->copy_area_bo);
640		accel_state->copy_area_bo = NULL;
641	    }
642	    accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, align,
643						       RADEON_GEM_DOMAIN_VRAM,
644						       0);
645	    if (accel_state->copy_area_bo == NULL)
646		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
647
648	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
649					      0, RADEON_GEM_DOMAIN_VRAM);
650	    if (radeon_cs_space_check(info->cs)) {
651		radeon_bo_unref(accel_state->copy_area_bo);
652		accel_state->copy_area_bo = NULL;
653		return FALSE;
654	    }
655	    accel_state->copy_area = (void*)accel_state->copy_area_bo;
656	} else
657#endif
658	{
659	    if (accel_state->copy_area) {
660		exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
661		accel_state->copy_area = NULL;
662	    }
663	    accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
664	    if (!accel_state->copy_area)
665		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
666	}
667    } else
668	R600DoPrepareCopy(pScrn);
669
670    if (accel_state->vsync)
671	RADEONVlineHelperClear(pScrn);
672
673    accel_state->dst_pix = pDst;
674    accel_state->src_pix = pSrc;
675    accel_state->xdir = xdir;
676    accel_state->ydir = ydir;
677
678    return TRUE;
679}
680
681static void
682R600DoneCopy(PixmapPtr pDst)
683{
684    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
685    RADEONInfoPtr info = RADEONPTR(pScrn);
686    struct radeon_accel_state *accel_state = info->accel_state;
687
688    if (!accel_state->same_surface)
689	R600DoCopyVline(pDst);
690
691    if (accel_state->copy_area) {
692	if (!info->cs)
693	    exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
694	accel_state->copy_area = NULL;
695    }
696
697}
698
699static void
700R600Copy(PixmapPtr pDst,
701	 int srcX, int srcY,
702	 int dstX, int dstY,
703	 int w, int h)
704{
705    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
706    RADEONInfoPtr info = RADEONPTR(pScrn);
707    struct radeon_accel_state *accel_state = info->accel_state;
708
709    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
710	return;
711
712#ifdef XF86DRM_MODE
713    if (info->cs && CS_FULL(info->cs)) {
714	R600DoneCopy(info->accel_state->dst_pix);
715	radeon_cs_flush_indirect(pScrn);
716	R600PrepareCopy(accel_state->src_pix,
717			accel_state->dst_pix,
718			accel_state->xdir,
719			accel_state->ydir,
720			accel_state->rop,
721			accel_state->planemask);
722    }
723#endif
724
725    if (accel_state->vsync)
726	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
727
728    if (accel_state->same_surface && accel_state->copy_area) {
729	uint32_t orig_offset, tmp_offset;
730	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
731	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
732	uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags;
733	uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags;
734	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
735	int orig_rop = accel_state->rop;
736
737#if defined(XF86DRM_MODE)
738	if (info->cs) {
739	    tmp_offset = 0;
740	    orig_offset = 0;
741	} else
742#endif
743	{
744	    tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset;
745	    orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
746	}
747
748	/* src to tmp */
749	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
750	accel_state->dst_obj.bo = accel_state->copy_area_bo;
751	accel_state->dst_obj.offset = tmp_offset;
752	accel_state->dst_obj.tiling_flags = 0;
753	accel_state->rop = 3;
754	R600DoPrepareCopy(pScrn);
755	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
756	R600DoCopy(pScrn);
757
758	/* tmp to dst */
759	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
760	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
761	accel_state->src_obj[0].offset = tmp_offset;
762	accel_state->src_obj[0].tiling_flags = 0;
763	accel_state->dst_obj.domain = orig_dst_domain;
764	accel_state->dst_obj.bo = orig_bo;
765	accel_state->dst_obj.offset = orig_offset;
766	accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags;
767	accel_state->rop = orig_rop;
768	R600DoPrepareCopy(pScrn);
769	R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
770	R600DoCopyVline(pDst);
771
772	/* restore state */
773	accel_state->src_obj[0].domain = orig_src_domain;
774	accel_state->src_obj[0].bo = orig_bo;
775	accel_state->src_obj[0].offset = orig_offset;
776	accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags;
777    } else
778	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
779
780}
781
782struct blendinfo {
783    Bool dst_alpha;
784    Bool src_alpha;
785    uint32_t blend_cntl;
786};
787
788static struct blendinfo R600BlendOp[] = {
789    /* Clear */
790    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
791    /* Src */
792    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
793    /* Dst */
794    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
795    /* Over */
796    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
797    /* OverReverse */
798    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
799    /* In */
800    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
801    /* InReverse */
802    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
803    /* Out */
804    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
805    /* OutReverse */
806    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
807    /* Atop */
808    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
809    /* AtopReverse */
810    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
811    /* Xor */
812    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
813    /* Add */
814    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
815};
816
817struct formatinfo {
818    unsigned int fmt;
819    uint32_t card_fmt;
820};
821
822static struct formatinfo R600TexFormats[] = {
823    {PICT_a8r8g8b8,	FMT_8_8_8_8},
824    {PICT_x8r8g8b8,	FMT_8_8_8_8},
825    {PICT_a8b8g8r8,	FMT_8_8_8_8},
826    {PICT_x8b8g8r8,	FMT_8_8_8_8},
827#ifdef PICT_TYPE_BGRA
828    {PICT_b8g8r8a8,	FMT_8_8_8_8},
829    {PICT_b8g8r8x8,	FMT_8_8_8_8},
830#endif
831    {PICT_r5g6b5,	FMT_5_6_5},
832    {PICT_a1r5g5b5,	FMT_1_5_5_5},
833    {PICT_x1r5g5b5,     FMT_1_5_5_5},
834    {PICT_a8,		FMT_8},
835};
836
837static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
838{
839    uint32_t sblend, dblend;
840
841    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
842    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
843
844    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
845     * it as always 1.
846     */
847    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
848	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
849	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
850	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
851	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
852    }
853
854    /* If the source alpha is being used, then we should only be in a case where
855     * the source blend factor is 0, and the source blend value is the mask
856     * channels multiplied by the source picture's alpha.
857     */
858    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
859	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
860	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
861	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
862	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
863	}
864    }
865
866    return sblend | dblend;
867}
868
869static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
870{
871    switch (pDstPicture->format) {
872    case PICT_a8r8g8b8:
873    case PICT_x8r8g8b8:
874    case PICT_a8b8g8r8:
875    case PICT_x8b8g8r8:
876#ifdef PICT_TYPE_BGRA
877    case PICT_b8g8r8a8:
878    case PICT_b8g8r8x8:
879#endif
880	*dst_format = COLOR_8_8_8_8;
881	break;
882    case PICT_r5g6b5:
883	*dst_format = COLOR_5_6_5;
884	break;
885    case PICT_a1r5g5b5:
886    case PICT_x1r5g5b5:
887	*dst_format = COLOR_1_5_5_5;
888	break;
889    case PICT_a8:
890	*dst_format = COLOR_8;
891	break;
892    default:
893	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
894	       (int)pDstPicture->format));
895    }
896    return TRUE;
897}
898
899static Bool R600CheckCompositeTexture(PicturePtr pPict,
900				      PicturePtr pDstPict,
901				      int op,
902				      int unit)
903{
904    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
905    unsigned int i;
906
907    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
908	if (R600TexFormats[i].fmt == pPict->format)
909	    break;
910    }
911    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
912	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
913			 (int)pPict->format));
914
915    if (pPict->filter != PictFilterNearest &&
916	pPict->filter != PictFilterBilinear)
917	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
918
919    /* for REPEAT_NONE, Render semantics are that sampling outside the source
920     * picture results in alpha=0 pixels. We can implement this with a border color
921     * *if* our source texture has an alpha channel, otherwise we need to fall
922     * back. If we're not transformed then we hope that upper layers have clipped
923     * rendering to the bounds of the source drawable, in which case it doesn't
924     * matter. I have not, however, verified that the X server always does such
925     * clipping.
926     */
927    /* FIXME R6xx */
928    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
929	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
930	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
931    }
932
933    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
934	RADEON_FALLBACK(("non-affine transforms not supported\n"));
935
936    return TRUE;
937}
938
939static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
940					int unit)
941{
942    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
943    RADEONInfoPtr info = RADEONPTR(pScrn);
944    struct radeon_accel_state *accel_state = info->accel_state;
945    unsigned int repeatType;
946    unsigned int i;
947    tex_resource_t  tex_res;
948    tex_sampler_t   tex_samp;
949    int pix_r, pix_g, pix_b, pix_a;
950    float vs_alu_consts[8];
951
952    CLEAR (tex_res);
953    CLEAR (tex_samp);
954
955    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
956	if (R600TexFormats[i].fmt == pPict->format)
957	    break;
958    }
959
960    /* Texture */
961    if (pPict->pDrawable) {
962	tex_res.w               = pPict->pDrawable->width;
963	tex_res.h               = pPict->pDrawable->height;
964	repeatType              = pPict->repeat ? pPict->repeatType : RepeatNone;
965    } else {
966	tex_res.w               = 1;
967	tex_res.h               = 1;
968	repeatType              = RepeatNormal;
969    }
970    tex_res.id                  = unit;
971    tex_res.pitch               = accel_state->src_obj[unit].pitch;
972    tex_res.depth               = 0;
973    tex_res.dim                 = SQ_TEX_DIM_2D;
974    tex_res.base                = accel_state->src_obj[unit].offset;
975    tex_res.mip_base            = accel_state->src_obj[unit].offset;
976    tex_res.size                = accel_state->src_size[unit];
977    tex_res.format              = R600TexFormats[i].card_fmt;
978    tex_res.bo                  = accel_state->src_obj[unit].bo;
979    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
980#ifdef XF86DRM_MODE
981    tex_res.surface             = accel_state->src_obj[unit].surface;
982#endif
983    tex_res.request_size        = 1;
984
985#if X_BYTE_ORDER == X_BIG_ENDIAN
986    switch (accel_state->src_obj[unit].bpp) {
987    case 16:
988	tex_res.endian = SQ_ENDIAN_8IN16;
989	break;
990    case 32:
991	tex_res.endian = SQ_ENDIAN_8IN32;
992	break;
993    default :
994	break;
995    }
996#endif
997
998    /* component swizzles */
999    switch (pPict->format) {
1000    case PICT_a1r5g5b5:
1001    case PICT_a8r8g8b8:
1002	pix_r = SQ_SEL_Z; /* R */
1003	pix_g = SQ_SEL_Y; /* G */
1004	pix_b = SQ_SEL_X; /* B */
1005	pix_a = SQ_SEL_W; /* A */
1006	break;
1007    case PICT_a8b8g8r8:
1008	pix_r = SQ_SEL_X; /* R */
1009	pix_g = SQ_SEL_Y; /* G */
1010	pix_b = SQ_SEL_Z; /* B */
1011	pix_a = SQ_SEL_W; /* A */
1012	break;
1013    case PICT_x8b8g8r8:
1014	pix_r = SQ_SEL_X; /* R */
1015	pix_g = SQ_SEL_Y; /* G */
1016	pix_b = SQ_SEL_Z; /* B */
1017	pix_a = SQ_SEL_1; /* A */
1018	break;
1019#ifdef PICT_TYPE_BGRA
1020    case PICT_b8g8r8a8:
1021	pix_r = SQ_SEL_Y; /* R */
1022	pix_g = SQ_SEL_Z; /* G */
1023	pix_b = SQ_SEL_W; /* B */
1024	pix_a = SQ_SEL_X; /* A */
1025	break;
1026    case PICT_b8g8r8x8:
1027	pix_r = SQ_SEL_Y; /* R */
1028	pix_g = SQ_SEL_Z; /* G */
1029	pix_b = SQ_SEL_W; /* B */
1030	pix_a = SQ_SEL_1; /* A */
1031	break;
1032#endif
1033    case PICT_x1r5g5b5:
1034    case PICT_x8r8g8b8:
1035    case PICT_r5g6b5:
1036	pix_r = SQ_SEL_Z; /* R */
1037	pix_g = SQ_SEL_Y; /* G */
1038	pix_b = SQ_SEL_X; /* B */
1039	pix_a = SQ_SEL_1; /* A */
1040	break;
1041    case PICT_a8:
1042	pix_r = SQ_SEL_0; /* R */
1043	pix_g = SQ_SEL_0; /* G */
1044	pix_b = SQ_SEL_0; /* B */
1045	pix_a = SQ_SEL_X; /* A */
1046	break;
1047    default:
1048	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
1049    }
1050
1051    if (unit == 0) {
1052	if (!accel_state->msk_pic) {
1053	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
1054		pix_r = SQ_SEL_0;
1055		pix_g = SQ_SEL_0;
1056		pix_b = SQ_SEL_0;
1057	    }
1058
1059	    if (PICT_FORMAT_A(pPict->format) == 0)
1060		pix_a = SQ_SEL_1;
1061	} else {
1062	    if (accel_state->component_alpha) {
1063		if (accel_state->src_alpha) {
1064		    if (PICT_FORMAT_A(pPict->format) == 0) {
1065			pix_r = SQ_SEL_1;
1066			pix_g = SQ_SEL_1;
1067			pix_b = SQ_SEL_1;
1068			pix_a = SQ_SEL_1;
1069		    } else {
1070			pix_r = pix_a;
1071			pix_g = pix_a;
1072			pix_b = pix_a;
1073		    }
1074		} else {
1075		    if (PICT_FORMAT_A(pPict->format) == 0)
1076			pix_a = SQ_SEL_1;
1077		}
1078	    } else {
1079		if (PICT_FORMAT_RGB(pPict->format) == 0) {
1080		    pix_r = SQ_SEL_0;
1081		    pix_g = SQ_SEL_0;
1082		    pix_b = SQ_SEL_0;
1083		}
1084
1085		if (PICT_FORMAT_A(pPict->format) == 0)
1086		    pix_a = SQ_SEL_1;
1087	    }
1088	}
1089    } else {
1090	if (accel_state->component_alpha) {
1091	    if (PICT_FORMAT_A(pPict->format) == 0)
1092		pix_a = SQ_SEL_1;
1093	} else {
1094	    if (PICT_FORMAT_A(pPict->format) == 0) {
1095		pix_r = SQ_SEL_1;
1096		pix_g = SQ_SEL_1;
1097		pix_b = SQ_SEL_1;
1098		pix_a = SQ_SEL_1;
1099	    } else {
1100		pix_r = pix_a;
1101		pix_g = pix_a;
1102		pix_b = pix_a;
1103	    }
1104	}
1105    }
1106
1107    tex_res.dst_sel_x           = pix_r; /* R */
1108    tex_res.dst_sel_y           = pix_g; /* G */
1109    tex_res.dst_sel_z           = pix_b; /* B */
1110    tex_res.dst_sel_w           = pix_a; /* A */
1111
1112    tex_res.base_level          = 0;
1113    tex_res.last_level          = 0;
1114    tex_res.perf_modulation     = 0;
1115    if (accel_state->src_obj[unit].tiling_flags == 0)
1116	tex_res.tile_mode           = 1;
1117    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
1118
1119    tex_samp.id                 = unit;
1120    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1121
1122    switch (repeatType) {
1123    case RepeatNormal:
1124	tex_samp.clamp_x            = SQ_TEX_WRAP;
1125	tex_samp.clamp_y            = SQ_TEX_WRAP;
1126	break;
1127    case RepeatPad:
1128	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1129	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1130	break;
1131    case RepeatReflect:
1132	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1133	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1134	break;
1135    case RepeatNone:
1136	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1137	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1138	break;
1139    default:
1140	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1141    }
1142
1143    switch (pPict->filter) {
1144    case PictFilterNearest:
1145	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1146	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1147	tex_samp.mc_coord_truncate  = 1;
1148	break;
1149    case PictFilterBilinear:
1150	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1151	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1152	break;
1153    default:
1154	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1155    }
1156
1157    tex_samp.clamp_z            = SQ_TEX_WRAP;
1158    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1159    tex_samp.mip_filter         = 0;			/* no mipmap */
1160    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
1161
1162    if (pPict->transform != 0) {
1163	accel_state->is_transform[unit] = TRUE;
1164	accel_state->transform[unit] = pPict->transform;
1165
1166	vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
1167	vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
1168	vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
1169	vs_alu_consts[3] = 1.0 / tex_res.w;
1170
1171	vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
1172	vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
1173	vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
1174	vs_alu_consts[7] = 1.0 / tex_res.h;
1175    } else {
1176	accel_state->is_transform[unit] = FALSE;
1177
1178	vs_alu_consts[0] = 1.0;
1179	vs_alu_consts[1] = 0.0;
1180	vs_alu_consts[2] = 0.0;
1181	vs_alu_consts[3] = 1.0 / tex_res.w;
1182
1183	vs_alu_consts[4] = 0.0;
1184	vs_alu_consts[5] = 1.0;
1185	vs_alu_consts[6] = 0.0;
1186	vs_alu_consts[7] = 1.0 / tex_res.h;
1187    }
1188
1189    /* VS alu constants */
1190    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
1191			sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
1192
1193    return TRUE;
1194}
1195
1196static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1197			       PicturePtr pDstPicture)
1198{
1199    uint32_t tmp1;
1200    PixmapPtr pSrcPixmap, pDstPixmap;
1201
1202    /* Check for unsupported compositing operations. */
1203    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1204	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1205
1206    if (pSrcPicture->pDrawable) {
1207	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1208
1209	if (pSrcPixmap->drawable.width >= 8192 ||
1210	    pSrcPixmap->drawable.height >= 8192) {
1211	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1212			     pSrcPixmap->drawable.width,
1213			     pSrcPixmap->drawable.height));
1214	}
1215
1216	if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1217	    return FALSE;
1218    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
1219	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1220
1221    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1222
1223    if (pDstPixmap->drawable.width >= 8192 ||
1224	pDstPixmap->drawable.height >= 8192) {
1225	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1226			 pDstPixmap->drawable.width,
1227			 pDstPixmap->drawable.height));
1228    }
1229
1230    if (pMaskPicture) {
1231	PixmapPtr pMaskPixmap;
1232
1233	if (pMaskPicture->pDrawable) {
1234	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1235
1236	    if (pMaskPixmap->drawable.width >= 8192 ||
1237		pMaskPixmap->drawable.height >= 8192) {
1238	      RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1239			       pMaskPixmap->drawable.width,
1240			       pMaskPixmap->drawable.height));
1241	    }
1242
1243	    if (pMaskPicture->componentAlpha) {
1244		/* Check if it's component alpha that relies on a source alpha and
1245		 * on the source value.  We can only get one of those into the
1246		 * single source value that we get to blend with.
1247		 */
1248		if (R600BlendOp[op].src_alpha &&
1249		    (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1250		    (BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1251		    RADEON_FALLBACK(("Component alpha not supported with source "
1252				     "alpha and source value blending.\n"));
1253		}
1254	    }
1255
1256	    if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1257		return FALSE;
1258	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
1259	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1260    }
1261
1262    if (!R600GetDestFormat(pDstPicture, &tmp1))
1263	return FALSE;
1264
1265    return TRUE;
1266
1267}
1268
1269static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1270				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1271				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1272{
1273    ScreenPtr pScreen = pDst->drawable.pScreen;
1274    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1275    RADEONInfoPtr info = RADEONPTR(pScrn);
1276    struct radeon_accel_state *accel_state = info->accel_state;
1277    uint32_t dst_format;
1278    cb_config_t cb_conf;
1279    shader_config_t vs_conf, ps_conf;
1280    struct r600_accel_object src_obj, mask_obj, dst_obj;
1281
1282    if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8))
1283	return FALSE;
1284
1285    if (!pSrc) {
1286	pSrc = RADEONSolidPixmap(pScreen, pSrcPicture->pSourcePict->solidFill.color);
1287	if (!pSrc)
1288	    RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
1289    }
1290
1291#if defined(XF86DRM_MODE)
1292    if (info->cs) {
1293	src_obj.offset = 0;
1294	dst_obj.offset = 0;
1295	dst_obj.bo = radeon_get_pixmap_bo(pDst);
1296	src_obj.bo = radeon_get_pixmap_bo(pSrc);
1297	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1298	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1299	dst_obj.surface = radeon_get_pixmap_surface(pDst);
1300	src_obj.surface = radeon_get_pixmap_surface(pSrc);
1301    } else
1302#endif
1303    {
1304	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1305	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1306	src_obj.bo = NULL;
1307	dst_obj.bo = NULL;
1308    }
1309    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1310    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1311
1312    src_obj.width = pSrc->drawable.width;
1313    src_obj.height = pSrc->drawable.height;
1314    src_obj.bpp = pSrc->drawable.bitsPerPixel;
1315    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1316
1317    dst_obj.width = pDst->drawable.width;
1318    dst_obj.height = pDst->drawable.height;
1319    dst_obj.bpp = pDst->drawable.bitsPerPixel;
1320    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1321
1322    if (pMaskPicture) {
1323	if (!pMask) {
1324	    pMask = RADEONSolidPixmap(pScreen, pMaskPicture->pSourcePict->solidFill.color);
1325	    if (!pMask) {
1326		if (!pSrcPicture->pDrawable)
1327		    pScreen->DestroyPixmap(pSrc);
1328		RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
1329	    }
1330	}
1331
1332#if defined(XF86DRM_MODE)
1333	if (info->cs) {
1334	    mask_obj.offset = 0;
1335	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
1336	    mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
1337	    mask_obj.surface = radeon_get_pixmap_surface(pMask);
1338	} else
1339#endif
1340	{
1341	    mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset;
1342	    mask_obj.bo = NULL;
1343	}
1344	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1345
1346	mask_obj.width = pMask->drawable.width;
1347	mask_obj.height = pMask->drawable.height;
1348	mask_obj.bpp = pMask->drawable.bitsPerPixel;
1349	mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1350
1351	if (!R600SetAccelState(pScrn,
1352			       &src_obj,
1353			       &mask_obj,
1354			       &dst_obj,
1355			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1356			       3, 0xffffffff))
1357	    return FALSE;
1358
1359	accel_state->msk_pic = pMaskPicture;
1360	if (pMaskPicture->componentAlpha) {
1361	    accel_state->component_alpha = TRUE;
1362	    if (R600BlendOp[op].src_alpha)
1363		accel_state->src_alpha = TRUE;
1364	    else
1365		accel_state->src_alpha = FALSE;
1366	} else {
1367	    accel_state->component_alpha = FALSE;
1368	    accel_state->src_alpha = FALSE;
1369	}
1370    } else {
1371	if (!R600SetAccelState(pScrn,
1372			       &src_obj,
1373			       NULL,
1374			       &dst_obj,
1375			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1376			       3, 0xffffffff))
1377	    return FALSE;
1378
1379	accel_state->msk_pic = NULL;
1380	accel_state->component_alpha = FALSE;
1381	accel_state->src_alpha = FALSE;
1382    }
1383
1384    if (!R600GetDestFormat(pDstPicture, &dst_format))
1385	return FALSE;
1386
1387    CLEAR (cb_conf);
1388    CLEAR (vs_conf);
1389    CLEAR (ps_conf);
1390
1391    if (pMask)
1392        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1393    else
1394        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1395
1396    radeon_cp_start(pScrn);
1397
1398    r600_set_default_state(pScrn, accel_state->ib);
1399
1400    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1401    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1402    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1403
1404    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
1405        R600IBDiscard(pScrn, accel_state->ib);
1406        return FALSE;
1407    }
1408
1409    if (pMask) {
1410        if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
1411            R600IBDiscard(pScrn, accel_state->ib);
1412            return FALSE;
1413        }
1414    } else
1415        accel_state->is_transform[1] = FALSE;
1416
1417    if (pMask) {
1418	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
1419	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
1420    } else {
1421	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
1422	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
1423    }
1424
1425    /* Shader */
1426    vs_conf.shader_addr         = accel_state->vs_mc_addr;
1427    vs_conf.shader_size         = accel_state->vs_size;
1428    vs_conf.num_gprs            = 5;
1429    vs_conf.stack_size          = 1;
1430    vs_conf.bo                  = accel_state->shaders_bo;
1431    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1432
1433    ps_conf.shader_addr         = accel_state->ps_mc_addr;
1434    ps_conf.shader_size         = accel_state->ps_size;
1435    ps_conf.num_gprs            = 3;
1436    ps_conf.stack_size          = 1;
1437    ps_conf.uncached_first_inst = 1;
1438    ps_conf.clamp_consts        = 0;
1439    ps_conf.export_mode         = 2;
1440    ps_conf.bo                  = accel_state->shaders_bo;
1441    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1442
1443    cb_conf.id = 0;
1444    cb_conf.w = accel_state->dst_obj.pitch;
1445    cb_conf.h = accel_state->dst_obj.height;
1446    cb_conf.base = accel_state->dst_obj.offset;
1447    cb_conf.format = dst_format;
1448    cb_conf.bo = accel_state->dst_obj.bo;
1449#ifdef XF86DRM_MODE
1450    cb_conf.surface = accel_state->dst_obj.surface;
1451#endif
1452
1453    switch (pDstPicture->format) {
1454    case PICT_a8r8g8b8:
1455    case PICT_x8r8g8b8:
1456    case PICT_a1r5g5b5:
1457    case PICT_x1r5g5b5:
1458    default:
1459	cb_conf.comp_swap = 1; /* ARGB */
1460	break;
1461    case PICT_a8b8g8r8:
1462    case PICT_x8b8g8r8:
1463	cb_conf.comp_swap = 0; /* ABGR */
1464	break;
1465#ifdef PICT_TYPE_BGRA
1466    case PICT_b8g8r8a8:
1467    case PICT_b8g8r8x8:
1468	cb_conf.comp_swap = 3; /* BGRA */
1469	break;
1470#endif
1471    case PICT_r5g6b5:
1472	cb_conf.comp_swap = 2; /* RGB */
1473	break;
1474    case PICT_a8:
1475	cb_conf.comp_swap = 3; /* A */
1476	break;
1477    }
1478    cb_conf.source_format = 1;
1479    cb_conf.blend_clamp = 1;
1480    cb_conf.blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1481    cb_conf.blend_enable = 1;
1482    cb_conf.pmask = 0xf;
1483    cb_conf.rop = 3;
1484    if (accel_state->dst_obj.tiling_flags == 0)
1485	cb_conf.array_mode = 0;
1486#if X_BYTE_ORDER == X_BIG_ENDIAN
1487    switch (dst_obj.bpp) {
1488    case 16:
1489	cb_conf.endian = ENDIAN_8IN16;
1490	break;
1491    case 32:
1492	cb_conf.endian = ENDIAN_8IN32;
1493	break;
1494    default:
1495	break;
1496    }
1497#endif
1498    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
1499
1500    if (pMask)
1501	r600_set_spi(pScrn, accel_state->ib, (2 - 1), 2);
1502    else
1503	r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
1504
1505    if (accel_state->vsync)
1506	RADEONVlineHelperClear(pScrn);
1507
1508    accel_state->composite_op = op;
1509    accel_state->dst_pic = pDstPicture;
1510    accel_state->src_pic = pSrcPicture;
1511    accel_state->dst_pix = pDst;
1512    accel_state->msk_pix = pMask;
1513    accel_state->src_pix = pSrc;
1514
1515    return TRUE;
1516}
1517
1518static void R600FinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst,
1519				struct radeon_accel_state *accel_state)
1520{
1521    int vtx_size;
1522
1523    if (accel_state->vsync)
1524       r600_cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
1525			       accel_state->vline_crtc,
1526			       accel_state->vline_y1,
1527			       accel_state->vline_y2);
1528
1529    vtx_size = accel_state->msk_pic ? 24 : 16;
1530
1531    r600_finish_op(pScrn, vtx_size);
1532}
1533
1534static void R600DoneComposite(PixmapPtr pDst)
1535{
1536    ScreenPtr pScreen = pDst->drawable.pScreen;
1537    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1538    RADEONInfoPtr info = RADEONPTR(pScrn);
1539    struct radeon_accel_state *accel_state = info->accel_state;
1540
1541    R600FinishComposite(pScrn, pDst, accel_state);
1542
1543    if (!accel_state->src_pic->pDrawable)
1544	pScreen->DestroyPixmap(accel_state->src_pix);
1545
1546    if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable)
1547	pScreen->DestroyPixmap(accel_state->msk_pix);
1548}
1549
1550static void R600Composite(PixmapPtr pDst,
1551			  int srcX, int srcY,
1552			  int maskX, int maskY,
1553			  int dstX, int dstY,
1554			  int w, int h)
1555{
1556    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1557    RADEONInfoPtr info = RADEONPTR(pScrn);
1558    struct radeon_accel_state *accel_state = info->accel_state;
1559    float *vb;
1560
1561    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1562       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1563
1564#ifdef XF86DRM_MODE
1565    if (info->cs && CS_FULL(info->cs)) {
1566	R600FinishComposite(pScrn, pDst, info->accel_state);
1567	radeon_cs_flush_indirect(pScrn);
1568	R600PrepareComposite(info->accel_state->composite_op,
1569			     info->accel_state->src_pic,
1570			     info->accel_state->msk_pic,
1571			     info->accel_state->dst_pic,
1572			     info->accel_state->src_pix,
1573			     info->accel_state->msk_pix,
1574			     info->accel_state->dst_pix);
1575    }
1576#endif
1577
1578    if (accel_state->vsync)
1579	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1580
1581    if (accel_state->msk_pic) {
1582
1583	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1584
1585	vb[0] = (float)dstX;
1586	vb[1] = (float)dstY;
1587	vb[2] = (float)srcX;
1588	vb[3] = (float)srcY;
1589	vb[4] = (float)maskX;
1590	vb[5] = (float)maskY;
1591
1592	vb[6] = (float)dstX;
1593	vb[7] = (float)(dstY + h);
1594	vb[8] = (float)srcX;
1595	vb[9] = (float)(srcY + h);
1596	vb[10] = (float)maskX;
1597	vb[11] = (float)(maskY + h);
1598
1599	vb[12] = (float)(dstX + w);
1600	vb[13] = (float)(dstY + h);
1601	vb[14] = (float)(srcX + w);
1602	vb[15] = (float)(srcY + h);
1603	vb[16] = (float)(maskX + w);
1604	vb[17] = (float)(maskY + h);
1605
1606	radeon_vbo_commit(pScrn, &accel_state->vbo);
1607
1608    } else {
1609
1610	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1611
1612	vb[0] = (float)dstX;
1613	vb[1] = (float)dstY;
1614	vb[2] = (float)srcX;
1615	vb[3] = (float)srcY;
1616
1617	vb[4] = (float)dstX;
1618	vb[5] = (float)(dstY + h);
1619	vb[6] = (float)srcX;
1620	vb[7] = (float)(srcY + h);
1621
1622	vb[8] = (float)(dstX + w);
1623	vb[9] = (float)(dstY + h);
1624	vb[10] = (float)(srcX + w);
1625	vb[11] = (float)(srcY + h);
1626
1627	radeon_vbo_commit(pScrn, &accel_state->vbo);
1628    }
1629
1630
1631}
1632
1633Bool
1634R600CopyToVRAM(ScrnInfoPtr pScrn,
1635	       char *src, int src_pitch,
1636	       uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_width, uint32_t dst_height, int bpp,
1637	       int x, int y, int w, int h)
1638{
1639    RADEONInfoPtr info = RADEONPTR(pScrn);
1640    struct radeon_accel_state *accel_state = info->accel_state;
1641    uint32_t scratch_mc_addr;
1642    int wpass = w * (bpp/8);
1643    int scratch_pitch_bytes = RADEON_ALIGN(wpass, 256);
1644    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1645    int scratch_offset = 0, hpass, temph;
1646    char *dst;
1647    drmBufPtr scratch;
1648    struct r600_accel_object scratch_obj, dst_obj;
1649
1650    if (dst_pitch & 7)
1651	return FALSE;
1652
1653    if (dst_mc_addr & 0xff)
1654	return FALSE;
1655
1656    scratch = RADEONCPGetBuffer(pScrn);
1657    if (scratch == NULL)
1658	return FALSE;
1659
1660    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1661    temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1662    dst = (char *)scratch->address;
1663
1664    scratch_obj.pitch = scratch_pitch;
1665    scratch_obj.width = w;
1666    scratch_obj.height = hpass;
1667    scratch_obj.offset = scratch_mc_addr;
1668    scratch_obj.bpp = bpp;
1669    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1670    scratch_obj.bo = NULL;
1671
1672    dst_obj.pitch = dst_pitch;
1673    dst_obj.width = dst_width;
1674    dst_obj.height = dst_height;
1675    dst_obj.offset = dst_mc_addr;
1676    dst_obj.bo = NULL;
1677    dst_obj.bpp = bpp;
1678    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1679
1680    if (!R600SetAccelState(pScrn,
1681			   &scratch_obj,
1682			   NULL,
1683			   &dst_obj,
1684			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1685			   3, 0xffffffff))
1686	return FALSE;
1687
1688    /* memcopy from sys to scratch */
1689    while (temph--) {
1690	memcpy (dst, src, wpass);
1691	src += src_pitch;
1692	dst += scratch_pitch_bytes;
1693    }
1694
1695    while (h) {
1696	uint32_t offset = scratch_mc_addr + scratch_offset;
1697	int oldhpass = hpass;
1698	h -= oldhpass;
1699	temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1700
1701	if (hpass) {
1702	    scratch_offset = scratch->total/2 - scratch_offset;
1703	    dst = (char *)scratch->address + scratch_offset;
1704	    /* wait for the engine to be idle */
1705	    RADEONWaitForIdleCP(pScrn);
1706	    //memcopy from sys to scratch
1707	    while (temph--) {
1708		memcpy (dst, src, wpass);
1709		src += src_pitch;
1710		dst += scratch_pitch_bytes;
1711	    }
1712	}
1713	/* blit from scratch to vram */
1714	info->accel_state->src_obj[0].height = oldhpass;
1715	info->accel_state->src_obj[0].offset = offset;
1716	R600DoPrepareCopy(pScrn);
1717	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
1718	R600DoCopy(pScrn);
1719	y += oldhpass;
1720    }
1721
1722    R600IBDiscard(pScrn, scratch);
1723
1724    return TRUE;
1725}
1726
1727static Bool
1728R600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1729		   char *src, int src_pitch)
1730{
1731    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1732    RADEONInfoPtr info = RADEONPTR(pScrn);
1733    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1734    uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1735    int bpp = pDst->drawable.bitsPerPixel;
1736
1737    return R600CopyToVRAM(pScrn,
1738			  src, src_pitch,
1739			  dst_pitch, dst_mc_addr, pDst->drawable.width, pDst->drawable.height, bpp,
1740			  x, y, w, h);
1741}
1742
1743static Bool
1744R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
1745		       char *dst, int dst_pitch)
1746{
1747    ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
1748    RADEONInfoPtr info = RADEONPTR(pScrn);
1749    struct radeon_accel_state *accel_state = info->accel_state;
1750    uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1751    uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1752    uint32_t src_width = pSrc->drawable.width;
1753    uint32_t src_height = pSrc->drawable.height;
1754    int bpp = pSrc->drawable.bitsPerPixel;
1755    uint32_t scratch_mc_addr;
1756    int scratch_pitch_bytes = RADEON_ALIGN(dst_pitch, 256);
1757    int scratch_offset = 0, hpass;
1758    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1759    int wpass = w * (bpp/8);
1760    drmBufPtr scratch;
1761    struct r600_accel_object scratch_obj, src_obj;
1762
1763    /* bad pipe setup in drm prior to 1.32 */
1764    if (info->dri->pKernelDRMVersion->version_minor < 32) {
1765	    if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32))
1766		    return FALSE;
1767    }
1768
1769    if (src_pitch & 7)
1770	return FALSE;
1771
1772    scratch = RADEONCPGetBuffer(pScrn);
1773    if (scratch == NULL)
1774	return FALSE;
1775
1776    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1777    hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1778
1779    src_obj.pitch = src_pitch;
1780    src_obj.width = src_width;
1781    src_obj.height = src_height;
1782    src_obj.offset = src_mc_addr;
1783    src_obj.bo = NULL;
1784    src_obj.bpp = bpp;
1785    src_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1786
1787    scratch_obj.pitch = scratch_pitch;
1788    scratch_obj.width = src_width;
1789    scratch_obj.height = hpass;
1790    scratch_obj.offset = scratch_mc_addr;
1791    scratch_obj.bpp = bpp;
1792    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1793    scratch_obj.bo = NULL;
1794
1795    if (!R600SetAccelState(pScrn,
1796			   &src_obj,
1797			   NULL,
1798			   &scratch_obj,
1799			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1800			   3, 0xffffffff))
1801	return FALSE;
1802
1803    /* blit from vram to scratch */
1804    R600DoPrepareCopy(pScrn);
1805    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1806    R600DoCopy(pScrn);
1807
1808    while (h) {
1809	char *src = (char *)scratch->address + scratch_offset;
1810	int oldhpass = hpass;
1811	h -= oldhpass;
1812	y += oldhpass;
1813	hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1814
1815	if (hpass) {
1816	    scratch_offset = scratch->total/2 - scratch_offset;
1817	    /* blit from vram to scratch */
1818	    info->accel_state->dst_obj.height = hpass;
1819	    info->accel_state->dst_obj.offset = scratch_mc_addr + scratch_offset;
1820	    R600DoPrepareCopy(pScrn);
1821	    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1822	    R600DoCopy(pScrn);
1823	}
1824
1825	/* wait for the engine to be idle */
1826	RADEONWaitForIdleCP(pScrn);
1827	/* memcopy from scratch to sys */
1828	while (oldhpass--) {
1829	    memcpy (dst, src, wpass);
1830	    dst += dst_pitch;
1831	    src += scratch_pitch_bytes;
1832	}
1833    }
1834
1835    R600IBDiscard(pScrn, scratch);
1836
1837    return TRUE;
1838
1839}
1840
1841#if defined(XF86DRM_MODE)
1842
1843static Bool
1844R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
1845		     char *src, int src_pitch)
1846{
1847    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1848    RADEONInfoPtr info = RADEONPTR(pScrn);
1849    struct radeon_accel_state *accel_state = info->accel_state;
1850    struct radeon_exa_pixmap_priv *driver_priv;
1851    struct radeon_bo *scratch = NULL;
1852    struct radeon_bo *copy_dst;
1853    unsigned char *dst;
1854    unsigned size;
1855    uint32_t dst_domain;
1856    int bpp = pDst->drawable.bitsPerPixel;
1857    uint32_t scratch_pitch;
1858    uint32_t copy_pitch;
1859    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
1860    int ret;
1861    Bool flush = TRUE;
1862    Bool r;
1863    int i;
1864    struct r600_accel_object src_obj, dst_obj;
1865    uint32_t height, base_align;
1866
1867    if (bpp < 8)
1868	return FALSE;
1869
1870    driver_priv = exaGetPixmapDriverPrivate(pDst);
1871    if (!driver_priv || !driver_priv->bo)
1872	return FALSE;
1873
1874    /* If we know the BO won't be busy, don't bother with a scratch */
1875    copy_dst = driver_priv->bo;
1876    copy_pitch = pDst->devKind;
1877    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1878	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1879	    flush = FALSE;
1880	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
1881		goto copy;
1882	}
1883    }
1884
1885    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1886    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1887    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1888    size = scratch_pitch * height * (bpp / 8);
1889    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1890    if (scratch == NULL) {
1891	goto copy;
1892    }
1893
1894    src_obj.pitch = scratch_pitch;
1895    src_obj.width = w;
1896    src_obj.height = h;
1897    src_obj.offset = 0;
1898    src_obj.bpp = bpp;
1899    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1900    src_obj.bo = scratch;
1901    src_obj.tiling_flags = 0;
1902#ifdef XF86DRM_MODE
1903    src_obj.surface = NULL;
1904#endif
1905
1906    dst_obj.pitch = dst_pitch_hw;
1907    dst_obj.width = pDst->drawable.width;
1908    dst_obj.height = pDst->drawable.height;
1909    dst_obj.offset = 0;
1910    dst_obj.bpp = bpp;
1911    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1912    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1913    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1914#ifdef XF86DRM_MODE
1915    dst_obj.surface = radeon_get_pixmap_surface(pDst);
1916#endif
1917
1918    if (!R600SetAccelState(pScrn,
1919			   &src_obj,
1920			   NULL,
1921			   &dst_obj,
1922			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1923			   3, 0xffffffff)) {
1924        goto copy;
1925    }
1926    copy_dst = scratch;
1927    copy_pitch = scratch_pitch * (bpp / 8);
1928    flush = FALSE;
1929
1930copy:
1931    if (flush)
1932	radeon_cs_flush_indirect(pScrn);
1933
1934    ret = radeon_bo_map(copy_dst, 0);
1935    if (ret) {
1936        r = FALSE;
1937        goto out;
1938    }
1939    r = TRUE;
1940    size = w * bpp / 8;
1941    dst = copy_dst->ptr;
1942    if (copy_dst == driver_priv->bo)
1943	dst += y * copy_pitch + x * bpp / 8;
1944    for (i = 0; i < h; i++) {
1945        memcpy(dst + i * copy_pitch, src, size);
1946        src += src_pitch;
1947    }
1948    radeon_bo_unmap(copy_dst);
1949
1950    if (copy_dst == scratch) {
1951	if (info->accel_state->vsync)
1952	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1953
1954	/* blit from gart to vram */
1955	R600DoPrepareCopy(pScrn);
1956	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
1957	R600DoCopyVline(pDst);
1958    }
1959
1960out:
1961    if (scratch)
1962	radeon_bo_unref(scratch);
1963    return r;
1964}
1965
1966static Bool
1967R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
1968			 int h, char *dst, int dst_pitch)
1969{
1970    ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
1971    RADEONInfoPtr info = RADEONPTR(pScrn);
1972    struct radeon_accel_state *accel_state = info->accel_state;
1973    struct radeon_exa_pixmap_priv *driver_priv;
1974    struct radeon_bo *scratch = NULL;
1975    struct radeon_bo *copy_src;
1976    unsigned size;
1977    uint32_t src_domain = 0;
1978    int bpp = pSrc->drawable.bitsPerPixel;
1979    uint32_t scratch_pitch;
1980    uint32_t copy_pitch;
1981    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
1982    int ret;
1983    Bool flush = FALSE;
1984    Bool r;
1985    struct r600_accel_object src_obj, dst_obj;
1986    uint32_t height, base_align;
1987
1988    if (bpp < 8)
1989	return FALSE;
1990
1991    driver_priv = exaGetPixmapDriverPrivate(pSrc);
1992    if (!driver_priv || !driver_priv->bo)
1993	return FALSE;
1994
1995    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
1996    copy_src = driver_priv->bo;
1997    copy_pitch = pSrc->devKind;
1998    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1999	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
2000	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
2001	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
2002		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
2003		src_domain = 0;
2004	    else /* A write may be scheduled */
2005		flush = TRUE;
2006	}
2007
2008	if (!src_domain)
2009	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
2010
2011	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
2012	    goto copy;
2013    }
2014
2015    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
2016    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
2017    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
2018    size = scratch_pitch * height * (bpp / 8);
2019    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
2020    if (scratch == NULL) {
2021	goto copy;
2022    }
2023    radeon_cs_space_reset_bos(info->cs);
2024    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
2025				      RADEON_GEM_DOMAIN_VRAM, 0);
2026    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
2027    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
2028    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
2029    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
2030    ret = radeon_cs_space_check(info->cs);
2031    if (ret) {
2032        goto copy;
2033    }
2034
2035    src_obj.pitch = src_pitch_hw;
2036    src_obj.width = pSrc->drawable.width;
2037    src_obj.height = pSrc->drawable.height;
2038    src_obj.offset = 0;
2039    src_obj.bpp = bpp;
2040    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
2041    src_obj.bo = radeon_get_pixmap_bo(pSrc);
2042    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
2043#ifdef XF86DRM_MODE
2044    src_obj.surface = radeon_get_pixmap_surface(pSrc);
2045#endif
2046
2047    dst_obj.pitch = scratch_pitch;
2048    dst_obj.width = w;
2049    dst_obj.height = h;
2050    dst_obj.offset = 0;
2051    dst_obj.bo = scratch;
2052    dst_obj.bpp = bpp;
2053    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
2054    dst_obj.tiling_flags = 0;
2055#ifdef XF86DRM_MODE
2056    dst_obj.surface = NULL;
2057#endif
2058
2059    if (!R600SetAccelState(pScrn,
2060			   &src_obj,
2061			   NULL,
2062			   &dst_obj,
2063			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
2064			   3, 0xffffffff)) {
2065        goto copy;
2066    }
2067
2068    /* blit from vram to gart */
2069    R600DoPrepareCopy(pScrn);
2070    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
2071    R600DoCopy(pScrn);
2072    copy_src = scratch;
2073    copy_pitch = scratch_pitch * (bpp / 8);
2074    flush = TRUE;
2075
2076copy:
2077    if (flush && info->cs)
2078	radeon_cs_flush_indirect(pScrn);
2079
2080    ret = radeon_bo_map(copy_src, 0);
2081    if (ret) {
2082	ErrorF("failed to map pixmap: %d\n", ret);
2083        r = FALSE;
2084        goto out;
2085    }
2086    r = TRUE;
2087    w *= bpp / 8;
2088    if (copy_src == driver_priv->bo)
2089	size = y * copy_pitch + x * bpp / 8;
2090    else
2091	size = 0;
2092    while (h--) {
2093        memcpy(dst, copy_src->ptr + size, w);
2094        size += copy_pitch;
2095        dst += dst_pitch;
2096    }
2097    radeon_bo_unmap(copy_src);
2098out:
2099    if (scratch)
2100	radeon_bo_unref(scratch);
2101    return r;
2102}
2103#endif
2104
2105static int
2106R600MarkSync(ScreenPtr pScreen)
2107{
2108    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2109    RADEONInfoPtr info = RADEONPTR(pScrn);
2110    struct radeon_accel_state *accel_state = info->accel_state;
2111
2112    return ++accel_state->exaSyncMarker;
2113
2114}
2115
2116static void
2117R600Sync(ScreenPtr pScreen, int marker)
2118{
2119    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2120    RADEONInfoPtr info = RADEONPTR(pScrn);
2121    struct radeon_accel_state *accel_state = info->accel_state;
2122
2123    if (accel_state->exaMarkerSynced != marker) {
2124#ifdef XF86DRM_MODE
2125#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2126	if (!info->cs)
2127#endif
2128#endif
2129	    RADEONWaitForIdleCP(pScrn);
2130	accel_state->exaMarkerSynced = marker;
2131    }
2132
2133}
2134
2135static Bool
2136R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
2137{
2138    RADEONInfoPtr info = RADEONPTR(pScrn);
2139    struct radeon_accel_state *accel_state = info->accel_state;
2140
2141    /* 512 bytes per shader for now */
2142    int size = 512 * 9;
2143
2144    accel_state->shaders = NULL;
2145
2146#ifdef XF86DRM_MODE
2147#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2148    if (info->cs) {
2149	accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
2150						 RADEON_GEM_DOMAIN_VRAM, 0);
2151	if (accel_state->shaders_bo == NULL) {
2152	    ErrorF("Allocating shader failed\n");
2153	    return FALSE;
2154	}
2155	return TRUE;
2156    } else
2157#endif
2158#endif
2159    {
2160	accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
2161						 TRUE, NULL, NULL);
2162
2163	if (accel_state->shaders == NULL)
2164	    return FALSE;
2165    }
2166
2167    return TRUE;
2168}
2169
2170Bool
2171R600LoadShaders(ScrnInfoPtr pScrn)
2172{
2173    RADEONInfoPtr info = RADEONPTR(pScrn);
2174    struct radeon_accel_state *accel_state = info->accel_state;
2175    RADEONChipFamily ChipSet = info->ChipFamily;
2176    uint32_t *shader;
2177#ifdef XF86DRM_MODE
2178#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2179    int ret;
2180
2181    if (info->cs) {
2182	ret = radeon_bo_map(accel_state->shaders_bo, 1);
2183	if (ret) {
2184	    FatalError("failed to map shader %d\n", ret);
2185	    return FALSE;
2186	}
2187	shader = accel_state->shaders_bo->ptr;
2188    } else
2189#endif
2190#endif
2191	shader = (pointer)((char *)info->FB + accel_state->shaders->offset);
2192
2193    /*  solid vs --------------------------------------- */
2194    accel_state->solid_vs_offset = 0;
2195    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
2196
2197    /*  solid ps --------------------------------------- */
2198    accel_state->solid_ps_offset = 512;
2199    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
2200
2201    /*  copy vs --------------------------------------- */
2202    accel_state->copy_vs_offset = 1024;
2203    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
2204
2205    /*  copy ps --------------------------------------- */
2206    accel_state->copy_ps_offset = 1536;
2207    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2208
2209    /*  comp vs --------------------------------------- */
2210    accel_state->comp_vs_offset = 2048;
2211    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2212
2213    /*  comp ps --------------------------------------- */
2214    accel_state->comp_ps_offset = 2560;
2215    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2216
2217    /*  xv vs --------------------------------------- */
2218    accel_state->xv_vs_offset = 3072;
2219    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2220
2221    /*  xv ps --------------------------------------- */
2222    accel_state->xv_ps_offset = 3584;
2223    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2224
2225#ifdef XF86DRM_MODE
2226#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2227    if (info->cs) {
2228	radeon_bo_unmap(accel_state->shaders_bo);
2229    }
2230#endif
2231#endif
2232
2233    return TRUE;
2234}
2235
2236static Bool
2237R600PrepareAccess(PixmapPtr pPix, int index)
2238{
2239    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
2240    RADEONInfoPtr info = RADEONPTR(pScrn);
2241    unsigned char *RADEONMMIO = info->MMIO;
2242
2243    /* flush HDP read/write caches */
2244    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2245
2246    return TRUE;
2247}
2248
2249static void
2250R600FinishAccess(PixmapPtr pPix, int index)
2251{
2252    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
2253    RADEONInfoPtr info = RADEONPTR(pScrn);
2254    unsigned char *RADEONMMIO = info->MMIO;
2255
2256    /* flush HDP read/write caches */
2257    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2258
2259}
2260
2261Bool
2262R600DrawInit(ScreenPtr pScreen)
2263{
2264    ScrnInfoPtr pScrn =  xf86ScreenToScrn(pScreen);
2265    RADEONInfoPtr info   = RADEONPTR(pScrn);
2266
2267    if (info->accel_state->exa == NULL) {
2268	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2269	return FALSE;
2270    }
2271
2272    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2273    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2274
2275    info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2276    info->accel_state->exa->Solid = R600Solid;
2277    info->accel_state->exa->DoneSolid = R600DoneSolid;
2278
2279    info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2280    info->accel_state->exa->Copy = R600Copy;
2281    info->accel_state->exa->DoneCopy = R600DoneCopy;
2282
2283    info->accel_state->exa->MarkSync = R600MarkSync;
2284    info->accel_state->exa->WaitMarker = R600Sync;
2285
2286#ifdef XF86DRM_MODE
2287#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2288    if (info->cs) {
2289	info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
2290	info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
2291	info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
2292	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
2293	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
2294	info->accel_state->exa->UploadToScreen = R600UploadToScreenCS;
2295	info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS;
2296#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5)
2297        info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
2298#endif
2299    } else
2300#endif
2301#endif
2302    {
2303	info->accel_state->exa->PrepareAccess = R600PrepareAccess;
2304	info->accel_state->exa->FinishAccess = R600FinishAccess;
2305
2306	/* AGP seems to have problems with gart transfers */
2307	if (info->accelDFS) {
2308	    info->accel_state->exa->UploadToScreen = R600UploadToScreen;
2309	    info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen;
2310	}
2311    }
2312
2313    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
2314#ifdef EXA_SUPPORTS_PREPARE_AUX
2315    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
2316#endif
2317
2318#ifdef XF86DRM_MODE
2319#ifdef EXA_HANDLES_PIXMAPS
2320    if (info->cs) {
2321	info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
2322#ifdef EXA_MIXED_PIXMAPS
2323	info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
2324#endif
2325    }
2326#endif
2327#endif
2328    info->accel_state->exa->pixmapOffsetAlign = 256;
2329    info->accel_state->exa->pixmapPitchAlign = 256;
2330
2331    info->accel_state->exa->CheckComposite = R600CheckComposite;
2332    info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2333    info->accel_state->exa->Composite = R600Composite;
2334    info->accel_state->exa->DoneComposite = R600DoneComposite;
2335
2336#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
2337    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
2338
2339    info->accel_state->exa->maxPitchBytes = 32768;
2340    info->accel_state->exa->maxX = 8192;
2341#else
2342    info->accel_state->exa->maxX = 8192;
2343#endif
2344    info->accel_state->exa->maxY = 8192;
2345
2346    /* not supported yet */
2347    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2348	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2349	info->accel_state->vsync = TRUE;
2350    } else
2351	info->accel_state->vsync = FALSE;
2352
2353    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
2354	free(info->accel_state->exa);
2355	return FALSE;
2356    }
2357
2358#ifdef XF86DRM_MODE
2359#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2360    if (!info->cs)
2361#endif
2362#endif
2363	if (!info->gartLocation)
2364	    return FALSE;
2365
2366    info->accel_state->XInited3D = FALSE;
2367    info->accel_state->copy_area = NULL;
2368    info->accel_state->src_obj[0].bo = NULL;
2369    info->accel_state->src_obj[1].bo = NULL;
2370    info->accel_state->dst_obj.bo = NULL;
2371    info->accel_state->copy_area_bo = NULL;
2372    info->accel_state->vbo.vb_start_op = -1;
2373    info->accel_state->finish_op = r600_finish_op;
2374    info->accel_state->vbo.verts_per_op = 3;
2375    RADEONVlineHelperClear(pScrn);
2376
2377#ifdef XF86DRM_MODE
2378    radeon_vbo_init_lists(pScrn);
2379#endif
2380
2381    if (!R600AllocShaders(pScrn, pScreen))
2382	return FALSE;
2383
2384    if (!R600LoadShaders(pScrn))
2385	return FALSE;
2386
2387    exaMarkSync(pScreen);
2388
2389    return TRUE;
2390
2391}
2392
2393