r600_exa.c revision c73da4db
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "exa.h"
34
35#include "radeon.h"
36#include "radeon_macros.h"
37#include "radeon_reg.h"
38#include "r600_shader.h"
39#include "r600_reg.h"
40#include "r600_state.h"
41#include "radeon_exa_shared.h"
42#include "radeon_vbo.h"
43
44/* #define SHOW_VERTEXES */
45
46Bool
47R600SetAccelState(ScrnInfoPtr pScrn,
48		  struct r600_accel_object *src0,
49		  struct r600_accel_object *src1,
50		  struct r600_accel_object *dst,
51		  uint32_t vs_offset, uint32_t ps_offset,
52		  int rop, Pixel planemask)
53{
54    RADEONInfoPtr info = RADEONPTR(pScrn);
55    struct radeon_accel_state *accel_state = info->accel_state;
56    uint32_t pitch_align = 0x7, base_align = 0xff;
57#if defined(XF86DRM_MODE)
58    int ret;
59#endif
60
61    if (src0) {
62	memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
63	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
64#if defined(XF86DRM_MODE)
65	if (info->cs && src0->surface) {
66		accel_state->src_size[0] = src0->surface->bo_size;
67	}
68#endif
69
70	/* bad pitch */
71	if (accel_state->src_obj[0].pitch & pitch_align)
72	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
73
74	/* bad offset */
75	if (accel_state->src_obj[0].offset & base_align)
76	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset));
77
78    } else {
79	memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
80	accel_state->src_size[0] = 0;
81    }
82
83    if (src1) {
84	memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
85	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
86#if defined(XF86DRM_MODE)
87	if (info->cs && src1->surface) {
88		accel_state->src_size[1] = src1->surface->bo_size;
89	}
90#endif
91
92	/* bad pitch */
93	if (accel_state->src_obj[1].pitch & pitch_align)
94	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
95
96	/* bad offset */
97	if (accel_state->src_obj[1].offset & base_align)
98	    RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset));
99    } else {
100	memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
101	accel_state->src_size[1] = 0;
102    }
103
104    if (dst) {
105	memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
106	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
107#if defined(XF86DRM_MODE)
108	if (info->cs && dst->surface) {
109		accel_state->dst_size = dst->surface->bo_size;
110	} else
111#endif
112	{
113		accel_state->dst_obj.tiling_flags = 0;
114	}
115	if (accel_state->dst_obj.pitch & pitch_align)
116	    RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
117
118	if (accel_state->dst_obj.offset & base_align)
119	    RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset));
120    } else {
121	memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
122	accel_state->dst_size = 0;
123    }
124
125#ifdef XF86DRM_MODE
126    if (info->cs && CS_FULL(info->cs))
127	radeon_cs_flush_indirect(pScrn);
128#endif
129
130    accel_state->rop = rop;
131    accel_state->planemask = planemask;
132
133    accel_state->vs_size = 512;
134    accel_state->ps_size = 512;
135#if defined(XF86DRM_MODE)
136    if (info->cs) {
137	accel_state->vs_mc_addr = vs_offset;
138	accel_state->ps_mc_addr = ps_offset;
139
140	radeon_cs_space_reset_bos(info->cs);
141	radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
142					  RADEON_GEM_DOMAIN_VRAM, 0);
143	if (accel_state->src_obj[0].bo)
144	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
145					      accel_state->src_obj[0].domain, 0);
146	if (accel_state->src_obj[1].bo)
147	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
148					      accel_state->src_obj[1].domain, 0);
149	if (accel_state->dst_obj.bo)
150	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
151					      0, accel_state->dst_obj.domain);
152	ret = radeon_cs_space_check(info->cs);
153	if (ret)
154	    RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
155
156    } else
157#endif
158    {
159	accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
160	    vs_offset;
161	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
162	    ps_offset;
163    }
164
165    return TRUE;
166}
167
168static Bool
169R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
170{
171    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
172    RADEONInfoPtr info = RADEONPTR(pScrn);
173    struct radeon_accel_state *accel_state = info->accel_state;
174    cb_config_t     cb_conf;
175    shader_config_t vs_conf, ps_conf;
176    uint32_t a, r, g, b;
177    float ps_alu_consts[4];
178    struct r600_accel_object dst;
179
180    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
181	RADEON_FALLBACK(("R600CheckDatatype failed\n"));
182    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
183	RADEON_FALLBACK(("invalid planemask\n"));
184
185#if defined(XF86DRM_MODE)
186    if (info->cs) {
187	dst.offset = 0;
188	dst.bo = radeon_get_pixmap_bo(pPix);
189	dst.tiling_flags = radeon_get_pixmap_tiling(pPix);
190	dst.surface = radeon_get_pixmap_surface(pPix);
191    } else
192#endif
193    {
194	dst.offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
195	dst.bo = NULL;
196    }
197
198    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
199    dst.width = pPix->drawable.width;
200    dst.height = pPix->drawable.height;
201    dst.bpp = pPix->drawable.bitsPerPixel;
202    dst.domain = RADEON_GEM_DOMAIN_VRAM;
203
204    if (!R600SetAccelState(pScrn,
205			   NULL,
206			   NULL,
207			   &dst,
208			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
209			   alu, pm))
210	return FALSE;
211
212    CLEAR (cb_conf);
213    CLEAR (vs_conf);
214    CLEAR (ps_conf);
215
216    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
217    radeon_cp_start(pScrn);
218
219    r600_set_default_state(pScrn, accel_state->ib);
220
221    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
222    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
223    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
224
225    /* Shader */
226    vs_conf.shader_addr         = accel_state->vs_mc_addr;
227    vs_conf.shader_size         = accel_state->vs_size;
228    vs_conf.num_gprs            = 2;
229    vs_conf.stack_size          = 0;
230    vs_conf.bo                  = accel_state->shaders_bo;
231    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
232
233    ps_conf.shader_addr         = accel_state->ps_mc_addr;
234    ps_conf.shader_size         = accel_state->ps_size;
235    ps_conf.num_gprs            = 1;
236    ps_conf.stack_size          = 0;
237    ps_conf.uncached_first_inst = 1;
238    ps_conf.clamp_consts        = 0;
239    ps_conf.export_mode         = 2;
240    ps_conf.bo                  = accel_state->shaders_bo;
241    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
242
243    cb_conf.id = 0;
244    cb_conf.w = accel_state->dst_obj.pitch;
245    cb_conf.h = accel_state->dst_obj.height;
246    cb_conf.base = accel_state->dst_obj.offset;
247    cb_conf.bo = accel_state->dst_obj.bo;
248#ifdef XF86DRM_MODE
249    if (info->cs)
250        cb_conf.surface = accel_state->dst_obj.surface;
251#endif
252
253    if (accel_state->dst_obj.bpp == 8) {
254	cb_conf.format = COLOR_8;
255	cb_conf.comp_swap = 3; /* A */
256    } else if (accel_state->dst_obj.bpp == 16) {
257	cb_conf.format = COLOR_5_6_5;
258	cb_conf.comp_swap = 2; /* RGB */
259#if X_BYTE_ORDER == X_BIG_ENDIAN
260	cb_conf.endian = ENDIAN_8IN16;
261#endif
262    } else {
263	cb_conf.format = COLOR_8_8_8_8;
264	cb_conf.comp_swap = 1; /* ARGB */
265#if X_BYTE_ORDER == X_BIG_ENDIAN
266	cb_conf.endian = ENDIAN_8IN32;
267#endif
268    }
269    cb_conf.source_format = 1;
270    cb_conf.blend_clamp = 1;
271    /* Render setup */
272    if (accel_state->planemask & 0x000000ff)
273	cb_conf.pmask |= 4; /* B */
274    if (accel_state->planemask & 0x0000ff00)
275	cb_conf.pmask |= 2; /* G */
276    if (accel_state->planemask & 0x00ff0000)
277	cb_conf.pmask |= 1; /* R */
278    if (accel_state->planemask & 0xff000000)
279	cb_conf.pmask |= 8; /* A */
280    cb_conf.rop = accel_state->rop;
281    if (accel_state->dst_obj.tiling_flags == 0)
282	cb_conf.array_mode = 0;
283    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
284
285    r600_set_spi(pScrn, accel_state->ib, 0, 0);
286
287    /* PS alu constants */
288    if (accel_state->dst_obj.bpp == 16) {
289	r = (fg >> 11) & 0x1f;
290	g = (fg >> 5) & 0x3f;
291	b = (fg >> 0) & 0x1f;
292	ps_alu_consts[0] = (float)r / 31; /* R */
293	ps_alu_consts[1] = (float)g / 63; /* G */
294	ps_alu_consts[2] = (float)b / 31; /* B */
295	ps_alu_consts[3] = 1.0; /* A */
296    } else if (accel_state->dst_obj.bpp == 8) {
297	a = (fg >> 0) & 0xff;
298	ps_alu_consts[0] = 0.0; /* R */
299	ps_alu_consts[1] = 0.0; /* G */
300	ps_alu_consts[2] = 0.0; /* B */
301	ps_alu_consts[3] = (float)a / 255; /* A */
302    } else {
303	a = (fg >> 24) & 0xff;
304	r = (fg >> 16) & 0xff;
305	g = (fg >> 8) & 0xff;
306	b = (fg >> 0) & 0xff;
307	ps_alu_consts[0] = (float)r / 255; /* R */
308	ps_alu_consts[1] = (float)g / 255; /* G */
309	ps_alu_consts[2] = (float)b / 255; /* B */
310	ps_alu_consts[3] = (float)a / 255; /* A */
311    }
312    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
313			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
314
315    if (accel_state->vsync)
316	RADEONVlineHelperClear(pScrn);
317
318    accel_state->dst_pix = pPix;
319    accel_state->fg = fg;
320
321    return TRUE;
322}
323
324static void
325R600DoneSolid(PixmapPtr pPix)
326{
327    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
328    RADEONInfoPtr info = RADEONPTR(pScrn);
329    struct radeon_accel_state *accel_state = info->accel_state;
330
331    if (accel_state->vsync)
332	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
333				accel_state->vline_crtc,
334				accel_state->vline_y1,
335				accel_state->vline_y2);
336
337    r600_finish_op(pScrn, 8);
338}
339
340static void
341R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
342{
343    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
344    RADEONInfoPtr info = RADEONPTR(pScrn);
345    struct radeon_accel_state *accel_state = info->accel_state;
346    float *vb;
347
348#ifdef XF86DRM_MODE
349    if (info->cs && CS_FULL(info->cs)) {
350	R600DoneSolid(info->accel_state->dst_pix);
351	radeon_cs_flush_indirect(pScrn);
352	R600PrepareSolid(accel_state->dst_pix,
353			 accel_state->rop,
354			 accel_state->planemask,
355			 accel_state->fg);
356    }
357#endif
358
359    if (accel_state->vsync)
360	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
361
362    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
363
364    vb[0] = (float)x1;
365    vb[1] = (float)y1;
366
367    vb[2] = (float)x1;
368    vb[3] = (float)y2;
369
370    vb[4] = (float)x2;
371    vb[5] = (float)y2;
372
373    radeon_vbo_commit(pScrn, &accel_state->vbo);
374}
375
376static void
377R600DoPrepareCopy(ScrnInfoPtr pScrn)
378{
379    RADEONInfoPtr info = RADEONPTR(pScrn);
380    struct radeon_accel_state *accel_state = info->accel_state;
381    cb_config_t     cb_conf;
382    tex_resource_t  tex_res;
383    tex_sampler_t   tex_samp;
384    shader_config_t vs_conf, ps_conf;
385
386    CLEAR (cb_conf);
387    CLEAR (tex_res);
388    CLEAR (tex_samp);
389    CLEAR (vs_conf);
390    CLEAR (ps_conf);
391
392    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
393    radeon_cp_start(pScrn);
394
395    r600_set_default_state(pScrn, accel_state->ib);
396
397    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
398    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
399    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
400
401    /* Shader */
402    vs_conf.shader_addr         = accel_state->vs_mc_addr;
403    vs_conf.shader_size         = accel_state->vs_size;
404    vs_conf.num_gprs            = 2;
405    vs_conf.stack_size          = 0;
406    vs_conf.bo                  = accel_state->shaders_bo;
407    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
408
409    ps_conf.shader_addr         = accel_state->ps_mc_addr;
410    ps_conf.shader_size         = accel_state->ps_size;
411    ps_conf.num_gprs            = 1;
412    ps_conf.stack_size          = 0;
413    ps_conf.uncached_first_inst = 1;
414    ps_conf.clamp_consts        = 0;
415    ps_conf.export_mode         = 2;
416    ps_conf.bo                  = accel_state->shaders_bo;
417    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
418
419    /* Texture */
420    tex_res.id                  = 0;
421    tex_res.w                   = accel_state->src_obj[0].width;
422    tex_res.h                   = accel_state->src_obj[0].height;
423    tex_res.pitch               = accel_state->src_obj[0].pitch;
424    tex_res.depth               = 0;
425    tex_res.dim                 = SQ_TEX_DIM_2D;
426    tex_res.base                = accel_state->src_obj[0].offset;
427    tex_res.mip_base            = accel_state->src_obj[0].offset;
428    tex_res.size                = accel_state->src_size[0];
429    tex_res.bo                  = accel_state->src_obj[0].bo;
430    tex_res.mip_bo              = accel_state->src_obj[0].bo;
431#ifdef XF86DRM_MODE
432    if (info->cs)
433        tex_res.surface             = accel_state->src_obj[0].surface;
434#endif
435    if (accel_state->src_obj[0].bpp == 8) {
436	tex_res.format              = FMT_8;
437	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
438	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
439	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
440	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
441    } else if (accel_state->src_obj[0].bpp == 16) {
442	tex_res.format              = FMT_5_6_5;
443	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
444	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
445	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
446	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
447    } else {
448	tex_res.format              = FMT_8_8_8_8;
449	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
450	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
451	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
452	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
453    }
454
455    tex_res.request_size        = 1;
456    tex_res.base_level          = 0;
457    tex_res.last_level          = 0;
458    tex_res.perf_modulation     = 0;
459    if (accel_state->src_obj[0].tiling_flags == 0)
460	tex_res.tile_mode           = 1;
461    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
462
463    tex_samp.id                 = 0;
464    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
465    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
466    tex_samp.clamp_z            = SQ_TEX_WRAP;
467    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
468    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
469    tex_samp.mc_coord_truncate  = 1;
470    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
471    tex_samp.mip_filter         = 0;			/* no mipmap */
472    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
473
474    cb_conf.id = 0;
475    cb_conf.w = accel_state->dst_obj.pitch;
476    cb_conf.h = accel_state->dst_obj.height;
477    cb_conf.base = accel_state->dst_obj.offset;
478    cb_conf.bo = accel_state->dst_obj.bo;
479#ifdef XF86DRM_MODE
480    if (info->cs)
481        cb_conf.surface = accel_state->dst_obj.surface;
482#endif
483    if (accel_state->dst_obj.bpp == 8) {
484	cb_conf.format = COLOR_8;
485	cb_conf.comp_swap = 3; /* A */
486    } else if (accel_state->dst_obj.bpp == 16) {
487	cb_conf.format = COLOR_5_6_5;
488	cb_conf.comp_swap = 2; /* RGB */
489    } else {
490	cb_conf.format = COLOR_8_8_8_8;
491	cb_conf.comp_swap = 1; /* ARGB */
492    }
493    cb_conf.source_format = 1;
494    cb_conf.blend_clamp = 1;
495
496    /* Render setup */
497    if (accel_state->planemask & 0x000000ff)
498	cb_conf.pmask |= 4; /* B */
499    if (accel_state->planemask & 0x0000ff00)
500	cb_conf.pmask |= 2; /* G */
501    if (accel_state->planemask & 0x00ff0000)
502	cb_conf.pmask |= 1; /* R */
503    if (accel_state->planemask & 0xff000000)
504	cb_conf.pmask |= 8; /* A */
505    cb_conf.rop = accel_state->rop;
506    if (accel_state->dst_obj.tiling_flags == 0)
507	cb_conf.array_mode = 0;
508    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
509
510    r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
511
512}
513
514static void
515R600DoCopy(ScrnInfoPtr pScrn)
516{
517    r600_finish_op(pScrn, 16);
518}
519
520static void
521R600DoCopyVline(PixmapPtr pPix)
522{
523    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
524    RADEONInfoPtr info = RADEONPTR(pScrn);
525    struct radeon_accel_state *accel_state = info->accel_state;
526
527    if (accel_state->vsync)
528	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
529				accel_state->vline_crtc,
530				accel_state->vline_y1,
531				accel_state->vline_y2);
532
533    r600_finish_op(pScrn, 16);
534}
535
536static void
537R600AppendCopyVertex(ScrnInfoPtr pScrn,
538		     int srcX, int srcY,
539		     int dstX, int dstY,
540		     int w, int h)
541{
542    RADEONInfoPtr info = RADEONPTR(pScrn);
543    struct radeon_accel_state *accel_state = info->accel_state;
544    float *vb;
545
546    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
547
548    vb[0] = (float)dstX;
549    vb[1] = (float)dstY;
550    vb[2] = (float)srcX;
551    vb[3] = (float)srcY;
552
553    vb[4] = (float)dstX;
554    vb[5] = (float)(dstY + h);
555    vb[6] = (float)srcX;
556    vb[7] = (float)(srcY + h);
557
558    vb[8] = (float)(dstX + w);
559    vb[9] = (float)(dstY + h);
560    vb[10] = (float)(srcX + w);
561    vb[11] = (float)(srcY + h);
562
563    radeon_vbo_commit(pScrn, &accel_state->vbo);
564}
565
566static Bool
567R600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
568		int xdir, int ydir,
569		int rop,
570		Pixel planemask)
571{
572    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
573    RADEONInfoPtr info = RADEONPTR(pScrn);
574    struct radeon_accel_state *accel_state = info->accel_state;
575    struct r600_accel_object src_obj, dst_obj;
576
577    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
578	RADEON_FALLBACK(("R600CheckDatatype src failed\n"));
579    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
580	RADEON_FALLBACK(("R600CheckDatatype dst failed\n"));
581    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
582	RADEON_FALLBACK(("Invalid planemask\n"));
583
584    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
585    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
586
587    accel_state->same_surface = FALSE;
588
589#if defined(XF86DRM_MODE)
590    if (info->cs) {
591	src_obj.offset = 0;
592	dst_obj.offset = 0;
593	src_obj.bo = radeon_get_pixmap_bo(pSrc);
594	dst_obj.bo = radeon_get_pixmap_bo(pDst);
595	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
596	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
597	src_obj.surface = radeon_get_pixmap_surface(pSrc);
598	dst_obj.surface = radeon_get_pixmap_surface(pDst);
599	if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
600	    accel_state->same_surface = TRUE;
601    } else
602#endif
603    {
604	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
605	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
606	if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst))
607	    accel_state->same_surface = TRUE;
608	src_obj.bo = NULL;
609	dst_obj.bo = NULL;
610    }
611
612    src_obj.width = pSrc->drawable.width;
613    src_obj.height = pSrc->drawable.height;
614    src_obj.bpp = pSrc->drawable.bitsPerPixel;
615    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
616
617    dst_obj.width = pDst->drawable.width;
618    dst_obj.height = pDst->drawable.height;
619    dst_obj.bpp = pDst->drawable.bitsPerPixel;
620    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
621
622    if (!R600SetAccelState(pScrn,
623			   &src_obj,
624			   NULL,
625			   &dst_obj,
626			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
627			   rop, planemask))
628	return FALSE;
629
630    if (accel_state->same_surface == TRUE) {
631#if defined(XF86DRM_MODE)
632	if (info->cs) {
633	    unsigned long size = accel_state->dst_obj.surface->bo_size;
634	    unsigned long align = accel_state->dst_obj.surface->bo_alignment;
635
636	    if (accel_state->copy_area_bo) {
637		radeon_bo_unref(accel_state->copy_area_bo);
638		accel_state->copy_area_bo = NULL;
639	    }
640	    accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, align,
641						       RADEON_GEM_DOMAIN_VRAM,
642						       0);
643	    if (accel_state->copy_area_bo == NULL)
644		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
645
646	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
647					      0, RADEON_GEM_DOMAIN_VRAM);
648	    if (radeon_cs_space_check(info->cs)) {
649		radeon_bo_unref(accel_state->copy_area_bo);
650		accel_state->copy_area_bo = NULL;
651		return FALSE;
652	    }
653	    accel_state->copy_area = (void*)accel_state->copy_area_bo;
654	} else
655#endif
656	{
657	    unsigned height = pDst->drawable.height;
658	    unsigned long size = height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
659
660	    if (accel_state->copy_area) {
661		exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
662		accel_state->copy_area = NULL;
663	    }
664	    accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
665	    if (!accel_state->copy_area)
666		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
667	}
668    } else
669	R600DoPrepareCopy(pScrn);
670
671    if (accel_state->vsync)
672	RADEONVlineHelperClear(pScrn);
673
674    accel_state->dst_pix = pDst;
675    accel_state->src_pix = pSrc;
676    accel_state->xdir = xdir;
677    accel_state->ydir = ydir;
678
679    return TRUE;
680}
681
682static void
683R600DoneCopy(PixmapPtr pDst)
684{
685    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
686    RADEONInfoPtr info = RADEONPTR(pScrn);
687    struct radeon_accel_state *accel_state = info->accel_state;
688
689    if (!accel_state->same_surface)
690	R600DoCopyVline(pDst);
691
692    if (accel_state->copy_area) {
693	if (!info->cs)
694	    exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
695	accel_state->copy_area = NULL;
696    }
697
698}
699
700static void
701R600Copy(PixmapPtr pDst,
702	 int srcX, int srcY,
703	 int dstX, int dstY,
704	 int w, int h)
705{
706    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
707    RADEONInfoPtr info = RADEONPTR(pScrn);
708    struct radeon_accel_state *accel_state = info->accel_state;
709
710    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
711	return;
712
713#ifdef XF86DRM_MODE
714    if (info->cs && CS_FULL(info->cs)) {
715	R600DoneCopy(info->accel_state->dst_pix);
716	radeon_cs_flush_indirect(pScrn);
717	R600PrepareCopy(accel_state->src_pix,
718			accel_state->dst_pix,
719			accel_state->xdir,
720			accel_state->ydir,
721			accel_state->rop,
722			accel_state->planemask);
723    }
724#endif
725
726    if (accel_state->vsync)
727	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
728
729    if (accel_state->same_surface && accel_state->copy_area) {
730	uint32_t orig_offset, tmp_offset;
731	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
732	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
733	uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags;
734	uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags;
735	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
736	int orig_rop = accel_state->rop;
737
738#if defined(XF86DRM_MODE)
739	if (info->cs) {
740	    tmp_offset = 0;
741	    orig_offset = 0;
742	} else
743#endif
744	{
745	    tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset;
746	    orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
747	}
748
749	/* src to tmp */
750	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
751	accel_state->dst_obj.bo = accel_state->copy_area_bo;
752	accel_state->dst_obj.offset = tmp_offset;
753	accel_state->dst_obj.tiling_flags = 0;
754	accel_state->rop = 3;
755	R600DoPrepareCopy(pScrn);
756	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
757	R600DoCopy(pScrn);
758
759	/* tmp to dst */
760	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
761	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
762	accel_state->src_obj[0].offset = tmp_offset;
763	accel_state->src_obj[0].tiling_flags = 0;
764	accel_state->dst_obj.domain = orig_dst_domain;
765	accel_state->dst_obj.bo = orig_bo;
766	accel_state->dst_obj.offset = orig_offset;
767	accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags;
768	accel_state->rop = orig_rop;
769	R600DoPrepareCopy(pScrn);
770	R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
771	R600DoCopyVline(pDst);
772
773	/* restore state */
774	accel_state->src_obj[0].domain = orig_src_domain;
775	accel_state->src_obj[0].bo = orig_bo;
776	accel_state->src_obj[0].offset = orig_offset;
777	accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags;
778    } else
779	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
780
781}
782
783struct blendinfo {
784    Bool dst_alpha;
785    Bool src_alpha;
786    uint32_t blend_cntl;
787};
788
789static struct blendinfo R600BlendOp[] = {
790    /* Clear */
791    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
792    /* Src */
793    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
794    /* Dst */
795    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
796    /* Over */
797    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
798    /* OverReverse */
799    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
800    /* In */
801    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
802    /* InReverse */
803    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
804    /* Out */
805    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
806    /* OutReverse */
807    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
808    /* Atop */
809    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
810    /* AtopReverse */
811    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
812    /* Xor */
813    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
814    /* Add */
815    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
816};
817
818struct formatinfo {
819    unsigned int fmt;
820    uint32_t card_fmt;
821};
822
823static struct formatinfo R600TexFormats[] = {
824    {PICT_a8r8g8b8,	FMT_8_8_8_8},
825    {PICT_x8r8g8b8,	FMT_8_8_8_8},
826    {PICT_a8b8g8r8,	FMT_8_8_8_8},
827    {PICT_x8b8g8r8,	FMT_8_8_8_8},
828#ifdef PICT_TYPE_BGRA
829    {PICT_b8g8r8a8,	FMT_8_8_8_8},
830    {PICT_b8g8r8x8,	FMT_8_8_8_8},
831#endif
832    {PICT_r5g6b5,	FMT_5_6_5},
833    {PICT_a1r5g5b5,	FMT_1_5_5_5},
834    {PICT_x1r5g5b5,     FMT_1_5_5_5},
835    {PICT_a8,		FMT_8},
836};
837
838static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
839{
840    uint32_t sblend, dblend;
841
842    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
843    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
844
845    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
846     * it as always 1.
847     */
848    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
849	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
850	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
851	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
852	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
853    }
854
855    /* If the source alpha is being used, then we should only be in a case where
856     * the source blend factor is 0, and the source blend value is the mask
857     * channels multiplied by the source picture's alpha.
858     */
859    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
860	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
861	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
862	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
863	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
864	}
865    }
866
867    return sblend | dblend;
868}
869
870static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
871{
872    switch (pDstPicture->format) {
873    case PICT_a8r8g8b8:
874    case PICT_x8r8g8b8:
875    case PICT_a8b8g8r8:
876    case PICT_x8b8g8r8:
877#ifdef PICT_TYPE_BGRA
878    case PICT_b8g8r8a8:
879    case PICT_b8g8r8x8:
880#endif
881	*dst_format = COLOR_8_8_8_8;
882	break;
883    case PICT_r5g6b5:
884	*dst_format = COLOR_5_6_5;
885	break;
886    case PICT_a1r5g5b5:
887    case PICT_x1r5g5b5:
888	*dst_format = COLOR_1_5_5_5;
889	break;
890    case PICT_a8:
891	*dst_format = COLOR_8;
892	break;
893    default:
894	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
895	       (int)pDstPicture->format));
896    }
897    return TRUE;
898}
899
900static Bool R600CheckCompositeTexture(PicturePtr pPict,
901				      PicturePtr pDstPict,
902				      int op,
903				      int unit)
904{
905    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
906    unsigned int i;
907
908    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
909	if (R600TexFormats[i].fmt == pPict->format)
910	    break;
911    }
912    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
913	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
914			 (int)pPict->format));
915
916    if (pPict->filter != PictFilterNearest &&
917	pPict->filter != PictFilterBilinear)
918	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
919
920    /* for REPEAT_NONE, Render semantics are that sampling outside the source
921     * picture results in alpha=0 pixels. We can implement this with a border color
922     * *if* our source texture has an alpha channel, otherwise we need to fall
923     * back. If we're not transformed then we hope that upper layers have clipped
924     * rendering to the bounds of the source drawable, in which case it doesn't
925     * matter. I have not, however, verified that the X server always does such
926     * clipping.
927     */
928    /* FIXME R6xx */
929    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
930	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
931	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
932    }
933
934    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
935	RADEON_FALLBACK(("non-affine transforms not supported\n"));
936
937    return TRUE;
938}
939
940static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
941					int unit)
942{
943    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
944    RADEONInfoPtr info = RADEONPTR(pScrn);
945    struct radeon_accel_state *accel_state = info->accel_state;
946    unsigned int repeatType;
947    unsigned int i;
948    tex_resource_t  tex_res;
949    tex_sampler_t   tex_samp;
950    int pix_r, pix_g, pix_b, pix_a;
951    float vs_alu_consts[8];
952
953    CLEAR (tex_res);
954    CLEAR (tex_samp);
955
956    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
957	if (R600TexFormats[i].fmt == pPict->format)
958	    break;
959    }
960
961    /* Texture */
962    if (pPict->pDrawable) {
963	tex_res.w               = pPict->pDrawable->width;
964	tex_res.h               = pPict->pDrawable->height;
965	repeatType              = pPict->repeat ? pPict->repeatType : RepeatNone;
966    } else {
967	tex_res.w               = 1;
968	tex_res.h               = 1;
969	repeatType              = RepeatNormal;
970    }
971    tex_res.id                  = unit;
972    tex_res.pitch               = accel_state->src_obj[unit].pitch;
973    tex_res.depth               = 0;
974    tex_res.dim                 = SQ_TEX_DIM_2D;
975    tex_res.base                = accel_state->src_obj[unit].offset;
976    tex_res.mip_base            = accel_state->src_obj[unit].offset;
977    tex_res.size                = accel_state->src_size[unit];
978    tex_res.format              = R600TexFormats[i].card_fmt;
979    tex_res.bo                  = accel_state->src_obj[unit].bo;
980    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
981#ifdef XF86DRM_MODE
982    if (info->cs)
983        tex_res.surface             = accel_state->src_obj[unit].surface;
984#endif
985    tex_res.request_size        = 1;
986
987#if X_BYTE_ORDER == X_BIG_ENDIAN
988    switch (accel_state->src_obj[unit].bpp) {
989    case 16:
990	tex_res.endian = SQ_ENDIAN_8IN16;
991	break;
992    case 32:
993	tex_res.endian = SQ_ENDIAN_8IN32;
994	break;
995    default :
996	break;
997    }
998#endif
999
1000    /* component swizzles */
1001    switch (pPict->format) {
1002    case PICT_a1r5g5b5:
1003    case PICT_a8r8g8b8:
1004	pix_r = SQ_SEL_Z; /* R */
1005	pix_g = SQ_SEL_Y; /* G */
1006	pix_b = SQ_SEL_X; /* B */
1007	pix_a = SQ_SEL_W; /* A */
1008	break;
1009    case PICT_a8b8g8r8:
1010	pix_r = SQ_SEL_X; /* R */
1011	pix_g = SQ_SEL_Y; /* G */
1012	pix_b = SQ_SEL_Z; /* B */
1013	pix_a = SQ_SEL_W; /* A */
1014	break;
1015    case PICT_x8b8g8r8:
1016	pix_r = SQ_SEL_X; /* R */
1017	pix_g = SQ_SEL_Y; /* G */
1018	pix_b = SQ_SEL_Z; /* B */
1019	pix_a = SQ_SEL_1; /* A */
1020	break;
1021#ifdef PICT_TYPE_BGRA
1022    case PICT_b8g8r8a8:
1023	pix_r = SQ_SEL_Y; /* R */
1024	pix_g = SQ_SEL_Z; /* G */
1025	pix_b = SQ_SEL_W; /* B */
1026	pix_a = SQ_SEL_X; /* A */
1027	break;
1028    case PICT_b8g8r8x8:
1029	pix_r = SQ_SEL_Y; /* R */
1030	pix_g = SQ_SEL_Z; /* G */
1031	pix_b = SQ_SEL_W; /* B */
1032	pix_a = SQ_SEL_1; /* A */
1033	break;
1034#endif
1035    case PICT_x1r5g5b5:
1036    case PICT_x8r8g8b8:
1037    case PICT_r5g6b5:
1038	pix_r = SQ_SEL_Z; /* R */
1039	pix_g = SQ_SEL_Y; /* G */
1040	pix_b = SQ_SEL_X; /* B */
1041	pix_a = SQ_SEL_1; /* A */
1042	break;
1043    case PICT_a8:
1044	pix_r = SQ_SEL_0; /* R */
1045	pix_g = SQ_SEL_0; /* G */
1046	pix_b = SQ_SEL_0; /* B */
1047	pix_a = SQ_SEL_X; /* A */
1048	break;
1049    default:
1050	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
1051    }
1052
1053    if (unit == 0) {
1054	if (!accel_state->msk_pic) {
1055	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
1056		pix_r = SQ_SEL_0;
1057		pix_g = SQ_SEL_0;
1058		pix_b = SQ_SEL_0;
1059	    }
1060
1061	    if (PICT_FORMAT_A(pPict->format) == 0)
1062		pix_a = SQ_SEL_1;
1063	} else {
1064	    if (accel_state->component_alpha) {
1065		if (accel_state->src_alpha) {
1066		    if (PICT_FORMAT_A(pPict->format) == 0) {
1067			pix_r = SQ_SEL_1;
1068			pix_g = SQ_SEL_1;
1069			pix_b = SQ_SEL_1;
1070			pix_a = SQ_SEL_1;
1071		    } else {
1072			pix_r = pix_a;
1073			pix_g = pix_a;
1074			pix_b = pix_a;
1075		    }
1076		} else {
1077		    if (PICT_FORMAT_A(pPict->format) == 0)
1078			pix_a = SQ_SEL_1;
1079		}
1080	    } else {
1081		if (PICT_FORMAT_RGB(pPict->format) == 0) {
1082		    pix_r = SQ_SEL_0;
1083		    pix_g = SQ_SEL_0;
1084		    pix_b = SQ_SEL_0;
1085		}
1086
1087		if (PICT_FORMAT_A(pPict->format) == 0)
1088		    pix_a = SQ_SEL_1;
1089	    }
1090	}
1091    } else {
1092	if (accel_state->component_alpha) {
1093	    if (PICT_FORMAT_A(pPict->format) == 0)
1094		pix_a = SQ_SEL_1;
1095	} else {
1096	    if (PICT_FORMAT_A(pPict->format) == 0) {
1097		pix_r = SQ_SEL_1;
1098		pix_g = SQ_SEL_1;
1099		pix_b = SQ_SEL_1;
1100		pix_a = SQ_SEL_1;
1101	    } else {
1102		pix_r = pix_a;
1103		pix_g = pix_a;
1104		pix_b = pix_a;
1105	    }
1106	}
1107    }
1108
1109    tex_res.dst_sel_x           = pix_r; /* R */
1110    tex_res.dst_sel_y           = pix_g; /* G */
1111    tex_res.dst_sel_z           = pix_b; /* B */
1112    tex_res.dst_sel_w           = pix_a; /* A */
1113
1114    tex_res.base_level          = 0;
1115    tex_res.last_level          = 0;
1116    tex_res.perf_modulation     = 0;
1117    if (accel_state->src_obj[unit].tiling_flags == 0)
1118	tex_res.tile_mode           = 1;
1119    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
1120
1121    tex_samp.id                 = unit;
1122    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1123
1124    switch (repeatType) {
1125    case RepeatNormal:
1126	tex_samp.clamp_x            = SQ_TEX_WRAP;
1127	tex_samp.clamp_y            = SQ_TEX_WRAP;
1128	break;
1129    case RepeatPad:
1130	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1131	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1132	break;
1133    case RepeatReflect:
1134	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1135	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1136	break;
1137    case RepeatNone:
1138	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1139	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1140	break;
1141    default:
1142	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1143    }
1144
1145    switch (pPict->filter) {
1146    case PictFilterNearest:
1147	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1148	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1149	tex_samp.mc_coord_truncate  = 1;
1150	break;
1151    case PictFilterBilinear:
1152	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1153	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1154	break;
1155    default:
1156	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1157    }
1158
1159    tex_samp.clamp_z            = SQ_TEX_WRAP;
1160    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1161    tex_samp.mip_filter         = 0;			/* no mipmap */
1162    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
1163
1164    if (pPict->transform != 0) {
1165	accel_state->is_transform[unit] = TRUE;
1166	accel_state->transform[unit] = pPict->transform;
1167
1168	vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
1169	vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
1170	vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
1171	vs_alu_consts[3] = 1.0 / tex_res.w;
1172
1173	vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
1174	vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
1175	vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
1176	vs_alu_consts[7] = 1.0 / tex_res.h;
1177    } else {
1178	accel_state->is_transform[unit] = FALSE;
1179
1180	vs_alu_consts[0] = 1.0;
1181	vs_alu_consts[1] = 0.0;
1182	vs_alu_consts[2] = 0.0;
1183	vs_alu_consts[3] = 1.0 / tex_res.w;
1184
1185	vs_alu_consts[4] = 0.0;
1186	vs_alu_consts[5] = 1.0;
1187	vs_alu_consts[6] = 0.0;
1188	vs_alu_consts[7] = 1.0 / tex_res.h;
1189    }
1190
1191    /* VS alu constants */
1192    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
1193			sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
1194
1195    return TRUE;
1196}
1197
1198static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1199			       PicturePtr pDstPicture)
1200{
1201    uint32_t tmp1;
1202    PixmapPtr pSrcPixmap, pDstPixmap;
1203
1204    /* Check for unsupported compositing operations. */
1205    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1206	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1207
1208    if (pSrcPicture->pDrawable) {
1209	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1210
1211	if (pSrcPixmap->drawable.width >= 8192 ||
1212	    pSrcPixmap->drawable.height >= 8192) {
1213	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1214			     pSrcPixmap->drawable.width,
1215			     pSrcPixmap->drawable.height));
1216	}
1217
1218	if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1219	    return FALSE;
1220    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
1221	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1222
1223    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1224
1225    if (pDstPixmap->drawable.width >= 8192 ||
1226	pDstPixmap->drawable.height >= 8192) {
1227	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1228			 pDstPixmap->drawable.width,
1229			 pDstPixmap->drawable.height));
1230    }
1231
1232    if (pMaskPicture) {
1233	PixmapPtr pMaskPixmap;
1234
1235	if (pMaskPicture->pDrawable) {
1236	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1237
1238	    if (pMaskPixmap->drawable.width >= 8192 ||
1239		pMaskPixmap->drawable.height >= 8192) {
1240	      RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1241			       pMaskPixmap->drawable.width,
1242			       pMaskPixmap->drawable.height));
1243	    }
1244
1245	    if (pMaskPicture->componentAlpha) {
1246		/* Check if it's component alpha that relies on a source alpha and
1247		 * on the source value.  We can only get one of those into the
1248		 * single source value that we get to blend with.
1249		 */
1250		if (R600BlendOp[op].src_alpha &&
1251		    (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1252		    (BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1253		    RADEON_FALLBACK(("Component alpha not supported with source "
1254				     "alpha and source value blending.\n"));
1255		}
1256	    }
1257
1258	    if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1259		return FALSE;
1260	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
1261	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1262    }
1263
1264    if (!R600GetDestFormat(pDstPicture, &tmp1))
1265	return FALSE;
1266
1267    return TRUE;
1268
1269}
1270
1271static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1272				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1273				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1274{
1275    ScreenPtr pScreen = pDst->drawable.pScreen;
1276    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1277    RADEONInfoPtr info = RADEONPTR(pScrn);
1278    struct radeon_accel_state *accel_state = info->accel_state;
1279    uint32_t dst_format;
1280    cb_config_t cb_conf;
1281    shader_config_t vs_conf, ps_conf;
1282    struct r600_accel_object src_obj, mask_obj, dst_obj;
1283
1284    if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8))
1285	return FALSE;
1286
1287    if (!pSrc) {
1288	pSrc = RADEONSolidPixmap(pScreen, pSrcPicture->pSourcePict->solidFill.color);
1289	if (!pSrc)
1290	    RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
1291    }
1292
1293#if defined(XF86DRM_MODE)
1294    if (info->cs) {
1295	src_obj.offset = 0;
1296	dst_obj.offset = 0;
1297	dst_obj.bo = radeon_get_pixmap_bo(pDst);
1298	src_obj.bo = radeon_get_pixmap_bo(pSrc);
1299	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1300	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1301	dst_obj.surface = radeon_get_pixmap_surface(pDst);
1302	src_obj.surface = radeon_get_pixmap_surface(pSrc);
1303    } else
1304#endif
1305    {
1306	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1307	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1308	src_obj.bo = NULL;
1309	dst_obj.bo = NULL;
1310    }
1311    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1312    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1313
1314    src_obj.width = pSrc->drawable.width;
1315    src_obj.height = pSrc->drawable.height;
1316    src_obj.bpp = pSrc->drawable.bitsPerPixel;
1317    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1318
1319    dst_obj.width = pDst->drawable.width;
1320    dst_obj.height = pDst->drawable.height;
1321    dst_obj.bpp = pDst->drawable.bitsPerPixel;
1322    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1323
1324    if (pMaskPicture) {
1325	if (!pMask) {
1326	    pMask = RADEONSolidPixmap(pScreen, pMaskPicture->pSourcePict->solidFill.color);
1327	    if (!pMask) {
1328		if (!pSrcPicture->pDrawable)
1329		    pScreen->DestroyPixmap(pSrc);
1330		RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
1331	    }
1332	}
1333
1334#if defined(XF86DRM_MODE)
1335	if (info->cs) {
1336	    mask_obj.offset = 0;
1337	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
1338	    mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
1339	    mask_obj.surface = radeon_get_pixmap_surface(pMask);
1340	} else
1341#endif
1342	{
1343	    mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset;
1344	    mask_obj.bo = NULL;
1345	}
1346	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1347
1348	mask_obj.width = pMask->drawable.width;
1349	mask_obj.height = pMask->drawable.height;
1350	mask_obj.bpp = pMask->drawable.bitsPerPixel;
1351	mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1352
1353	if (!R600SetAccelState(pScrn,
1354			       &src_obj,
1355			       &mask_obj,
1356			       &dst_obj,
1357			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1358			       3, 0xffffffff))
1359	    return FALSE;
1360
1361	accel_state->msk_pic = pMaskPicture;
1362	if (pMaskPicture->componentAlpha) {
1363	    accel_state->component_alpha = TRUE;
1364	    if (R600BlendOp[op].src_alpha)
1365		accel_state->src_alpha = TRUE;
1366	    else
1367		accel_state->src_alpha = FALSE;
1368	} else {
1369	    accel_state->component_alpha = FALSE;
1370	    accel_state->src_alpha = FALSE;
1371	}
1372    } else {
1373	if (!R600SetAccelState(pScrn,
1374			       &src_obj,
1375			       NULL,
1376			       &dst_obj,
1377			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1378			       3, 0xffffffff))
1379	    return FALSE;
1380
1381	accel_state->msk_pic = NULL;
1382	accel_state->component_alpha = FALSE;
1383	accel_state->src_alpha = FALSE;
1384    }
1385
1386    if (!R600GetDestFormat(pDstPicture, &dst_format))
1387	return FALSE;
1388
1389    CLEAR (cb_conf);
1390    CLEAR (vs_conf);
1391    CLEAR (ps_conf);
1392
1393    if (pMask)
1394        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1395    else
1396        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1397
1398    radeon_cp_start(pScrn);
1399
1400    r600_set_default_state(pScrn, accel_state->ib);
1401
1402    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1403    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1404    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1405
1406    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
1407        R600IBDiscard(pScrn, accel_state->ib);
1408        return FALSE;
1409    }
1410
1411    if (pMask) {
1412        if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
1413            R600IBDiscard(pScrn, accel_state->ib);
1414            return FALSE;
1415        }
1416    } else
1417        accel_state->is_transform[1] = FALSE;
1418
1419    if (pMask) {
1420	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
1421	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
1422    } else {
1423	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
1424	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
1425    }
1426
1427    /* Shader */
1428    vs_conf.shader_addr         = accel_state->vs_mc_addr;
1429    vs_conf.shader_size         = accel_state->vs_size;
1430    vs_conf.num_gprs            = 5;
1431    vs_conf.stack_size          = 1;
1432    vs_conf.bo                  = accel_state->shaders_bo;
1433    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1434
1435    ps_conf.shader_addr         = accel_state->ps_mc_addr;
1436    ps_conf.shader_size         = accel_state->ps_size;
1437    ps_conf.num_gprs            = 3;
1438    ps_conf.stack_size          = 1;
1439    ps_conf.uncached_first_inst = 1;
1440    ps_conf.clamp_consts        = 0;
1441    ps_conf.export_mode         = 2;
1442    ps_conf.bo                  = accel_state->shaders_bo;
1443    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1444
1445    cb_conf.id = 0;
1446    cb_conf.w = accel_state->dst_obj.pitch;
1447    cb_conf.h = accel_state->dst_obj.height;
1448    cb_conf.base = accel_state->dst_obj.offset;
1449    cb_conf.format = dst_format;
1450    cb_conf.bo = accel_state->dst_obj.bo;
1451#ifdef XF86DRM_MODE
1452    if (info->cs)
1453        cb_conf.surface = accel_state->dst_obj.surface;
1454#endif
1455
1456    switch (pDstPicture->format) {
1457    case PICT_a8r8g8b8:
1458    case PICT_x8r8g8b8:
1459    case PICT_a1r5g5b5:
1460    case PICT_x1r5g5b5:
1461    default:
1462	cb_conf.comp_swap = 1; /* ARGB */
1463	break;
1464    case PICT_a8b8g8r8:
1465    case PICT_x8b8g8r8:
1466	cb_conf.comp_swap = 0; /* ABGR */
1467	break;
1468#ifdef PICT_TYPE_BGRA
1469    case PICT_b8g8r8a8:
1470    case PICT_b8g8r8x8:
1471	cb_conf.comp_swap = 3; /* BGRA */
1472	break;
1473#endif
1474    case PICT_r5g6b5:
1475	cb_conf.comp_swap = 2; /* RGB */
1476	break;
1477    case PICT_a8:
1478	cb_conf.comp_swap = 3; /* A */
1479	break;
1480    }
1481    cb_conf.source_format = 1;
1482    cb_conf.blend_clamp = 1;
1483    cb_conf.blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1484    cb_conf.blend_enable = 1;
1485    cb_conf.pmask = 0xf;
1486    cb_conf.rop = 3;
1487    if (accel_state->dst_obj.tiling_flags == 0)
1488	cb_conf.array_mode = 0;
1489#if X_BYTE_ORDER == X_BIG_ENDIAN
1490    switch (dst_obj.bpp) {
1491    case 16:
1492	cb_conf.endian = ENDIAN_8IN16;
1493	break;
1494    case 32:
1495	cb_conf.endian = ENDIAN_8IN32;
1496	break;
1497    default:
1498	break;
1499    }
1500#endif
1501    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
1502
1503    if (pMask)
1504	r600_set_spi(pScrn, accel_state->ib, (2 - 1), 2);
1505    else
1506	r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1);
1507
1508    if (accel_state->vsync)
1509	RADEONVlineHelperClear(pScrn);
1510
1511    accel_state->composite_op = op;
1512    accel_state->dst_pic = pDstPicture;
1513    accel_state->src_pic = pSrcPicture;
1514    accel_state->dst_pix = pDst;
1515    accel_state->msk_pix = pMask;
1516    accel_state->src_pix = pSrc;
1517
1518    return TRUE;
1519}
1520
1521static void R600FinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst,
1522				struct radeon_accel_state *accel_state)
1523{
1524    int vtx_size;
1525
1526    if (accel_state->vsync)
1527       r600_cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
1528			       accel_state->vline_crtc,
1529			       accel_state->vline_y1,
1530			       accel_state->vline_y2);
1531
1532    vtx_size = accel_state->msk_pic ? 24 : 16;
1533
1534    r600_finish_op(pScrn, vtx_size);
1535}
1536
1537static void R600DoneComposite(PixmapPtr pDst)
1538{
1539    ScreenPtr pScreen = pDst->drawable.pScreen;
1540    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1541    RADEONInfoPtr info = RADEONPTR(pScrn);
1542    struct radeon_accel_state *accel_state = info->accel_state;
1543
1544    R600FinishComposite(pScrn, pDst, accel_state);
1545
1546    if (!accel_state->src_pic->pDrawable)
1547	pScreen->DestroyPixmap(accel_state->src_pix);
1548
1549    if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable)
1550	pScreen->DestroyPixmap(accel_state->msk_pix);
1551}
1552
1553static void R600Composite(PixmapPtr pDst,
1554			  int srcX, int srcY,
1555			  int maskX, int maskY,
1556			  int dstX, int dstY,
1557			  int w, int h)
1558{
1559    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1560    RADEONInfoPtr info = RADEONPTR(pScrn);
1561    struct radeon_accel_state *accel_state = info->accel_state;
1562    float *vb;
1563
1564    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1565       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1566
1567#ifdef XF86DRM_MODE
1568    if (info->cs && CS_FULL(info->cs)) {
1569	R600FinishComposite(pScrn, pDst, info->accel_state);
1570	radeon_cs_flush_indirect(pScrn);
1571	R600PrepareComposite(info->accel_state->composite_op,
1572			     info->accel_state->src_pic,
1573			     info->accel_state->msk_pic,
1574			     info->accel_state->dst_pic,
1575			     info->accel_state->src_pix,
1576			     info->accel_state->msk_pix,
1577			     info->accel_state->dst_pix);
1578    }
1579#endif
1580
1581    if (accel_state->vsync)
1582	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1583
1584    if (accel_state->msk_pic) {
1585
1586	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1587
1588	vb[0] = (float)dstX;
1589	vb[1] = (float)dstY;
1590	vb[2] = (float)srcX;
1591	vb[3] = (float)srcY;
1592	vb[4] = (float)maskX;
1593	vb[5] = (float)maskY;
1594
1595	vb[6] = (float)dstX;
1596	vb[7] = (float)(dstY + h);
1597	vb[8] = (float)srcX;
1598	vb[9] = (float)(srcY + h);
1599	vb[10] = (float)maskX;
1600	vb[11] = (float)(maskY + h);
1601
1602	vb[12] = (float)(dstX + w);
1603	vb[13] = (float)(dstY + h);
1604	vb[14] = (float)(srcX + w);
1605	vb[15] = (float)(srcY + h);
1606	vb[16] = (float)(maskX + w);
1607	vb[17] = (float)(maskY + h);
1608
1609	radeon_vbo_commit(pScrn, &accel_state->vbo);
1610
1611    } else {
1612
1613	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1614
1615	vb[0] = (float)dstX;
1616	vb[1] = (float)dstY;
1617	vb[2] = (float)srcX;
1618	vb[3] = (float)srcY;
1619
1620	vb[4] = (float)dstX;
1621	vb[5] = (float)(dstY + h);
1622	vb[6] = (float)srcX;
1623	vb[7] = (float)(srcY + h);
1624
1625	vb[8] = (float)(dstX + w);
1626	vb[9] = (float)(dstY + h);
1627	vb[10] = (float)(srcX + w);
1628	vb[11] = (float)(srcY + h);
1629
1630	radeon_vbo_commit(pScrn, &accel_state->vbo);
1631    }
1632
1633
1634}
1635
1636Bool
1637R600CopyToVRAM(ScrnInfoPtr pScrn,
1638	       char *src, int src_pitch,
1639	       uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_width, uint32_t dst_height, int bpp,
1640	       int x, int y, int w, int h)
1641{
1642    RADEONInfoPtr info = RADEONPTR(pScrn);
1643    struct radeon_accel_state *accel_state = info->accel_state;
1644    uint32_t scratch_mc_addr;
1645    int wpass = w * (bpp/8);
1646    int scratch_pitch_bytes = RADEON_ALIGN(wpass, 256);
1647    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1648    int scratch_offset = 0, hpass, temph;
1649    char *dst;
1650    drmBufPtr scratch;
1651    struct r600_accel_object scratch_obj, dst_obj;
1652
1653    if (dst_pitch & 7)
1654	return FALSE;
1655
1656    if (dst_mc_addr & 0xff)
1657	return FALSE;
1658
1659    scratch = RADEONCPGetBuffer(pScrn);
1660    if (scratch == NULL)
1661	return FALSE;
1662
1663    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1664    temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1665    dst = (char *)scratch->address;
1666
1667    scratch_obj.pitch = scratch_pitch;
1668    scratch_obj.width = w;
1669    scratch_obj.height = hpass;
1670    scratch_obj.offset = scratch_mc_addr;
1671    scratch_obj.bpp = bpp;
1672    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1673    scratch_obj.bo = NULL;
1674
1675    dst_obj.pitch = dst_pitch;
1676    dst_obj.width = dst_width;
1677    dst_obj.height = dst_height;
1678    dst_obj.offset = dst_mc_addr;
1679    dst_obj.bo = NULL;
1680    dst_obj.bpp = bpp;
1681    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1682
1683    if (!R600SetAccelState(pScrn,
1684			   &scratch_obj,
1685			   NULL,
1686			   &dst_obj,
1687			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1688			   3, 0xffffffff))
1689	return FALSE;
1690
1691    /* memcopy from sys to scratch */
1692    while (temph--) {
1693	memcpy (dst, src, wpass);
1694	src += src_pitch;
1695	dst += scratch_pitch_bytes;
1696    }
1697
1698    while (h) {
1699	uint32_t offset = scratch_mc_addr + scratch_offset;
1700	int oldhpass = hpass;
1701	h -= oldhpass;
1702	temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1703
1704	if (hpass) {
1705	    scratch_offset = scratch->total/2 - scratch_offset;
1706	    dst = (char *)scratch->address + scratch_offset;
1707	    /* wait for the engine to be idle */
1708	    RADEONWaitForIdleCP(pScrn);
1709	    //memcopy from sys to scratch
1710	    while (temph--) {
1711		memcpy (dst, src, wpass);
1712		src += src_pitch;
1713		dst += scratch_pitch_bytes;
1714	    }
1715	}
1716	/* blit from scratch to vram */
1717	info->accel_state->src_obj[0].height = oldhpass;
1718	info->accel_state->src_obj[0].offset = offset;
1719	R600DoPrepareCopy(pScrn);
1720	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
1721	R600DoCopy(pScrn);
1722	y += oldhpass;
1723    }
1724
1725    R600IBDiscard(pScrn, scratch);
1726
1727    return TRUE;
1728}
1729
1730static Bool
1731R600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1732		   char *src, int src_pitch)
1733{
1734    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1735    RADEONInfoPtr info = RADEONPTR(pScrn);
1736    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1737    uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1738    int bpp = pDst->drawable.bitsPerPixel;
1739
1740    return R600CopyToVRAM(pScrn,
1741			  src, src_pitch,
1742			  dst_pitch, dst_mc_addr, pDst->drawable.width, pDst->drawable.height, bpp,
1743			  x, y, w, h);
1744}
1745
1746static Bool
1747R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
1748		       char *dst, int dst_pitch)
1749{
1750    ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
1751    RADEONInfoPtr info = RADEONPTR(pScrn);
1752    struct radeon_accel_state *accel_state = info->accel_state;
1753    uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1754    uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1755    uint32_t src_width = pSrc->drawable.width;
1756    uint32_t src_height = pSrc->drawable.height;
1757    int bpp = pSrc->drawable.bitsPerPixel;
1758    uint32_t scratch_mc_addr;
1759    int scratch_pitch_bytes = RADEON_ALIGN(dst_pitch, 256);
1760    int scratch_offset = 0, hpass;
1761    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1762    int wpass = w * (bpp/8);
1763    drmBufPtr scratch;
1764    struct r600_accel_object scratch_obj, src_obj;
1765
1766    /* bad pipe setup in drm prior to 1.32 */
1767    if (info->dri->pKernelDRMVersion->version_minor < 32) {
1768	    if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32))
1769		    return FALSE;
1770    }
1771
1772    if (src_pitch & 7)
1773	return FALSE;
1774
1775    scratch = RADEONCPGetBuffer(pScrn);
1776    if (scratch == NULL)
1777	return FALSE;
1778
1779    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1780    hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1781
1782    src_obj.pitch = src_pitch;
1783    src_obj.width = src_width;
1784    src_obj.height = src_height;
1785    src_obj.offset = src_mc_addr;
1786    src_obj.bo = NULL;
1787    src_obj.bpp = bpp;
1788    src_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1789
1790    scratch_obj.pitch = scratch_pitch;
1791    scratch_obj.width = src_width;
1792    scratch_obj.height = hpass;
1793    scratch_obj.offset = scratch_mc_addr;
1794    scratch_obj.bpp = bpp;
1795    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1796    scratch_obj.bo = NULL;
1797
1798    if (!R600SetAccelState(pScrn,
1799			   &src_obj,
1800			   NULL,
1801			   &scratch_obj,
1802			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1803			   3, 0xffffffff))
1804	return FALSE;
1805
1806    /* blit from vram to scratch */
1807    R600DoPrepareCopy(pScrn);
1808    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1809    R600DoCopy(pScrn);
1810
1811    while (h) {
1812	char *src = (char *)scratch->address + scratch_offset;
1813	int oldhpass = hpass;
1814	h -= oldhpass;
1815	y += oldhpass;
1816	hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1817
1818	if (hpass) {
1819	    scratch_offset = scratch->total/2 - scratch_offset;
1820	    /* blit from vram to scratch */
1821	    info->accel_state->dst_obj.height = hpass;
1822	    info->accel_state->dst_obj.offset = scratch_mc_addr + scratch_offset;
1823	    R600DoPrepareCopy(pScrn);
1824	    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1825	    R600DoCopy(pScrn);
1826	}
1827
1828	/* wait for the engine to be idle */
1829	RADEONWaitForIdleCP(pScrn);
1830	/* memcopy from scratch to sys */
1831	while (oldhpass--) {
1832	    memcpy (dst, src, wpass);
1833	    dst += dst_pitch;
1834	    src += scratch_pitch_bytes;
1835	}
1836    }
1837
1838    R600IBDiscard(pScrn, scratch);
1839
1840    return TRUE;
1841
1842}
1843
1844#if defined(XF86DRM_MODE)
1845
1846static Bool
1847R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
1848		     char *src, int src_pitch)
1849{
1850    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1851    RADEONInfoPtr info = RADEONPTR(pScrn);
1852    struct radeon_accel_state *accel_state = info->accel_state;
1853    struct radeon_exa_pixmap_priv *driver_priv;
1854    struct radeon_bo *scratch = NULL;
1855    struct radeon_bo *copy_dst;
1856    unsigned char *dst;
1857    unsigned size;
1858    uint32_t dst_domain;
1859    int bpp = pDst->drawable.bitsPerPixel;
1860    uint32_t scratch_pitch;
1861    uint32_t copy_pitch;
1862    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
1863    int ret;
1864    Bool flush = TRUE;
1865    Bool r;
1866    int i;
1867    struct r600_accel_object src_obj, dst_obj;
1868    uint32_t height, base_align;
1869
1870    if (bpp < 8)
1871	return FALSE;
1872
1873    driver_priv = exaGetPixmapDriverPrivate(pDst);
1874    if (!driver_priv || !driver_priv->bo)
1875	return FALSE;
1876
1877    /* If we know the BO won't be busy, don't bother with a scratch */
1878    copy_dst = driver_priv->bo;
1879    copy_pitch = pDst->devKind;
1880    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1881	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1882	    flush = FALSE;
1883	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
1884		goto copy;
1885	}
1886    }
1887
1888    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1889    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1890    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1891    size = scratch_pitch * height * (bpp / 8);
1892    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1893    if (scratch == NULL) {
1894	goto copy;
1895    }
1896
1897    src_obj.pitch = scratch_pitch;
1898    src_obj.width = w;
1899    src_obj.height = h;
1900    src_obj.offset = 0;
1901    src_obj.bpp = bpp;
1902    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1903    src_obj.bo = scratch;
1904    src_obj.tiling_flags = 0;
1905    src_obj.surface = NULL;
1906
1907    dst_obj.pitch = dst_pitch_hw;
1908    dst_obj.width = pDst->drawable.width;
1909    dst_obj.height = pDst->drawable.height;
1910    dst_obj.offset = 0;
1911    dst_obj.bpp = bpp;
1912    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1913    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1914    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1915    dst_obj.surface = radeon_get_pixmap_surface(pDst);
1916
1917    if (!R600SetAccelState(pScrn,
1918			   &src_obj,
1919			   NULL,
1920			   &dst_obj,
1921			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1922			   3, 0xffffffff)) {
1923        goto copy;
1924    }
1925    copy_dst = scratch;
1926    copy_pitch = scratch_pitch * (bpp / 8);
1927    flush = FALSE;
1928
1929copy:
1930    if (flush)
1931	radeon_cs_flush_indirect(pScrn);
1932
1933    ret = radeon_bo_map(copy_dst, 0);
1934    if (ret) {
1935        r = FALSE;
1936        goto out;
1937    }
1938    r = TRUE;
1939    size = w * bpp / 8;
1940    dst = copy_dst->ptr;
1941    if (copy_dst == driver_priv->bo)
1942	dst += y * copy_pitch + x * bpp / 8;
1943    for (i = 0; i < h; i++) {
1944        memcpy(dst + i * copy_pitch, src, size);
1945        src += src_pitch;
1946    }
1947    radeon_bo_unmap(copy_dst);
1948
1949    if (copy_dst == scratch) {
1950	if (info->accel_state->vsync)
1951	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1952
1953	/* blit from gart to vram */
1954	R600DoPrepareCopy(pScrn);
1955	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
1956	R600DoCopyVline(pDst);
1957    }
1958
1959out:
1960    if (scratch)
1961	radeon_bo_unref(scratch);
1962    return r;
1963}
1964
1965static Bool
1966R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
1967			 int h, char *dst, int dst_pitch)
1968{
1969    ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
1970    RADEONInfoPtr info = RADEONPTR(pScrn);
1971    struct radeon_accel_state *accel_state = info->accel_state;
1972    struct radeon_exa_pixmap_priv *driver_priv;
1973    struct radeon_bo *scratch = NULL;
1974    struct radeon_bo *copy_src;
1975    unsigned size;
1976    uint32_t src_domain = 0;
1977    int bpp = pSrc->drawable.bitsPerPixel;
1978    uint32_t scratch_pitch;
1979    uint32_t copy_pitch;
1980    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
1981    int ret;
1982    Bool flush = FALSE;
1983    Bool r;
1984    struct r600_accel_object src_obj, dst_obj;
1985    uint32_t height, base_align;
1986
1987    if (bpp < 8)
1988	return FALSE;
1989
1990    driver_priv = exaGetPixmapDriverPrivate(pSrc);
1991    if (!driver_priv || !driver_priv->bo)
1992	return FALSE;
1993
1994    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
1995    copy_src = driver_priv->bo;
1996    copy_pitch = pSrc->devKind;
1997    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1998	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1999	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
2000	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
2001		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
2002		src_domain = 0;
2003	    else /* A write may be scheduled */
2004		flush = TRUE;
2005	}
2006
2007	if (!src_domain)
2008	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
2009
2010	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
2011	    goto copy;
2012    }
2013
2014    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
2015    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
2016    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
2017    size = scratch_pitch * height * (bpp / 8);
2018    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
2019    if (scratch == NULL) {
2020	goto copy;
2021    }
2022    radeon_cs_space_reset_bos(info->cs);
2023    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
2024				      RADEON_GEM_DOMAIN_VRAM, 0);
2025    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
2026    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
2027    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
2028    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
2029    ret = radeon_cs_space_check(info->cs);
2030    if (ret) {
2031        goto copy;
2032    }
2033
2034    src_obj.pitch = src_pitch_hw;
2035    src_obj.width = pSrc->drawable.width;
2036    src_obj.height = pSrc->drawable.height;
2037    src_obj.offset = 0;
2038    src_obj.bpp = bpp;
2039    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
2040    src_obj.bo = radeon_get_pixmap_bo(pSrc);
2041    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
2042    src_obj.surface = radeon_get_pixmap_surface(pSrc);
2043
2044    dst_obj.pitch = scratch_pitch;
2045    dst_obj.width = w;
2046    dst_obj.height = h;
2047    dst_obj.offset = 0;
2048    dst_obj.bo = scratch;
2049    dst_obj.bpp = bpp;
2050    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
2051    dst_obj.tiling_flags = 0;
2052    dst_obj.surface = NULL;
2053
2054    if (!R600SetAccelState(pScrn,
2055			   &src_obj,
2056			   NULL,
2057			   &dst_obj,
2058			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
2059			   3, 0xffffffff)) {
2060        goto copy;
2061    }
2062
2063    /* blit from vram to gart */
2064    R600DoPrepareCopy(pScrn);
2065    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
2066    R600DoCopy(pScrn);
2067    copy_src = scratch;
2068    copy_pitch = scratch_pitch * (bpp / 8);
2069    flush = TRUE;
2070
2071copy:
2072    if (flush && info->cs)
2073	radeon_cs_flush_indirect(pScrn);
2074
2075    ret = radeon_bo_map(copy_src, 0);
2076    if (ret) {
2077	ErrorF("failed to map pixmap: %d\n", ret);
2078        r = FALSE;
2079        goto out;
2080    }
2081    r = TRUE;
2082    w *= bpp / 8;
2083    if (copy_src == driver_priv->bo)
2084	size = y * copy_pitch + x * bpp / 8;
2085    else
2086	size = 0;
2087    while (h--) {
2088        memcpy(dst, copy_src->ptr + size, w);
2089        size += copy_pitch;
2090        dst += dst_pitch;
2091    }
2092    radeon_bo_unmap(copy_src);
2093out:
2094    if (scratch)
2095	radeon_bo_unref(scratch);
2096    return r;
2097}
2098#endif
2099
2100static int
2101R600MarkSync(ScreenPtr pScreen)
2102{
2103    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2104    RADEONInfoPtr info = RADEONPTR(pScrn);
2105    struct radeon_accel_state *accel_state = info->accel_state;
2106
2107    return ++accel_state->exaSyncMarker;
2108
2109}
2110
2111static void
2112R600Sync(ScreenPtr pScreen, int marker)
2113{
2114    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2115    RADEONInfoPtr info = RADEONPTR(pScrn);
2116    struct radeon_accel_state *accel_state = info->accel_state;
2117
2118    if (accel_state->exaMarkerSynced != marker) {
2119#ifdef XF86DRM_MODE
2120#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2121	if (!info->cs)
2122#endif
2123#endif
2124	    RADEONWaitForIdleCP(pScrn);
2125	accel_state->exaMarkerSynced = marker;
2126    }
2127
2128}
2129
2130static Bool
2131R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
2132{
2133    RADEONInfoPtr info = RADEONPTR(pScrn);
2134    struct radeon_accel_state *accel_state = info->accel_state;
2135
2136    /* 512 bytes per shader for now */
2137    int size = 512 * 9;
2138
2139    accel_state->shaders = NULL;
2140
2141#ifdef XF86DRM_MODE
2142#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2143    if (info->cs) {
2144	accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
2145						 RADEON_GEM_DOMAIN_VRAM, 0);
2146	if (accel_state->shaders_bo == NULL) {
2147	    ErrorF("Allocating shader failed\n");
2148	    return FALSE;
2149	}
2150	return TRUE;
2151    } else
2152#endif
2153#endif
2154    {
2155	accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
2156						 TRUE, NULL, NULL);
2157
2158	if (accel_state->shaders == NULL)
2159	    return FALSE;
2160    }
2161
2162    return TRUE;
2163}
2164
2165Bool
2166R600LoadShaders(ScrnInfoPtr pScrn)
2167{
2168    RADEONInfoPtr info = RADEONPTR(pScrn);
2169    struct radeon_accel_state *accel_state = info->accel_state;
2170    RADEONChipFamily ChipSet = info->ChipFamily;
2171    uint32_t *shader;
2172#ifdef XF86DRM_MODE
2173#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2174    int ret;
2175
2176    if (info->cs) {
2177	ret = radeon_bo_map(accel_state->shaders_bo, 1);
2178	if (ret) {
2179	    FatalError("failed to map shader %d\n", ret);
2180	    return FALSE;
2181	}
2182	shader = accel_state->shaders_bo->ptr;
2183    } else
2184#endif
2185#endif
2186	shader = (pointer)((char *)info->FB + accel_state->shaders->offset);
2187
2188    /*  solid vs --------------------------------------- */
2189    accel_state->solid_vs_offset = 0;
2190    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
2191
2192    /*  solid ps --------------------------------------- */
2193    accel_state->solid_ps_offset = 512;
2194    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
2195
2196    /*  copy vs --------------------------------------- */
2197    accel_state->copy_vs_offset = 1024;
2198    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
2199
2200    /*  copy ps --------------------------------------- */
2201    accel_state->copy_ps_offset = 1536;
2202    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2203
2204    /*  comp vs --------------------------------------- */
2205    accel_state->comp_vs_offset = 2048;
2206    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2207
2208    /*  comp ps --------------------------------------- */
2209    accel_state->comp_ps_offset = 2560;
2210    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2211
2212    /*  xv vs --------------------------------------- */
2213    accel_state->xv_vs_offset = 3072;
2214    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2215
2216    /*  xv ps --------------------------------------- */
2217    accel_state->xv_ps_offset = 3584;
2218    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2219
2220#ifdef XF86DRM_MODE
2221#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2222    if (info->cs) {
2223	radeon_bo_unmap(accel_state->shaders_bo);
2224    }
2225#endif
2226#endif
2227
2228    return TRUE;
2229}
2230
2231static Bool
2232R600PrepareAccess(PixmapPtr pPix, int index)
2233{
2234    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
2235    RADEONInfoPtr info = RADEONPTR(pScrn);
2236    unsigned char *RADEONMMIO = info->MMIO;
2237
2238    /* flush HDP read/write caches */
2239    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2240
2241    return TRUE;
2242}
2243
2244static void
2245R600FinishAccess(PixmapPtr pPix, int index)
2246{
2247    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
2248    RADEONInfoPtr info = RADEONPTR(pScrn);
2249    unsigned char *RADEONMMIO = info->MMIO;
2250
2251    /* flush HDP read/write caches */
2252    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2253
2254}
2255
2256Bool
2257R600DrawInit(ScreenPtr pScreen)
2258{
2259    ScrnInfoPtr pScrn =  xf86ScreenToScrn(pScreen);
2260    RADEONInfoPtr info   = RADEONPTR(pScrn);
2261
2262    if (info->accel_state->exa == NULL) {
2263	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2264	return FALSE;
2265    }
2266
2267    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2268    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2269
2270    info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2271    info->accel_state->exa->Solid = R600Solid;
2272    info->accel_state->exa->DoneSolid = R600DoneSolid;
2273
2274    info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2275    info->accel_state->exa->Copy = R600Copy;
2276    info->accel_state->exa->DoneCopy = R600DoneCopy;
2277
2278    info->accel_state->exa->MarkSync = R600MarkSync;
2279    info->accel_state->exa->WaitMarker = R600Sync;
2280
2281#ifdef XF86DRM_MODE
2282#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2283    if (info->cs) {
2284	info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
2285	info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
2286	info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
2287	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
2288	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
2289	info->accel_state->exa->UploadToScreen = R600UploadToScreenCS;
2290	info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS;
2291#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5)
2292        info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
2293#endif
2294    } else
2295#endif
2296#endif
2297    {
2298	info->accel_state->exa->PrepareAccess = R600PrepareAccess;
2299	info->accel_state->exa->FinishAccess = R600FinishAccess;
2300
2301	/* AGP seems to have problems with gart transfers */
2302	if (info->accelDFS) {
2303	    info->accel_state->exa->UploadToScreen = R600UploadToScreen;
2304	    info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen;
2305	}
2306    }
2307
2308    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
2309#ifdef EXA_SUPPORTS_PREPARE_AUX
2310    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
2311#endif
2312
2313#ifdef XF86DRM_MODE
2314#ifdef EXA_HANDLES_PIXMAPS
2315    if (info->cs) {
2316	info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
2317#ifdef EXA_MIXED_PIXMAPS
2318	info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
2319#endif
2320    }
2321#endif
2322#endif
2323    info->accel_state->exa->pixmapOffsetAlign = 256;
2324    info->accel_state->exa->pixmapPitchAlign = 256;
2325
2326    info->accel_state->exa->CheckComposite = R600CheckComposite;
2327    info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2328    info->accel_state->exa->Composite = R600Composite;
2329    info->accel_state->exa->DoneComposite = R600DoneComposite;
2330
2331#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
2332    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
2333
2334    info->accel_state->exa->maxPitchBytes = 32768;
2335    info->accel_state->exa->maxX = 8192;
2336#else
2337    info->accel_state->exa->maxX = 8192;
2338#endif
2339    info->accel_state->exa->maxY = 8192;
2340
2341    /* not supported yet */
2342    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2343	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2344	info->accel_state->vsync = TRUE;
2345    } else
2346	info->accel_state->vsync = FALSE;
2347
2348    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
2349	free(info->accel_state->exa);
2350	return FALSE;
2351    }
2352
2353#ifdef XF86DRM_MODE
2354#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2355    if (!info->cs)
2356#endif
2357#endif
2358	if (!info->gartLocation)
2359	    return FALSE;
2360
2361    info->accel_state->XInited3D = FALSE;
2362    info->accel_state->copy_area = NULL;
2363    info->accel_state->src_obj[0].bo = NULL;
2364    info->accel_state->src_obj[1].bo = NULL;
2365    info->accel_state->dst_obj.bo = NULL;
2366    info->accel_state->copy_area_bo = NULL;
2367    info->accel_state->vbo.vb_start_op = -1;
2368    info->accel_state->finish_op = r600_finish_op;
2369    info->accel_state->vbo.verts_per_op = 3;
2370    RADEONVlineHelperClear(pScrn);
2371
2372#ifdef XF86DRM_MODE
2373    radeon_vbo_init_lists(pScrn);
2374#endif
2375
2376    if (!R600AllocShaders(pScrn, pScreen))
2377	return FALSE;
2378
2379    if (!R600LoadShaders(pScrn))
2380	return FALSE;
2381
2382    exaMarkSync(pScreen);
2383
2384    return TRUE;
2385
2386}
2387
2388