r600_exa.c revision 921a55d8
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "exa.h"
34
35#include "radeon.h"
36#include "radeon_macros.h"
37#include "radeon_reg.h"
38#include "r600_shader.h"
39#include "r600_reg.h"
40#include "r600_state.h"
41#include "radeon_exa_shared.h"
42#include "radeon_vbo.h"
43
44/* #define SHOW_VERTEXES */
45
46uint32_t R600_ROP[16] = {
47    RADEON_ROP3_ZERO, /* GXclear        */
48    RADEON_ROP3_DSa,  /* Gxand          */
49    RADEON_ROP3_SDna, /* GXandReverse   */
50    RADEON_ROP3_S,    /* GXcopy         */
51    RADEON_ROP3_DSna, /* GXandInverted  */
52    RADEON_ROP3_D,    /* GXnoop         */
53    RADEON_ROP3_DSx,  /* GXxor          */
54    RADEON_ROP3_DSo,  /* GXor           */
55    RADEON_ROP3_DSon, /* GXnor          */
56    RADEON_ROP3_DSxn, /* GXequiv        */
57    RADEON_ROP3_Dn,   /* GXinvert       */
58    RADEON_ROP3_SDno, /* GXorReverse    */
59    RADEON_ROP3_Sn,   /* GXcopyInverted */
60    RADEON_ROP3_DSno, /* GXorInverted   */
61    RADEON_ROP3_DSan, /* GXnand         */
62    RADEON_ROP3_ONE,  /* GXset          */
63};
64
65Bool
66R600SetAccelState(ScrnInfoPtr pScrn,
67		  struct r600_accel_object *src0,
68		  struct r600_accel_object *src1,
69		  struct r600_accel_object *dst,
70		  uint32_t vs_offset, uint32_t ps_offset,
71		  int rop, Pixel planemask)
72{
73    RADEONInfoPtr info = RADEONPTR(pScrn);
74    struct radeon_accel_state *accel_state = info->accel_state;
75
76    if (src0) {
77	memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
78	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
79    } else {
80	memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
81	accel_state->src_size[0] = 0;
82    }
83
84    if (src1) {
85	memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
86	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
87    } else {
88	memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
89	accel_state->src_size[1] = 0;
90    }
91
92    if (dst) {
93	memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
94	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
95    } else {
96	memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
97	accel_state->dst_size = 0;
98    }
99
100    accel_state->rop = rop;
101    accel_state->planemask = planemask;
102
103    /* bad pitch */
104    if (accel_state->src_obj[0].pitch & 7)
105	RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
106
107    /* bad offset */
108    if (accel_state->src_obj[0].offset & 0xff)
109	RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset));
110
111    /* bad pitch */
112    if (accel_state->src_obj[1].pitch & 7)
113	RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
114
115    /* bad offset */
116    if (accel_state->src_obj[1].offset & 0xff)
117	RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset));
118
119    if (accel_state->dst_obj.pitch & 7)
120	RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
121
122    if (accel_state->dst_obj.offset & 0xff)
123	RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset));
124
125    accel_state->vs_size = 512;
126    accel_state->ps_size = 512;
127#if defined(XF86DRM_MODE)
128    if (info->cs) {
129	int ret;
130	accel_state->vs_mc_addr = vs_offset;
131	accel_state->ps_mc_addr = ps_offset;
132
133	radeon_cs_space_reset_bos(info->cs);
134	radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
135					  RADEON_GEM_DOMAIN_VRAM, 0);
136	if (accel_state->src_obj[0].bo)
137	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
138					      accel_state->src_obj[0].domain, 0);
139	if (accel_state->src_obj[1].bo)
140	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
141					      accel_state->src_obj[1].domain, 0);
142	if (accel_state->dst_obj.bo)
143	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
144					      0, accel_state->dst_obj.domain);
145	ret = radeon_cs_space_check(info->cs);
146	if (ret)
147	    RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
148
149    } else
150#endif
151    {
152	accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
153	    vs_offset;
154	accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
155	    ps_offset;
156    }
157
158    return TRUE;
159}
160
161static void
162R600DoneSolid(PixmapPtr pPix);
163
164static Bool
165R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
166{
167    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
168    RADEONInfoPtr info = RADEONPTR(pScrn);
169    struct radeon_accel_state *accel_state = info->accel_state;
170    cb_config_t     cb_conf;
171    shader_config_t vs_conf, ps_conf;
172    int pmask = 0;
173    uint32_t a, r, g, b;
174    float ps_alu_consts[4];
175    struct r600_accel_object dst;
176
177    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
178	RADEON_FALLBACK(("R600CheckDatatype failed\n"));
179    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
180	RADEON_FALLBACK(("invalid planemask\n"));
181
182#if defined(XF86DRM_MODE)
183    if (info->cs) {
184	dst.offset = 0;
185	dst.bo = radeon_get_pixmap_bo(pPix);
186    } else
187#endif
188    {
189	dst.offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
190	dst.bo = NULL;
191    }
192
193    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
194    dst.width = pPix->drawable.width;
195    dst.height = pPix->drawable.height;
196    dst.bpp = pPix->drawable.bitsPerPixel;
197    dst.domain = RADEON_GEM_DOMAIN_VRAM;
198
199    if (!R600SetAccelState(pScrn,
200			   NULL,
201			   NULL,
202			   &dst,
203			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
204			   alu, pm))
205	return FALSE;
206
207    CLEAR (cb_conf);
208    CLEAR (vs_conf);
209    CLEAR (ps_conf);
210
211    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
212    radeon_cp_start(pScrn);
213
214    r600_set_default_state(pScrn, accel_state->ib);
215
216    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
217    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
218    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
219
220    /* Shader */
221    vs_conf.shader_addr         = accel_state->vs_mc_addr;
222    vs_conf.shader_size         = accel_state->vs_size;
223    vs_conf.num_gprs            = 2;
224    vs_conf.stack_size          = 0;
225    vs_conf.bo                  = accel_state->shaders_bo;
226    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
227
228    ps_conf.shader_addr         = accel_state->ps_mc_addr;
229    ps_conf.shader_size         = accel_state->ps_size;
230    ps_conf.num_gprs            = 1;
231    ps_conf.stack_size          = 0;
232    ps_conf.uncached_first_inst = 1;
233    ps_conf.clamp_consts        = 0;
234    ps_conf.export_mode         = 2;
235    ps_conf.bo                  = accel_state->shaders_bo;
236    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
237
238    cb_conf.id = 0;
239    cb_conf.w = accel_state->dst_obj.pitch;
240    cb_conf.h = accel_state->dst_obj.height;
241    cb_conf.base = accel_state->dst_obj.offset;
242    cb_conf.bo = accel_state->dst_obj.bo;
243
244    if (accel_state->dst_obj.bpp == 8) {
245	cb_conf.format = COLOR_8;
246	cb_conf.comp_swap = 3; /* A */
247    } else if (accel_state->dst_obj.bpp == 16) {
248	cb_conf.format = COLOR_5_6_5;
249	cb_conf.comp_swap = 2; /* RGB */
250    } else {
251	cb_conf.format = COLOR_8_8_8_8;
252	cb_conf.comp_swap = 1; /* ARGB */
253    }
254    cb_conf.source_format = 1;
255    cb_conf.blend_clamp = 1;
256    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
257
258    /* Render setup */
259    if (accel_state->planemask & 0x000000ff)
260	pmask |= 4; /* B */
261    if (accel_state->planemask & 0x0000ff00)
262	pmask |= 2; /* G */
263    if (accel_state->planemask & 0x00ff0000)
264	pmask |= 1; /* R */
265    if (accel_state->planemask & 0xff000000)
266	pmask |= 8; /* A */
267    BEGIN_BATCH(20);
268    EREG(accel_state->ib, CB_TARGET_MASK,                      (pmask << TARGET0_ENABLE_shift));
269    EREG(accel_state->ib, CB_COLOR_CONTROL,                    R600_ROP[accel_state->rop]);
270
271    /* Interpolator setup */
272    /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */
273    EREG(accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift));
274    EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
275    /* color semantic id 0 -> GPR[0] */
276    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 << 2),       ((0    << SEMANTIC_shift)	|
277								  (0x03 << DEFAULT_VAL_shift)	|
278								  FLAT_SHADE_bit		|
279								  SEL_CENTROID_bit));
280
281    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
282     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
283    /* no VS exports as PS input (NUM_INTERP is not zero based, no minus one) */
284    PACK0(accel_state->ib, SPI_PS_IN_CONTROL_0, 3);
285    E32(accel_state->ib, (0 << NUM_INTERP_shift));
286    E32(accel_state->ib, 0);
287    E32(accel_state->ib, FLAT_SHADE_ENA_bit);
288    END_BATCH();
289
290    /* PS alu constants */
291    if (accel_state->dst_obj.bpp == 16) {
292	r = (fg >> 11) & 0x1f;
293	g = (fg >> 5) & 0x3f;
294	b = (fg >> 0) & 0x1f;
295	ps_alu_consts[0] = (float)r / 31; /* R */
296	ps_alu_consts[1] = (float)g / 63; /* G */
297	ps_alu_consts[2] = (float)b / 31; /* B */
298	ps_alu_consts[3] = 1.0; /* A */
299    } else if (accel_state->dst_obj.bpp == 8) {
300	a = (fg >> 0) & 0xff;
301	ps_alu_consts[0] = 0.0; /* R */
302	ps_alu_consts[1] = 0.0; /* G */
303	ps_alu_consts[2] = 0.0; /* B */
304	ps_alu_consts[3] = (float)a / 255; /* A */
305    } else {
306	a = (fg >> 24) & 0xff;
307	r = (fg >> 16) & 0xff;
308	g = (fg >> 8) & 0xff;
309	b = (fg >> 0) & 0xff;
310	ps_alu_consts[0] = (float)r / 255; /* R */
311	ps_alu_consts[1] = (float)g / 255; /* G */
312	ps_alu_consts[2] = (float)b / 255; /* B */
313	ps_alu_consts[3] = (float)a / 255; /* A */
314    }
315    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
316			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
317
318    if (accel_state->vsync)
319	RADEONVlineHelperClear(pScrn);
320
321    return TRUE;
322}
323
324
325static void
326R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
327{
328    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
329    RADEONInfoPtr info = RADEONPTR(pScrn);
330    struct radeon_accel_state *accel_state = info->accel_state;
331    float *vb;
332
333    if (accel_state->vsync)
334	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
335
336    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
337
338    vb[0] = (float)x1;
339    vb[1] = (float)y1;
340
341    vb[2] = (float)x1;
342    vb[3] = (float)y2;
343
344    vb[4] = (float)x2;
345    vb[5] = (float)y2;
346
347    radeon_vbo_commit(pScrn, &accel_state->vbo);
348}
349
350static void
351R600DoneSolid(PixmapPtr pPix)
352{
353    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
354    RADEONInfoPtr info = RADEONPTR(pScrn);
355    struct radeon_accel_state *accel_state = info->accel_state;
356
357    if (accel_state->vsync)
358	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
359				accel_state->vline_crtc,
360				accel_state->vline_y1,
361				accel_state->vline_y2);
362
363    r600_finish_op(pScrn, 8);
364}
365
366static void
367R600DoPrepareCopy(ScrnInfoPtr pScrn)
368{
369    RADEONInfoPtr info = RADEONPTR(pScrn);
370    struct radeon_accel_state *accel_state = info->accel_state;
371    int pmask = 0;
372    cb_config_t     cb_conf;
373    tex_resource_t  tex_res;
374    tex_sampler_t   tex_samp;
375    shader_config_t vs_conf, ps_conf;
376
377    CLEAR (cb_conf);
378    CLEAR (tex_res);
379    CLEAR (tex_samp);
380    CLEAR (vs_conf);
381    CLEAR (ps_conf);
382
383    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
384    radeon_cp_start(pScrn);
385
386    r600_set_default_state(pScrn, accel_state->ib);
387
388    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
389    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
390    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
391
392    /* Shader */
393    vs_conf.shader_addr         = accel_state->vs_mc_addr;
394    vs_conf.shader_size         = accel_state->vs_size;
395    vs_conf.num_gprs            = 2;
396    vs_conf.stack_size          = 0;
397    vs_conf.bo                  = accel_state->shaders_bo;
398    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
399
400    ps_conf.shader_addr         = accel_state->ps_mc_addr;
401    ps_conf.shader_size         = accel_state->ps_size;
402    ps_conf.num_gprs            = 1;
403    ps_conf.stack_size          = 0;
404    ps_conf.uncached_first_inst = 1;
405    ps_conf.clamp_consts        = 0;
406    ps_conf.export_mode         = 2;
407    ps_conf.bo                  = accel_state->shaders_bo;
408    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
409
410    /* Texture */
411    tex_res.id                  = 0;
412    tex_res.w                   = accel_state->src_obj[0].width;
413    tex_res.h                   = accel_state->src_obj[0].height;
414    tex_res.pitch               = accel_state->src_obj[0].pitch;
415    tex_res.depth               = 0;
416    tex_res.dim                 = SQ_TEX_DIM_2D;
417    tex_res.base                = accel_state->src_obj[0].offset;
418    tex_res.mip_base            = accel_state->src_obj[0].offset;
419    tex_res.size                = accel_state->src_size[0];
420    tex_res.bo                  = accel_state->src_obj[0].bo;
421    tex_res.mip_bo              = accel_state->src_obj[0].bo;
422    if (accel_state->src_obj[0].bpp == 8) {
423	tex_res.format              = FMT_8;
424	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
425	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
426	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
427	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
428    } else if (accel_state->src_obj[0].bpp == 16) {
429	tex_res.format              = FMT_5_6_5;
430	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
431	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
432	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
433	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
434    } else {
435	tex_res.format              = FMT_8_8_8_8;
436	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
437	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
438	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
439	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
440    }
441
442    tex_res.request_size        = 1;
443    tex_res.base_level          = 0;
444    tex_res.last_level          = 0;
445    tex_res.perf_modulation     = 0;
446    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
447
448    tex_samp.id                 = 0;
449    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
450    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
451    tex_samp.clamp_z            = SQ_TEX_WRAP;
452    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
453    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
454    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
455    tex_samp.mip_filter         = 0;			/* no mipmap */
456    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
457
458    cb_conf.id = 0;
459    cb_conf.w = accel_state->dst_obj.pitch;
460    cb_conf.h = accel_state->dst_obj.height;
461    cb_conf.base = accel_state->dst_obj.offset;
462    cb_conf.bo = accel_state->dst_obj.bo;
463    if (accel_state->dst_obj.bpp == 8) {
464	cb_conf.format = COLOR_8;
465	cb_conf.comp_swap = 3; /* A */
466    } else if (accel_state->dst_obj.bpp == 16) {
467	cb_conf.format = COLOR_5_6_5;
468	cb_conf.comp_swap = 2; /* RGB */
469    } else {
470	cb_conf.format = COLOR_8_8_8_8;
471	cb_conf.comp_swap = 1; /* ARGB */
472    }
473    cb_conf.source_format = 1;
474    cb_conf.blend_clamp = 1;
475    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
476
477    /* Render setup */
478    if (accel_state->planemask & 0x000000ff)
479	pmask |= 4; /* B */
480    if (accel_state->planemask & 0x0000ff00)
481	pmask |= 2; /* G */
482    if (accel_state->planemask & 0x00ff0000)
483	pmask |= 1; /* R */
484    if (accel_state->planemask & 0xff000000)
485	pmask |= 8; /* A */
486    BEGIN_BATCH(20);
487    EREG(accel_state->ib, CB_TARGET_MASK,                      (pmask << TARGET0_ENABLE_shift));
488    EREG(accel_state->ib, CB_COLOR_CONTROL,                    R600_ROP[accel_state->rop]);
489
490    /* Interpolator setup */
491    /* export tex coord from VS */
492    EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
493    EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
494    /* color semantic id 0 -> GPR[0] */
495    EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 << 2),       ((0    << SEMANTIC_shift)	|
496								(0x01 << DEFAULT_VAL_shift)	|
497								SEL_CENTROID_bit));
498
499    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
500     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
501    /* input tex coord from VS */
502    PACK0(accel_state->ib, SPI_PS_IN_CONTROL_0, 3);
503    E32(accel_state->ib, ((1 << NUM_INTERP_shift)));
504    E32(accel_state->ib, 0);
505    E32(accel_state->ib, 0);
506    END_BATCH();
507
508}
509
510static void
511R600DoCopy(ScrnInfoPtr pScrn)
512{
513    r600_finish_op(pScrn, 16);
514}
515
516static void
517R600DoCopyVline(PixmapPtr pPix)
518{
519    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
520    RADEONInfoPtr info = RADEONPTR(pScrn);
521    struct radeon_accel_state *accel_state = info->accel_state;
522
523    if (accel_state->vsync)
524	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
525				accel_state->vline_crtc,
526				accel_state->vline_y1,
527				accel_state->vline_y2);
528
529    r600_finish_op(pScrn, 16);
530}
531
532static void
533R600AppendCopyVertex(ScrnInfoPtr pScrn,
534		     int srcX, int srcY,
535		     int dstX, int dstY,
536		     int w, int h)
537{
538    RADEONInfoPtr info = RADEONPTR(pScrn);
539    struct radeon_accel_state *accel_state = info->accel_state;
540    float *vb;
541
542    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
543
544    vb[0] = (float)dstX;
545    vb[1] = (float)dstY;
546    vb[2] = (float)srcX;
547    vb[3] = (float)srcY;
548
549    vb[4] = (float)dstX;
550    vb[5] = (float)(dstY + h);
551    vb[6] = (float)srcX;
552    vb[7] = (float)(srcY + h);
553
554    vb[8] = (float)(dstX + w);
555    vb[9] = (float)(dstY + h);
556    vb[10] = (float)(srcX + w);
557    vb[11] = (float)(srcY + h);
558
559    radeon_vbo_commit(pScrn, &accel_state->vbo);
560}
561
562static Bool
563R600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
564		int xdir, int ydir,
565		int rop,
566		Pixel planemask)
567{
568    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
569    RADEONInfoPtr info = RADEONPTR(pScrn);
570    struct radeon_accel_state *accel_state = info->accel_state;
571    struct r600_accel_object src_obj, dst_obj;
572
573    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
574	RADEON_FALLBACK(("R600CheckDatatype src failed\n"));
575    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
576	RADEON_FALLBACK(("R600CheckDatatype dst failed\n"));
577    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
578	RADEON_FALLBACK(("Invalid planemask\n"));
579
580    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
581    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
582
583    accel_state->same_surface = FALSE;
584
585#if defined(XF86DRM_MODE)
586    if (info->cs) {
587	src_obj.offset = 0;
588	dst_obj.offset = 0;
589	src_obj.bo = radeon_get_pixmap_bo(pSrc);
590	dst_obj.bo = radeon_get_pixmap_bo(pDst);
591	if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
592	    accel_state->same_surface = TRUE;
593    } else
594#endif
595    {
596	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
597	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
598	if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst))
599	    accel_state->same_surface = TRUE;
600	src_obj.bo = NULL;
601	dst_obj.bo = NULL;
602    }
603
604    src_obj.width = pSrc->drawable.width;
605    src_obj.height = pSrc->drawable.height;
606    src_obj.bpp = pSrc->drawable.bitsPerPixel;
607    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
608
609    dst_obj.width = pDst->drawable.width;
610    dst_obj.height = pDst->drawable.height;
611    dst_obj.bpp = pDst->drawable.bitsPerPixel;
612    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
613
614    if (!R600SetAccelState(pScrn,
615			   &src_obj,
616			   NULL,
617			   &dst_obj,
618			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
619			   rop, planemask))
620	return FALSE;
621
622    if (accel_state->same_surface == TRUE) {
623	unsigned long size = pDst->drawable.height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
624
625#if defined(XF86DRM_MODE)
626	if (info->cs) {
627	    if (accel_state->copy_area_bo) {
628		radeon_bo_unref(accel_state->copy_area_bo);
629		accel_state->copy_area_bo = NULL;
630	    }
631	    accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
632						       RADEON_GEM_DOMAIN_VRAM,
633						       0);
634	    if (accel_state->copy_area_bo == NULL)
635		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
636
637	    radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
638					      RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_DOMAIN_VRAM);
639	    if (radeon_cs_space_check(info->cs)) {
640		radeon_bo_unref(accel_state->copy_area_bo);
641		accel_state->copy_area_bo = NULL;
642		return FALSE;
643	    }
644	    accel_state->copy_area = (void*)accel_state->copy_area_bo;
645	} else
646#endif
647	{
648	    if (accel_state->copy_area) {
649		exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
650		accel_state->copy_area = NULL;
651	    }
652	    accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL);
653	    if (!accel_state->copy_area)
654		RADEON_FALLBACK(("temp copy surface alloc failed\n"));
655	}
656    } else
657	R600DoPrepareCopy(pScrn);
658
659    if (accel_state->vsync)
660	RADEONVlineHelperClear(pScrn);
661
662    return TRUE;
663}
664
665static void
666R600Copy(PixmapPtr pDst,
667	 int srcX, int srcY,
668	 int dstX, int dstY,
669	 int w, int h)
670{
671    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
672    RADEONInfoPtr info = RADEONPTR(pScrn);
673    struct radeon_accel_state *accel_state = info->accel_state;
674
675    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
676	return;
677
678    if (accel_state->vsync)
679	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
680
681    if (accel_state->same_surface && accel_state->copy_area) {
682	uint32_t orig_offset, tmp_offset;
683	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
684	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
685	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
686
687#if defined(XF86DRM_MODE)
688	if (info->cs) {
689	    tmp_offset = 0;
690	    orig_offset = 0;
691	} else
692#endif
693	{
694	    tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset;
695	    orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
696	}
697
698	/* src to tmp */
699	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
700	accel_state->dst_obj.bo = accel_state->copy_area_bo;
701	accel_state->dst_obj.offset = tmp_offset;
702	R600DoPrepareCopy(pScrn);
703	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
704	R600DoCopy(pScrn);
705
706	/* tmp to dst */
707	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
708	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
709	accel_state->src_obj[0].offset = tmp_offset;
710	accel_state->dst_obj.domain = orig_dst_domain;
711	accel_state->dst_obj.bo = orig_bo;
712	accel_state->dst_obj.offset = orig_offset;
713	R600DoPrepareCopy(pScrn);
714	R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
715	R600DoCopyVline(pDst);
716
717	/* restore state */
718	accel_state->src_obj[0].domain = orig_src_domain;
719	accel_state->src_obj[0].bo = orig_bo;
720	accel_state->src_obj[0].offset = orig_offset;
721    } else
722	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
723
724}
725
726static void
727R600DoneCopy(PixmapPtr pDst)
728{
729    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
730    RADEONInfoPtr info = RADEONPTR(pScrn);
731    struct radeon_accel_state *accel_state = info->accel_state;
732
733    if (!accel_state->same_surface)
734	R600DoCopyVline(pDst);
735
736    if (accel_state->copy_area) {
737	if (!info->cs)
738	    exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area);
739	accel_state->copy_area = NULL;
740    }
741
742}
743
744struct blendinfo {
745    Bool dst_alpha;
746    Bool src_alpha;
747    uint32_t blend_cntl;
748};
749
750static struct blendinfo R600BlendOp[] = {
751    /* Clear */
752    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
753    /* Src */
754    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
755    /* Dst */
756    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
757    /* Over */
758    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
759    /* OverReverse */
760    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
761    /* In */
762    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
763    /* InReverse */
764    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
765    /* Out */
766    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
767    /* OutReverse */
768    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
769    /* Atop */
770    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
771    /* AtopReverse */
772    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
773    /* Xor */
774    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
775    /* Add */
776    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
777};
778
779struct formatinfo {
780    unsigned int fmt;
781    uint32_t card_fmt;
782};
783
784static struct formatinfo R600TexFormats[] = {
785    {PICT_a8r8g8b8,	FMT_8_8_8_8},
786    {PICT_x8r8g8b8,	FMT_8_8_8_8},
787    {PICT_a8b8g8r8,	FMT_8_8_8_8},
788    {PICT_x8b8g8r8,	FMT_8_8_8_8},
789#ifdef PICT_TYPE_BGRA
790    {PICT_b8g8r8a8,	FMT_8_8_8_8},
791    {PICT_b8g8r8x8,	FMT_8_8_8_8},
792#endif
793    {PICT_r5g6b5,	FMT_5_6_5},
794    {PICT_a1r5g5b5,	FMT_1_5_5_5},
795    {PICT_x1r5g5b5,     FMT_1_5_5_5},
796    {PICT_a8,		FMT_8},
797};
798
799static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
800{
801    uint32_t sblend, dblend;
802
803    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
804    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
805
806    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
807     * it as always 1.
808     */
809    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
810	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
811	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
812	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
813	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
814    }
815
816    /* If the source alpha is being used, then we should only be in a case where
817     * the source blend factor is 0, and the source blend value is the mask
818     * channels multiplied by the source picture's alpha.
819     */
820    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
821	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
822	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
823	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
824	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
825	}
826    }
827
828    return sblend | dblend;
829}
830
831static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
832{
833    switch (pDstPicture->format) {
834    case PICT_a8r8g8b8:
835    case PICT_x8r8g8b8:
836    case PICT_a8b8g8r8:
837    case PICT_x8b8g8r8:
838#ifdef PICT_TYPE_BGRA
839    case PICT_b8g8r8a8:
840    case PICT_b8g8r8x8:
841#endif
842	*dst_format = COLOR_8_8_8_8;
843	break;
844    case PICT_r5g6b5:
845	*dst_format = COLOR_5_6_5;
846	break;
847    case PICT_a1r5g5b5:
848    case PICT_x1r5g5b5:
849	*dst_format = COLOR_1_5_5_5;
850	break;
851    case PICT_a8:
852	*dst_format = COLOR_8;
853	break;
854    default:
855	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
856	       (int)pDstPicture->format));
857    }
858    return TRUE;
859}
860
861static Bool R600CheckCompositeTexture(PicturePtr pPict,
862				      PicturePtr pDstPict,
863				      int op,
864				      int unit)
865{
866    int w = pPict->pDrawable->width;
867    int h = pPict->pDrawable->height;
868    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
869    unsigned int i;
870    int max_tex_w, max_tex_h;
871
872    max_tex_w = 8192;
873    max_tex_h = 8192;
874
875    if ((w > max_tex_w) || (h > max_tex_h))
876	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
877
878    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
879	if (R600TexFormats[i].fmt == pPict->format)
880	    break;
881    }
882    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
883	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
884			 (int)pPict->format));
885
886    if (pPict->filter != PictFilterNearest &&
887	pPict->filter != PictFilterBilinear)
888	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
889
890    /* for REPEAT_NONE, Render semantics are that sampling outside the source
891     * picture results in alpha=0 pixels. We can implement this with a border color
892     * *if* our source texture has an alpha channel, otherwise we need to fall
893     * back. If we're not transformed then we hope that upper layers have clipped
894     * rendering to the bounds of the source drawable, in which case it doesn't
895     * matter. I have not, however, verified that the X server always does such
896     * clipping.
897     */
898    /* FIXME R6xx */
899    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
900	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
901	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
902    }
903
904    if (!radeon_transform_is_affine(pPict->transform))
905	RADEON_FALLBACK(("non-affine transforms not supported\n"));
906
907    return TRUE;
908}
909
910static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
911					int unit)
912{
913    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
914    RADEONInfoPtr info = RADEONPTR(pScrn);
915    struct radeon_accel_state *accel_state = info->accel_state;
916    int w = pPict->pDrawable->width;
917    int h = pPict->pDrawable->height;
918    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
919    unsigned int i;
920    tex_resource_t  tex_res;
921    tex_sampler_t   tex_samp;
922    int pix_r, pix_g, pix_b, pix_a;
923    float vs_alu_consts[8];
924
925    CLEAR (tex_res);
926    CLEAR (tex_samp);
927
928    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
929	if (R600TexFormats[i].fmt == pPict->format)
930	    break;
931    }
932
933    /* Texture */
934    tex_res.id                  = unit;
935    tex_res.w                   = w;
936    tex_res.h                   = h;
937    tex_res.pitch               = accel_state->src_obj[unit].pitch;
938    tex_res.depth               = 0;
939    tex_res.dim                 = SQ_TEX_DIM_2D;
940    tex_res.base                = accel_state->src_obj[unit].offset;
941    tex_res.mip_base            = accel_state->src_obj[unit].offset;
942    tex_res.size                = accel_state->src_size[unit];
943    tex_res.format              = R600TexFormats[i].card_fmt;
944    tex_res.bo                  = accel_state->src_obj[unit].bo;
945    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
946    tex_res.request_size        = 1;
947
948    /* component swizzles */
949    switch (pPict->format) {
950    case PICT_a1r5g5b5:
951    case PICT_a8r8g8b8:
952	pix_r = SQ_SEL_Z; /* R */
953	pix_g = SQ_SEL_Y; /* G */
954	pix_b = SQ_SEL_X; /* B */
955	pix_a = SQ_SEL_W; /* A */
956	break;
957    case PICT_a8b8g8r8:
958	pix_r = SQ_SEL_X; /* R */
959	pix_g = SQ_SEL_Y; /* G */
960	pix_b = SQ_SEL_Z; /* B */
961	pix_a = SQ_SEL_W; /* A */
962	break;
963    case PICT_x8b8g8r8:
964	pix_r = SQ_SEL_X; /* R */
965	pix_g = SQ_SEL_Y; /* G */
966	pix_b = SQ_SEL_Z; /* B */
967	pix_a = SQ_SEL_1; /* A */
968	break;
969#ifdef PICT_TYPE_BGRA
970    case PICT_b8g8r8a8:
971	pix_r = SQ_SEL_Y; /* R */
972	pix_g = SQ_SEL_Z; /* G */
973	pix_b = SQ_SEL_W; /* B */
974	pix_a = SQ_SEL_X; /* A */
975	break;
976    case PICT_b8g8r8x8:
977	pix_r = SQ_SEL_Y; /* R */
978	pix_g = SQ_SEL_Z; /* G */
979	pix_b = SQ_SEL_W; /* B */
980	pix_a = SQ_SEL_1; /* A */
981	break;
982#endif
983    case PICT_x1r5g5b5:
984    case PICT_x8r8g8b8:
985    case PICT_r5g6b5:
986	pix_r = SQ_SEL_Z; /* R */
987	pix_g = SQ_SEL_Y; /* G */
988	pix_b = SQ_SEL_X; /* B */
989	pix_a = SQ_SEL_1; /* A */
990	break;
991    case PICT_a8:
992	pix_r = SQ_SEL_0; /* R */
993	pix_g = SQ_SEL_0; /* G */
994	pix_b = SQ_SEL_0; /* B */
995	pix_a = SQ_SEL_X; /* A */
996	break;
997    default:
998	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
999    }
1000
1001    if (unit == 0) {
1002	if (!accel_state->msk_pic) {
1003	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
1004		pix_r = SQ_SEL_0;
1005		pix_g = SQ_SEL_0;
1006		pix_b = SQ_SEL_0;
1007	    }
1008
1009	    if (PICT_FORMAT_A(pPict->format) == 0)
1010		pix_a = SQ_SEL_1;
1011	} else {
1012	    if (accel_state->component_alpha) {
1013		if (accel_state->src_alpha) {
1014		    if (PICT_FORMAT_A(pPict->format) == 0) {
1015			pix_r = SQ_SEL_1;
1016			pix_g = SQ_SEL_1;
1017			pix_b = SQ_SEL_1;
1018			pix_a = SQ_SEL_1;
1019		    } else {
1020			pix_r = pix_a;
1021			pix_g = pix_a;
1022			pix_b = pix_a;
1023		    }
1024		} else {
1025		    if (PICT_FORMAT_A(pPict->format) == 0)
1026			pix_a = SQ_SEL_1;
1027		}
1028	    } else {
1029		if (PICT_FORMAT_RGB(pPict->format) == 0) {
1030		    pix_r = SQ_SEL_0;
1031		    pix_g = SQ_SEL_0;
1032		    pix_b = SQ_SEL_0;
1033		}
1034
1035		if (PICT_FORMAT_A(pPict->format) == 0)
1036		    pix_a = SQ_SEL_1;
1037	    }
1038	}
1039    } else {
1040	if (accel_state->component_alpha) {
1041	    if (PICT_FORMAT_A(pPict->format) == 0)
1042		pix_a = SQ_SEL_1;
1043	} else {
1044	    if (PICT_FORMAT_A(pPict->format) == 0) {
1045		pix_r = SQ_SEL_1;
1046		pix_g = SQ_SEL_1;
1047		pix_b = SQ_SEL_1;
1048		pix_a = SQ_SEL_1;
1049	    } else {
1050		pix_r = pix_a;
1051		pix_g = pix_a;
1052		pix_b = pix_a;
1053	    }
1054	}
1055    }
1056
1057    tex_res.dst_sel_x           = pix_r; /* R */
1058    tex_res.dst_sel_y           = pix_g; /* G */
1059    tex_res.dst_sel_z           = pix_b; /* B */
1060    tex_res.dst_sel_w           = pix_a; /* A */
1061
1062    tex_res.base_level          = 0;
1063    tex_res.last_level          = 0;
1064    tex_res.perf_modulation     = 0;
1065    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
1066
1067    tex_samp.id                 = unit;
1068    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1069
1070    switch (repeatType) {
1071    case RepeatNormal:
1072	tex_samp.clamp_x            = SQ_TEX_WRAP;
1073	tex_samp.clamp_y            = SQ_TEX_WRAP;
1074	break;
1075    case RepeatPad:
1076	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1077	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1078	break;
1079    case RepeatReflect:
1080	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1081	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1082	break;
1083    case RepeatNone:
1084	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1085	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1086	break;
1087    default:
1088	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1089    }
1090
1091    switch (pPict->filter) {
1092    case PictFilterNearest:
1093	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1094	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1095	break;
1096    case PictFilterBilinear:
1097	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1098	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1099	break;
1100    default:
1101	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1102    }
1103
1104    tex_samp.clamp_z            = SQ_TEX_WRAP;
1105    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1106    tex_samp.mip_filter         = 0;			/* no mipmap */
1107    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
1108
1109    if (pPict->transform != 0) {
1110	accel_state->is_transform[unit] = TRUE;
1111	accel_state->transform[unit] = pPict->transform;
1112
1113	vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
1114	vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
1115	vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
1116	vs_alu_consts[3] = 1.0 / w;
1117
1118	vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
1119	vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
1120	vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
1121	vs_alu_consts[7] = 1.0 / h;
1122    } else {
1123	accel_state->is_transform[unit] = FALSE;
1124
1125	vs_alu_consts[0] = 1.0;
1126	vs_alu_consts[1] = 0.0;
1127	vs_alu_consts[2] = 0.0;
1128	vs_alu_consts[3] = 1.0 / w;
1129
1130	vs_alu_consts[4] = 0.0;
1131	vs_alu_consts[5] = 1.0;
1132	vs_alu_consts[6] = 0.0;
1133	vs_alu_consts[7] = 1.0 / h;
1134    }
1135
1136    /* VS alu constants */
1137    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
1138			sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
1139
1140    return TRUE;
1141}
1142
1143static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1144			       PicturePtr pDstPicture)
1145{
1146    uint32_t tmp1;
1147    PixmapPtr pSrcPixmap, pDstPixmap;
1148    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1149
1150    /* Check for unsupported compositing operations. */
1151    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1152	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1153
1154    if (!pSrcPicture->pDrawable)
1155	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1156
1157    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1158
1159    max_tex_w = 8192;
1160    max_tex_h = 8192;
1161    max_dst_w = 8192;
1162    max_dst_h = 8192;
1163
1164    if (pSrcPixmap->drawable.width >= max_tex_w ||
1165	pSrcPixmap->drawable.height >= max_tex_h) {
1166	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1167			 pSrcPixmap->drawable.width,
1168			 pSrcPixmap->drawable.height));
1169    }
1170
1171    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1172
1173    if (pDstPixmap->drawable.width >= max_dst_w ||
1174	pDstPixmap->drawable.height >= max_dst_h) {
1175	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1176			 pDstPixmap->drawable.width,
1177			 pDstPixmap->drawable.height));
1178    }
1179
1180    if (pMaskPicture) {
1181	PixmapPtr pMaskPixmap;
1182
1183	if (!pMaskPicture->pDrawable)
1184	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
1185
1186	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1187
1188	if (pMaskPixmap->drawable.width >= max_tex_w ||
1189	    pMaskPixmap->drawable.height >= max_tex_h) {
1190	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1191			     pMaskPixmap->drawable.width,
1192			     pMaskPixmap->drawable.height));
1193	}
1194
1195	if (pMaskPicture->componentAlpha) {
1196	    /* Check if it's component alpha that relies on a source alpha and
1197	     * on the source value.  We can only get one of those into the
1198	     * single source value that we get to blend with.
1199	     */
1200	    if (R600BlendOp[op].src_alpha &&
1201		(R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1202		(BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1203		RADEON_FALLBACK(("Component alpha not supported with source "
1204				 "alpha and source value blending.\n"));
1205	    }
1206	}
1207
1208	if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1209	    return FALSE;
1210    }
1211
1212    if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1213	return FALSE;
1214
1215    if (!R600GetDestFormat(pDstPicture, &tmp1))
1216	return FALSE;
1217
1218    return TRUE;
1219
1220}
1221
1222static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1223				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1224				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1225{
1226    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1227    RADEONInfoPtr info = RADEONPTR(pScrn);
1228    struct radeon_accel_state *accel_state = info->accel_state;
1229    uint32_t blendcntl, dst_format;
1230    cb_config_t cb_conf;
1231    shader_config_t vs_conf, ps_conf;
1232    struct r600_accel_object src_obj, mask_obj, dst_obj;
1233
1234    if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8)
1235	return FALSE;
1236
1237#if defined(XF86DRM_MODE)
1238    if (info->cs) {
1239	src_obj.offset = 0;
1240	dst_obj.offset = 0;
1241	src_obj.bo = radeon_get_pixmap_bo(pSrc);
1242	dst_obj.bo = radeon_get_pixmap_bo(pDst);
1243    } else
1244#endif
1245    {
1246	src_obj.offset = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1247	dst_obj.offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1248	src_obj.bo = NULL;
1249	dst_obj.bo = NULL;
1250    }
1251    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1252    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1253
1254    src_obj.width = pSrc->drawable.width;
1255    src_obj.height = pSrc->drawable.height;
1256    src_obj.bpp = pSrc->drawable.bitsPerPixel;
1257    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1258
1259    dst_obj.width = pDst->drawable.width;
1260    dst_obj.height = pDst->drawable.height;
1261    dst_obj.bpp = pDst->drawable.bitsPerPixel;
1262    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1263
1264    if (pMask) {
1265#if defined(XF86DRM_MODE)
1266	if (info->cs) {
1267	    mask_obj.offset = 0;
1268	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
1269	} else
1270#endif
1271	{
1272	    mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset;
1273	    mask_obj.bo = NULL;
1274	}
1275	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1276
1277	mask_obj.width = pMask->drawable.width;
1278	mask_obj.height = pMask->drawable.height;
1279	mask_obj.bpp = pMask->drawable.bitsPerPixel;
1280	mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1281
1282	if (!R600SetAccelState(pScrn,
1283			       &src_obj,
1284			       &mask_obj,
1285			       &dst_obj,
1286			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1287			       3, 0xffffffff))
1288	    return FALSE;
1289
1290	accel_state->msk_pic = pMaskPicture;
1291	if (pMaskPicture->componentAlpha) {
1292	    accel_state->component_alpha = TRUE;
1293	    if (R600BlendOp[op].src_alpha)
1294		accel_state->src_alpha = TRUE;
1295	    else
1296		accel_state->src_alpha = FALSE;
1297	} else {
1298	    accel_state->component_alpha = FALSE;
1299	    accel_state->src_alpha = FALSE;
1300	}
1301    } else {
1302	if (!R600SetAccelState(pScrn,
1303			       &src_obj,
1304			       NULL,
1305			       &dst_obj,
1306			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1307			       3, 0xffffffff))
1308	    return FALSE;
1309
1310	accel_state->msk_pic = NULL;
1311	accel_state->component_alpha = FALSE;
1312	accel_state->src_alpha = FALSE;
1313    }
1314
1315    if (!R600GetDestFormat(pDstPicture, &dst_format))
1316	return FALSE;
1317
1318    CLEAR (cb_conf);
1319    CLEAR (vs_conf);
1320    CLEAR (ps_conf);
1321
1322    if (pMask)
1323        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1324    else
1325        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1326
1327    radeon_cp_start(pScrn);
1328
1329    r600_set_default_state(pScrn, accel_state->ib);
1330
1331    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1332    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1333    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1334
1335    if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
1336        R600IBDiscard(pScrn, accel_state->ib);
1337        return FALSE;
1338    }
1339
1340    if (pMask) {
1341        if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
1342            R600IBDiscard(pScrn, accel_state->ib);
1343            return FALSE;
1344        }
1345    } else
1346        accel_state->is_transform[1] = FALSE;
1347
1348    if (pMask) {
1349	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
1350	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
1351    } else {
1352	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
1353	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
1354    }
1355
1356    /* Shader */
1357    vs_conf.shader_addr         = accel_state->vs_mc_addr;
1358    vs_conf.shader_size         = accel_state->vs_size;
1359    vs_conf.num_gprs            = 5;
1360    vs_conf.stack_size          = 1;
1361    vs_conf.bo                  = accel_state->shaders_bo;
1362    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1363
1364    ps_conf.shader_addr         = accel_state->ps_mc_addr;
1365    ps_conf.shader_size         = accel_state->ps_size;
1366    ps_conf.num_gprs            = 3;
1367    ps_conf.stack_size          = 1;
1368    ps_conf.uncached_first_inst = 1;
1369    ps_conf.clamp_consts        = 0;
1370    ps_conf.export_mode         = 2;
1371    ps_conf.bo                  = accel_state->shaders_bo;
1372    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1373
1374    cb_conf.id = 0;
1375    cb_conf.w = accel_state->dst_obj.pitch;
1376    cb_conf.h = accel_state->dst_obj.height;
1377    cb_conf.base = accel_state->dst_obj.offset;
1378    cb_conf.format = dst_format;
1379    cb_conf.bo = accel_state->dst_obj.bo;
1380
1381    switch (pDstPicture->format) {
1382    case PICT_a8r8g8b8:
1383    case PICT_x8r8g8b8:
1384    case PICT_a1r5g5b5:
1385    case PICT_x1r5g5b5:
1386    default:
1387	cb_conf.comp_swap = 1; /* ARGB */
1388	break;
1389    case PICT_a8b8g8r8:
1390    case PICT_x8b8g8r8:
1391	cb_conf.comp_swap = 0; /* ABGR */
1392	break;
1393#ifdef PICT_TYPE_BGRA
1394    case PICT_b8g8r8a8:
1395    case PICT_b8g8r8x8:
1396	cb_conf.comp_swap = 3; /* BGRA */
1397	break;
1398#endif
1399    case PICT_r5g6b5:
1400	cb_conf.comp_swap = 2; /* RGB */
1401	break;
1402    case PICT_a8:
1403	cb_conf.comp_swap = 3; /* A */
1404	break;
1405    }
1406    cb_conf.source_format = 1;
1407    cb_conf.blend_clamp = 1;
1408    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
1409
1410    BEGIN_BATCH(24);
1411    EREG(accel_state->ib, CB_TARGET_MASK,                      (0xf << TARGET0_ENABLE_shift));
1412
1413    blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1414
1415    if (info->ChipFamily == CHIP_FAMILY_R600) {
1416	/* no per-MRT blend on R600 */
1417	EREG(accel_state->ib, CB_COLOR_CONTROL,                    R600_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift));
1418	EREG(accel_state->ib, CB_BLEND_CONTROL,                    blendcntl);
1419    } else {
1420	EREG(accel_state->ib, CB_COLOR_CONTROL,                    (R600_ROP[3] |
1421								    (1 << TARGET_BLEND_ENABLE_shift) |
1422								    PER_MRT_BLEND_bit));
1423	EREG(accel_state->ib, CB_BLEND0_CONTROL,                   blendcntl);
1424    }
1425
1426    /* Interpolator setup */
1427    if (pMask) {
1428	/* export 2 tex coords from VS */
1429	EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift));
1430	/* src = semantic id 0; mask = semantic id 1 */
1431	EREG(accel_state->ib, SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
1432						  (1 << SEMANTIC_1_shift)));
1433    } else {
1434	/* export 1 tex coords from VS */
1435	EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
1436	/* src = semantic id 0 */
1437	EREG(accel_state->ib, SPI_VS_OUT_ID_0,   (0 << SEMANTIC_0_shift));
1438    }
1439
1440    PACK0(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
1441    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
1442    E32(accel_state->ib, ((0    << SEMANTIC_shift)	|
1443			  (0x01 << DEFAULT_VAL_shift)	|
1444			  SEL_CENTROID_bit));
1445    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
1446    E32(accel_state->ib, ((1    << SEMANTIC_shift)	|
1447			  (0x01 << DEFAULT_VAL_shift)	|
1448			  SEL_CENTROID_bit));
1449
1450    PACK0(accel_state->ib, SPI_PS_IN_CONTROL_0, 3);
1451    if (pMask) {
1452	/* input 2 tex coords from VS */
1453	E32(accel_state->ib, (2 << NUM_INTERP_shift));
1454    } else {
1455	/* input 1 tex coords from VS */
1456	E32(accel_state->ib, (1 << NUM_INTERP_shift));
1457    }
1458    E32(accel_state->ib, 0);
1459    E32(accel_state->ib, 0);
1460    END_BATCH();
1461
1462    if (accel_state->vsync)
1463	RADEONVlineHelperClear(pScrn);
1464
1465    return TRUE;
1466}
1467
1468static void R600Composite(PixmapPtr pDst,
1469			  int srcX, int srcY,
1470			  int maskX, int maskY,
1471			  int dstX, int dstY,
1472			  int w, int h)
1473{
1474    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1475    RADEONInfoPtr info = RADEONPTR(pScrn);
1476    struct radeon_accel_state *accel_state = info->accel_state;
1477    float *vb;
1478
1479    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1480       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1481
1482    if (accel_state->vsync)
1483	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1484
1485    if (accel_state->msk_pic) {
1486
1487	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1488
1489	vb[0] = (float)dstX;
1490	vb[1] = (float)dstY;
1491	vb[2] = (float)srcX;
1492	vb[3] = (float)srcY;
1493	vb[4] = (float)maskX;
1494	vb[5] = (float)maskY;
1495
1496	vb[6] = (float)dstX;
1497	vb[7] = (float)(dstY + h);
1498	vb[8] = (float)srcX;
1499	vb[9] = (float)(srcY + h);
1500	vb[10] = (float)maskX;
1501	vb[11] = (float)(maskY + h);
1502
1503	vb[12] = (float)(dstX + w);
1504	vb[13] = (float)(dstY + h);
1505	vb[14] = (float)(srcX + w);
1506	vb[15] = (float)(srcY + h);
1507	vb[16] = (float)(maskX + w);
1508	vb[17] = (float)(maskY + h);
1509
1510	radeon_vbo_commit(pScrn, &accel_state->vbo);
1511
1512    } else {
1513
1514	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1515
1516	vb[0] = (float)dstX;
1517	vb[1] = (float)dstY;
1518	vb[2] = (float)srcX;
1519	vb[3] = (float)srcY;
1520
1521	vb[4] = (float)dstX;
1522	vb[5] = (float)(dstY + h);
1523	vb[6] = (float)srcX;
1524	vb[7] = (float)(srcY + h);
1525
1526	vb[8] = (float)(dstX + w);
1527	vb[9] = (float)(dstY + h);
1528	vb[10] = (float)(srcX + w);
1529	vb[11] = (float)(srcY + h);
1530
1531	radeon_vbo_commit(pScrn, &accel_state->vbo);
1532    }
1533
1534
1535}
1536
1537static void R600DoneComposite(PixmapPtr pDst)
1538{
1539    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1540    RADEONInfoPtr info = RADEONPTR(pScrn);
1541    struct radeon_accel_state *accel_state = info->accel_state;
1542    int vtx_size;
1543
1544    if (accel_state->vsync)
1545       r600_cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
1546			       accel_state->vline_crtc,
1547			       accel_state->vline_y1,
1548			       accel_state->vline_y2);
1549
1550    vtx_size = accel_state->msk_pic ? 24 : 16;
1551
1552    r600_finish_op(pScrn, vtx_size);
1553}
1554
1555Bool
1556R600CopyToVRAM(ScrnInfoPtr pScrn,
1557	       char *src, int src_pitch,
1558	       uint32_t dst_pitch, uint32_t dst_mc_addr, uint32_t dst_width, uint32_t dst_height, int bpp,
1559	       int x, int y, int w, int h)
1560{
1561    RADEONInfoPtr info = RADEONPTR(pScrn);
1562    struct radeon_accel_state *accel_state = info->accel_state;
1563    uint32_t scratch_mc_addr;
1564    int wpass = w * (bpp/8);
1565    int scratch_pitch_bytes = RADEON_ALIGN(wpass, 256);
1566    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1567    int scratch_offset = 0, hpass, temph;
1568    char *dst;
1569    drmBufPtr scratch;
1570    struct r600_accel_object scratch_obj, dst_obj;
1571
1572    if (dst_pitch & 7)
1573	return FALSE;
1574
1575    if (dst_mc_addr & 0xff)
1576	return FALSE;
1577
1578    scratch = RADEONCPGetBuffer(pScrn);
1579    if (scratch == NULL)
1580	return FALSE;
1581
1582    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1583    temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1584    dst = (char *)scratch->address;
1585
1586    scratch_obj.pitch = scratch_pitch;
1587    scratch_obj.width = w;
1588    scratch_obj.height = hpass;
1589    scratch_obj.offset = scratch_mc_addr;
1590    scratch_obj.bpp = bpp;
1591    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1592    scratch_obj.bo = NULL;
1593
1594    dst_obj.pitch = dst_pitch;
1595    dst_obj.width = dst_width;
1596    dst_obj.height = dst_height;
1597    dst_obj.offset = dst_mc_addr;
1598    dst_obj.bo = NULL;
1599    dst_obj.bpp = bpp;
1600    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1601
1602    if (!R600SetAccelState(pScrn,
1603			   &scratch_obj,
1604			   NULL,
1605			   &dst_obj,
1606			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1607			   3, 0xffffffff))
1608	return FALSE;
1609
1610    /* memcopy from sys to scratch */
1611    while (temph--) {
1612	memcpy (dst, src, wpass);
1613	src += src_pitch;
1614	dst += scratch_pitch_bytes;
1615    }
1616
1617    while (h) {
1618	uint32_t offset = scratch_mc_addr + scratch_offset;
1619	int oldhpass = hpass;
1620	h -= oldhpass;
1621	temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1622
1623	if (hpass) {
1624	    scratch_offset = scratch->total/2 - scratch_offset;
1625	    dst = (char *)scratch->address + scratch_offset;
1626	    /* wait for the engine to be idle */
1627	    RADEONWaitForIdleCP(pScrn);
1628	    //memcopy from sys to scratch
1629	    while (temph--) {
1630		memcpy (dst, src, wpass);
1631		src += src_pitch;
1632		dst += scratch_pitch_bytes;
1633	    }
1634	}
1635	/* blit from scratch to vram */
1636	info->accel_state->src_obj[0].height = oldhpass;
1637	info->accel_state->src_obj[0].offset = offset;
1638	R600DoPrepareCopy(pScrn);
1639	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
1640	R600DoCopy(pScrn);
1641	y += oldhpass;
1642    }
1643
1644    R600IBDiscard(pScrn, scratch);
1645
1646    return TRUE;
1647}
1648
1649static Bool
1650R600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1651		   char *src, int src_pitch)
1652{
1653    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1654    RADEONInfoPtr info = RADEONPTR(pScrn);
1655    uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1656    uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
1657    int bpp = pDst->drawable.bitsPerPixel;
1658
1659    return R600CopyToVRAM(pScrn,
1660			  src, src_pitch,
1661			  dst_pitch, dst_mc_addr, pDst->drawable.width, pDst->drawable.height, bpp,
1662			  x, y, w, h);
1663}
1664
1665static Bool
1666R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
1667		       char *dst, int dst_pitch)
1668{
1669    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1670    RADEONInfoPtr info = RADEONPTR(pScrn);
1671    struct radeon_accel_state *accel_state = info->accel_state;
1672    uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1673    uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
1674    uint32_t src_width = pSrc->drawable.width;
1675    uint32_t src_height = pSrc->drawable.height;
1676    int bpp = pSrc->drawable.bitsPerPixel;
1677    uint32_t scratch_mc_addr;
1678    int scratch_pitch_bytes = RADEON_ALIGN(dst_pitch, 256);
1679    int scratch_offset = 0, hpass;
1680    uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
1681    int wpass = w * (bpp/8);
1682    drmBufPtr scratch;
1683    struct r600_accel_object scratch_obj, src_obj;
1684
1685    /* bad pipe setup in drm prior to 1.32 */
1686    if (info->dri->pKernelDRMVersion->version_minor < 32) {
1687	    if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32))
1688		    return FALSE;
1689    }
1690
1691    if (src_pitch & 7)
1692	return FALSE;
1693
1694    scratch = RADEONCPGetBuffer(pScrn);
1695    if (scratch == NULL)
1696	return FALSE;
1697
1698    scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
1699    hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1700
1701    src_obj.pitch = src_pitch;
1702    src_obj.width = src_width;
1703    src_obj.height = src_height;
1704    src_obj.offset = src_mc_addr;
1705    src_obj.bo = NULL;
1706    src_obj.bpp = bpp;
1707    src_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1708
1709    scratch_obj.pitch = scratch_pitch;
1710    scratch_obj.width = src_width;
1711    scratch_obj.height = hpass;
1712    scratch_obj.offset = scratch_mc_addr;
1713    scratch_obj.bpp = bpp;
1714    scratch_obj.domain = RADEON_GEM_DOMAIN_GTT;
1715    scratch_obj.bo = NULL;
1716
1717    if (!R600SetAccelState(pScrn,
1718			   &src_obj,
1719			   NULL,
1720			   &scratch_obj,
1721			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1722			   3, 0xffffffff))
1723	return FALSE;
1724
1725    /* blit from vram to scratch */
1726    R600DoPrepareCopy(pScrn);
1727    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1728    R600DoCopy(pScrn);
1729
1730    while (h) {
1731	char *src = (char *)scratch->address + scratch_offset;
1732	int oldhpass = hpass;
1733	h -= oldhpass;
1734	y += oldhpass;
1735	hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
1736
1737	if (hpass) {
1738	    scratch_offset = scratch->total/2 - scratch_offset;
1739	    /* blit from vram to scratch */
1740	    info->accel_state->dst_obj.height = hpass;
1741	    info->accel_state->dst_obj.offset = scratch_mc_addr + scratch_offset;
1742	    R600DoPrepareCopy(pScrn);
1743	    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
1744	    R600DoCopy(pScrn);
1745	}
1746
1747	/* wait for the engine to be idle */
1748	RADEONWaitForIdleCP(pScrn);
1749	/* memcopy from scratch to sys */
1750	while (oldhpass--) {
1751	    memcpy (dst, src, wpass);
1752	    dst += dst_pitch;
1753	    src += scratch_pitch_bytes;
1754	}
1755    }
1756
1757    R600IBDiscard(pScrn, scratch);
1758
1759    return TRUE;
1760
1761}
1762
1763#if defined(XF86DRM_MODE)
1764
1765static Bool
1766R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
1767		     char *src, int src_pitch)
1768{
1769    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
1770    RADEONInfoPtr info = RADEONPTR(pScrn);
1771    struct radeon_accel_state *accel_state = info->accel_state;
1772    struct radeon_exa_pixmap_priv *driver_priv;
1773    struct radeon_bo *scratch = NULL;
1774    struct radeon_bo *copy_dst;
1775    unsigned char *dst;
1776    unsigned size;
1777    uint32_t dst_domain;
1778    int bpp = pDst->drawable.bitsPerPixel;
1779    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
1780    uint32_t copy_pitch;
1781    uint32_t src_pitch_hw = scratch_pitch / (bpp / 8);
1782    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
1783    int ret;
1784    Bool flush = TRUE;
1785    Bool r;
1786    int i;
1787    struct r600_accel_object src_obj, dst_obj;
1788    uint32_t tiling_flags = 0, pitch = 0;
1789
1790    if (bpp < 8)
1791	return FALSE;
1792
1793    driver_priv = exaGetPixmapDriverPrivate(pDst);
1794    if (!driver_priv || !driver_priv->bo)
1795	return FALSE;
1796
1797    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
1798    if (ret)
1799	ErrorF("radeon_bo_get_tiling failed\n");
1800
1801    /* If we know the BO won't be busy, don't bother with a scratch */
1802    copy_dst = driver_priv->bo;
1803    copy_pitch = pDst->devKind;
1804    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1805	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1806	    flush = FALSE;
1807	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
1808		goto copy;
1809	}
1810    }
1811
1812    size = scratch_pitch * h;
1813    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
1814    if (scratch == NULL) {
1815	goto copy;
1816    }
1817
1818    src_obj.pitch = src_pitch_hw;
1819    src_obj.width = w;
1820    src_obj.height = h;
1821    src_obj.offset = 0;
1822    src_obj.bpp = bpp;
1823    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1824    src_obj.bo = scratch;
1825
1826    dst_obj.pitch = dst_pitch_hw;
1827    dst_obj.width = pDst->drawable.width;
1828    dst_obj.height = pDst->drawable.height;
1829    dst_obj.offset = 0;
1830    dst_obj.bpp = bpp;
1831    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1832    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1833
1834    if (!R600SetAccelState(pScrn,
1835			   &src_obj,
1836			   NULL,
1837			   &dst_obj,
1838			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1839			   3, 0xffffffff)) {
1840        goto copy;
1841    }
1842    copy_dst = scratch;
1843    copy_pitch = scratch_pitch;
1844    flush = FALSE;
1845
1846copy:
1847    if (flush)
1848	radeon_cs_flush_indirect(pScrn);
1849
1850    ret = radeon_bo_map(copy_dst, 0);
1851    if (ret) {
1852        r = FALSE;
1853        goto out;
1854    }
1855    r = TRUE;
1856    size = w * bpp / 8;
1857    dst = copy_dst->ptr;
1858    if (copy_dst == driver_priv->bo)
1859	dst += y * copy_pitch + x * bpp / 8;
1860    for (i = 0; i < h; i++) {
1861        memcpy(dst + i * copy_pitch, src, size);
1862        src += src_pitch;
1863    }
1864    radeon_bo_unmap(copy_dst);
1865
1866    if (copy_dst == scratch) {
1867	if (info->accel_state->vsync)
1868	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1869
1870	/* blit from gart to vram */
1871	R600DoPrepareCopy(pScrn);
1872	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
1873	R600DoCopyVline(pDst);
1874    }
1875
1876out:
1877    if (scratch)
1878	radeon_bo_unref(scratch);
1879    return r;
1880}
1881
1882static Bool
1883R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
1884			 int h, char *dst, int dst_pitch)
1885{
1886    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
1887    RADEONInfoPtr info = RADEONPTR(pScrn);
1888    struct radeon_accel_state *accel_state = info->accel_state;
1889    struct radeon_exa_pixmap_priv *driver_priv;
1890    struct radeon_bo *scratch = NULL;
1891    struct radeon_bo *copy_src;
1892    unsigned size;
1893    uint32_t src_domain = 0;
1894    int bpp = pSrc->drawable.bitsPerPixel;
1895    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
1896    uint32_t copy_pitch;
1897    uint32_t dst_pitch_hw = scratch_pitch / (bpp / 8);
1898    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
1899    int ret;
1900    Bool flush = FALSE;
1901    Bool r;
1902    struct r600_accel_object src_obj, dst_obj;
1903    uint32_t tiling_flags = 0, pitch = 0;
1904
1905    if (bpp < 8)
1906	return FALSE;
1907
1908    driver_priv = exaGetPixmapDriverPrivate(pSrc);
1909    if (!driver_priv || !driver_priv->bo)
1910	return FALSE;
1911
1912    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
1913    if (ret)
1914	ErrorF("radeon_bo_get_tiling failed\n");
1915
1916    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
1917    copy_src = driver_priv->bo;
1918    copy_pitch = pSrc->devKind;
1919    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1920	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1921	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
1922	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
1923		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
1924		src_domain = 0;
1925	    else /* A write may be scheduled */
1926		flush = TRUE;
1927	}
1928
1929	if (!src_domain)
1930	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
1931
1932	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
1933	    goto copy;
1934    }
1935
1936    size = scratch_pitch * h;
1937    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
1938    if (scratch == NULL) {
1939	goto copy;
1940    }
1941    radeon_cs_space_reset_bos(info->cs);
1942    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
1943				      RADEON_GEM_DOMAIN_VRAM, 0);
1944    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
1945    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
1946    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1947    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
1948    ret = radeon_cs_space_check(info->cs);
1949    if (ret) {
1950        goto copy;
1951    }
1952
1953    src_obj.pitch = src_pitch_hw;
1954    src_obj.width = pSrc->drawable.width;
1955    src_obj.height = pSrc->drawable.height;
1956    src_obj.offset = 0;
1957    src_obj.bpp = bpp;
1958    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1959    src_obj.bo = radeon_get_pixmap_bo(pSrc);
1960
1961    dst_obj.pitch = dst_pitch_hw;
1962    dst_obj.width = w;
1963    dst_obj.height = h;
1964    dst_obj.offset = 0;
1965    dst_obj.bo = scratch;
1966    dst_obj.bpp = bpp;
1967    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1968
1969    if (!R600SetAccelState(pScrn,
1970			   &src_obj,
1971			   NULL,
1972			   &dst_obj,
1973			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1974			   3, 0xffffffff)) {
1975        goto copy;
1976    }
1977
1978    /* blit from vram to gart */
1979    R600DoPrepareCopy(pScrn);
1980    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
1981    R600DoCopy(pScrn);
1982    copy_src = scratch;
1983    copy_pitch = scratch_pitch;
1984    flush = TRUE;
1985
1986copy:
1987    if (flush && info->cs)
1988	radeon_cs_flush_indirect(pScrn);
1989
1990    ret = radeon_bo_map(copy_src, 0);
1991    if (ret) {
1992	ErrorF("failed to map pixmap: %d\n", ret);
1993        r = FALSE;
1994        goto out;
1995    }
1996    r = TRUE;
1997    w *= bpp / 8;
1998    if (copy_src == driver_priv->bo)
1999	size = y * copy_pitch + x * bpp / 8;
2000    else
2001	size = 0;
2002    while (h--) {
2003        memcpy(dst, copy_src->ptr + size, w);
2004        size += copy_pitch;
2005        dst += dst_pitch;
2006    }
2007    radeon_bo_unmap(copy_src);
2008out:
2009    if (scratch)
2010	radeon_bo_unref(scratch);
2011    return r;
2012}
2013#endif
2014
2015static int
2016R600MarkSync(ScreenPtr pScreen)
2017{
2018    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
2019    RADEONInfoPtr info = RADEONPTR(pScrn);
2020    struct radeon_accel_state *accel_state = info->accel_state;
2021
2022    return ++accel_state->exaSyncMarker;
2023
2024}
2025
2026static void
2027R600Sync(ScreenPtr pScreen, int marker)
2028{
2029    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
2030    RADEONInfoPtr info = RADEONPTR(pScrn);
2031    struct radeon_accel_state *accel_state = info->accel_state;
2032
2033    if (accel_state->exaMarkerSynced != marker) {
2034#ifdef XF86DRM_MODE
2035#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2036	if (!info->cs)
2037#endif
2038#endif
2039	    RADEONWaitForIdleCP(pScrn);
2040	accel_state->exaMarkerSynced = marker;
2041    }
2042
2043}
2044
2045static Bool
2046R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
2047{
2048    RADEONInfoPtr info = RADEONPTR(pScrn);
2049    struct radeon_accel_state *accel_state = info->accel_state;
2050
2051    /* 512 bytes per shader for now */
2052    int size = 512 * 9;
2053
2054    accel_state->shaders = NULL;
2055
2056#ifdef XF86DRM_MODE
2057#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2058    if (info->cs) {
2059	accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
2060						 RADEON_GEM_DOMAIN_VRAM, 0);
2061	if (accel_state->shaders_bo == NULL) {
2062	    ErrorF("Allocating shader failed\n");
2063	    return FALSE;
2064	}
2065	return TRUE;
2066    } else
2067#endif
2068#endif
2069    {
2070	accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256,
2071						 TRUE, NULL, NULL);
2072
2073	if (accel_state->shaders == NULL)
2074	    return FALSE;
2075    }
2076
2077    return TRUE;
2078}
2079
2080Bool
2081R600LoadShaders(ScrnInfoPtr pScrn)
2082{
2083    RADEONInfoPtr info = RADEONPTR(pScrn);
2084    struct radeon_accel_state *accel_state = info->accel_state;
2085    RADEONChipFamily ChipSet = info->ChipFamily;
2086    uint32_t *shader;
2087#ifdef XF86DRM_MODE
2088#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2089    int ret;
2090
2091    if (info->cs) {
2092	ret = radeon_bo_map(accel_state->shaders_bo, 1);
2093	if (ret) {
2094	    FatalError("failed to map shader %d\n", ret);
2095	    return FALSE;
2096	}
2097	shader = accel_state->shaders_bo->ptr;
2098    } else
2099#endif
2100#endif
2101	shader = (pointer)((char *)info->FB + accel_state->shaders->offset);
2102
2103    /*  solid vs --------------------------------------- */
2104    accel_state->solid_vs_offset = 0;
2105    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
2106
2107    /*  solid ps --------------------------------------- */
2108    accel_state->solid_ps_offset = 512;
2109    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
2110
2111    /*  copy vs --------------------------------------- */
2112    accel_state->copy_vs_offset = 1024;
2113    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
2114
2115    /*  copy ps --------------------------------------- */
2116    accel_state->copy_ps_offset = 1536;
2117    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2118
2119    /*  comp vs --------------------------------------- */
2120    accel_state->comp_vs_offset = 2048;
2121    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2122
2123    /*  comp ps --------------------------------------- */
2124    accel_state->comp_ps_offset = 2560;
2125    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2126
2127    /*  xv vs --------------------------------------- */
2128    accel_state->xv_vs_offset = 3072;
2129    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2130
2131    /*  xv ps --------------------------------------- */
2132    accel_state->xv_ps_offset = 3584;
2133    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2134
2135#ifdef XF86DRM_MODE
2136#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2137    if (info->cs) {
2138	radeon_bo_unmap(accel_state->shaders_bo);
2139    }
2140#endif
2141#endif
2142
2143    return TRUE;
2144}
2145
2146static Bool
2147R600PrepareAccess(PixmapPtr pPix, int index)
2148{
2149    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2150    RADEONInfoPtr info = RADEONPTR(pScrn);
2151    unsigned char *RADEONMMIO = info->MMIO;
2152
2153    /* flush HDP read/write caches */
2154    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2155
2156    return TRUE;
2157}
2158
2159static void
2160R600FinishAccess(PixmapPtr pPix, int index)
2161{
2162    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
2163    RADEONInfoPtr info = RADEONPTR(pScrn);
2164    unsigned char *RADEONMMIO = info->MMIO;
2165
2166    /* flush HDP read/write caches */
2167    OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2168
2169}
2170
2171Bool
2172R600DrawInit(ScreenPtr pScreen)
2173{
2174    ScrnInfoPtr pScrn =  xf86Screens[pScreen->myNum];
2175    RADEONInfoPtr info   = RADEONPTR(pScrn);
2176
2177    if (info->accel_state->exa == NULL) {
2178	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2179	return FALSE;
2180    }
2181
2182    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2183    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2184
2185    info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2186    info->accel_state->exa->Solid = R600Solid;
2187    info->accel_state->exa->DoneSolid = R600DoneSolid;
2188
2189    info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2190    info->accel_state->exa->Copy = R600Copy;
2191    info->accel_state->exa->DoneCopy = R600DoneCopy;
2192
2193    info->accel_state->exa->MarkSync = R600MarkSync;
2194    info->accel_state->exa->WaitMarker = R600Sync;
2195
2196#ifdef XF86DRM_MODE
2197#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2198    if (info->cs) {
2199	info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
2200	info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
2201	info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
2202	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
2203	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
2204	info->accel_state->exa->UploadToScreen = R600UploadToScreenCS;
2205	info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS;
2206#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5)
2207        info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
2208#endif
2209    } else
2210#endif
2211#endif
2212    {
2213	info->accel_state->exa->PrepareAccess = R600PrepareAccess;
2214	info->accel_state->exa->FinishAccess = R600FinishAccess;
2215
2216	/* AGP seems to have problems with gart transfers */
2217	if (info->accelDFS) {
2218	    info->accel_state->exa->UploadToScreen = R600UploadToScreen;
2219	    info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen;
2220	}
2221    }
2222
2223    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
2224#ifdef EXA_SUPPORTS_PREPARE_AUX
2225    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
2226#endif
2227
2228#ifdef XF86DRM_MODE
2229#ifdef EXA_HANDLES_PIXMAPS
2230    if (info->cs) {
2231	info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
2232#ifdef EXA_MIXED_PIXMAPS
2233	info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
2234#endif
2235    }
2236#endif
2237#endif
2238    info->accel_state->exa->pixmapOffsetAlign = 256;
2239    info->accel_state->exa->pixmapPitchAlign = 256;
2240
2241    info->accel_state->exa->CheckComposite = R600CheckComposite;
2242    info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2243    info->accel_state->exa->Composite = R600Composite;
2244    info->accel_state->exa->DoneComposite = R600DoneComposite;
2245
2246#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
2247    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
2248
2249    info->accel_state->exa->maxPitchBytes = 32768;
2250    info->accel_state->exa->maxX = 8192;
2251#else
2252    info->accel_state->exa->maxX = 8192;
2253#endif
2254    info->accel_state->exa->maxY = 8192;
2255
2256    /* not supported yet */
2257    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2258	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2259	info->accel_state->vsync = TRUE;
2260    } else
2261	info->accel_state->vsync = FALSE;
2262
2263    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
2264	free(info->accel_state->exa);
2265	return FALSE;
2266    }
2267
2268#ifdef XF86DRM_MODE
2269#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
2270    if (!info->cs)
2271#endif
2272#endif
2273	if (!info->gartLocation)
2274	    return FALSE;
2275
2276    info->accel_state->XInited3D = FALSE;
2277    info->accel_state->copy_area = NULL;
2278    info->accel_state->src_obj[0].bo = NULL;
2279    info->accel_state->src_obj[1].bo = NULL;
2280    info->accel_state->dst_obj.bo = NULL;
2281    info->accel_state->copy_area_bo = NULL;
2282    info->accel_state->vbo.vb_start_op = -1;
2283    info->accel_state->finish_op = r600_finish_op;
2284    info->accel_state->vbo.verts_per_op = 3;
2285    RADEONVlineHelperClear(pScrn);
2286
2287#ifdef XF86DRM_MODE
2288    radeon_vbo_init_lists(pScrn);
2289#endif
2290
2291    if (!R600AllocShaders(pScrn, pScreen))
2292	return FALSE;
2293
2294    if (!R600LoadShaders(pScrn))
2295	return FALSE;
2296
2297    exaMarkSync(pScreen);
2298
2299    return TRUE;
2300
2301}
2302
2303