1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#ifdef XF86DRM_MODE
32
33#include "xf86.h"
34
35#include "exa.h"
36
37#include "radeon.h"
38#include "radeon_macros.h"
39#include "radeon_reg.h"
40#include "evergreen_shader.h"
41#include "evergreen_reg.h"
42#include "evergreen_state.h"
43#include "radeon_exa_shared.h"
44#include "radeon_vbo.h"
45
46extern int cayman_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs);
47extern int cayman_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps);
48
49extern int cayman_copy_vs(RADEONChipFamily ChipSet, uint32_t* vs);
50extern int cayman_copy_ps(RADEONChipFamily ChipSet, uint32_t* ps);
51
52extern int cayman_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader);
53extern int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader);
54
55extern int cayman_comp_vs(RADEONChipFamily ChipSet, uint32_t* vs);
56extern int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* ps);
57
58static Bool
59EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
60{
61    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
62    RADEONInfoPtr info = RADEONPTR(pScrn);
63    struct radeon_accel_state *accel_state = info->accel_state;
64    cb_config_t     cb_conf;
65    shader_config_t vs_conf, ps_conf;
66    uint32_t a, r, g, b;
67    float *ps_alu_consts;
68    const_config_t ps_const_conf;
69    struct r600_accel_object dst;
70
71
72    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
73	RADEON_FALLBACK(("EVERGREENCheckDatatype failed\n"));
74    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
75	RADEON_FALLBACK(("invalid planemask\n"));
76
77    dst.offset = 0;
78    dst.bo = radeon_get_pixmap_bo(pPix);
79    dst.tiling_flags = radeon_get_pixmap_tiling(pPix);
80    dst.surface = radeon_get_pixmap_surface(pPix);
81
82    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
83    dst.width = pPix->drawable.width;
84    dst.height = pPix->drawable.height;
85    dst.bpp = pPix->drawable.bitsPerPixel;
86    dst.domain = RADEON_GEM_DOMAIN_VRAM;
87
88    if (!R600SetAccelState(pScrn,
89			   NULL,
90			   NULL,
91			   &dst,
92			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
93			   alu, pm))
94	return FALSE;
95
96    CLEAR (cb_conf);
97    CLEAR (vs_conf);
98    CLEAR (ps_conf);
99    CLEAR (ps_const_conf);
100
101    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
102    radeon_vbo_check(pScrn, &accel_state->cbuf, 256);
103    radeon_cp_start(pScrn);
104
105    evergreen_set_default_state(pScrn);
106
107    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
108    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
109    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
110
111    /* Shader */
112    vs_conf.shader_addr         = accel_state->vs_mc_addr;
113    vs_conf.shader_size         = accel_state->vs_size;
114    vs_conf.num_gprs            = 2;
115    vs_conf.stack_size          = 0;
116    vs_conf.bo                  = accel_state->shaders_bo;
117    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
118
119    ps_conf.shader_addr         = accel_state->ps_mc_addr;
120    ps_conf.shader_size         = accel_state->ps_size;
121    ps_conf.num_gprs            = 1;
122    ps_conf.stack_size          = 0;
123    ps_conf.clamp_consts        = 0;
124    ps_conf.export_mode         = 2;
125    ps_conf.bo                  = accel_state->shaders_bo;
126    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
127
128    cb_conf.id = 0;
129    cb_conf.w = accel_state->dst_obj.pitch;
130    cb_conf.h = accel_state->dst_obj.height;
131    cb_conf.base = accel_state->dst_obj.offset;
132    cb_conf.bo = accel_state->dst_obj.bo;
133    cb_conf.surface = accel_state->dst_obj.surface;
134
135    if (accel_state->dst_obj.bpp == 8) {
136	cb_conf.format = COLOR_8;
137	cb_conf.comp_swap = 3; /* A */
138    } else if (accel_state->dst_obj.bpp == 16) {
139	cb_conf.format = COLOR_5_6_5;
140	cb_conf.comp_swap = 2; /* RGB */
141#if X_BYTE_ORDER == X_BIG_ENDIAN
142	cb_conf.endian = ENDIAN_8IN16;
143#endif
144    } else {
145	cb_conf.format = COLOR_8_8_8_8;
146	cb_conf.comp_swap = 1; /* ARGB */
147#if X_BYTE_ORDER == X_BIG_ENDIAN
148	cb_conf.endian = ENDIAN_8IN32;
149#endif
150    }
151    cb_conf.source_format = EXPORT_4C_16BPC;
152    cb_conf.blend_clamp = 1;
153    /* Render setup */
154    if (accel_state->planemask & 0x000000ff)
155	cb_conf.pmask |= 4; /* B */
156    if (accel_state->planemask & 0x0000ff00)
157	cb_conf.pmask |= 2; /* G */
158    if (accel_state->planemask & 0x00ff0000)
159	cb_conf.pmask |= 1; /* R */
160    if (accel_state->planemask & 0xff000000)
161	cb_conf.pmask |= 8; /* A */
162    cb_conf.rop = accel_state->rop;
163    if (accel_state->dst_obj.tiling_flags == 0) {
164	cb_conf.array_mode = 0;
165	cb_conf.non_disp_tiling = 1;
166    }
167    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
168
169    evergreen_set_spi(pScrn, 0, 0);
170
171    /* PS alu constants */
172    ps_const_conf.size_bytes = 256;
173    ps_const_conf.type = SHADER_TYPE_PS;
174    ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
175    ps_const_conf.bo = accel_state->cbuf.vb_bo;
176    ps_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_offset;
177    ps_const_conf.cpu_ptr = (uint32_t *)(char *)ps_alu_consts;
178    if (accel_state->dst_obj.bpp == 16) {
179	r = (fg >> 11) & 0x1f;
180	g = (fg >> 5) & 0x3f;
181	b = (fg >> 0) & 0x1f;
182	ps_alu_consts[0] = (float)r / 31; /* R */
183	ps_alu_consts[1] = (float)g / 63; /* G */
184	ps_alu_consts[2] = (float)b / 31; /* B */
185	ps_alu_consts[3] = 1.0; /* A */
186    } else if (accel_state->dst_obj.bpp == 8) {
187	a = (fg >> 0) & 0xff;
188	ps_alu_consts[0] = 0.0; /* R */
189	ps_alu_consts[1] = 0.0; /* G */
190	ps_alu_consts[2] = 0.0; /* B */
191	ps_alu_consts[3] = (float)a / 255; /* A */
192    } else {
193	a = (fg >> 24) & 0xff;
194	r = (fg >> 16) & 0xff;
195	g = (fg >> 8) & 0xff;
196	b = (fg >> 0) & 0xff;
197	ps_alu_consts[0] = (float)r / 255; /* R */
198	ps_alu_consts[1] = (float)g / 255; /* G */
199	ps_alu_consts[2] = (float)b / 255; /* B */
200	ps_alu_consts[3] = (float)a / 255; /* A */
201    }
202    radeon_vbo_commit(pScrn, &accel_state->cbuf);
203    evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT);
204
205    if (accel_state->vsync)
206	RADEONVlineHelperClear(pScrn);
207
208    accel_state->dst_pix = pPix;
209    accel_state->fg = fg;
210
211    return TRUE;
212}
213
214static void
215EVERGREENDoneSolid(PixmapPtr pPix)
216{
217    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
218    RADEONInfoPtr info = RADEONPTR(pScrn);
219    struct radeon_accel_state *accel_state = info->accel_state;
220
221    if (accel_state->vsync)
222	evergreen_cp_wait_vline_sync(pScrn, pPix,
223				     accel_state->vline_crtc,
224				     accel_state->vline_y1,
225				     accel_state->vline_y2);
226
227    evergreen_finish_op(pScrn, 8);
228}
229
230static void
231EVERGREENSolid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
232{
233    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
234    RADEONInfoPtr info = RADEONPTR(pScrn);
235    struct radeon_accel_state *accel_state = info->accel_state;
236    float *vb;
237
238    if (CS_FULL(info->cs)) {
239	EVERGREENDoneSolid(info->accel_state->dst_pix);
240	radeon_cs_flush_indirect(pScrn);
241	EVERGREENPrepareSolid(accel_state->dst_pix,
242			      accel_state->rop,
243			      accel_state->planemask,
244			      accel_state->fg);
245    }
246
247    if (accel_state->vsync)
248	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
249
250    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
251
252    vb[0] = (float)x1;
253    vb[1] = (float)y1;
254
255    vb[2] = (float)x1;
256    vb[3] = (float)y2;
257
258    vb[4] = (float)x2;
259    vb[5] = (float)y2;
260
261    radeon_vbo_commit(pScrn, &accel_state->vbo);
262}
263
264static void
265EVERGREENDoPrepareCopy(ScrnInfoPtr pScrn)
266{
267    RADEONInfoPtr info = RADEONPTR(pScrn);
268    struct radeon_accel_state *accel_state = info->accel_state;
269    cb_config_t     cb_conf;
270    tex_resource_t  tex_res;
271    tex_sampler_t   tex_samp;
272    shader_config_t vs_conf, ps_conf;
273
274    CLEAR (cb_conf);
275    CLEAR (tex_res);
276    CLEAR (tex_samp);
277    CLEAR (vs_conf);
278    CLEAR (ps_conf);
279
280    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
281    radeon_cp_start(pScrn);
282
283    evergreen_set_default_state(pScrn);
284
285    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
286    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
287    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
288
289    /* Shader */
290    vs_conf.shader_addr         = accel_state->vs_mc_addr;
291    vs_conf.shader_size         = accel_state->vs_size;
292    vs_conf.num_gprs            = 2;
293    vs_conf.stack_size          = 0;
294    vs_conf.bo                  = accel_state->shaders_bo;
295    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
296
297    ps_conf.shader_addr         = accel_state->ps_mc_addr;
298    ps_conf.shader_size         = accel_state->ps_size;
299    ps_conf.num_gprs            = 1;
300    ps_conf.stack_size          = 0;
301    ps_conf.clamp_consts        = 0;
302    ps_conf.export_mode         = 2;
303    ps_conf.bo                  = accel_state->shaders_bo;
304    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
305
306    /* Texture */
307    tex_res.id                  = 0;
308    tex_res.w                   = accel_state->src_obj[0].width;
309    tex_res.h                   = accel_state->src_obj[0].height;
310    tex_res.pitch               = accel_state->src_obj[0].pitch;
311    tex_res.depth               = 0;
312    tex_res.dim                 = SQ_TEX_DIM_2D;
313    tex_res.base                = accel_state->src_obj[0].offset;
314    tex_res.mip_base            = accel_state->src_obj[0].offset;
315    tex_res.size                = accel_state->src_size[0];
316    tex_res.bo                  = accel_state->src_obj[0].bo;
317    tex_res.mip_bo              = accel_state->src_obj[0].bo;
318    tex_res.surface             = accel_state->src_obj[0].surface;
319    if (accel_state->src_obj[0].bpp == 8) {
320	tex_res.format              = FMT_8;
321	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
322	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
323	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
324	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
325    } else if (accel_state->src_obj[0].bpp == 16) {
326	tex_res.format              = FMT_5_6_5;
327	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
328	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
329	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
330	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
331    } else {
332	tex_res.format              = FMT_8_8_8_8;
333	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
334	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
335	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
336	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
337    }
338
339    tex_res.base_level          = 0;
340    tex_res.last_level          = 0;
341    tex_res.perf_modulation     = 0;
342    if (accel_state->src_obj[0].tiling_flags == 0)
343	tex_res.array_mode          = 0;
344    evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
345
346    tex_samp.id                 = 0;
347    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
348    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
349    tex_samp.clamp_z            = SQ_TEX_WRAP;
350    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
351    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
352    tex_samp.mc_coord_truncate  = 1;
353    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
354    tex_samp.mip_filter         = 0;			/* no mipmap */
355    evergreen_set_tex_sampler   (pScrn, &tex_samp);
356
357    cb_conf.id = 0;
358    cb_conf.w = accel_state->dst_obj.pitch;
359    cb_conf.h = accel_state->dst_obj.height;
360    cb_conf.base = accel_state->dst_obj.offset;
361    cb_conf.bo = accel_state->dst_obj.bo;
362    cb_conf.surface = accel_state->dst_obj.surface;
363    if (accel_state->dst_obj.bpp == 8) {
364	cb_conf.format = COLOR_8;
365	cb_conf.comp_swap = 3; /* A */
366    } else if (accel_state->dst_obj.bpp == 16) {
367	cb_conf.format = COLOR_5_6_5;
368	cb_conf.comp_swap = 2; /* RGB */
369    } else {
370	cb_conf.format = COLOR_8_8_8_8;
371	cb_conf.comp_swap = 1; /* ARGB */
372    }
373    cb_conf.source_format = EXPORT_4C_16BPC;
374    cb_conf.blend_clamp = 1;
375    /* Render setup */
376    if (accel_state->planemask & 0x000000ff)
377	cb_conf.pmask |= 4; /* B */
378    if (accel_state->planemask & 0x0000ff00)
379	cb_conf.pmask |= 2; /* G */
380    if (accel_state->planemask & 0x00ff0000)
381	cb_conf.pmask |= 1; /* R */
382    if (accel_state->planemask & 0xff000000)
383	cb_conf.pmask |= 8; /* A */
384    cb_conf.rop = accel_state->rop;
385    if (accel_state->dst_obj.tiling_flags == 0) {
386	cb_conf.array_mode = 0;
387	cb_conf.non_disp_tiling = 1;
388    }
389    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
390
391    evergreen_set_spi(pScrn, (1 - 1), 1);
392
393}
394
395static void
396EVERGREENDoCopy(ScrnInfoPtr pScrn)
397{
398    evergreen_finish_op(pScrn, 16);
399}
400
401static void
402EVERGREENDoCopyVline(PixmapPtr pPix)
403{
404    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
405    RADEONInfoPtr info = RADEONPTR(pScrn);
406    struct radeon_accel_state *accel_state = info->accel_state;
407
408    if (accel_state->vsync)
409	evergreen_cp_wait_vline_sync(pScrn, pPix,
410				     accel_state->vline_crtc,
411				     accel_state->vline_y1,
412				     accel_state->vline_y2);
413
414    evergreen_finish_op(pScrn, 16);
415}
416
417static void
418EVERGREENAppendCopyVertex(ScrnInfoPtr pScrn,
419			  int srcX, int srcY,
420			  int dstX, int dstY,
421			  int w, int h)
422{
423    RADEONInfoPtr info = RADEONPTR(pScrn);
424    struct radeon_accel_state *accel_state = info->accel_state;
425    float *vb;
426
427    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
428
429    vb[0] = (float)dstX;
430    vb[1] = (float)dstY;
431    vb[2] = (float)srcX;
432    vb[3] = (float)srcY;
433
434    vb[4] = (float)dstX;
435    vb[5] = (float)(dstY + h);
436    vb[6] = (float)srcX;
437    vb[7] = (float)(srcY + h);
438
439    vb[8] = (float)(dstX + w);
440    vb[9] = (float)(dstY + h);
441    vb[10] = (float)(srcX + w);
442    vb[11] = (float)(srcY + h);
443
444    radeon_vbo_commit(pScrn, &accel_state->vbo);
445}
446
447static Bool
448EVERGREENPrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
449		     int xdir, int ydir,
450		     int rop,
451		     Pixel planemask)
452{
453    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
454    RADEONInfoPtr info = RADEONPTR(pScrn);
455    struct radeon_accel_state *accel_state = info->accel_state;
456    struct r600_accel_object src_obj, dst_obj;
457
458    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
459	RADEON_FALLBACK(("EVERGREENCheckDatatype src failed\n"));
460    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
461	RADEON_FALLBACK(("EVERGREENCheckDatatype dst failed\n"));
462    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
463	RADEON_FALLBACK(("Invalid planemask\n"));
464
465    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
466    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
467
468    accel_state->same_surface = FALSE;
469
470    src_obj.offset = 0;
471    dst_obj.offset = 0;
472    src_obj.bo = radeon_get_pixmap_bo(pSrc);
473    dst_obj.bo = radeon_get_pixmap_bo(pDst);
474    dst_obj.surface = radeon_get_pixmap_surface(pDst);
475    src_obj.surface = radeon_get_pixmap_surface(pSrc);
476    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
477    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
478    if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
479	accel_state->same_surface = TRUE;
480
481    src_obj.width = pSrc->drawable.width;
482    src_obj.height = pSrc->drawable.height;
483    src_obj.bpp = pSrc->drawable.bitsPerPixel;
484    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
485
486    dst_obj.width = pDst->drawable.width;
487    dst_obj.height = pDst->drawable.height;
488    dst_obj.bpp = pDst->drawable.bitsPerPixel;
489    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
490
491    if (!R600SetAccelState(pScrn,
492			   &src_obj,
493			   NULL,
494			   &dst_obj,
495			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
496			   rop, planemask))
497	return FALSE;
498
499    if (accel_state->same_surface == TRUE) {
500	unsigned height = RADEON_ALIGN(pDst->drawable.height,
501				       drmmode_get_height_align(pScrn, accel_state->dst_obj.tiling_flags));
502	unsigned long size = height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
503
504	if (accel_state->dst_obj.surface)
505		size = accel_state->dst_obj.surface->bo_size;
506
507	if (accel_state->copy_area_bo) {
508	    radeon_bo_unref(accel_state->copy_area_bo);
509	    accel_state->copy_area_bo = NULL;
510	}
511	accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
512						   RADEON_GEM_DOMAIN_VRAM,
513						   0);
514	if (accel_state->copy_area_bo == NULL)
515	    RADEON_FALLBACK(("temp copy surface alloc failed\n"));
516
517	radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
518					  0, RADEON_GEM_DOMAIN_VRAM);
519	if (radeon_cs_space_check(info->cs)) {
520	    radeon_bo_unref(accel_state->copy_area_bo);
521	    accel_state->copy_area_bo = NULL;
522	    return FALSE;
523	}
524	accel_state->copy_area = (void*)accel_state->copy_area_bo;
525    } else
526	EVERGREENDoPrepareCopy(pScrn);
527
528    if (accel_state->vsync)
529	RADEONVlineHelperClear(pScrn);
530
531    accel_state->dst_pix = pDst;
532    accel_state->src_pix = pSrc;
533    accel_state->xdir = xdir;
534    accel_state->ydir = ydir;
535
536    return TRUE;
537}
538
539static void
540EVERGREENDoneCopy(PixmapPtr pDst)
541{
542    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
543    RADEONInfoPtr info = RADEONPTR(pScrn);
544    struct radeon_accel_state *accel_state = info->accel_state;
545
546    if (!accel_state->same_surface)
547	EVERGREENDoCopyVline(pDst);
548
549    if (accel_state->copy_area)
550	accel_state->copy_area = NULL;
551
552}
553
554static void
555EVERGREENCopy(PixmapPtr pDst,
556	      int srcX, int srcY,
557	      int dstX, int dstY,
558	      int w, int h)
559{
560    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
561    RADEONInfoPtr info = RADEONPTR(pScrn);
562    struct radeon_accel_state *accel_state = info->accel_state;
563
564    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
565	return;
566
567    if (CS_FULL(info->cs)) {
568	EVERGREENDoneCopy(info->accel_state->dst_pix);
569	radeon_cs_flush_indirect(pScrn);
570	EVERGREENPrepareCopy(accel_state->src_pix,
571			     accel_state->dst_pix,
572			     accel_state->xdir,
573			     accel_state->ydir,
574			     accel_state->rop,
575			     accel_state->planemask);
576    }
577
578    if (accel_state->vsync)
579	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
580
581    if (accel_state->same_surface && accel_state->copy_area) {
582	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
583	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
584	uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags;
585	uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags;
586	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
587	int orig_rop = accel_state->rop;
588	struct radeon_surface *orig_dst_surface = accel_state->dst_obj.surface;
589	struct radeon_surface *orig_src_surface = accel_state->src_obj[0].surface;
590
591	/* src to tmp */
592	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
593	accel_state->dst_obj.bo = accel_state->copy_area_bo;
594	accel_state->dst_obj.offset = 0;
595	accel_state->dst_obj.tiling_flags = 0;
596	accel_state->rop = 3;
597	accel_state->dst_obj.surface = NULL;
598	EVERGREENDoPrepareCopy(pScrn);
599	EVERGREENAppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
600	EVERGREENDoCopy(pScrn);
601
602	/* tmp to dst */
603	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
604	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
605	accel_state->src_obj[0].offset = 0;
606	accel_state->src_obj[0].tiling_flags = 0;
607	accel_state->src_obj[0].surface = NULL;
608	accel_state->dst_obj.domain = orig_dst_domain;
609	accel_state->dst_obj.bo = orig_bo;
610	accel_state->dst_obj.offset = 0;
611	accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags;
612	accel_state->rop = orig_rop;
613	accel_state->dst_obj.surface = orig_dst_surface;
614	EVERGREENDoPrepareCopy(pScrn);
615	EVERGREENAppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
616	EVERGREENDoCopyVline(pDst);
617
618	/* restore state */
619	accel_state->src_obj[0].domain = orig_src_domain;
620	accel_state->src_obj[0].bo = orig_bo;
621	accel_state->src_obj[0].offset = 0;
622	accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags;
623	accel_state->src_obj[0].surface = orig_src_surface;
624    } else
625	EVERGREENAppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
626
627}
628
629struct blendinfo {
630    Bool dst_alpha;
631    Bool src_alpha;
632    uint32_t blend_cntl;
633};
634
635static struct blendinfo EVERGREENBlendOp[] = {
636    /* Clear */
637    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
638    /* Src */
639    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
640    /* Dst */
641    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
642    /* Over */
643    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
644    /* OverReverse */
645    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
646    /* In */
647    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
648    /* InReverse */
649    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
650    /* Out */
651    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
652    /* OutReverse */
653    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
654    /* Atop */
655    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
656    /* AtopReverse */
657    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
658    /* Xor */
659    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
660    /* Add */
661    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
662};
663
664struct formatinfo {
665    unsigned int fmt;
666    uint32_t card_fmt;
667};
668
669static struct formatinfo EVERGREENTexFormats[] = {
670    {PICT_a8r8g8b8,	FMT_8_8_8_8},
671    {PICT_x8r8g8b8,	FMT_8_8_8_8},
672    {PICT_a8b8g8r8,	FMT_8_8_8_8},
673    {PICT_x8b8g8r8,	FMT_8_8_8_8},
674#ifdef PICT_TYPE_BGRA
675    {PICT_b8g8r8a8,	FMT_8_8_8_8},
676    {PICT_b8g8r8x8,	FMT_8_8_8_8},
677#endif
678    {PICT_r5g6b5,	FMT_5_6_5},
679    {PICT_a1r5g5b5,	FMT_1_5_5_5},
680    {PICT_x1r5g5b5,     FMT_1_5_5_5},
681    {PICT_a8,		FMT_8},
682};
683
684static uint32_t EVERGREENGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
685{
686    uint32_t sblend, dblend;
687
688    sblend = EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
689    dblend = EVERGREENBlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
690
691    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
692     * it as always 1.
693     */
694    if (PICT_FORMAT_A(dst_format) == 0 && EVERGREENBlendOp[op].dst_alpha) {
695	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
696	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
697	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
698	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
699    }
700
701    /* If the source alpha is being used, then we should only be in a case where
702     * the source blend factor is 0, and the source blend value is the mask
703     * channels multiplied by the source picture's alpha.
704     */
705    if (pMask && pMask->componentAlpha && EVERGREENBlendOp[op].src_alpha) {
706	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
707	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
708	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
709	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
710	}
711    }
712
713    return sblend | dblend;
714}
715
716static Bool EVERGREENGetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
717{
718    switch (pDstPicture->format) {
719    case PICT_a8r8g8b8:
720    case PICT_x8r8g8b8:
721    case PICT_a8b8g8r8:
722    case PICT_x8b8g8r8:
723#ifdef PICT_TYPE_BGRA
724    case PICT_b8g8r8a8:
725    case PICT_b8g8r8x8:
726#endif
727	*dst_format = COLOR_8_8_8_8;
728	break;
729    case PICT_r5g6b5:
730	*dst_format = COLOR_5_6_5;
731	break;
732    case PICT_a1r5g5b5:
733    case PICT_x1r5g5b5:
734	*dst_format = COLOR_1_5_5_5;
735	break;
736    case PICT_a8:
737	*dst_format = COLOR_8;
738	break;
739    default:
740	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
741	       (int)pDstPicture->format));
742    }
743    return TRUE;
744}
745
746static Bool EVERGREENCheckCompositeTexture(PicturePtr pPict,
747					   PicturePtr pDstPict,
748					   int op,
749					   int unit)
750{
751    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
752    unsigned int i;
753
754    for (i = 0; i < sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]); i++) {
755	if (EVERGREENTexFormats[i].fmt == pPict->format)
756	    break;
757    }
758    if (i == sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]))
759	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
760			 (int)pPict->format));
761
762    if (pPict->filter != PictFilterNearest &&
763	pPict->filter != PictFilterBilinear)
764	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
765
766    /* for REPEAT_NONE, Render semantics are that sampling outside the source
767     * picture results in alpha=0 pixels. We can implement this with a border color
768     * *if* our source texture has an alpha channel, otherwise we need to fall
769     * back. If we're not transformed then we hope that upper layers have clipped
770     * rendering to the bounds of the source drawable, in which case it doesn't
771     * matter. I have not, however, verified that the X server always does such
772     * clipping.
773     */
774    /* FIXME evergreen */
775    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
776	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
777	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
778    }
779
780    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
781	RADEON_FALLBACK(("non-affine transforms not supported\n"));
782
783    return TRUE;
784}
785
786static void EVERGREENXFormSetup(PicturePtr pPict, PixmapPtr pPix,
787				int unit, float *vs_alu_consts)
788{
789    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
790    RADEONInfoPtr info = RADEONPTR(pScrn);
791    struct radeon_accel_state *accel_state = info->accel_state;
792    int const_offset = unit * 8;
793    int w, h;
794
795    if (pPict->pDrawable) {
796	w = pPict->pDrawable->width;
797	h = pPict->pDrawable->height;
798    } else {
799	w = 1;
800	h = 1;
801    }
802
803    if (pPict->transform != 0) {
804	accel_state->is_transform[unit] = TRUE;
805	accel_state->transform[unit] = pPict->transform;
806
807	vs_alu_consts[0 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][0]);
808	vs_alu_consts[1 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][1]);
809	vs_alu_consts[2 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][2]);
810	vs_alu_consts[3 + const_offset] = 1.0 / w;
811
812	vs_alu_consts[4 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][0]);
813	vs_alu_consts[5 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][1]);
814	vs_alu_consts[6 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][2]);
815	vs_alu_consts[7 + const_offset] = 1.0 / h;
816    } else {
817	accel_state->is_transform[unit] = FALSE;
818
819	vs_alu_consts[0 + const_offset] = 1.0;
820	vs_alu_consts[1 + const_offset] = 0.0;
821	vs_alu_consts[2 + const_offset] = 0.0;
822	vs_alu_consts[3 + const_offset] = 1.0 / w;
823
824	vs_alu_consts[4 + const_offset] = 0.0;
825	vs_alu_consts[5 + const_offset] = 1.0;
826	vs_alu_consts[6 + const_offset] = 0.0;
827	vs_alu_consts[7 + const_offset] = 1.0 / h;
828    }
829
830}
831
832static Bool EVERGREENTextureSetup(PicturePtr pPict, PixmapPtr pPix,
833				  int unit)
834{
835    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
836    RADEONInfoPtr info = RADEONPTR(pScrn);
837    struct radeon_accel_state *accel_state = info->accel_state;
838    unsigned int repeatType;
839    unsigned int i;
840    tex_resource_t  tex_res;
841    tex_sampler_t   tex_samp;
842    int pix_r, pix_g, pix_b, pix_a;
843
844    CLEAR (tex_res);
845    CLEAR (tex_samp);
846
847    for (i = 0; i < sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]); i++) {
848	if (EVERGREENTexFormats[i].fmt == pPict->format)
849	    break;
850    }
851
852    /* Texture */
853    if (pPict->pDrawable) {
854	tex_res.w               = pPict->pDrawable->width;
855	tex_res.h               = pPict->pDrawable->height;
856	repeatType              = pPict->repeat ? pPict->repeatType : RepeatNone;
857    } else {
858	tex_res.w               = 1;
859	tex_res.h               = 1;
860	repeatType              = RepeatNormal;
861    }
862
863    tex_res.id                  = unit;
864    tex_res.pitch               = accel_state->src_obj[unit].pitch;
865    tex_res.depth               = 0;
866    tex_res.dim                 = SQ_TEX_DIM_2D;
867    tex_res.base                = accel_state->src_obj[unit].offset;
868    tex_res.mip_base            = accel_state->src_obj[unit].offset;
869    tex_res.size                = accel_state->src_size[unit];
870    tex_res.format              = EVERGREENTexFormats[i].card_fmt;
871    tex_res.bo                  = accel_state->src_obj[unit].bo;
872    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
873    tex_res.surface             = accel_state->src_obj[unit].surface;
874
875#if X_BYTE_ORDER == X_BIG_ENDIAN
876    switch (accel_state->src_obj[unit].bpp) {
877    case 16:
878	tex_res.endian = SQ_ENDIAN_8IN16;
879	break;
880    case 32:
881	tex_res.endian = SQ_ENDIAN_8IN32;
882	break;
883    default :
884	break;
885    }
886#endif
887
888    /* component swizzles */
889    switch (pPict->format) {
890    case PICT_a1r5g5b5:
891    case PICT_a8r8g8b8:
892	pix_r = SQ_SEL_Z; /* R */
893	pix_g = SQ_SEL_Y; /* G */
894	pix_b = SQ_SEL_X; /* B */
895	pix_a = SQ_SEL_W; /* A */
896	break;
897    case PICT_a8b8g8r8:
898	pix_r = SQ_SEL_X; /* R */
899	pix_g = SQ_SEL_Y; /* G */
900	pix_b = SQ_SEL_Z; /* B */
901	pix_a = SQ_SEL_W; /* A */
902	break;
903    case PICT_x8b8g8r8:
904	pix_r = SQ_SEL_X; /* R */
905	pix_g = SQ_SEL_Y; /* G */
906	pix_b = SQ_SEL_Z; /* B */
907	pix_a = SQ_SEL_1; /* A */
908	break;
909#ifdef PICT_TYPE_BGRA
910    case PICT_b8g8r8a8:
911	pix_r = SQ_SEL_Y; /* R */
912	pix_g = SQ_SEL_Z; /* G */
913	pix_b = SQ_SEL_W; /* B */
914	pix_a = SQ_SEL_X; /* A */
915	break;
916    case PICT_b8g8r8x8:
917	pix_r = SQ_SEL_Y; /* R */
918	pix_g = SQ_SEL_Z; /* G */
919	pix_b = SQ_SEL_W; /* B */
920	pix_a = SQ_SEL_1; /* A */
921	break;
922#endif
923    case PICT_x1r5g5b5:
924    case PICT_x8r8g8b8:
925    case PICT_r5g6b5:
926	pix_r = SQ_SEL_Z; /* R */
927	pix_g = SQ_SEL_Y; /* G */
928	pix_b = SQ_SEL_X; /* B */
929	pix_a = SQ_SEL_1; /* A */
930	break;
931    case PICT_a8:
932	pix_r = SQ_SEL_0; /* R */
933	pix_g = SQ_SEL_0; /* G */
934	pix_b = SQ_SEL_0; /* B */
935	pix_a = SQ_SEL_X; /* A */
936	break;
937    default:
938	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
939    }
940
941    if (unit == 0) {
942	if (!accel_state->msk_pic) {
943	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
944		pix_r = SQ_SEL_0;
945		pix_g = SQ_SEL_0;
946		pix_b = SQ_SEL_0;
947	    }
948
949	    if (PICT_FORMAT_A(pPict->format) == 0)
950		pix_a = SQ_SEL_1;
951	} else {
952	    if (accel_state->component_alpha) {
953		if (accel_state->src_alpha) {
954		    if (PICT_FORMAT_A(pPict->format) == 0) {
955			pix_r = SQ_SEL_1;
956			pix_g = SQ_SEL_1;
957			pix_b = SQ_SEL_1;
958			pix_a = SQ_SEL_1;
959		    } else {
960			pix_r = pix_a;
961			pix_g = pix_a;
962			pix_b = pix_a;
963		    }
964		} else {
965		    if (PICT_FORMAT_A(pPict->format) == 0)
966			pix_a = SQ_SEL_1;
967		}
968	    } else {
969		if (PICT_FORMAT_RGB(pPict->format) == 0) {
970		    pix_r = SQ_SEL_0;
971		    pix_g = SQ_SEL_0;
972		    pix_b = SQ_SEL_0;
973		}
974
975		if (PICT_FORMAT_A(pPict->format) == 0)
976		    pix_a = SQ_SEL_1;
977	    }
978	}
979    } else {
980	if (accel_state->component_alpha) {
981	    if (PICT_FORMAT_A(pPict->format) == 0)
982		pix_a = SQ_SEL_1;
983	} else {
984	    if (PICT_FORMAT_A(pPict->format) == 0) {
985		pix_r = SQ_SEL_1;
986		pix_g = SQ_SEL_1;
987		pix_b = SQ_SEL_1;
988		pix_a = SQ_SEL_1;
989	    } else {
990		pix_r = pix_a;
991		pix_g = pix_a;
992		pix_b = pix_a;
993	    }
994	}
995    }
996
997    tex_res.dst_sel_x           = pix_r; /* R */
998    tex_res.dst_sel_y           = pix_g; /* G */
999    tex_res.dst_sel_z           = pix_b; /* B */
1000    tex_res.dst_sel_w           = pix_a; /* A */
1001
1002    tex_res.base_level          = 0;
1003    tex_res.last_level          = 0;
1004    tex_res.perf_modulation     = 0;
1005    if (accel_state->src_obj[unit].tiling_flags == 0)
1006	tex_res.array_mode          = 0;
1007    evergreen_set_tex_resource  (pScrn, &tex_res, accel_state->src_obj[unit].domain);
1008
1009    tex_samp.id                 = unit;
1010    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1011
1012    switch (repeatType) {
1013    case RepeatNormal:
1014	tex_samp.clamp_x            = SQ_TEX_WRAP;
1015	tex_samp.clamp_y            = SQ_TEX_WRAP;
1016	break;
1017    case RepeatPad:
1018	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1019	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1020	break;
1021    case RepeatReflect:
1022	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1023	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1024	break;
1025    case RepeatNone:
1026	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1027	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1028	break;
1029    default:
1030	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1031    }
1032
1033    switch (pPict->filter) {
1034    case PictFilterNearest:
1035	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1036	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1037	tex_samp.mc_coord_truncate  = 1;
1038	break;
1039    case PictFilterBilinear:
1040	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1041	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1042	break;
1043    default:
1044	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1045    }
1046
1047    tex_samp.clamp_z            = SQ_TEX_WRAP;
1048    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1049    tex_samp.mip_filter         = 0;			/* no mipmap */
1050    evergreen_set_tex_sampler   (pScrn, &tex_samp);
1051
1052    return TRUE;
1053}
1054
1055static Bool EVERGREENCheckComposite(int op, PicturePtr pSrcPicture,
1056				    PicturePtr pMaskPicture,
1057				    PicturePtr pDstPicture)
1058{
1059    uint32_t tmp1;
1060    PixmapPtr pSrcPixmap, pDstPixmap;
1061
1062    /* Check for unsupported compositing operations. */
1063    if (op >= (int) (sizeof(EVERGREENBlendOp) / sizeof(EVERGREENBlendOp[0])))
1064	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1065
1066    if (pSrcPicture->pDrawable) {
1067	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1068
1069	if (pSrcPixmap->drawable.width >= 16384 ||
1070	    pSrcPixmap->drawable.height >= 16384) {
1071	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1072			     pSrcPixmap->drawable.width,
1073			     pSrcPixmap->drawable.height));
1074	}
1075
1076	if (!EVERGREENCheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1077	    return FALSE;
1078    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
1079	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1080
1081    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1082
1083    if (pDstPixmap->drawable.width >= 16384 ||
1084	pDstPixmap->drawable.height >= 16384) {
1085	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1086			 pDstPixmap->drawable.width,
1087			 pDstPixmap->drawable.height));
1088    }
1089
1090    if (pMaskPicture) {
1091	PixmapPtr pMaskPixmap;
1092
1093	if (pMaskPicture->pDrawable) {
1094	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1095
1096	    if (pMaskPixmap->drawable.width >= 16384 ||
1097		pMaskPixmap->drawable.height >= 16384) {
1098	      RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1099			       pMaskPixmap->drawable.width,
1100			       pMaskPixmap->drawable.height));
1101	    }
1102
1103	    if (pMaskPicture->componentAlpha) {
1104		/* Check if it's component alpha that relies on a source alpha and
1105		 * on the source value.  We can only get one of those into the
1106		 * single source value that we get to blend with.
1107		 */
1108		if (EVERGREENBlendOp[op].src_alpha &&
1109		    (EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1110		    (BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1111		    RADEON_FALLBACK(("Component alpha not supported with source "
1112				     "alpha and source value blending.\n"));
1113		}
1114	    }
1115
1116	    if (!EVERGREENCheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1117		return FALSE;
1118	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
1119	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1120    }
1121
1122    if (!EVERGREENGetDestFormat(pDstPicture, &tmp1))
1123	return FALSE;
1124
1125    return TRUE;
1126
1127}
1128
1129static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
1130				      PicturePtr pMaskPicture, PicturePtr pDstPicture,
1131				      PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1132{
1133    ScreenPtr pScreen = pDst->drawable.pScreen;
1134    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1135    RADEONInfoPtr info = RADEONPTR(pScrn);
1136    struct radeon_accel_state *accel_state = info->accel_state;
1137    uint32_t dst_format;
1138    cb_config_t cb_conf;
1139    shader_config_t vs_conf, ps_conf;
1140    const_config_t vs_const_conf;
1141    struct r600_accel_object src_obj, mask_obj, dst_obj;
1142    float *cbuf;
1143
1144    if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8))
1145	return FALSE;
1146
1147    if (!pSrc) {
1148	pSrc = RADEONSolidPixmap(pScreen, pSrcPicture->pSourcePict->solidFill.color);
1149	if (!pSrc)
1150	    RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
1151    }
1152
1153    src_obj.offset = 0;
1154    dst_obj.offset = 0;
1155    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1156    src_obj.bo = radeon_get_pixmap_bo(pSrc);
1157    dst_obj.surface = radeon_get_pixmap_surface(pDst);
1158    src_obj.surface = radeon_get_pixmap_surface(pSrc);
1159    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1160    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1161    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1162    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1163
1164    src_obj.width = pSrc->drawable.width;
1165    src_obj.height = pSrc->drawable.height;
1166    src_obj.bpp = pSrc->drawable.bitsPerPixel;
1167    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1168
1169    dst_obj.width = pDst->drawable.width;
1170    dst_obj.height = pDst->drawable.height;
1171    dst_obj.bpp = pDst->drawable.bitsPerPixel;
1172    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1173
1174    if (pMaskPicture) {
1175	if (!pMask) {
1176	    pMask = RADEONSolidPixmap(pScreen, pMaskPicture->pSourcePict->solidFill.color);
1177	    if (!pMask) {
1178		if (!pSrcPicture->pDrawable)
1179		    pScreen->DestroyPixmap(pSrc);
1180		RADEON_FALLBACK("Failed to create solid scratch pixmap\n");
1181	    }
1182	}
1183	mask_obj.offset = 0;
1184	mask_obj.bo = radeon_get_pixmap_bo(pMask);
1185	mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
1186	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1187	mask_obj.surface = radeon_get_pixmap_surface(pMask);
1188	mask_obj.width = pMask->drawable.width;
1189	mask_obj.height = pMask->drawable.height;
1190	mask_obj.bpp = pMask->drawable.bitsPerPixel;
1191	mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1192
1193	if (!R600SetAccelState(pScrn,
1194			       &src_obj,
1195			       &mask_obj,
1196			       &dst_obj,
1197			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1198			       3, 0xffffffff))
1199	    return FALSE;
1200
1201	accel_state->msk_pic = pMaskPicture;
1202	if (pMaskPicture->componentAlpha) {
1203	    accel_state->component_alpha = TRUE;
1204	    if (EVERGREENBlendOp[op].src_alpha)
1205		accel_state->src_alpha = TRUE;
1206	    else
1207		accel_state->src_alpha = FALSE;
1208	} else {
1209	    accel_state->component_alpha = FALSE;
1210	    accel_state->src_alpha = FALSE;
1211	}
1212    } else {
1213	if (!R600SetAccelState(pScrn,
1214			       &src_obj,
1215			       NULL,
1216			       &dst_obj,
1217			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1218			       3, 0xffffffff))
1219	    return FALSE;
1220
1221	accel_state->msk_pic = NULL;
1222	accel_state->component_alpha = FALSE;
1223	accel_state->src_alpha = FALSE;
1224    }
1225
1226    if (!EVERGREENGetDestFormat(pDstPicture, &dst_format))
1227	return FALSE;
1228
1229    CLEAR (cb_conf);
1230    CLEAR (vs_conf);
1231    CLEAR (ps_conf);
1232    CLEAR (vs_const_conf);
1233
1234    if (pMask)
1235        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1236    else
1237        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1238
1239    radeon_vbo_check(pScrn, &accel_state->cbuf, 256);
1240
1241    radeon_cp_start(pScrn);
1242
1243    evergreen_set_default_state(pScrn);
1244
1245    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1246    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1247    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1248
1249    if (!EVERGREENTextureSetup(pSrcPicture, pSrc, 0)) {
1250        radeon_ib_discard(pScrn);
1251        radeon_cs_flush_indirect(pScrn);
1252        return FALSE;
1253    }
1254
1255    if (pMask) {
1256        if (!EVERGREENTextureSetup(pMaskPicture, pMask, 1)) {
1257	    radeon_ib_discard(pScrn);
1258	    radeon_cs_flush_indirect(pScrn);
1259            return FALSE;
1260        }
1261    } else
1262        accel_state->is_transform[1] = FALSE;
1263
1264    if (pMask) {
1265	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0));
1266	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0));
1267    } else {
1268	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0));
1269	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0));
1270    }
1271
1272    /* Shader */
1273    vs_conf.shader_addr         = accel_state->vs_mc_addr;
1274    vs_conf.shader_size         = accel_state->vs_size;
1275    vs_conf.num_gprs            = 5;
1276    vs_conf.stack_size          = 1;
1277    vs_conf.bo                  = accel_state->shaders_bo;
1278    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1279
1280    ps_conf.shader_addr         = accel_state->ps_mc_addr;
1281    ps_conf.shader_size         = accel_state->ps_size;
1282    ps_conf.num_gprs            = 3;
1283    ps_conf.stack_size          = 1;
1284    ps_conf.clamp_consts        = 0;
1285    ps_conf.export_mode         = 2;
1286    ps_conf.bo                  = accel_state->shaders_bo;
1287    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1288
1289    cb_conf.id = 0;
1290    cb_conf.w = accel_state->dst_obj.pitch;
1291    cb_conf.h = accel_state->dst_obj.height;
1292    cb_conf.base = accel_state->dst_obj.offset;
1293    cb_conf.format = dst_format;
1294    cb_conf.bo = accel_state->dst_obj.bo;
1295    cb_conf.surface = accel_state->dst_obj.surface;
1296
1297    switch (pDstPicture->format) {
1298    case PICT_a8r8g8b8:
1299    case PICT_x8r8g8b8:
1300    case PICT_a1r5g5b5:
1301    case PICT_x1r5g5b5:
1302    default:
1303	cb_conf.comp_swap = 1; /* ARGB */
1304	break;
1305    case PICT_a8b8g8r8:
1306    case PICT_x8b8g8r8:
1307	cb_conf.comp_swap = 0; /* ABGR */
1308	break;
1309#ifdef PICT_TYPE_BGRA
1310    case PICT_b8g8r8a8:
1311    case PICT_b8g8r8x8:
1312	cb_conf.comp_swap = 3; /* BGRA */
1313	break;
1314#endif
1315    case PICT_r5g6b5:
1316	cb_conf.comp_swap = 2; /* RGB */
1317	break;
1318    case PICT_a8:
1319	cb_conf.comp_swap = 3; /* A */
1320	break;
1321    }
1322    cb_conf.source_format = EXPORT_4C_16BPC;
1323    cb_conf.blend_clamp = 1;
1324    cb_conf.blendcntl = EVERGREENGetBlendCntl(op, pMaskPicture, pDstPicture->format);
1325    cb_conf.blendcntl |= CB_BLEND0_CONTROL__ENABLE_bit;
1326    cb_conf.rop = 3;
1327    cb_conf.pmask = 0xf;
1328    if (accel_state->dst_obj.tiling_flags == 0) {
1329	cb_conf.array_mode = 0;
1330	cb_conf.non_disp_tiling = 1;
1331    }
1332#if X_BYTE_ORDER == X_BIG_ENDIAN
1333    switch (dst_obj.bpp) {
1334    case 16:
1335	cb_conf.endian = ENDIAN_8IN16;
1336	break;
1337    case 32:
1338	cb_conf.endian = ENDIAN_8IN32;
1339	break;
1340    default:
1341	break;
1342    }
1343#endif
1344    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
1345
1346    if (pMask)
1347	evergreen_set_spi(pScrn, (2 - 1), 2);
1348    else
1349	evergreen_set_spi(pScrn, (1 - 1), 1);
1350
1351    /* VS alu constants */
1352    vs_const_conf.size_bytes = 256;
1353    vs_const_conf.type = SHADER_TYPE_VS;
1354    cbuf = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
1355    vs_const_conf.bo = accel_state->cbuf.vb_bo;
1356    vs_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_offset;
1357
1358    vs_const_conf.cpu_ptr = (uint32_t *)(char *)cbuf;
1359    EVERGREENXFormSetup(pSrcPicture, pSrc, 0, cbuf);
1360    if (pMask)
1361        EVERGREENXFormSetup(pMaskPicture, pMask, 1, cbuf);
1362
1363    radeon_vbo_commit(pScrn, &accel_state->cbuf);
1364    evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT);
1365
1366    if (accel_state->vsync)
1367	RADEONVlineHelperClear(pScrn);
1368
1369    accel_state->composite_op = op;
1370    accel_state->dst_pic = pDstPicture;
1371    accel_state->src_pic = pSrcPicture;
1372    accel_state->dst_pix = pDst;
1373    accel_state->msk_pix = pMask;
1374    accel_state->src_pix = pSrc;
1375
1376    return TRUE;
1377}
1378
1379static void EVERGREENFinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst,
1380				     struct radeon_accel_state *accel_state)
1381{
1382    int vtx_size;
1383
1384    if (accel_state->vsync)
1385       evergreen_cp_wait_vline_sync(pScrn, pDst,
1386				    accel_state->vline_crtc,
1387				    accel_state->vline_y1,
1388				    accel_state->vline_y2);
1389
1390    vtx_size = accel_state->msk_pic ? 24 : 16;
1391
1392    evergreen_finish_op(pScrn, vtx_size);
1393}
1394
1395static void EVERGREENDoneComposite(PixmapPtr pDst)
1396{
1397    ScreenPtr pScreen = pDst->drawable.pScreen;
1398    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1399    RADEONInfoPtr info = RADEONPTR(pScrn);
1400    struct radeon_accel_state *accel_state = info->accel_state;
1401
1402    EVERGREENFinishComposite(pScrn, pDst, accel_state);
1403
1404    if (!accel_state->src_pic->pDrawable)
1405	pScreen->DestroyPixmap(accel_state->src_pix);
1406
1407    if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable)
1408	pScreen->DestroyPixmap(accel_state->msk_pix);
1409}
1410
1411static void EVERGREENComposite(PixmapPtr pDst,
1412			       int srcX, int srcY,
1413			       int maskX, int maskY,
1414			       int dstX, int dstY,
1415			       int w, int h)
1416{
1417    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1418    RADEONInfoPtr info = RADEONPTR(pScrn);
1419    struct radeon_accel_state *accel_state = info->accel_state;
1420    float *vb;
1421
1422    if (CS_FULL(info->cs)) {
1423	EVERGREENFinishComposite(pScrn, pDst, info->accel_state);
1424	radeon_cs_flush_indirect(pScrn);
1425	EVERGREENPrepareComposite(info->accel_state->composite_op,
1426				  info->accel_state->src_pic,
1427				  info->accel_state->msk_pic,
1428				  info->accel_state->dst_pic,
1429				  info->accel_state->src_pix,
1430				  info->accel_state->msk_pix,
1431				  info->accel_state->dst_pix);
1432    }
1433
1434    if (accel_state->vsync)
1435	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1436
1437    if (accel_state->msk_pic) {
1438
1439	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1440
1441	vb[0] = (float)dstX;
1442	vb[1] = (float)dstY;
1443	vb[2] = (float)srcX;
1444	vb[3] = (float)srcY;
1445	vb[4] = (float)maskX;
1446	vb[5] = (float)maskY;
1447
1448	vb[6] = (float)dstX;
1449	vb[7] = (float)(dstY + h);
1450	vb[8] = (float)srcX;
1451	vb[9] = (float)(srcY + h);
1452	vb[10] = (float)maskX;
1453	vb[11] = (float)(maskY + h);
1454
1455	vb[12] = (float)(dstX + w);
1456	vb[13] = (float)(dstY + h);
1457	vb[14] = (float)(srcX + w);
1458	vb[15] = (float)(srcY + h);
1459	vb[16] = (float)(maskX + w);
1460	vb[17] = (float)(maskY + h);
1461
1462	radeon_vbo_commit(pScrn, &accel_state->vbo);
1463
1464    } else {
1465
1466	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1467
1468	vb[0] = (float)dstX;
1469	vb[1] = (float)dstY;
1470	vb[2] = (float)srcX;
1471	vb[3] = (float)srcY;
1472
1473	vb[4] = (float)dstX;
1474	vb[5] = (float)(dstY + h);
1475	vb[6] = (float)srcX;
1476	vb[7] = (float)(srcY + h);
1477
1478	vb[8] = (float)(dstX + w);
1479	vb[9] = (float)(dstY + h);
1480	vb[10] = (float)(srcX + w);
1481	vb[11] = (float)(srcY + h);
1482
1483	radeon_vbo_commit(pScrn, &accel_state->vbo);
1484    }
1485
1486
1487}
1488
1489static Bool
1490EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1491			char *src, int src_pitch)
1492{
1493    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1494    RADEONInfoPtr info = RADEONPTR(pScrn);
1495    struct radeon_accel_state *accel_state = info->accel_state;
1496    struct radeon_exa_pixmap_priv *driver_priv;
1497    struct radeon_bo *scratch = NULL;
1498    struct radeon_bo *copy_dst;
1499    unsigned char *dst;
1500    unsigned size;
1501    uint32_t dst_domain;
1502    int bpp = pDst->drawable.bitsPerPixel;
1503    uint32_t scratch_pitch;
1504    uint32_t copy_pitch;
1505    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
1506    int ret;
1507    Bool flush = TRUE;
1508    Bool r;
1509    int i;
1510    struct r600_accel_object src_obj, dst_obj;
1511    uint32_t height, base_align;
1512
1513    if (bpp < 8)
1514	return FALSE;
1515
1516    driver_priv = exaGetPixmapDriverPrivate(pDst);
1517    if (!driver_priv || !driver_priv->bo)
1518	return FALSE;
1519
1520    /* If we know the BO won't be busy, don't bother with a scratch */
1521    copy_dst = driver_priv->bo;
1522    copy_pitch = pDst->devKind;
1523    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1524	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1525	    flush = FALSE;
1526	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
1527		goto copy;
1528	}
1529    }
1530
1531    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1532    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1533    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1534    size = scratch_pitch * height * (bpp / 8);
1535    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1536    if (scratch == NULL) {
1537	goto copy;
1538    }
1539
1540    src_obj.pitch = scratch_pitch;
1541    src_obj.width = w;
1542    src_obj.height = h;
1543    src_obj.offset = 0;
1544    src_obj.bpp = bpp;
1545    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1546    src_obj.bo = scratch;
1547    src_obj.tiling_flags = 0;
1548    src_obj.surface = NULL;
1549
1550    dst_obj.pitch = dst_pitch_hw;
1551    dst_obj.width = pDst->drawable.width;
1552    dst_obj.height = pDst->drawable.height;
1553    dst_obj.offset = 0;
1554    dst_obj.bpp = bpp;
1555    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1556    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1557    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1558    dst_obj.surface = radeon_get_pixmap_surface(pDst);
1559
1560    if (!R600SetAccelState(pScrn,
1561			   &src_obj,
1562			   NULL,
1563			   &dst_obj,
1564			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1565			   3, 0xffffffff)) {
1566        goto copy;
1567    }
1568    copy_dst = scratch;
1569    copy_pitch = scratch_pitch * (bpp / 8);
1570    flush = FALSE;
1571
1572copy:
1573    if (flush)
1574	radeon_cs_flush_indirect(pScrn);
1575
1576    ret = radeon_bo_map(copy_dst, 0);
1577    if (ret) {
1578        r = FALSE;
1579        goto out;
1580    }
1581    r = TRUE;
1582    size = w * bpp / 8;
1583    dst = copy_dst->ptr;
1584    if (copy_dst == driver_priv->bo)
1585	dst += y * copy_pitch + x * bpp / 8;
1586    for (i = 0; i < h; i++) {
1587	memcpy(dst + i * copy_pitch, src, size);
1588        src += src_pitch;
1589    }
1590    radeon_bo_unmap(copy_dst);
1591
1592    if (copy_dst == scratch) {
1593	if (info->accel_state->vsync)
1594	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1595
1596	/* blit from gart to vram */
1597	EVERGREENDoPrepareCopy(pScrn);
1598	EVERGREENAppendCopyVertex(pScrn, 0, 0, x, y, w, h);
1599	EVERGREENDoCopyVline(pDst);
1600    }
1601
1602out:
1603    if (scratch)
1604	radeon_bo_unref(scratch);
1605    return r;
1606}
1607
1608static Bool
1609EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w,
1610			    int h, char *dst, int dst_pitch)
1611{
1612    ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
1613    RADEONInfoPtr info = RADEONPTR(pScrn);
1614    struct radeon_accel_state *accel_state = info->accel_state;
1615    struct radeon_exa_pixmap_priv *driver_priv;
1616    struct radeon_bo *scratch = NULL;
1617    struct radeon_bo *copy_src;
1618    unsigned size;
1619    uint32_t src_domain = 0;
1620    int bpp = pSrc->drawable.bitsPerPixel;
1621    uint32_t scratch_pitch;
1622    uint32_t copy_pitch;
1623    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
1624    int ret;
1625    Bool flush = FALSE;
1626    Bool r;
1627    struct r600_accel_object src_obj, dst_obj;
1628    uint32_t height, base_align;
1629
1630    if (bpp < 8)
1631	return FALSE;
1632
1633    driver_priv = exaGetPixmapDriverPrivate(pSrc);
1634    if (!driver_priv || !driver_priv->bo)
1635	return FALSE;
1636
1637    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
1638    copy_src = driver_priv->bo;
1639    copy_pitch = pSrc->devKind;
1640    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1641	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1642	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
1643	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
1644		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
1645		src_domain = 0;
1646	    else /* A write may be scheduled */
1647		flush = TRUE;
1648	}
1649
1650	if (!src_domain)
1651	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
1652
1653	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
1654	    goto copy;
1655
1656    }
1657
1658    if (!accel_state->allowHWDFS)
1659	goto copy;
1660
1661    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1662    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1663    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1664    size = scratch_pitch * height * (bpp / 8);
1665    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1666    if (scratch == NULL) {
1667	goto copy;
1668    }
1669    radeon_cs_space_reset_bos(info->cs);
1670    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
1671				      RADEON_GEM_DOMAIN_VRAM, 0);
1672    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
1673    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
1674    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1675    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
1676    ret = radeon_cs_space_check(info->cs);
1677    if (ret) {
1678	goto copy;
1679    }
1680
1681    src_obj.pitch = src_pitch_hw;
1682    src_obj.width = pSrc->drawable.width;
1683    src_obj.height = pSrc->drawable.height;
1684    src_obj.offset = 0;
1685    src_obj.bpp = bpp;
1686    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1687    src_obj.bo = radeon_get_pixmap_bo(pSrc);
1688    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1689    src_obj.surface = radeon_get_pixmap_surface(pSrc);
1690
1691    dst_obj.pitch = scratch_pitch;
1692    dst_obj.width = w;
1693    dst_obj.height = h;
1694    dst_obj.offset = 0;
1695    dst_obj.bo = scratch;
1696    dst_obj.bpp = bpp;
1697    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1698    dst_obj.tiling_flags = 0;
1699    dst_obj.surface = NULL;
1700
1701    if (!R600SetAccelState(pScrn,
1702			   &src_obj,
1703			   NULL,
1704			   &dst_obj,
1705			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1706			   3, 0xffffffff)) {
1707	goto copy;
1708    }
1709
1710    /* blit from vram to gart */
1711    EVERGREENDoPrepareCopy(pScrn);
1712    EVERGREENAppendCopyVertex(pScrn, x, y, 0, 0, w, h);
1713    EVERGREENDoCopy(pScrn);
1714    copy_src = scratch;
1715    copy_pitch = scratch_pitch * (bpp / 8);
1716    flush = TRUE;
1717
1718copy:
1719    if (flush)
1720	radeon_cs_flush_indirect(pScrn);
1721
1722    ret = radeon_bo_map(copy_src, 0);
1723    if (ret) {
1724	ErrorF("failed to map pixmap: %d\n", ret);
1725        r = FALSE;
1726        goto out;
1727    }
1728    r = TRUE;
1729    w *= bpp / 8;
1730    if (copy_src == driver_priv->bo)
1731	size = y * copy_pitch + x * bpp / 8;
1732    else
1733	size = 0;
1734    while (h--) {
1735	memcpy(dst, copy_src->ptr + size, w);
1736	size += copy_pitch;
1737        dst += dst_pitch;
1738    }
1739    radeon_bo_unmap(copy_src);
1740out:
1741    if (scratch)
1742	radeon_bo_unref(scratch);
1743    return r;
1744}
1745
1746static int
1747EVERGREENMarkSync(ScreenPtr pScreen)
1748{
1749    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1750    RADEONInfoPtr info = RADEONPTR(pScrn);
1751    struct radeon_accel_state *accel_state = info->accel_state;
1752
1753    return ++accel_state->exaSyncMarker;
1754
1755}
1756
1757static void
1758EVERGREENSync(ScreenPtr pScreen, int marker)
1759{
1760    return;
1761}
1762
1763static Bool
1764EVERGREENAllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
1765{
1766    RADEONInfoPtr info = RADEONPTR(pScrn);
1767    struct radeon_accel_state *accel_state = info->accel_state;
1768
1769    /* 512 bytes per shader for now */
1770    int size = 512 * 9;
1771
1772    accel_state->shaders = NULL;
1773
1774    accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
1775					     RADEON_GEM_DOMAIN_VRAM, 0);
1776    if (accel_state->shaders_bo == NULL) {
1777	ErrorF("Allocating shader failed\n");
1778	return FALSE;
1779    }
1780    return TRUE;
1781}
1782
1783static Bool
1784EVERGREENLoadShaders(ScrnInfoPtr pScrn)
1785{
1786    RADEONInfoPtr info = RADEONPTR(pScrn);
1787    struct radeon_accel_state *accel_state = info->accel_state;
1788    RADEONChipFamily ChipSet = info->ChipFamily;
1789    uint32_t *shader;
1790    int ret;
1791
1792    ret = radeon_bo_map(accel_state->shaders_bo, 1);
1793    if (ret) {
1794	FatalError("failed to map shader %d\n", ret);
1795	return FALSE;
1796    }
1797    shader = accel_state->shaders_bo->ptr;
1798
1799    /*  solid vs --------------------------------------- */
1800    accel_state->solid_vs_offset = 0;
1801    evergreen_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
1802
1803    /*  solid ps --------------------------------------- */
1804    accel_state->solid_ps_offset = 512;
1805    evergreen_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
1806
1807    /*  copy vs --------------------------------------- */
1808    accel_state->copy_vs_offset = 1024;
1809    evergreen_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
1810
1811    /*  copy ps --------------------------------------- */
1812    accel_state->copy_ps_offset = 1536;
1813    evergreen_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
1814
1815    /*  comp vs --------------------------------------- */
1816    accel_state->comp_vs_offset = 2048;
1817    evergreen_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
1818
1819    /*  comp ps --------------------------------------- */
1820    accel_state->comp_ps_offset = 2560;
1821    evergreen_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
1822
1823    /*  xv vs --------------------------------------- */
1824    accel_state->xv_vs_offset = 3072;
1825    evergreen_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
1826
1827    /*  xv ps --------------------------------------- */
1828    accel_state->xv_ps_offset = 3584;
1829    evergreen_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
1830
1831    radeon_bo_unmap(accel_state->shaders_bo);
1832
1833    return TRUE;
1834}
1835
1836static Bool
1837CAYMANLoadShaders(ScrnInfoPtr pScrn)
1838{
1839    RADEONInfoPtr info = RADEONPTR(pScrn);
1840    struct radeon_accel_state *accel_state = info->accel_state;
1841    RADEONChipFamily ChipSet = info->ChipFamily;
1842    uint32_t *shader;
1843    int ret;
1844
1845    ret = radeon_bo_map(accel_state->shaders_bo, 1);
1846    if (ret) {
1847	FatalError("failed to map shader %d\n", ret);
1848	return FALSE;
1849    }
1850    shader = accel_state->shaders_bo->ptr;
1851
1852    /*  solid vs --------------------------------------- */
1853    accel_state->solid_vs_offset = 0;
1854    cayman_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
1855
1856    /*  solid ps --------------------------------------- */
1857    accel_state->solid_ps_offset = 512;
1858    cayman_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
1859
1860    /*  copy vs --------------------------------------- */
1861    accel_state->copy_vs_offset = 1024;
1862    cayman_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
1863
1864    /*  copy ps --------------------------------------- */
1865    accel_state->copy_ps_offset = 1536;
1866    cayman_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
1867
1868    /*  comp vs --------------------------------------- */
1869    accel_state->comp_vs_offset = 2048;
1870    cayman_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
1871
1872    /*  comp ps --------------------------------------- */
1873    accel_state->comp_ps_offset = 2560;
1874    cayman_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
1875
1876    /*  xv vs --------------------------------------- */
1877    accel_state->xv_vs_offset = 3072;
1878    cayman_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
1879
1880    /*  xv ps --------------------------------------- */
1881    accel_state->xv_ps_offset = 3584;
1882    cayman_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
1883
1884    radeon_bo_unmap(accel_state->shaders_bo);
1885
1886    return TRUE;
1887}
1888
1889Bool
1890EVERGREENDrawInit(ScreenPtr pScreen)
1891{
1892    ScrnInfoPtr pScrn =  xf86ScreenToScrn(pScreen);
1893    RADEONInfoPtr info   = RADEONPTR(pScrn);
1894
1895    if (info->accel_state->exa == NULL) {
1896	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
1897	return FALSE;
1898    }
1899
1900    /* accel requires kms */
1901    if (!info->cs)
1902	return FALSE;
1903
1904    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
1905    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
1906
1907    info->accel_state->exa->PrepareSolid = EVERGREENPrepareSolid;
1908    info->accel_state->exa->Solid = EVERGREENSolid;
1909    info->accel_state->exa->DoneSolid = EVERGREENDoneSolid;
1910
1911    info->accel_state->exa->PrepareCopy = EVERGREENPrepareCopy;
1912    info->accel_state->exa->Copy = EVERGREENCopy;
1913    info->accel_state->exa->DoneCopy = EVERGREENDoneCopy;
1914
1915    info->accel_state->exa->MarkSync = EVERGREENMarkSync;
1916    info->accel_state->exa->WaitMarker = EVERGREENSync;
1917
1918    info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
1919    info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
1920    info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
1921    info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
1922    info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
1923    info->accel_state->exa->UploadToScreen = EVERGREENUploadToScreen;
1924    info->accel_state->exa->DownloadFromScreen = EVERGREENDownloadFromScreen;
1925#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5)
1926    info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
1927#endif
1928
1929    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
1930#ifdef EXA_SUPPORTS_PREPARE_AUX
1931    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
1932#endif
1933
1934#ifdef EXA_HANDLES_PIXMAPS
1935    info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
1936#ifdef EXA_MIXED_PIXMAPS
1937    info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
1938#endif
1939#endif
1940    info->accel_state->exa->pixmapOffsetAlign = 256;
1941    info->accel_state->exa->pixmapPitchAlign = 256;
1942
1943    info->accel_state->exa->CheckComposite = EVERGREENCheckComposite;
1944    info->accel_state->exa->PrepareComposite = EVERGREENPrepareComposite;
1945    info->accel_state->exa->Composite = EVERGREENComposite;
1946    info->accel_state->exa->DoneComposite = EVERGREENDoneComposite;
1947
1948#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
1949    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
1950
1951    info->accel_state->exa->maxPitchBytes = 32768;
1952    info->accel_state->exa->maxX = 8192;
1953#else
1954    info->accel_state->exa->maxX = 8192;
1955#endif
1956    info->accel_state->exa->maxY = 8192;
1957
1958    /* not supported yet */
1959    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
1960	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
1961	info->accel_state->vsync = TRUE;
1962    } else
1963	info->accel_state->vsync = FALSE;
1964
1965    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
1966	free(info->accel_state->exa);
1967	return FALSE;
1968    }
1969
1970    info->accel_state->XInited3D = FALSE;
1971    info->accel_state->copy_area = NULL;
1972    info->accel_state->src_obj[0].bo = NULL;
1973    info->accel_state->src_obj[1].bo = NULL;
1974    info->accel_state->dst_obj.bo = NULL;
1975    info->accel_state->copy_area_bo = NULL;
1976    info->accel_state->vbo.vb_start_op = -1;
1977    info->accel_state->cbuf.vb_start_op = -1;
1978    info->accel_state->finish_op = evergreen_finish_op;
1979    info->accel_state->vbo.verts_per_op = 3;
1980    info->accel_state->cbuf.verts_per_op = 1;
1981    RADEONVlineHelperClear(pScrn);
1982
1983    radeon_vbo_init_lists(pScrn);
1984
1985    if (!EVERGREENAllocShaders(pScrn, pScreen))
1986	return FALSE;
1987
1988    if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) {
1989	if (!CAYMANLoadShaders(pScrn))
1990	    return FALSE;
1991    } else {
1992	if (!EVERGREENLoadShaders(pScrn))
1993	    return FALSE;
1994    }
1995
1996    exaMarkSync(pScreen);
1997
1998    return TRUE;
1999
2000}
2001
2002#endif
2003