evergreen_exa.c revision 8bf5c682
1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27#ifdef HAVE_CONFIG_H
28#include "config.h"
29#endif
30
31#include "xf86.h"
32
33#include "exa.h"
34
35#include "radeon.h"
36#include "radeon_reg.h"
37#include "evergreen_shader.h"
38#include "evergreen_reg.h"
39#include "evergreen_state.h"
40#include "radeon_exa_shared.h"
41#include "radeon_vbo.h"
42
43extern int cayman_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs);
44extern int cayman_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps);
45
46extern int cayman_copy_vs(RADEONChipFamily ChipSet, uint32_t* vs);
47extern int cayman_copy_ps(RADEONChipFamily ChipSet, uint32_t* ps);
48
49extern int cayman_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader);
50extern int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader);
51
52extern int cayman_comp_vs(RADEONChipFamily ChipSet, uint32_t* vs);
53extern int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* ps);
54
55static Bool
56EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
57{
58    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
59    RADEONInfoPtr info = RADEONPTR(pScrn);
60    struct radeon_accel_state *accel_state = info->accel_state;
61    cb_config_t     cb_conf;
62    shader_config_t vs_conf, ps_conf;
63    uint32_t a, r, g, b;
64    float *ps_alu_consts;
65    const_config_t ps_const_conf;
66    struct r600_accel_object dst;
67
68
69    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
70	RADEON_FALLBACK(("EVERGREENCheckDatatype failed\n"));
71    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
72	RADEON_FALLBACK(("invalid planemask\n"));
73
74    dst.bo = radeon_get_pixmap_bo(pPix);
75    dst.tiling_flags = radeon_get_pixmap_tiling(pPix);
76    dst.surface = radeon_get_pixmap_surface(pPix);
77
78    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
79    dst.width = pPix->drawable.width;
80    dst.height = pPix->drawable.height;
81    dst.bpp = pPix->drawable.bitsPerPixel;
82    dst.domain = RADEON_GEM_DOMAIN_VRAM;
83
84    if (!R600SetAccelState(pScrn,
85			   NULL,
86			   NULL,
87			   &dst,
88			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
89			   alu, pm))
90	return FALSE;
91
92    CLEAR (cb_conf);
93    CLEAR (vs_conf);
94    CLEAR (ps_conf);
95    CLEAR (ps_const_conf);
96
97    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
98    radeon_vbo_check(pScrn, &accel_state->cbuf, 256);
99    radeon_cp_start(pScrn);
100
101    evergreen_set_default_state(pScrn);
102
103    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
104    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
105    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
106
107    /* Shader */
108    vs_conf.shader_addr         = accel_state->vs_mc_addr;
109    vs_conf.shader_size         = accel_state->vs_size;
110    vs_conf.num_gprs            = 2;
111    vs_conf.stack_size          = 0;
112    vs_conf.bo                  = accel_state->shaders_bo;
113    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
114
115    ps_conf.shader_addr         = accel_state->ps_mc_addr;
116    ps_conf.shader_size         = accel_state->ps_size;
117    ps_conf.num_gprs            = 1;
118    ps_conf.stack_size          = 0;
119    ps_conf.clamp_consts        = 0;
120    ps_conf.export_mode         = 2;
121    ps_conf.bo                  = accel_state->shaders_bo;
122    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
123
124    cb_conf.id = 0;
125    cb_conf.w = accel_state->dst_obj.pitch;
126    cb_conf.h = accel_state->dst_obj.height;
127    cb_conf.base = 0;
128    cb_conf.bo = accel_state->dst_obj.bo;
129    cb_conf.surface = accel_state->dst_obj.surface;
130
131    if (accel_state->dst_obj.bpp == 8) {
132	cb_conf.format = COLOR_8;
133	cb_conf.comp_swap = 3; /* A */
134    } else if (accel_state->dst_obj.bpp == 16) {
135	cb_conf.format = COLOR_5_6_5;
136	cb_conf.comp_swap = 2; /* RGB */
137#if X_BYTE_ORDER == X_BIG_ENDIAN
138	cb_conf.endian = ENDIAN_8IN16;
139#endif
140    } else {
141	cb_conf.format = COLOR_8_8_8_8;
142	cb_conf.comp_swap = 1; /* ARGB */
143#if X_BYTE_ORDER == X_BIG_ENDIAN
144	cb_conf.endian = ENDIAN_8IN32;
145#endif
146    }
147    cb_conf.source_format = EXPORT_4C_16BPC;
148    cb_conf.blend_clamp = 1;
149    /* Render setup */
150    if (accel_state->planemask & 0x000000ff)
151	cb_conf.pmask |= 4; /* B */
152    if (accel_state->planemask & 0x0000ff00)
153	cb_conf.pmask |= 2; /* G */
154    if (accel_state->planemask & 0x00ff0000)
155	cb_conf.pmask |= 1; /* R */
156    if (accel_state->planemask & 0xff000000)
157	cb_conf.pmask |= 8; /* A */
158    cb_conf.rop = accel_state->rop;
159    if ((accel_state->dst_obj.tiling_flags & RADEON_TILING_MASK) ==
160	RADEON_TILING_LINEAR) {
161	cb_conf.array_mode = 0;
162	cb_conf.non_disp_tiling = 1;
163    }
164    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
165
166    evergreen_set_spi(pScrn, 0, 0);
167
168    /* PS alu constants */
169    ps_const_conf.size_bytes = 256;
170    ps_const_conf.type = SHADER_TYPE_PS;
171    ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
172    ps_const_conf.bo = accel_state->cbuf.vb_bo;
173    ps_const_conf.const_addr = accel_state->cbuf.vb_offset;
174    ps_const_conf.cpu_ptr = (uint32_t *)(char *)ps_alu_consts;
175    if (accel_state->dst_obj.bpp == 16) {
176	r = (fg >> 11) & 0x1f;
177	g = (fg >> 5) & 0x3f;
178	b = (fg >> 0) & 0x1f;
179	ps_alu_consts[0] = (float)r / 31; /* R */
180	ps_alu_consts[1] = (float)g / 63; /* G */
181	ps_alu_consts[2] = (float)b / 31; /* B */
182	ps_alu_consts[3] = 1.0; /* A */
183    } else if (accel_state->dst_obj.bpp == 8) {
184	a = (fg >> 0) & 0xff;
185	ps_alu_consts[0] = 0.0; /* R */
186	ps_alu_consts[1] = 0.0; /* G */
187	ps_alu_consts[2] = 0.0; /* B */
188	ps_alu_consts[3] = (float)a / 255; /* A */
189    } else {
190	a = (fg >> 24) & 0xff;
191	r = (fg >> 16) & 0xff;
192	g = (fg >> 8) & 0xff;
193	b = (fg >> 0) & 0xff;
194	ps_alu_consts[0] = (float)r / 255; /* R */
195	ps_alu_consts[1] = (float)g / 255; /* G */
196	ps_alu_consts[2] = (float)b / 255; /* B */
197	ps_alu_consts[3] = (float)a / 255; /* A */
198    }
199    radeon_vbo_commit(pScrn, &accel_state->cbuf);
200    evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT);
201
202    if (accel_state->vsync)
203	RADEONVlineHelperClear(pScrn);
204
205    accel_state->dst_pix = pPix;
206    accel_state->fg = fg;
207
208    return TRUE;
209}
210
211static void
212EVERGREENDoneSolid(PixmapPtr pPix)
213{
214    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
215    RADEONInfoPtr info = RADEONPTR(pScrn);
216    struct radeon_accel_state *accel_state = info->accel_state;
217
218    if (accel_state->vsync)
219	evergreen_cp_wait_vline_sync(pScrn, pPix,
220				     accel_state->vline_crtc,
221				     accel_state->vline_y1,
222				     accel_state->vline_y2);
223
224    evergreen_finish_op(pScrn, 8);
225}
226
227static void
228EVERGREENSolid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
229{
230    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
231    RADEONInfoPtr info = RADEONPTR(pScrn);
232    struct radeon_accel_state *accel_state = info->accel_state;
233    float *vb;
234
235    if (CS_FULL(info->cs)) {
236	EVERGREENDoneSolid(info->accel_state->dst_pix);
237	radeon_cs_flush_indirect(pScrn);
238	EVERGREENPrepareSolid(accel_state->dst_pix,
239			      accel_state->rop,
240			      accel_state->planemask,
241			      accel_state->fg);
242    }
243
244    if (accel_state->vsync)
245	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
246
247    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
248
249    vb[0] = (float)x1;
250    vb[1] = (float)y1;
251
252    vb[2] = (float)x1;
253    vb[3] = (float)y2;
254
255    vb[4] = (float)x2;
256    vb[5] = (float)y2;
257
258    radeon_vbo_commit(pScrn, &accel_state->vbo);
259}
260
261static void
262EVERGREENDoPrepareCopy(ScrnInfoPtr pScrn)
263{
264    RADEONInfoPtr info = RADEONPTR(pScrn);
265    struct radeon_accel_state *accel_state = info->accel_state;
266    cb_config_t     cb_conf;
267    tex_resource_t  tex_res;
268    tex_sampler_t   tex_samp;
269    shader_config_t vs_conf, ps_conf;
270
271    CLEAR (cb_conf);
272    CLEAR (tex_res);
273    CLEAR (tex_samp);
274    CLEAR (vs_conf);
275    CLEAR (ps_conf);
276
277    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
278    radeon_cp_start(pScrn);
279
280    evergreen_set_default_state(pScrn);
281
282    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
283    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
284    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
285
286    /* Shader */
287    vs_conf.shader_addr         = accel_state->vs_mc_addr;
288    vs_conf.shader_size         = accel_state->vs_size;
289    vs_conf.num_gprs            = 2;
290    vs_conf.stack_size          = 0;
291    vs_conf.bo                  = accel_state->shaders_bo;
292    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
293
294    ps_conf.shader_addr         = accel_state->ps_mc_addr;
295    ps_conf.shader_size         = accel_state->ps_size;
296    ps_conf.num_gprs            = 1;
297    ps_conf.stack_size          = 0;
298    ps_conf.clamp_consts        = 0;
299    ps_conf.export_mode         = 2;
300    ps_conf.bo                  = accel_state->shaders_bo;
301    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
302
303    /* Texture */
304    tex_res.id                  = 0;
305    tex_res.w                   = accel_state->src_obj[0].width;
306    tex_res.h                   = accel_state->src_obj[0].height;
307    tex_res.pitch               = accel_state->src_obj[0].pitch;
308    tex_res.depth               = 0;
309    tex_res.dim                 = SQ_TEX_DIM_2D;
310    tex_res.base                = 0;
311    tex_res.mip_base            = 0;
312    tex_res.size                = accel_state->src_size[0];
313    tex_res.bo                  = accel_state->src_obj[0].bo;
314    tex_res.mip_bo              = accel_state->src_obj[0].bo;
315    tex_res.surface             = accel_state->src_obj[0].surface;
316    if (accel_state->src_obj[0].bpp == 8) {
317	tex_res.format              = FMT_8;
318	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
319	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
320	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
321	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
322    } else if (accel_state->src_obj[0].bpp == 16) {
323	tex_res.format              = FMT_5_6_5;
324	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
325	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
326	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
327	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
328    } else {
329	tex_res.format              = FMT_8_8_8_8;
330	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
331	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
332	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
333	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
334    }
335
336    tex_res.base_level          = 0;
337    tex_res.last_level          = 0;
338    tex_res.perf_modulation     = 0;
339    if ((accel_state->src_obj[0].tiling_flags & RADEON_TILING_MASK) ==
340	RADEON_TILING_LINEAR)
341	tex_res.array_mode          = 0;
342    evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
343
344    tex_samp.id                 = 0;
345    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
346    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
347    tex_samp.clamp_z            = SQ_TEX_WRAP;
348    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
349    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
350    tex_samp.mc_coord_truncate  = 1;
351    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
352    tex_samp.mip_filter         = 0;			/* no mipmap */
353    evergreen_set_tex_sampler   (pScrn, &tex_samp);
354
355    cb_conf.id = 0;
356    cb_conf.w = accel_state->dst_obj.pitch;
357    cb_conf.h = accel_state->dst_obj.height;
358    cb_conf.base = 0;
359    cb_conf.bo = accel_state->dst_obj.bo;
360    cb_conf.surface = accel_state->dst_obj.surface;
361    if (accel_state->dst_obj.bpp == 8) {
362	cb_conf.format = COLOR_8;
363	cb_conf.comp_swap = 3; /* A */
364    } else if (accel_state->dst_obj.bpp == 16) {
365	cb_conf.format = COLOR_5_6_5;
366	cb_conf.comp_swap = 2; /* RGB */
367    } else {
368	cb_conf.format = COLOR_8_8_8_8;
369	cb_conf.comp_swap = 1; /* ARGB */
370    }
371    cb_conf.source_format = EXPORT_4C_16BPC;
372    cb_conf.blend_clamp = 1;
373    /* Render setup */
374    if (accel_state->planemask & 0x000000ff)
375	cb_conf.pmask |= 4; /* B */
376    if (accel_state->planemask & 0x0000ff00)
377	cb_conf.pmask |= 2; /* G */
378    if (accel_state->planemask & 0x00ff0000)
379	cb_conf.pmask |= 1; /* R */
380    if (accel_state->planemask & 0xff000000)
381	cb_conf.pmask |= 8; /* A */
382    cb_conf.rop = accel_state->rop;
383    if ((accel_state->dst_obj.tiling_flags & RADEON_TILING_MASK) ==
384	RADEON_TILING_LINEAR) {
385	cb_conf.array_mode = 0;
386	cb_conf.non_disp_tiling = 1;
387    }
388    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
389
390    evergreen_set_spi(pScrn, (1 - 1), 1);
391
392}
393
394static void
395EVERGREENDoCopy(ScrnInfoPtr pScrn)
396{
397    evergreen_finish_op(pScrn, 16);
398}
399
400static void
401EVERGREENDoCopyVline(PixmapPtr pPix)
402{
403    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
404    RADEONInfoPtr info = RADEONPTR(pScrn);
405    struct radeon_accel_state *accel_state = info->accel_state;
406
407    if (accel_state->vsync)
408	evergreen_cp_wait_vline_sync(pScrn, pPix,
409				     accel_state->vline_crtc,
410				     accel_state->vline_y1,
411				     accel_state->vline_y2);
412
413    evergreen_finish_op(pScrn, 16);
414}
415
416static void
417EVERGREENAppendCopyVertex(ScrnInfoPtr pScrn,
418			  int srcX, int srcY,
419			  int dstX, int dstY,
420			  int w, int h)
421{
422    RADEONInfoPtr info = RADEONPTR(pScrn);
423    struct radeon_accel_state *accel_state = info->accel_state;
424    float *vb;
425
426    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
427
428    vb[0] = (float)dstX;
429    vb[1] = (float)dstY;
430    vb[2] = (float)srcX;
431    vb[3] = (float)srcY;
432
433    vb[4] = (float)dstX;
434    vb[5] = (float)(dstY + h);
435    vb[6] = (float)srcX;
436    vb[7] = (float)(srcY + h);
437
438    vb[8] = (float)(dstX + w);
439    vb[9] = (float)(dstY + h);
440    vb[10] = (float)(srcX + w);
441    vb[11] = (float)(srcY + h);
442
443    radeon_vbo_commit(pScrn, &accel_state->vbo);
444}
445
446static Bool
447EVERGREENPrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
448		     int xdir, int ydir,
449		     int rop,
450		     Pixel planemask)
451{
452    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
453    RADEONInfoPtr info = RADEONPTR(pScrn);
454    struct radeon_accel_state *accel_state = info->accel_state;
455    struct r600_accel_object src_obj, dst_obj;
456
457    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
458	RADEON_FALLBACK(("EVERGREENCheckDatatype src failed\n"));
459    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
460	RADEON_FALLBACK(("EVERGREENCheckDatatype dst failed\n"));
461    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
462	RADEON_FALLBACK(("Invalid planemask\n"));
463
464    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
465    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
466
467    accel_state->same_surface = FALSE;
468
469    src_obj.bo = radeon_get_pixmap_bo(pSrc);
470    dst_obj.bo = radeon_get_pixmap_bo(pDst);
471    dst_obj.surface = radeon_get_pixmap_surface(pDst);
472    src_obj.surface = radeon_get_pixmap_surface(pSrc);
473    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
474    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
475    if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
476	accel_state->same_surface = TRUE;
477
478    src_obj.width = pSrc->drawable.width;
479    src_obj.height = pSrc->drawable.height;
480    src_obj.bpp = pSrc->drawable.bitsPerPixel;
481    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
482
483    dst_obj.width = pDst->drawable.width;
484    dst_obj.height = pDst->drawable.height;
485    dst_obj.bpp = pDst->drawable.bitsPerPixel;
486    if (radeon_get_pixmap_shared(pDst) == TRUE)
487	dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
488    else
489	dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
490
491    if (!R600SetAccelState(pScrn,
492			   &src_obj,
493			   NULL,
494			   &dst_obj,
495			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
496			   rop, planemask))
497	return FALSE;
498
499    if (accel_state->same_surface == TRUE) {
500	unsigned height = RADEON_ALIGN(pDst->drawable.height,
501				       drmmode_get_height_align(pScrn, accel_state->dst_obj.tiling_flags));
502	unsigned long size = height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
503
504	if (accel_state->dst_obj.surface)
505		size = accel_state->dst_obj.surface->bo_size;
506
507	if (accel_state->copy_area_bo) {
508	    radeon_bo_unref(accel_state->copy_area_bo);
509	    accel_state->copy_area_bo = NULL;
510	}
511	accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
512						   RADEON_GEM_DOMAIN_VRAM,
513						   0);
514	if (accel_state->copy_area_bo == NULL)
515	    RADEON_FALLBACK(("temp copy surface alloc failed\n"));
516
517	radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
518					  0, RADEON_GEM_DOMAIN_VRAM);
519	if (radeon_cs_space_check(info->cs)) {
520	    radeon_bo_unref(accel_state->copy_area_bo);
521	    accel_state->copy_area_bo = NULL;
522	    return FALSE;
523	}
524	accel_state->copy_area = (void*)accel_state->copy_area_bo;
525    } else
526	EVERGREENDoPrepareCopy(pScrn);
527
528    if (accel_state->vsync)
529	RADEONVlineHelperClear(pScrn);
530
531    accel_state->dst_pix = pDst;
532    accel_state->src_pix = pSrc;
533    accel_state->xdir = xdir;
534    accel_state->ydir = ydir;
535
536    return TRUE;
537}
538
539static void
540EVERGREENDoneCopy(PixmapPtr pDst)
541{
542    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
543    RADEONInfoPtr info = RADEONPTR(pScrn);
544    struct radeon_accel_state *accel_state = info->accel_state;
545
546    if (!accel_state->same_surface)
547	EVERGREENDoCopyVline(pDst);
548
549    if (accel_state->copy_area)
550	accel_state->copy_area = NULL;
551
552}
553
554static void
555EVERGREENCopy(PixmapPtr pDst,
556	      int srcX, int srcY,
557	      int dstX, int dstY,
558	      int w, int h)
559{
560    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
561    RADEONInfoPtr info = RADEONPTR(pScrn);
562    struct radeon_accel_state *accel_state = info->accel_state;
563
564    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
565	return;
566
567    if (CS_FULL(info->cs)) {
568	EVERGREENDoneCopy(info->accel_state->dst_pix);
569	radeon_cs_flush_indirect(pScrn);
570	EVERGREENPrepareCopy(accel_state->src_pix,
571			     accel_state->dst_pix,
572			     accel_state->xdir,
573			     accel_state->ydir,
574			     accel_state->rop,
575			     accel_state->planemask);
576    }
577
578    if (accel_state->vsync)
579	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
580
581    if (accel_state->same_surface &&
582	    (srcX + w <= dstX || dstX + w <= srcX || srcY + h <= dstY || dstY + h <= srcY)) {
583	EVERGREENDoPrepareCopy(pScrn);
584	EVERGREENAppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
585	EVERGREENDoCopyVline(pDst);
586    } else if (accel_state->same_surface && accel_state->copy_area) {
587	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
588	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
589	uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags;
590	uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags;
591	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
592	int orig_rop = accel_state->rop;
593	struct radeon_surface *orig_dst_surface = accel_state->dst_obj.surface;
594	struct radeon_surface *orig_src_surface = accel_state->src_obj[0].surface;
595
596	/* src to tmp */
597	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
598	accel_state->dst_obj.bo = accel_state->copy_area_bo;
599	accel_state->dst_obj.tiling_flags = 0;
600	accel_state->rop = 3;
601	accel_state->dst_obj.surface = NULL;
602	EVERGREENDoPrepareCopy(pScrn);
603	EVERGREENAppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
604	EVERGREENDoCopy(pScrn);
605
606	/* tmp to dst */
607	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
608	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
609	accel_state->src_obj[0].tiling_flags = 0;
610	accel_state->src_obj[0].surface = NULL;
611	accel_state->dst_obj.domain = orig_dst_domain;
612	accel_state->dst_obj.bo = orig_bo;
613	accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags;
614	accel_state->rop = orig_rop;
615	accel_state->dst_obj.surface = orig_dst_surface;
616	EVERGREENDoPrepareCopy(pScrn);
617	EVERGREENAppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
618	EVERGREENDoCopyVline(pDst);
619
620	/* restore state */
621	accel_state->src_obj[0].domain = orig_src_domain;
622	accel_state->src_obj[0].bo = orig_bo;
623	accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags;
624	accel_state->src_obj[0].surface = orig_src_surface;
625    } else
626	EVERGREENAppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
627
628}
629
630struct blendinfo {
631    Bool dst_alpha;
632    Bool src_alpha;
633    uint32_t blend_cntl;
634};
635
636static struct blendinfo EVERGREENBlendOp[] = {
637    /* Clear */
638    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
639    /* Src */
640    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
641    /* Dst */
642    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
643    /* Over */
644    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
645    /* OverReverse */
646    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
647    /* In */
648    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
649    /* InReverse */
650    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
651    /* Out */
652    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
653    /* OutReverse */
654    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
655    /* Atop */
656    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
657    /* AtopReverse */
658    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
659    /* Xor */
660    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
661    /* Add */
662    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
663};
664
665struct formatinfo {
666    unsigned int fmt;
667    uint32_t card_fmt;
668};
669
670static struct formatinfo EVERGREENTexFormats[] = {
671    {PICT_a2r10g10b10,	FMT_2_10_10_10},
672    {PICT_x2r10g10b10,	FMT_2_10_10_10},
673    {PICT_a2b10g10r10,	FMT_2_10_10_10},
674    {PICT_x2b10g10r10,	FMT_2_10_10_10},
675    {PICT_a8r8g8b8,	FMT_8_8_8_8},
676    {PICT_x8r8g8b8,	FMT_8_8_8_8},
677    {PICT_a8b8g8r8,	FMT_8_8_8_8},
678    {PICT_x8b8g8r8,	FMT_8_8_8_8},
679    {PICT_b8g8r8a8,	FMT_8_8_8_8},
680    {PICT_b8g8r8x8,	FMT_8_8_8_8},
681    {PICT_r5g6b5,	FMT_5_6_5},
682    {PICT_a1r5g5b5,	FMT_1_5_5_5},
683    {PICT_x1r5g5b5,     FMT_1_5_5_5},
684    {PICT_a8,		FMT_8},
685};
686
687static uint32_t EVERGREENGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
688{
689    uint32_t sblend, dblend;
690
691    sblend = EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
692    dblend = EVERGREENBlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
693
694    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
695     * it as always 1.
696     */
697    if (PICT_FORMAT_A(dst_format) == 0 && EVERGREENBlendOp[op].dst_alpha) {
698	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
699	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
700	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
701	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
702    }
703
704    /* If the source alpha is being used, then we should only be in a case where
705     * the source blend factor is 0, and the source blend value is the mask
706     * channels multiplied by the source picture's alpha.
707     */
708    if (pMask && pMask->componentAlpha && EVERGREENBlendOp[op].src_alpha) {
709	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
710	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
711	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
712	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
713	}
714
715	/* With some tricks, we can still accelerate PictOpOver with solid src.
716	 * This is commonly used for text rendering, so it's worth the extra
717	 * effort.
718	 */
719	if (sblend == (BLEND_ONE << COLOR_SRCBLEND_shift)) {
720	    sblend = (BLEND_CONSTANT_COLOR << COLOR_SRCBLEND_shift);
721	}
722    }
723
724    return sblend | dblend;
725}
726
727static Bool EVERGREENGetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
728{
729    switch (pDstPicture->format) {
730    case PICT_a2r10g10b10:
731    case PICT_x2r10g10b10:
732    case PICT_a2b10g10r10:
733    case PICT_x2b10g10r10:
734	*dst_format = COLOR_2_10_10_10;
735	break;
736    case PICT_a8r8g8b8:
737    case PICT_x8r8g8b8:
738    case PICT_a8b8g8r8:
739    case PICT_x8b8g8r8:
740    case PICT_b8g8r8a8:
741    case PICT_b8g8r8x8:
742	*dst_format = COLOR_8_8_8_8;
743	break;
744    case PICT_r5g6b5:
745	*dst_format = COLOR_5_6_5;
746	break;
747    case PICT_a1r5g5b5:
748    case PICT_x1r5g5b5:
749	*dst_format = COLOR_1_5_5_5;
750	break;
751    case PICT_a8:
752	*dst_format = COLOR_8;
753	break;
754    default:
755	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
756	       (int)pDstPicture->format));
757    }
758    return TRUE;
759}
760
761static Bool EVERGREENCheckCompositeTexture(PicturePtr pPict,
762					   PicturePtr pDstPict,
763					   int op,
764					   int unit)
765{
766    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
767    unsigned int i;
768
769    for (i = 0; i < sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]); i++) {
770	if (EVERGREENTexFormats[i].fmt == pPict->format)
771	    break;
772    }
773    if (i == sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]))
774	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
775			 (int)pPict->format));
776
777    if (pPict->filter != PictFilterNearest &&
778	pPict->filter != PictFilterBilinear)
779	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
780
781    /* for REPEAT_NONE, Render semantics are that sampling outside the source
782     * picture results in alpha=0 pixels. We can implement this with a border color
783     * *if* our source texture has an alpha channel, otherwise we need to fall
784     * back. If we're not transformed then we hope that upper layers have clipped
785     * rendering to the bounds of the source drawable, in which case it doesn't
786     * matter. I have not, however, verified that the X server always does such
787     * clipping.
788     */
789    /* FIXME evergreen */
790    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
791	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
792	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
793    }
794
795    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
796	RADEON_FALLBACK(("non-affine transforms not supported\n"));
797
798    return TRUE;
799}
800
801static void EVERGREENXFormSetup(PicturePtr pPict, ScrnInfoPtr pScrn,
802				int unit, float *vs_alu_consts)
803{
804    RADEONInfoPtr info = RADEONPTR(pScrn);
805    struct radeon_accel_state *accel_state = info->accel_state;
806    int const_offset = unit * 8;
807    int w, h;
808
809    if (pPict->pDrawable) {
810	w = pPict->pDrawable->width;
811	h = pPict->pDrawable->height;
812    } else {
813	w = 1;
814	h = 1;
815    }
816
817    if (pPict->transform != 0) {
818	accel_state->is_transform[unit] = TRUE;
819	accel_state->transform[unit] = pPict->transform;
820
821	vs_alu_consts[0 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][0]);
822	vs_alu_consts[1 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][1]);
823	vs_alu_consts[2 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][2]);
824	vs_alu_consts[3 + const_offset] = 1.0 / w;
825
826	vs_alu_consts[4 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][0]);
827	vs_alu_consts[5 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][1]);
828	vs_alu_consts[6 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][2]);
829	vs_alu_consts[7 + const_offset] = 1.0 / h;
830    } else {
831	accel_state->is_transform[unit] = FALSE;
832
833	vs_alu_consts[0 + const_offset] = 1.0;
834	vs_alu_consts[1 + const_offset] = 0.0;
835	vs_alu_consts[2 + const_offset] = 0.0;
836	vs_alu_consts[3 + const_offset] = 1.0 / w;
837
838	vs_alu_consts[4 + const_offset] = 0.0;
839	vs_alu_consts[5 + const_offset] = 1.0;
840	vs_alu_consts[6 + const_offset] = 0.0;
841	vs_alu_consts[7 + const_offset] = 1.0 / h;
842    }
843
844}
845
846static Bool EVERGREENTextureSetup(PicturePtr pPict, PixmapPtr pPix,
847				  int unit)
848{
849    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
850    RADEONInfoPtr info = RADEONPTR(pScrn);
851    struct radeon_accel_state *accel_state = info->accel_state;
852    unsigned int repeatType;
853    unsigned int i;
854    tex_resource_t  tex_res;
855    tex_sampler_t   tex_samp;
856    int pix_r, pix_g, pix_b, pix_a;
857
858    CLEAR (tex_res);
859    CLEAR (tex_samp);
860
861    for (i = 0; i < sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]); i++) {
862	if (EVERGREENTexFormats[i].fmt == pPict->format)
863	    break;
864    }
865
866    /* Texture */
867    if (pPict->pDrawable) {
868	tex_res.w               = pPict->pDrawable->width;
869	tex_res.h               = pPict->pDrawable->height;
870	repeatType              = pPict->repeat ? pPict->repeatType : RepeatNone;
871    } else {
872	tex_res.w               = 1;
873	tex_res.h               = 1;
874	repeatType              = RepeatNormal;
875    }
876
877    tex_res.id                  = unit;
878    tex_res.pitch               = accel_state->src_obj[unit].pitch;
879    tex_res.depth               = 0;
880    tex_res.dim                 = SQ_TEX_DIM_2D;
881    tex_res.base                = 0;
882    tex_res.mip_base            = 0;
883    tex_res.size                = accel_state->src_size[unit];
884    tex_res.format              = EVERGREENTexFormats[i].card_fmt;
885    tex_res.bo                  = accel_state->src_obj[unit].bo;
886    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
887    tex_res.surface             = accel_state->src_obj[unit].surface;
888
889#if X_BYTE_ORDER == X_BIG_ENDIAN
890    switch (accel_state->src_obj[unit].bpp) {
891    case 16:
892	tex_res.endian = SQ_ENDIAN_8IN16;
893	break;
894    case 32:
895	tex_res.endian = SQ_ENDIAN_8IN32;
896	break;
897    default :
898	break;
899    }
900#endif
901
902    /* component swizzles */
903    switch (pPict->format) {
904    case PICT_a2r10g10b10:
905    case PICT_a1r5g5b5:
906    case PICT_a8r8g8b8:
907	pix_r = SQ_SEL_Z; /* R */
908	pix_g = SQ_SEL_Y; /* G */
909	pix_b = SQ_SEL_X; /* B */
910	pix_a = SQ_SEL_W; /* A */
911	break;
912    case PICT_a2b10g10r10:
913    case PICT_a8b8g8r8:
914	pix_r = SQ_SEL_X; /* R */
915	pix_g = SQ_SEL_Y; /* G */
916	pix_b = SQ_SEL_Z; /* B */
917	pix_a = SQ_SEL_W; /* A */
918	break;
919    case PICT_x2b10g10r10:
920    case PICT_x8b8g8r8:
921	pix_r = SQ_SEL_X; /* R */
922	pix_g = SQ_SEL_Y; /* G */
923	pix_b = SQ_SEL_Z; /* B */
924	pix_a = SQ_SEL_1; /* A */
925	break;
926    case PICT_b8g8r8a8:
927	pix_r = SQ_SEL_Y; /* R */
928	pix_g = SQ_SEL_Z; /* G */
929	pix_b = SQ_SEL_W; /* B */
930	pix_a = SQ_SEL_X; /* A */
931	break;
932    case PICT_b8g8r8x8:
933	pix_r = SQ_SEL_Y; /* R */
934	pix_g = SQ_SEL_Z; /* G */
935	pix_b = SQ_SEL_W; /* B */
936	pix_a = SQ_SEL_1; /* A */
937	break;
938    case PICT_x2r10g10b10:
939    case PICT_x1r5g5b5:
940    case PICT_x8r8g8b8:
941    case PICT_r5g6b5:
942	pix_r = SQ_SEL_Z; /* R */
943	pix_g = SQ_SEL_Y; /* G */
944	pix_b = SQ_SEL_X; /* B */
945	pix_a = SQ_SEL_1; /* A */
946	break;
947    case PICT_a8:
948	pix_r = SQ_SEL_0; /* R */
949	pix_g = SQ_SEL_0; /* G */
950	pix_b = SQ_SEL_0; /* B */
951	pix_a = SQ_SEL_X; /* A */
952	break;
953    default:
954	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
955    }
956
957    if (unit == 0) {
958	if (!accel_state->msk_pic) {
959	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
960		pix_r = SQ_SEL_0;
961		pix_g = SQ_SEL_0;
962		pix_b = SQ_SEL_0;
963	    }
964
965	    if (PICT_FORMAT_A(pPict->format) == 0)
966		pix_a = SQ_SEL_1;
967	} else {
968	    if (accel_state->component_alpha) {
969		if (accel_state->src_alpha) {
970		    if (PICT_FORMAT_A(pPict->format) == 0) {
971			pix_r = SQ_SEL_1;
972			pix_g = SQ_SEL_1;
973			pix_b = SQ_SEL_1;
974			pix_a = SQ_SEL_1;
975		    } else {
976			pix_r = pix_a;
977			pix_g = pix_a;
978			pix_b = pix_a;
979		    }
980		} else {
981		    if (PICT_FORMAT_A(pPict->format) == 0)
982			pix_a = SQ_SEL_1;
983		}
984	    } else {
985		if (PICT_FORMAT_RGB(pPict->format) == 0) {
986		    pix_r = SQ_SEL_0;
987		    pix_g = SQ_SEL_0;
988		    pix_b = SQ_SEL_0;
989		}
990
991		if (PICT_FORMAT_A(pPict->format) == 0)
992		    pix_a = SQ_SEL_1;
993	    }
994	}
995    } else {
996	if (accel_state->component_alpha) {
997	    if (PICT_FORMAT_A(pPict->format) == 0)
998		pix_a = SQ_SEL_1;
999	} else {
1000	    if (PICT_FORMAT_A(pPict->format) == 0) {
1001		pix_r = SQ_SEL_1;
1002		pix_g = SQ_SEL_1;
1003		pix_b = SQ_SEL_1;
1004		pix_a = SQ_SEL_1;
1005	    } else {
1006		pix_r = pix_a;
1007		pix_g = pix_a;
1008		pix_b = pix_a;
1009	    }
1010	}
1011    }
1012
1013    tex_res.dst_sel_x           = pix_r; /* R */
1014    tex_res.dst_sel_y           = pix_g; /* G */
1015    tex_res.dst_sel_z           = pix_b; /* B */
1016    tex_res.dst_sel_w           = pix_a; /* A */
1017
1018    tex_res.base_level          = 0;
1019    tex_res.last_level          = 0;
1020    tex_res.perf_modulation     = 0;
1021    if ((accel_state->src_obj[unit].tiling_flags & RADEON_TILING_MASK) ==
1022	RADEON_TILING_LINEAR)
1023	tex_res.array_mode          = 0;
1024    evergreen_set_tex_resource  (pScrn, &tex_res, accel_state->src_obj[unit].domain);
1025
1026    tex_samp.id                 = unit;
1027    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1028
1029    switch (repeatType) {
1030    case RepeatNormal:
1031	tex_samp.clamp_x            = SQ_TEX_WRAP;
1032	tex_samp.clamp_y            = SQ_TEX_WRAP;
1033	break;
1034    case RepeatPad:
1035	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1036	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1037	break;
1038    case RepeatReflect:
1039	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1040	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1041	break;
1042    case RepeatNone:
1043	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1044	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1045	break;
1046    default:
1047	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1048    }
1049
1050    switch (pPict->filter) {
1051    case PictFilterNearest:
1052	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1053	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1054	tex_samp.mc_coord_truncate  = 1;
1055	break;
1056    case PictFilterBilinear:
1057	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1058	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1059	break;
1060    default:
1061	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1062    }
1063
1064    tex_samp.clamp_z            = SQ_TEX_WRAP;
1065    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1066    tex_samp.mip_filter         = 0;			/* no mipmap */
1067    evergreen_set_tex_sampler   (pScrn, &tex_samp);
1068
1069    return TRUE;
1070}
1071
1072static Bool EVERGREENCheckComposite(int op, PicturePtr pSrcPicture,
1073				    PicturePtr pMaskPicture,
1074				    PicturePtr pDstPicture)
1075{
1076    uint32_t tmp1;
1077    PixmapPtr pSrcPixmap, pDstPixmap;
1078
1079    /* Check for unsupported compositing operations. */
1080    if (op >= (int) (sizeof(EVERGREENBlendOp) / sizeof(EVERGREENBlendOp[0])))
1081	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1082
1083    if (pSrcPicture->pDrawable) {
1084	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1085
1086	if (pSrcPixmap->drawable.width >= 16384 ||
1087	    pSrcPixmap->drawable.height >= 16384) {
1088	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1089			     pSrcPixmap->drawable.width,
1090			     pSrcPixmap->drawable.height));
1091	}
1092
1093	if (!EVERGREENCheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1094	    return FALSE;
1095    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
1096	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1097
1098    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1099
1100    if (pDstPixmap->drawable.width >= 16384 ||
1101	pDstPixmap->drawable.height >= 16384) {
1102	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1103			 pDstPixmap->drawable.width,
1104			 pDstPixmap->drawable.height));
1105    }
1106
1107    if (pMaskPicture) {
1108	PixmapPtr pMaskPixmap;
1109
1110	if (pMaskPicture->pDrawable) {
1111	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1112
1113	    if (pMaskPixmap->drawable.width >= 16384 ||
1114		pMaskPixmap->drawable.height >= 16384) {
1115	      RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1116			       pMaskPixmap->drawable.width,
1117			       pMaskPixmap->drawable.height));
1118	    }
1119
1120	    if (pMaskPicture->componentAlpha) {
1121		/* Check if it's component alpha that relies on a source alpha and
1122		 * on the source value.  We can only get one of those into the
1123		 * single source value that we get to blend with.
1124		 *
1125		 * We can cheat a bit if the src is solid, though. PictOpOver
1126		 * can use the constant blend color to sneak a second blend
1127		 * source in.
1128		 */
1129		if (EVERGREENBlendOp[op].src_alpha &&
1130		    (EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1131		    (BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1132		    if (pSrcPicture->pDrawable || op != PictOpOver)
1133			RADEON_FALLBACK(("Component alpha not supported with source "
1134					 "alpha and source value blending.\n"));
1135		}
1136	    }
1137
1138	    if (!EVERGREENCheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1139		return FALSE;
1140	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
1141	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1142    }
1143
1144    if (!EVERGREENGetDestFormat(pDstPicture, &tmp1))
1145	return FALSE;
1146
1147    return TRUE;
1148
1149}
1150
1151static void EVERGREENSetSolidConsts(ScrnInfoPtr pScrn, float *buf, int format, uint32_t fg, int unit)
1152{
1153    RADEONInfoPtr info = RADEONPTR(pScrn);
1154    struct radeon_accel_state *accel_state = info->accel_state;
1155    float pix_r = 0, pix_g = 0, pix_b = 0, pix_a = 0;
1156
1157    uint32_t w = (fg >> 24) & 0xff;
1158    uint32_t z = (fg >> 16) & 0xff;
1159    uint32_t y = (fg >> 8) & 0xff;
1160    uint32_t x = (fg >> 0) & 0xff;
1161    float xf = (float)x / 255; /* R */
1162    float yf = (float)y / 255; /* G */
1163    float zf = (float)z / 255; /* B */
1164    float wf = (float)w / 255; /* A */
1165
1166    /* component swizzles */
1167    switch (format) {
1168	case PICT_a1r5g5b5:
1169	case PICT_a8r8g8b8:
1170	    pix_r = zf; /* R */
1171	    pix_g = yf; /* G */
1172	    pix_b = xf; /* B */
1173	    pix_a = wf; /* A */
1174	    break;
1175	case PICT_a8b8g8r8:
1176	    pix_r = xf; /* R */
1177	    pix_g = yf; /* G */
1178	    pix_b = zf; /* B */
1179	    pix_a = wf; /* A */
1180	    break;
1181	case PICT_x8b8g8r8:
1182	    pix_r = xf; /* R */
1183	    pix_g = yf; /* G */
1184	    pix_b = zf; /* B */
1185	    pix_a = 1.0; /* A */
1186	    break;
1187	case PICT_b8g8r8a8:
1188	    pix_r = yf; /* R */
1189	    pix_g = zf; /* G */
1190	    pix_b = wf; /* B */
1191	    pix_a = xf; /* A */
1192	    break;
1193	case PICT_b8g8r8x8:
1194	    pix_r = yf; /* R */
1195	    pix_g = zf; /* G */
1196	    pix_b = wf; /* B */
1197	    pix_a = 1.0; /* A */
1198	    break;
1199	case PICT_x1r5g5b5:
1200	case PICT_x8r8g8b8:
1201	case PICT_r5g6b5:
1202	    pix_r = zf; /* R */
1203	    pix_g = yf; /* G */
1204	    pix_b = xf; /* B */
1205	    pix_a = 1.0; /* A */
1206	    break;
1207	case PICT_a8:
1208	    pix_r = 0.0; /* R */
1209	    pix_g = 0.0; /* G */
1210	    pix_b = 0.0; /* B */
1211	    pix_a = xf; /* A */
1212	    break;
1213	default:
1214	    ErrorF("Bad format 0x%x\n", format);
1215    }
1216
1217    if (unit == 0) {
1218	if (!accel_state->msk_pic) {
1219	    if (PICT_FORMAT_RGB(format) == 0) {
1220		pix_r = 0.0;
1221		pix_g = 0.0;
1222		pix_b = 0.0;
1223	    }
1224
1225	    if (PICT_FORMAT_A(format) == 0)
1226		pix_a = 1.0;
1227	} else {
1228	    if (accel_state->component_alpha) {
1229		if (accel_state->src_alpha) {
1230		    /* required for PictOpOver */
1231		    float cblend[4] = { pix_r / pix_a, pix_g / pix_a,
1232					pix_b / pix_a, pix_a / pix_a };
1233		    evergreen_set_blend_color(pScrn, cblend);
1234
1235		    if (PICT_FORMAT_A(format) == 0) {
1236			pix_r = 1.0;
1237			pix_g = 1.0;
1238			pix_b = 1.0;
1239			pix_a = 1.0;
1240		    } else {
1241			pix_r = pix_a;
1242			pix_g = pix_a;
1243			pix_b = pix_a;
1244		    }
1245		} else {
1246		    if (PICT_FORMAT_A(format) == 0)
1247			pix_a = 1.0;
1248		}
1249	    } else {
1250		if (PICT_FORMAT_RGB(format) == 0) {
1251		    pix_r = 0;
1252		    pix_g = 0;
1253		    pix_b = 0;
1254		}
1255
1256		if (PICT_FORMAT_A(format) == 0)
1257		    pix_a = 1.0;
1258	    }
1259	}
1260    } else {
1261	if (accel_state->component_alpha) {
1262	    if (PICT_FORMAT_A(format) == 0)
1263		pix_a = 1.0;
1264	} else {
1265	    if (PICT_FORMAT_A(format) == 0) {
1266		pix_r = 1.0;
1267		pix_g = 1.0;
1268		pix_b = 1.0;
1269		pix_a = 1.0;
1270	    } else {
1271		pix_r = pix_a;
1272		pix_g = pix_a;
1273		pix_b = pix_a;
1274	    }
1275	}
1276    }
1277
1278    buf[0] = pix_r;
1279    buf[1] = pix_g;
1280    buf[2] = pix_b;
1281    buf[3] = pix_a;
1282}
1283
1284static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
1285				      PicturePtr pMaskPicture, PicturePtr pDstPicture,
1286				      PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1287{
1288    ScreenPtr pScreen = pDst->drawable.pScreen;
1289    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1290    RADEONInfoPtr info = RADEONPTR(pScrn);
1291    struct radeon_accel_state *accel_state = info->accel_state;
1292    uint32_t dst_format;
1293    cb_config_t cb_conf;
1294    shader_config_t vs_conf, ps_conf;
1295    const_config_t vs_const_conf;
1296    struct r600_accel_object src_obj, mask_obj, dst_obj;
1297    float *cbuf;
1298    uint32_t ps_bool_consts = 0;
1299
1300    if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8))
1301	return FALSE;
1302
1303    if (pSrc) {
1304	src_obj.bo = radeon_get_pixmap_bo(pSrc);
1305	src_obj.surface = radeon_get_pixmap_surface(pSrc);
1306	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1307	src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1308	src_obj.width = pSrc->drawable.width;
1309	src_obj.height = pSrc->drawable.height;
1310	src_obj.bpp = pSrc->drawable.bitsPerPixel;
1311	src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1312    }
1313
1314    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1315    dst_obj.surface = radeon_get_pixmap_surface(pDst);
1316    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1317    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1318    dst_obj.width = pDst->drawable.width;
1319    dst_obj.height = pDst->drawable.height;
1320    dst_obj.bpp = pDst->drawable.bitsPerPixel;
1321    if (radeon_get_pixmap_shared(pDst) == TRUE)
1322	dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1323    else
1324	dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1325
1326    if (pMaskPicture) {
1327	if (pMask) {
1328	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
1329	    mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
1330	    mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1331	    mask_obj.surface = radeon_get_pixmap_surface(pMask);
1332	    mask_obj.width = pMask->drawable.width;
1333	    mask_obj.height = pMask->drawable.height;
1334	    mask_obj.bpp = pMask->drawable.bitsPerPixel;
1335	    mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1336	}
1337
1338	accel_state->msk_pic = pMaskPicture;
1339	if (pMaskPicture->componentAlpha) {
1340	    accel_state->component_alpha = TRUE;
1341	    if (EVERGREENBlendOp[op].src_alpha)
1342		accel_state->src_alpha = TRUE;
1343	    else
1344		accel_state->src_alpha = FALSE;
1345	} else {
1346	    accel_state->component_alpha = FALSE;
1347	    accel_state->src_alpha = FALSE;
1348	}
1349    } else {
1350	accel_state->msk_pic = NULL;
1351	accel_state->component_alpha = FALSE;
1352	accel_state->src_alpha = FALSE;
1353    }
1354
1355    if (!R600SetAccelState(pScrn,
1356		pSrc ? &src_obj : NULL,
1357		(pMaskPicture && pMask) ? &mask_obj : NULL,
1358		&dst_obj,
1359		accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1360		3, 0xffffffff))
1361	return FALSE;
1362
1363    if (!EVERGREENGetDestFormat(pDstPicture, &dst_format))
1364	return FALSE;
1365
1366    CLEAR (cb_conf);
1367    CLEAR (vs_conf);
1368    CLEAR (ps_conf);
1369    CLEAR (vs_const_conf);
1370
1371    if (pMask)
1372        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1373    else
1374        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1375
1376    radeon_vbo_check(pScrn, &accel_state->cbuf, 256);
1377
1378    radeon_cp_start(pScrn);
1379
1380    evergreen_set_default_state(pScrn);
1381
1382    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1383    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1384    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1385
1386    if (pSrc) {
1387	if (!EVERGREENTextureSetup(pSrcPicture, pSrc, 0)) {
1388	    radeon_ib_discard(pScrn);
1389	    radeon_cs_flush_indirect(pScrn);
1390	    return FALSE;
1391	}
1392    } else
1393	accel_state->is_transform[0] = FALSE;
1394
1395    if (pMask) {
1396        if (!EVERGREENTextureSetup(pMaskPicture, pMask, 1)) {
1397	    radeon_ib_discard(pScrn);
1398	    radeon_cs_flush_indirect(pScrn);
1399            return FALSE;
1400        }
1401    } else
1402        accel_state->is_transform[1] = FALSE;
1403
1404    if (pSrc)
1405	ps_bool_consts |= (1 << 0);
1406    if (pMask)
1407	ps_bool_consts |= (1 << 1);
1408    evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, ps_bool_consts);
1409
1410    if (pMask) {
1411	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0));
1412    } else {
1413	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0));
1414    }
1415
1416    /* Shader */
1417    vs_conf.shader_addr         = accel_state->vs_mc_addr;
1418    vs_conf.shader_size         = accel_state->vs_size;
1419    vs_conf.num_gprs            = 5;
1420    vs_conf.stack_size          = 1;
1421    vs_conf.bo                  = accel_state->shaders_bo;
1422    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1423
1424    ps_conf.shader_addr         = accel_state->ps_mc_addr;
1425    ps_conf.shader_size         = accel_state->ps_size;
1426    ps_conf.num_gprs            = 2;
1427    ps_conf.stack_size          = 1;
1428    ps_conf.clamp_consts        = 0;
1429    ps_conf.export_mode         = 2;
1430    ps_conf.bo                  = accel_state->shaders_bo;
1431    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1432
1433    cb_conf.id = 0;
1434    cb_conf.w = accel_state->dst_obj.pitch;
1435    cb_conf.h = accel_state->dst_obj.height;
1436    cb_conf.base = 0;
1437    cb_conf.format = dst_format;
1438    cb_conf.bo = accel_state->dst_obj.bo;
1439    cb_conf.surface = accel_state->dst_obj.surface;
1440
1441    switch (pDstPicture->format) {
1442    case PICT_a2r10g10b10:
1443    case PICT_x2r10g10b10:
1444    case PICT_a8r8g8b8:
1445    case PICT_x8r8g8b8:
1446    case PICT_a1r5g5b5:
1447    case PICT_x1r5g5b5:
1448    default:
1449	cb_conf.comp_swap = 1; /* ARGB */
1450	break;
1451    case PICT_a2b10g10r10:
1452    case PICT_x2b10g10r10:
1453    case PICT_a8b8g8r8:
1454    case PICT_x8b8g8r8:
1455	cb_conf.comp_swap = 0; /* ABGR */
1456	break;
1457    case PICT_b8g8r8a8:
1458    case PICT_b8g8r8x8:
1459	cb_conf.comp_swap = 3; /* BGRA */
1460	break;
1461    case PICT_r5g6b5:
1462	cb_conf.comp_swap = 2; /* RGB */
1463	break;
1464    case PICT_a8:
1465	cb_conf.comp_swap = 3; /* A */
1466	break;
1467    }
1468    cb_conf.source_format = EXPORT_4C_16BPC;
1469    cb_conf.blend_clamp = 1;
1470    cb_conf.blendcntl = EVERGREENGetBlendCntl(op, pMaskPicture, pDstPicture->format);
1471    cb_conf.blendcntl |= CB_BLEND0_CONTROL__ENABLE_bit;
1472    cb_conf.rop = 3;
1473    cb_conf.pmask = 0xf;
1474    if ((accel_state->dst_obj.tiling_flags & RADEON_TILING_MASK) ==
1475	RADEON_TILING_LINEAR) {
1476	cb_conf.array_mode = 0;
1477	cb_conf.non_disp_tiling = 1;
1478    }
1479#if X_BYTE_ORDER == X_BIG_ENDIAN
1480    switch (dst_obj.bpp) {
1481    case 16:
1482	cb_conf.endian = ENDIAN_8IN16;
1483	break;
1484    case 32:
1485	cb_conf.endian = ENDIAN_8IN32;
1486	break;
1487    default:
1488	break;
1489    }
1490#endif
1491    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
1492
1493    if (pMask)
1494	evergreen_set_spi(pScrn, (2 - 1), 2);
1495    else
1496	evergreen_set_spi(pScrn, (1 - 1), 1);
1497
1498    /* VS alu constants */
1499    vs_const_conf.size_bytes = 256;
1500    vs_const_conf.type = SHADER_TYPE_VS;
1501    cbuf = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
1502    vs_const_conf.bo = accel_state->cbuf.vb_bo;
1503    vs_const_conf.const_addr = accel_state->cbuf.vb_offset;
1504
1505    vs_const_conf.cpu_ptr = (uint32_t *)(char *)cbuf;
1506    EVERGREENXFormSetup(pSrcPicture, pScrn, 0, cbuf);
1507    if (pMask)
1508        EVERGREENXFormSetup(pMaskPicture, pScrn, 1, cbuf);
1509
1510    if (!pSrc) {
1511	/* solid src color */
1512	EVERGREENSetSolidConsts(pScrn, &cbuf[16], pSrcPicture->format,
1513		pSrcPicture->pSourcePict->solidFill.color, 0);
1514    }
1515
1516    if (!pMaskPicture) {
1517	/* use identity constant if there is no mask */
1518	cbuf[20] = 1.0;
1519	cbuf[21] = 1.0;
1520	cbuf[22] = 1.0;
1521	cbuf[23] = 1.0;
1522    } else if (!pMask) {
1523	/* solid mask color */
1524	EVERGREENSetSolidConsts(pScrn, &cbuf[20], pMaskPicture->format,
1525		pMaskPicture->pSourcePict->solidFill.color, 1);
1526    }
1527
1528    radeon_vbo_commit(pScrn, &accel_state->cbuf);
1529    evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT);
1530
1531    if (accel_state->vsync)
1532	RADEONVlineHelperClear(pScrn);
1533
1534    accel_state->composite_op = op;
1535    accel_state->dst_pic = pDstPicture;
1536    accel_state->src_pic = pSrcPicture;
1537    accel_state->dst_pix = pDst;
1538    accel_state->msk_pix = pMask;
1539    accel_state->src_pix = pSrc;
1540
1541    return TRUE;
1542}
1543
1544static void EVERGREENFinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst,
1545				     struct radeon_accel_state *accel_state)
1546{
1547    int vtx_size;
1548
1549    if (accel_state->vsync)
1550       evergreen_cp_wait_vline_sync(pScrn, pDst,
1551				    accel_state->vline_crtc,
1552				    accel_state->vline_y1,
1553				    accel_state->vline_y2);
1554
1555    vtx_size = accel_state->msk_pix ? 24 : 16;
1556
1557    evergreen_finish_op(pScrn, vtx_size);
1558}
1559
1560static void EVERGREENDoneComposite(PixmapPtr pDst)
1561{
1562    ScreenPtr pScreen = pDst->drawable.pScreen;
1563    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1564    RADEONInfoPtr info = RADEONPTR(pScrn);
1565    struct radeon_accel_state *accel_state = info->accel_state;
1566
1567    EVERGREENFinishComposite(pScrn, pDst, accel_state);
1568}
1569
1570static void EVERGREENComposite(PixmapPtr pDst,
1571			       int srcX, int srcY,
1572			       int maskX, int maskY,
1573			       int dstX, int dstY,
1574			       int w, int h)
1575{
1576    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1577    RADEONInfoPtr info = RADEONPTR(pScrn);
1578    struct radeon_accel_state *accel_state = info->accel_state;
1579    float *vb;
1580
1581    if (CS_FULL(info->cs)) {
1582	EVERGREENFinishComposite(pScrn, pDst, info->accel_state);
1583	radeon_cs_flush_indirect(pScrn);
1584	EVERGREENPrepareComposite(info->accel_state->composite_op,
1585				  info->accel_state->src_pic,
1586				  info->accel_state->msk_pic,
1587				  info->accel_state->dst_pic,
1588				  info->accel_state->src_pix,
1589				  info->accel_state->msk_pix,
1590				  info->accel_state->dst_pix);
1591    }
1592
1593    if (accel_state->vsync)
1594	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1595
1596    if (accel_state->msk_pix) {
1597
1598	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1599
1600	vb[0] = (float)dstX;
1601	vb[1] = (float)dstY;
1602	vb[2] = (float)srcX;
1603	vb[3] = (float)srcY;
1604	vb[4] = (float)maskX;
1605	vb[5] = (float)maskY;
1606
1607	vb[6] = (float)dstX;
1608	vb[7] = (float)(dstY + h);
1609	vb[8] = (float)srcX;
1610	vb[9] = (float)(srcY + h);
1611	vb[10] = (float)maskX;
1612	vb[11] = (float)(maskY + h);
1613
1614	vb[12] = (float)(dstX + w);
1615	vb[13] = (float)(dstY + h);
1616	vb[14] = (float)(srcX + w);
1617	vb[15] = (float)(srcY + h);
1618	vb[16] = (float)(maskX + w);
1619	vb[17] = (float)(maskY + h);
1620
1621	radeon_vbo_commit(pScrn, &accel_state->vbo);
1622
1623    } else {
1624
1625	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1626
1627	vb[0] = (float)dstX;
1628	vb[1] = (float)dstY;
1629	vb[2] = (float)srcX;
1630	vb[3] = (float)srcY;
1631
1632	vb[4] = (float)dstX;
1633	vb[5] = (float)(dstY + h);
1634	vb[6] = (float)srcX;
1635	vb[7] = (float)(srcY + h);
1636
1637	vb[8] = (float)(dstX + w);
1638	vb[9] = (float)(dstY + h);
1639	vb[10] = (float)(srcX + w);
1640	vb[11] = (float)(srcY + h);
1641
1642	radeon_vbo_commit(pScrn, &accel_state->vbo);
1643    }
1644
1645
1646}
1647
1648static Bool
1649EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
1650			char *src, int src_pitch)
1651{
1652    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1653    RADEONInfoPtr info = RADEONPTR(pScrn);
1654    struct radeon_accel_state *accel_state = info->accel_state;
1655    struct radeon_exa_pixmap_priv *driver_priv;
1656    struct radeon_bo *scratch = NULL;
1657    struct radeon_bo *copy_dst;
1658    unsigned char *dst;
1659    unsigned size;
1660    uint32_t dst_domain;
1661    int bpp = pDst->drawable.bitsPerPixel;
1662    uint32_t scratch_pitch;
1663    uint32_t copy_pitch;
1664    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
1665    int ret;
1666    Bool flush = TRUE;
1667    Bool r;
1668    int i;
1669    struct r600_accel_object src_obj, dst_obj;
1670    uint32_t height, base_align;
1671
1672    if (bpp < 8)
1673	return FALSE;
1674
1675    driver_priv = exaGetPixmapDriverPrivate(pDst);
1676    if (!driver_priv || !driver_priv->bo)
1677	return FALSE;
1678
1679    /* If we know the BO won't be busy / in VRAM, don't bother with a scratch */
1680    copy_dst = driver_priv->bo;
1681    copy_pitch = pDst->devKind;
1682    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1683	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1684	    flush = FALSE;
1685	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain) &&
1686		!(dst_domain & RADEON_GEM_DOMAIN_VRAM))
1687		goto copy;
1688	}
1689    }
1690
1691    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1692    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1693    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1694    size = scratch_pitch * height * (bpp / 8);
1695    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1696    if (scratch == NULL) {
1697	goto copy;
1698    }
1699
1700    src_obj.pitch = scratch_pitch;
1701    src_obj.width = w;
1702    src_obj.height = h;
1703    src_obj.bpp = bpp;
1704    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1705    src_obj.bo = scratch;
1706    src_obj.tiling_flags = 0;
1707    src_obj.surface = NULL;
1708
1709    dst_obj.pitch = dst_pitch_hw;
1710    dst_obj.width = pDst->drawable.width;
1711    dst_obj.height = pDst->drawable.height;
1712    dst_obj.bpp = bpp;
1713    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1714    dst_obj.bo = radeon_get_pixmap_bo(pDst);
1715    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1716    dst_obj.surface = radeon_get_pixmap_surface(pDst);
1717
1718    if (!R600SetAccelState(pScrn,
1719			   &src_obj,
1720			   NULL,
1721			   &dst_obj,
1722			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1723			   3, 0xffffffff)) {
1724        goto copy;
1725    }
1726    copy_dst = scratch;
1727    copy_pitch = scratch_pitch * (bpp / 8);
1728    flush = FALSE;
1729
1730copy:
1731    if (flush)
1732	radeon_cs_flush_indirect(pScrn);
1733
1734    ret = radeon_bo_map(copy_dst, 0);
1735    if (ret) {
1736        r = FALSE;
1737        goto out;
1738    }
1739    r = TRUE;
1740    size = w * bpp / 8;
1741    dst = copy_dst->ptr;
1742    if (copy_dst == driver_priv->bo)
1743	dst += y * copy_pitch + x * bpp / 8;
1744    for (i = 0; i < h; i++) {
1745	memcpy(dst + i * copy_pitch, src, size);
1746        src += src_pitch;
1747    }
1748    radeon_bo_unmap(copy_dst);
1749
1750    if (copy_dst == scratch) {
1751	if (info->accel_state->vsync)
1752	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1753
1754	/* blit from gart to vram */
1755	EVERGREENDoPrepareCopy(pScrn);
1756	EVERGREENAppendCopyVertex(pScrn, 0, 0, x, y, w, h);
1757	EVERGREENDoCopyVline(pDst);
1758    }
1759
1760out:
1761    if (scratch)
1762	radeon_bo_unref(scratch);
1763    return r;
1764}
1765
1766static Bool
1767EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w,
1768			    int h, char *dst, int dst_pitch)
1769{
1770    ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
1771    RADEONInfoPtr info = RADEONPTR(pScrn);
1772    struct radeon_accel_state *accel_state = info->accel_state;
1773    struct radeon_exa_pixmap_priv *driver_priv;
1774    struct radeon_bo *scratch = NULL;
1775    struct radeon_bo *copy_src;
1776    unsigned size;
1777    uint32_t src_domain = 0;
1778    int bpp = pSrc->drawable.bitsPerPixel;
1779    uint32_t scratch_pitch;
1780    uint32_t copy_pitch;
1781    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
1782    int ret;
1783    Bool flush = FALSE;
1784    Bool r;
1785    struct r600_accel_object src_obj, dst_obj;
1786    uint32_t height, base_align;
1787
1788    if (bpp < 8)
1789	return FALSE;
1790
1791    driver_priv = exaGetPixmapDriverPrivate(pSrc);
1792    if (!driver_priv || !driver_priv->bo)
1793	return FALSE;
1794
1795    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
1796    copy_src = driver_priv->bo;
1797    copy_pitch = pSrc->devKind;
1798    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1799	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
1800	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
1801	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
1802		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
1803		src_domain = 0;
1804	    else /* A write may be scheduled */
1805		flush = TRUE;
1806	}
1807
1808	if (!src_domain)
1809	    radeon_bo_is_busy(driver_priv->bo, &src_domain);
1810
1811	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
1812	    goto copy;
1813
1814    }
1815
1816    if (!accel_state->allowHWDFS)
1817	goto copy;
1818
1819    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1820    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1821    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1822    size = scratch_pitch * height * (bpp / 8);
1823    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1824    if (scratch == NULL) {
1825	goto copy;
1826    }
1827    radeon_cs_space_reset_bos(info->cs);
1828    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
1829				      RADEON_GEM_DOMAIN_VRAM, 0);
1830    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
1831    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
1832    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1833    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
1834    ret = radeon_cs_space_check(info->cs);
1835    if (ret) {
1836	goto copy;
1837    }
1838
1839    src_obj.pitch = src_pitch_hw;
1840    src_obj.width = pSrc->drawable.width;
1841    src_obj.height = pSrc->drawable.height;
1842    src_obj.bpp = bpp;
1843    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1844    src_obj.bo = radeon_get_pixmap_bo(pSrc);
1845    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1846    src_obj.surface = radeon_get_pixmap_surface(pSrc);
1847
1848    dst_obj.pitch = scratch_pitch;
1849    dst_obj.width = w;
1850    dst_obj.height = h;
1851    dst_obj.bo = scratch;
1852    dst_obj.bpp = bpp;
1853    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1854    dst_obj.tiling_flags = 0;
1855    dst_obj.surface = NULL;
1856
1857    if (!R600SetAccelState(pScrn,
1858			   &src_obj,
1859			   NULL,
1860			   &dst_obj,
1861			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1862			   3, 0xffffffff)) {
1863	goto copy;
1864    }
1865
1866    /* blit from vram to gart */
1867    EVERGREENDoPrepareCopy(pScrn);
1868    EVERGREENAppendCopyVertex(pScrn, x, y, 0, 0, w, h);
1869    EVERGREENDoCopy(pScrn);
1870    copy_src = scratch;
1871    copy_pitch = scratch_pitch * (bpp / 8);
1872    flush = TRUE;
1873
1874copy:
1875    if (flush)
1876	radeon_cs_flush_indirect(pScrn);
1877
1878    ret = radeon_bo_map(copy_src, 0);
1879    if (ret) {
1880	ErrorF("failed to map pixmap: %d\n", ret);
1881        r = FALSE;
1882        goto out;
1883    }
1884    r = TRUE;
1885    w *= bpp / 8;
1886    if (copy_src == driver_priv->bo)
1887	size = y * copy_pitch + x * bpp / 8;
1888    else
1889	size = 0;
1890    while (h--) {
1891	memcpy(dst, copy_src->ptr + size, w);
1892	size += copy_pitch;
1893        dst += dst_pitch;
1894    }
1895    radeon_bo_unmap(copy_src);
1896out:
1897    if (scratch)
1898	radeon_bo_unref(scratch);
1899    return r;
1900}
1901
1902static int
1903EVERGREENMarkSync(ScreenPtr pScreen)
1904{
1905    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1906    RADEONInfoPtr info = RADEONPTR(pScrn);
1907    struct radeon_accel_state *accel_state = info->accel_state;
1908
1909    return ++accel_state->exaSyncMarker;
1910
1911}
1912
1913static void
1914EVERGREENSync(ScreenPtr pScreen, int marker)
1915{
1916    return;
1917}
1918
1919static Bool
1920EVERGREENAllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
1921{
1922    RADEONInfoPtr info = RADEONPTR(pScrn);
1923    struct radeon_accel_state *accel_state = info->accel_state;
1924
1925    /* 512 bytes per shader for now */
1926    int size = 512 * 9;
1927
1928    accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
1929					     RADEON_GEM_DOMAIN_VRAM, 0);
1930    if (accel_state->shaders_bo == NULL) {
1931	ErrorF("Allocating shader failed\n");
1932	return FALSE;
1933    }
1934    return TRUE;
1935}
1936
1937static Bool
1938EVERGREENLoadShaders(ScrnInfoPtr pScrn)
1939{
1940    RADEONInfoPtr info = RADEONPTR(pScrn);
1941    struct radeon_accel_state *accel_state = info->accel_state;
1942    RADEONChipFamily ChipSet = info->ChipFamily;
1943    uint32_t *shader;
1944    int ret;
1945
1946    ret = radeon_bo_map(accel_state->shaders_bo, 1);
1947    if (ret) {
1948	FatalError("failed to map shader %d\n", ret);
1949	return FALSE;
1950    }
1951    shader = accel_state->shaders_bo->ptr;
1952
1953    /*  solid vs --------------------------------------- */
1954    accel_state->solid_vs_offset = 0;
1955    evergreen_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
1956
1957    /*  solid ps --------------------------------------- */
1958    accel_state->solid_ps_offset = 512;
1959    evergreen_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
1960
1961    /*  copy vs --------------------------------------- */
1962    accel_state->copy_vs_offset = 1024;
1963    evergreen_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
1964
1965    /*  copy ps --------------------------------------- */
1966    accel_state->copy_ps_offset = 1536;
1967    evergreen_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
1968
1969    /*  comp vs --------------------------------------- */
1970    accel_state->comp_vs_offset = 2048;
1971    evergreen_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
1972
1973    /*  comp ps --------------------------------------- */
1974    accel_state->comp_ps_offset = 2560;
1975    evergreen_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
1976
1977    /*  xv vs --------------------------------------- */
1978    accel_state->xv_vs_offset = 3072;
1979    evergreen_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
1980
1981    /*  xv ps --------------------------------------- */
1982    accel_state->xv_ps_offset = 3584;
1983    evergreen_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
1984
1985    radeon_bo_unmap(accel_state->shaders_bo);
1986
1987    return TRUE;
1988}
1989
1990static Bool
1991CAYMANLoadShaders(ScrnInfoPtr pScrn)
1992{
1993    RADEONInfoPtr info = RADEONPTR(pScrn);
1994    struct radeon_accel_state *accel_state = info->accel_state;
1995    RADEONChipFamily ChipSet = info->ChipFamily;
1996    uint32_t *shader;
1997    int ret;
1998
1999    ret = radeon_bo_map(accel_state->shaders_bo, 1);
2000    if (ret) {
2001	FatalError("failed to map shader %d\n", ret);
2002	return FALSE;
2003    }
2004    shader = accel_state->shaders_bo->ptr;
2005
2006    /*  solid vs --------------------------------------- */
2007    accel_state->solid_vs_offset = 0;
2008    cayman_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
2009
2010    /*  solid ps --------------------------------------- */
2011    accel_state->solid_ps_offset = 512;
2012    cayman_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
2013
2014    /*  copy vs --------------------------------------- */
2015    accel_state->copy_vs_offset = 1024;
2016    cayman_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
2017
2018    /*  copy ps --------------------------------------- */
2019    accel_state->copy_ps_offset = 1536;
2020    cayman_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2021
2022    /*  comp vs --------------------------------------- */
2023    accel_state->comp_vs_offset = 2048;
2024    cayman_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2025
2026    /*  comp ps --------------------------------------- */
2027    accel_state->comp_ps_offset = 2560;
2028    cayman_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2029
2030    /*  xv vs --------------------------------------- */
2031    accel_state->xv_vs_offset = 3072;
2032    cayman_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2033
2034    /*  xv ps --------------------------------------- */
2035    accel_state->xv_ps_offset = 3584;
2036    cayman_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2037
2038    radeon_bo_unmap(accel_state->shaders_bo);
2039
2040    return TRUE;
2041}
2042
2043Bool
2044EVERGREENDrawInit(ScreenPtr pScreen)
2045{
2046    ScrnInfoPtr pScrn =  xf86ScreenToScrn(pScreen);
2047    RADEONInfoPtr info   = RADEONPTR(pScrn);
2048
2049    if (info->accel_state->exa == NULL) {
2050	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2051	return FALSE;
2052    }
2053
2054    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2055    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2056
2057    info->accel_state->exa->PrepareSolid = EVERGREENPrepareSolid;
2058    info->accel_state->exa->Solid = EVERGREENSolid;
2059    info->accel_state->exa->DoneSolid = EVERGREENDoneSolid;
2060
2061    info->accel_state->exa->PrepareCopy = EVERGREENPrepareCopy;
2062    info->accel_state->exa->Copy = EVERGREENCopy;
2063    info->accel_state->exa->DoneCopy = EVERGREENDoneCopy;
2064
2065    info->accel_state->exa->MarkSync = EVERGREENMarkSync;
2066    info->accel_state->exa->WaitMarker = EVERGREENSync;
2067
2068    info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
2069    info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
2070    info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
2071    info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
2072    info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
2073    info->accel_state->exa->UploadToScreen = EVERGREENUploadToScreen;
2074    info->accel_state->exa->DownloadFromScreen = EVERGREENDownloadFromScreen;
2075    info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
2076    info->accel_state->exa->SharePixmapBacking = RADEONEXASharePixmapBacking;
2077    info->accel_state->exa->SetSharedPixmapBacking = RADEONEXASetSharedPixmapBacking;
2078    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS | EXA_SUPPORTS_PREPARE_AUX |
2079	EXA_HANDLES_PIXMAPS | EXA_MIXED_PIXMAPS;
2080    info->accel_state->exa->pixmapOffsetAlign = 256;
2081    info->accel_state->exa->pixmapPitchAlign = 256;
2082
2083    info->accel_state->exa->CheckComposite = EVERGREENCheckComposite;
2084    info->accel_state->exa->PrepareComposite = EVERGREENPrepareComposite;
2085    info->accel_state->exa->Composite = EVERGREENComposite;
2086    info->accel_state->exa->DoneComposite = EVERGREENDoneComposite;
2087
2088    info->accel_state->exa->maxPitchBytes = 32768;
2089    info->accel_state->exa->maxX = 8192;
2090    info->accel_state->exa->maxY = 8192;
2091
2092    /* not supported yet */
2093    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2094	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2095	info->accel_state->vsync = TRUE;
2096    } else
2097	info->accel_state->vsync = FALSE;
2098
2099    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
2100	free(info->accel_state->exa);
2101	return FALSE;
2102    }
2103
2104    info->accel_state->XInited3D = FALSE;
2105    info->accel_state->copy_area = NULL;
2106    info->accel_state->src_obj[0].bo = NULL;
2107    info->accel_state->src_obj[1].bo = NULL;
2108    info->accel_state->dst_obj.bo = NULL;
2109    info->accel_state->copy_area_bo = NULL;
2110    info->accel_state->vbo.vb_start_op = -1;
2111    info->accel_state->cbuf.vb_start_op = -1;
2112    info->accel_state->finish_op = evergreen_finish_op;
2113    info->accel_state->vbo.verts_per_op = 3;
2114    info->accel_state->cbuf.verts_per_op = 1;
2115    RADEONVlineHelperClear(pScrn);
2116
2117    radeon_vbo_init_lists(pScrn);
2118
2119    if (!EVERGREENAllocShaders(pScrn, pScreen))
2120	return FALSE;
2121
2122    if (info->ChipFamily >= CHIP_FAMILY_CAYMAN) {
2123	if (!CAYMANLoadShaders(pScrn))
2124	    return FALSE;
2125    } else {
2126	if (!EVERGREENLoadShaders(pScrn))
2127	    return FALSE;
2128    }
2129
2130    exaMarkSync(pScreen);
2131
2132    return TRUE;
2133
2134}
2135