1de2362d3Smrg/*
2de2362d3Smrg * Copyright 2008 Advanced Micro Devices, Inc.
3de2362d3Smrg *
4de2362d3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5de2362d3Smrg * copy of this software and associated documentation files (the "Software"),
6de2362d3Smrg * to deal in the Software without restriction, including without limitation
7de2362d3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8de2362d3Smrg * and/or sell copies of the Software, and to permit persons to whom the
9de2362d3Smrg * Software is furnished to do so, subject to the following conditions:
10de2362d3Smrg *
11de2362d3Smrg * The above copyright notice and this permission notice (including the next
12de2362d3Smrg * paragraph) shall be included in all copies or substantial portions of the
13de2362d3Smrg * Software.
14de2362d3Smrg *
15de2362d3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16de2362d3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17de2362d3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18de2362d3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19de2362d3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20de2362d3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21de2362d3Smrg * SOFTWARE.
22de2362d3Smrg *
23de2362d3Smrg * Author: Alex Deucher <alexander.deucher@amd.com>
24de2362d3Smrg *
25de2362d3Smrg */
26de2362d3Smrg
27de2362d3Smrg#ifdef HAVE_CONFIG_H
28de2362d3Smrg#include "config.h"
29de2362d3Smrg#endif
30de2362d3Smrg
31de2362d3Smrg#include "xf86.h"
32de2362d3Smrg
33de2362d3Smrg#include "exa.h"
34de2362d3Smrg
35de2362d3Smrg#include "radeon.h"
36de2362d3Smrg#include "radeon_reg.h"
37de2362d3Smrg#include "r600_shader.h"
38de2362d3Smrg#include "r600_reg.h"
39de2362d3Smrg#include "r600_state.h"
40de2362d3Smrg#include "radeon_exa_shared.h"
41de2362d3Smrg#include "radeon_vbo.h"
42de2362d3Smrg
43de2362d3Smrg/* #define SHOW_VERTEXES */
44de2362d3Smrg
45de2362d3SmrgBool
46de2362d3SmrgR600SetAccelState(ScrnInfoPtr pScrn,
47de2362d3Smrg		  struct r600_accel_object *src0,
48de2362d3Smrg		  struct r600_accel_object *src1,
49de2362d3Smrg		  struct r600_accel_object *dst,
50de2362d3Smrg		  uint32_t vs_offset, uint32_t ps_offset,
51de2362d3Smrg		  int rop, Pixel planemask)
52de2362d3Smrg{
53de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
54de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
5518781e08Smrg    uint32_t pitch_align = 0x7;
56de2362d3Smrg    int ret;
57de2362d3Smrg
58de2362d3Smrg    if (src0) {
59de2362d3Smrg	memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
60de2362d3Smrg	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
6118781e08Smrg	if (src0->surface)
62de2362d3Smrg		accel_state->src_size[0] = src0->surface->bo_size;
63de2362d3Smrg
64de2362d3Smrg	/* bad pitch */
65de2362d3Smrg	if (accel_state->src_obj[0].pitch & pitch_align)
66de2362d3Smrg	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
67de2362d3Smrg
68de2362d3Smrg    } else {
69de2362d3Smrg	memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
70de2362d3Smrg	accel_state->src_size[0] = 0;
71de2362d3Smrg    }
72de2362d3Smrg
73de2362d3Smrg    if (src1) {
74de2362d3Smrg	memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
75de2362d3Smrg	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
7618781e08Smrg	if (src1->surface) {
77de2362d3Smrg		accel_state->src_size[1] = src1->surface->bo_size;
78de2362d3Smrg	}
79de2362d3Smrg
80de2362d3Smrg	/* bad pitch */
81de2362d3Smrg	if (accel_state->src_obj[1].pitch & pitch_align)
82de2362d3Smrg	    RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
83de2362d3Smrg
84de2362d3Smrg    } else {
85de2362d3Smrg	memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
86de2362d3Smrg	accel_state->src_size[1] = 0;
87de2362d3Smrg    }
88de2362d3Smrg
89de2362d3Smrg    if (dst) {
90de2362d3Smrg	memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
91de2362d3Smrg	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
9218781e08Smrg	if (dst->surface) {
93de2362d3Smrg		accel_state->dst_size = dst->surface->bo_size;
94de2362d3Smrg	} else
95de2362d3Smrg	{
96de2362d3Smrg		accel_state->dst_obj.tiling_flags = 0;
97de2362d3Smrg	}
98de2362d3Smrg	if (accel_state->dst_obj.pitch & pitch_align)
99de2362d3Smrg	    RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
100de2362d3Smrg
101de2362d3Smrg    } else {
102de2362d3Smrg	memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
103de2362d3Smrg	accel_state->dst_size = 0;
104de2362d3Smrg    }
105de2362d3Smrg
10618781e08Smrg    if (CS_FULL(info->cs))
107de2362d3Smrg	radeon_cs_flush_indirect(pScrn);
108de2362d3Smrg
109de2362d3Smrg    accel_state->rop = rop;
110de2362d3Smrg    accel_state->planemask = planemask;
111de2362d3Smrg
112de2362d3Smrg    accel_state->vs_size = 512;
113de2362d3Smrg    accel_state->ps_size = 512;
11418781e08Smrg    accel_state->vs_mc_addr = vs_offset;
11518781e08Smrg    accel_state->ps_mc_addr = ps_offset;
116de2362d3Smrg
11718781e08Smrg    radeon_cs_space_reset_bos(info->cs);
11818781e08Smrg    radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
11918781e08Smrg				      RADEON_GEM_DOMAIN_VRAM, 0);
12018781e08Smrg    if (accel_state->src_obj[0].bo)
12118781e08Smrg	radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
12218781e08Smrg					  accel_state->src_obj[0].domain, 0);
12318781e08Smrg    if (accel_state->src_obj[1].bo)
12418781e08Smrg	radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
12518781e08Smrg					  accel_state->src_obj[1].domain, 0);
12618781e08Smrg    if (accel_state->dst_obj.bo)
12718781e08Smrg	radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
12818781e08Smrg					  0, accel_state->dst_obj.domain);
12918781e08Smrg    ret = radeon_cs_space_check(info->cs);
13018781e08Smrg    if (ret)
13118781e08Smrg	RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
132de2362d3Smrg
133de2362d3Smrg    return TRUE;
134de2362d3Smrg}
135de2362d3Smrg
136de2362d3Smrgstatic Bool
137de2362d3SmrgR600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
138de2362d3Smrg{
139de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
140de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
141de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
142de2362d3Smrg    cb_config_t     cb_conf;
143de2362d3Smrg    shader_config_t vs_conf, ps_conf;
144de2362d3Smrg    uint32_t a, r, g, b;
145de2362d3Smrg    float ps_alu_consts[4];
146de2362d3Smrg    struct r600_accel_object dst;
147de2362d3Smrg
148de2362d3Smrg    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
149de2362d3Smrg	RADEON_FALLBACK(("R600CheckDatatype failed\n"));
150de2362d3Smrg    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
151de2362d3Smrg	RADEON_FALLBACK(("invalid planemask\n"));
152de2362d3Smrg
15339413783Smrg    dst.bo = radeon_get_pixmap_bo(pPix)->bo.radeon;
15418781e08Smrg    dst.tiling_flags = radeon_get_pixmap_tiling(pPix);
15518781e08Smrg    dst.surface = radeon_get_pixmap_surface(pPix);
156de2362d3Smrg
157de2362d3Smrg    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
158de2362d3Smrg    dst.width = pPix->drawable.width;
159de2362d3Smrg    dst.height = pPix->drawable.height;
160de2362d3Smrg    dst.bpp = pPix->drawable.bitsPerPixel;
161de2362d3Smrg    dst.domain = RADEON_GEM_DOMAIN_VRAM;
162de2362d3Smrg
163de2362d3Smrg    if (!R600SetAccelState(pScrn,
164de2362d3Smrg			   NULL,
165de2362d3Smrg			   NULL,
166de2362d3Smrg			   &dst,
167de2362d3Smrg			   accel_state->solid_vs_offset, accel_state->solid_ps_offset,
168de2362d3Smrg			   alu, pm))
169de2362d3Smrg	return FALSE;
170de2362d3Smrg
171de2362d3Smrg    CLEAR (cb_conf);
172de2362d3Smrg    CLEAR (vs_conf);
173de2362d3Smrg    CLEAR (ps_conf);
174de2362d3Smrg
175de2362d3Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
176de2362d3Smrg    radeon_cp_start(pScrn);
177de2362d3Smrg
17818781e08Smrg    r600_set_default_state(pScrn);
179de2362d3Smrg
18018781e08Smrg    r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
18118781e08Smrg    r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
18218781e08Smrg    r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
183de2362d3Smrg
184de2362d3Smrg    /* Shader */
185de2362d3Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
186de2362d3Smrg    vs_conf.shader_size         = accel_state->vs_size;
187de2362d3Smrg    vs_conf.num_gprs            = 2;
188de2362d3Smrg    vs_conf.stack_size          = 0;
189de2362d3Smrg    vs_conf.bo                  = accel_state->shaders_bo;
19018781e08Smrg    r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
191de2362d3Smrg
192de2362d3Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
193de2362d3Smrg    ps_conf.shader_size         = accel_state->ps_size;
194de2362d3Smrg    ps_conf.num_gprs            = 1;
195de2362d3Smrg    ps_conf.stack_size          = 0;
196de2362d3Smrg    ps_conf.uncached_first_inst = 1;
197de2362d3Smrg    ps_conf.clamp_consts        = 0;
198de2362d3Smrg    ps_conf.export_mode         = 2;
199de2362d3Smrg    ps_conf.bo                  = accel_state->shaders_bo;
20018781e08Smrg    r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
201de2362d3Smrg
202de2362d3Smrg    cb_conf.id = 0;
203de2362d3Smrg    cb_conf.w = accel_state->dst_obj.pitch;
204de2362d3Smrg    cb_conf.h = accel_state->dst_obj.height;
20518781e08Smrg    cb_conf.base = 0;
206de2362d3Smrg    cb_conf.bo = accel_state->dst_obj.bo;
207de2362d3Smrg    cb_conf.surface = accel_state->dst_obj.surface;
208de2362d3Smrg
209de2362d3Smrg    if (accel_state->dst_obj.bpp == 8) {
210de2362d3Smrg	cb_conf.format = COLOR_8;
211de2362d3Smrg	cb_conf.comp_swap = 3; /* A */
212de2362d3Smrg    } else if (accel_state->dst_obj.bpp == 16) {
213de2362d3Smrg	cb_conf.format = COLOR_5_6_5;
214de2362d3Smrg	cb_conf.comp_swap = 2; /* RGB */
215de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
216de2362d3Smrg	cb_conf.endian = ENDIAN_8IN16;
217de2362d3Smrg#endif
218de2362d3Smrg    } else {
219de2362d3Smrg	cb_conf.format = COLOR_8_8_8_8;
220de2362d3Smrg	cb_conf.comp_swap = 1; /* ARGB */
221de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
222de2362d3Smrg	cb_conf.endian = ENDIAN_8IN32;
223de2362d3Smrg#endif
224de2362d3Smrg    }
225de2362d3Smrg    cb_conf.source_format = 1;
226de2362d3Smrg    cb_conf.blend_clamp = 1;
227de2362d3Smrg    /* Render setup */
228de2362d3Smrg    if (accel_state->planemask & 0x000000ff)
229de2362d3Smrg	cb_conf.pmask |= 4; /* B */
230de2362d3Smrg    if (accel_state->planemask & 0x0000ff00)
231de2362d3Smrg	cb_conf.pmask |= 2; /* G */
232de2362d3Smrg    if (accel_state->planemask & 0x00ff0000)
233de2362d3Smrg	cb_conf.pmask |= 1; /* R */
234de2362d3Smrg    if (accel_state->planemask & 0xff000000)
235de2362d3Smrg	cb_conf.pmask |= 8; /* A */
236de2362d3Smrg    cb_conf.rop = accel_state->rop;
237de2362d3Smrg    if (accel_state->dst_obj.tiling_flags == 0)
238de2362d3Smrg	cb_conf.array_mode = 0;
23918781e08Smrg    r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
240de2362d3Smrg
24118781e08Smrg    r600_set_spi(pScrn, 0, 0);
242de2362d3Smrg
243de2362d3Smrg    /* PS alu constants */
244de2362d3Smrg    if (accel_state->dst_obj.bpp == 16) {
245de2362d3Smrg	r = (fg >> 11) & 0x1f;
246de2362d3Smrg	g = (fg >> 5) & 0x3f;
247de2362d3Smrg	b = (fg >> 0) & 0x1f;
248de2362d3Smrg	ps_alu_consts[0] = (float)r / 31; /* R */
249de2362d3Smrg	ps_alu_consts[1] = (float)g / 63; /* G */
250de2362d3Smrg	ps_alu_consts[2] = (float)b / 31; /* B */
251de2362d3Smrg	ps_alu_consts[3] = 1.0; /* A */
252de2362d3Smrg    } else if (accel_state->dst_obj.bpp == 8) {
253de2362d3Smrg	a = (fg >> 0) & 0xff;
254de2362d3Smrg	ps_alu_consts[0] = 0.0; /* R */
255de2362d3Smrg	ps_alu_consts[1] = 0.0; /* G */
256de2362d3Smrg	ps_alu_consts[2] = 0.0; /* B */
257de2362d3Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
258de2362d3Smrg    } else {
259de2362d3Smrg	a = (fg >> 24) & 0xff;
260de2362d3Smrg	r = (fg >> 16) & 0xff;
261de2362d3Smrg	g = (fg >> 8) & 0xff;
262de2362d3Smrg	b = (fg >> 0) & 0xff;
263de2362d3Smrg	ps_alu_consts[0] = (float)r / 255; /* R */
264de2362d3Smrg	ps_alu_consts[1] = (float)g / 255; /* G */
265de2362d3Smrg	ps_alu_consts[2] = (float)b / 255; /* B */
266de2362d3Smrg	ps_alu_consts[3] = (float)a / 255; /* A */
267de2362d3Smrg    }
26818781e08Smrg    r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_ps,
269de2362d3Smrg			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
270de2362d3Smrg
271de2362d3Smrg    if (accel_state->vsync)
272de2362d3Smrg	RADEONVlineHelperClear(pScrn);
273de2362d3Smrg
274de2362d3Smrg    accel_state->dst_pix = pPix;
275de2362d3Smrg    accel_state->fg = fg;
276de2362d3Smrg
277de2362d3Smrg    return TRUE;
278de2362d3Smrg}
279de2362d3Smrg
280de2362d3Smrgstatic void
281de2362d3SmrgR600DoneSolid(PixmapPtr pPix)
282de2362d3Smrg{
283de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
284de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
285de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
286de2362d3Smrg
287de2362d3Smrg    if (accel_state->vsync)
28818781e08Smrg	r600_cp_wait_vline_sync(pScrn, pPix,
289de2362d3Smrg				accel_state->vline_crtc,
290de2362d3Smrg				accel_state->vline_y1,
291de2362d3Smrg				accel_state->vline_y2);
292de2362d3Smrg
293de2362d3Smrg    r600_finish_op(pScrn, 8);
294de2362d3Smrg}
295de2362d3Smrg
296de2362d3Smrgstatic void
297de2362d3SmrgR600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
298de2362d3Smrg{
299de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
300de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
301de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
302de2362d3Smrg    float *vb;
303de2362d3Smrg
30418781e08Smrg    if (CS_FULL(info->cs)) {
305de2362d3Smrg	R600DoneSolid(info->accel_state->dst_pix);
306de2362d3Smrg	radeon_cs_flush_indirect(pScrn);
307de2362d3Smrg	R600PrepareSolid(accel_state->dst_pix,
308de2362d3Smrg			 accel_state->rop,
309de2362d3Smrg			 accel_state->planemask,
310de2362d3Smrg			 accel_state->fg);
311de2362d3Smrg    }
312de2362d3Smrg
313de2362d3Smrg    if (accel_state->vsync)
314de2362d3Smrg	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
315de2362d3Smrg
316de2362d3Smrg    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
317de2362d3Smrg
318de2362d3Smrg    vb[0] = (float)x1;
319de2362d3Smrg    vb[1] = (float)y1;
320de2362d3Smrg
321de2362d3Smrg    vb[2] = (float)x1;
322de2362d3Smrg    vb[3] = (float)y2;
323de2362d3Smrg
324de2362d3Smrg    vb[4] = (float)x2;
325de2362d3Smrg    vb[5] = (float)y2;
326de2362d3Smrg
327de2362d3Smrg    radeon_vbo_commit(pScrn, &accel_state->vbo);
328de2362d3Smrg}
329de2362d3Smrg
330de2362d3Smrgstatic void
331de2362d3SmrgR600DoPrepareCopy(ScrnInfoPtr pScrn)
332de2362d3Smrg{
333de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
334de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
335de2362d3Smrg    cb_config_t     cb_conf;
336de2362d3Smrg    tex_resource_t  tex_res;
337de2362d3Smrg    tex_sampler_t   tex_samp;
338de2362d3Smrg    shader_config_t vs_conf, ps_conf;
339de2362d3Smrg
340de2362d3Smrg    CLEAR (cb_conf);
341de2362d3Smrg    CLEAR (tex_res);
342de2362d3Smrg    CLEAR (tex_samp);
343de2362d3Smrg    CLEAR (vs_conf);
344de2362d3Smrg    CLEAR (ps_conf);
345de2362d3Smrg
346de2362d3Smrg    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
347de2362d3Smrg    radeon_cp_start(pScrn);
348de2362d3Smrg
34918781e08Smrg    r600_set_default_state(pScrn);
350de2362d3Smrg
35118781e08Smrg    r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
35218781e08Smrg    r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
35318781e08Smrg    r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
354de2362d3Smrg
355de2362d3Smrg    /* Shader */
356de2362d3Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
357de2362d3Smrg    vs_conf.shader_size         = accel_state->vs_size;
358de2362d3Smrg    vs_conf.num_gprs            = 2;
359de2362d3Smrg    vs_conf.stack_size          = 0;
360de2362d3Smrg    vs_conf.bo                  = accel_state->shaders_bo;
36118781e08Smrg    r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
362de2362d3Smrg
363de2362d3Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
364de2362d3Smrg    ps_conf.shader_size         = accel_state->ps_size;
365de2362d3Smrg    ps_conf.num_gprs            = 1;
366de2362d3Smrg    ps_conf.stack_size          = 0;
367de2362d3Smrg    ps_conf.uncached_first_inst = 1;
368de2362d3Smrg    ps_conf.clamp_consts        = 0;
369de2362d3Smrg    ps_conf.export_mode         = 2;
370de2362d3Smrg    ps_conf.bo                  = accel_state->shaders_bo;
37118781e08Smrg    r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
372de2362d3Smrg
373de2362d3Smrg    /* Texture */
374de2362d3Smrg    tex_res.id                  = 0;
375de2362d3Smrg    tex_res.w                   = accel_state->src_obj[0].width;
376de2362d3Smrg    tex_res.h                   = accel_state->src_obj[0].height;
377de2362d3Smrg    tex_res.pitch               = accel_state->src_obj[0].pitch;
378de2362d3Smrg    tex_res.depth               = 0;
379de2362d3Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
38018781e08Smrg    tex_res.base                = 0;
38118781e08Smrg    tex_res.mip_base            = 0;
382de2362d3Smrg    tex_res.size                = accel_state->src_size[0];
383de2362d3Smrg    tex_res.bo                  = accel_state->src_obj[0].bo;
384de2362d3Smrg    tex_res.mip_bo              = accel_state->src_obj[0].bo;
385de2362d3Smrg    tex_res.surface             = accel_state->src_obj[0].surface;
386de2362d3Smrg    if (accel_state->src_obj[0].bpp == 8) {
387de2362d3Smrg	tex_res.format              = FMT_8;
388de2362d3Smrg	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
389de2362d3Smrg	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
390de2362d3Smrg	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
391de2362d3Smrg	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
392de2362d3Smrg    } else if (accel_state->src_obj[0].bpp == 16) {
393de2362d3Smrg	tex_res.format              = FMT_5_6_5;
394de2362d3Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
395de2362d3Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
396de2362d3Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
397de2362d3Smrg	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
398de2362d3Smrg    } else {
399de2362d3Smrg	tex_res.format              = FMT_8_8_8_8;
400de2362d3Smrg	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
401de2362d3Smrg	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
402de2362d3Smrg	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
403de2362d3Smrg	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
404de2362d3Smrg    }
405de2362d3Smrg
406de2362d3Smrg    tex_res.request_size        = 1;
407de2362d3Smrg    tex_res.base_level          = 0;
408de2362d3Smrg    tex_res.last_level          = 0;
409de2362d3Smrg    tex_res.perf_modulation     = 0;
410de2362d3Smrg    if (accel_state->src_obj[0].tiling_flags == 0)
411de2362d3Smrg	tex_res.tile_mode           = 1;
41218781e08Smrg    r600_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
413de2362d3Smrg
414de2362d3Smrg    tex_samp.id                 = 0;
415de2362d3Smrg    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
416de2362d3Smrg    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
417de2362d3Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
418de2362d3Smrg    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
419de2362d3Smrg    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
420de2362d3Smrg    tex_samp.mc_coord_truncate  = 1;
421de2362d3Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
422de2362d3Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
42318781e08Smrg    r600_set_tex_sampler(pScrn, &tex_samp);
424de2362d3Smrg
425de2362d3Smrg    cb_conf.id = 0;
426de2362d3Smrg    cb_conf.w = accel_state->dst_obj.pitch;
427de2362d3Smrg    cb_conf.h = accel_state->dst_obj.height;
42818781e08Smrg    cb_conf.base = 0;
429de2362d3Smrg    cb_conf.bo = accel_state->dst_obj.bo;
430de2362d3Smrg    cb_conf.surface = accel_state->dst_obj.surface;
431de2362d3Smrg    if (accel_state->dst_obj.bpp == 8) {
432de2362d3Smrg	cb_conf.format = COLOR_8;
433de2362d3Smrg	cb_conf.comp_swap = 3; /* A */
434de2362d3Smrg    } else if (accel_state->dst_obj.bpp == 16) {
435de2362d3Smrg	cb_conf.format = COLOR_5_6_5;
436de2362d3Smrg	cb_conf.comp_swap = 2; /* RGB */
437de2362d3Smrg    } else {
438de2362d3Smrg	cb_conf.format = COLOR_8_8_8_8;
439de2362d3Smrg	cb_conf.comp_swap = 1; /* ARGB */
440de2362d3Smrg    }
441de2362d3Smrg    cb_conf.source_format = 1;
442de2362d3Smrg    cb_conf.blend_clamp = 1;
443de2362d3Smrg
444de2362d3Smrg    /* Render setup */
445de2362d3Smrg    if (accel_state->planemask & 0x000000ff)
446de2362d3Smrg	cb_conf.pmask |= 4; /* B */
447de2362d3Smrg    if (accel_state->planemask & 0x0000ff00)
448de2362d3Smrg	cb_conf.pmask |= 2; /* G */
449de2362d3Smrg    if (accel_state->planemask & 0x00ff0000)
450de2362d3Smrg	cb_conf.pmask |= 1; /* R */
451de2362d3Smrg    if (accel_state->planemask & 0xff000000)
452de2362d3Smrg	cb_conf.pmask |= 8; /* A */
453de2362d3Smrg    cb_conf.rop = accel_state->rop;
454de2362d3Smrg    if (accel_state->dst_obj.tiling_flags == 0)
455de2362d3Smrg	cb_conf.array_mode = 0;
45618781e08Smrg    r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
457de2362d3Smrg
45818781e08Smrg    r600_set_spi(pScrn, (1 - 1), 1);
459de2362d3Smrg
460de2362d3Smrg}
461de2362d3Smrg
462de2362d3Smrgstatic void
463de2362d3SmrgR600DoCopy(ScrnInfoPtr pScrn)
464de2362d3Smrg{
465de2362d3Smrg    r600_finish_op(pScrn, 16);
466de2362d3Smrg}
467de2362d3Smrg
468de2362d3Smrgstatic void
469de2362d3SmrgR600DoCopyVline(PixmapPtr pPix)
470de2362d3Smrg{
471de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
472de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
473de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
474de2362d3Smrg
475de2362d3Smrg    if (accel_state->vsync)
47618781e08Smrg	r600_cp_wait_vline_sync(pScrn, pPix,
477de2362d3Smrg				accel_state->vline_crtc,
478de2362d3Smrg				accel_state->vline_y1,
479de2362d3Smrg				accel_state->vline_y2);
480de2362d3Smrg
481de2362d3Smrg    r600_finish_op(pScrn, 16);
482de2362d3Smrg}
483de2362d3Smrg
484de2362d3Smrgstatic void
485de2362d3SmrgR600AppendCopyVertex(ScrnInfoPtr pScrn,
486de2362d3Smrg		     int srcX, int srcY,
487de2362d3Smrg		     int dstX, int dstY,
488de2362d3Smrg		     int w, int h)
489de2362d3Smrg{
490de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
491de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
492de2362d3Smrg    float *vb;
493de2362d3Smrg
494de2362d3Smrg    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
495de2362d3Smrg
496de2362d3Smrg    vb[0] = (float)dstX;
497de2362d3Smrg    vb[1] = (float)dstY;
498de2362d3Smrg    vb[2] = (float)srcX;
499de2362d3Smrg    vb[3] = (float)srcY;
500de2362d3Smrg
501de2362d3Smrg    vb[4] = (float)dstX;
502de2362d3Smrg    vb[5] = (float)(dstY + h);
503de2362d3Smrg    vb[6] = (float)srcX;
504de2362d3Smrg    vb[7] = (float)(srcY + h);
505de2362d3Smrg
506de2362d3Smrg    vb[8] = (float)(dstX + w);
507de2362d3Smrg    vb[9] = (float)(dstY + h);
508de2362d3Smrg    vb[10] = (float)(srcX + w);
509de2362d3Smrg    vb[11] = (float)(srcY + h);
510de2362d3Smrg
511de2362d3Smrg    radeon_vbo_commit(pScrn, &accel_state->vbo);
512de2362d3Smrg}
513de2362d3Smrg
514de2362d3Smrgstatic Bool
515de2362d3SmrgR600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
516de2362d3Smrg		int xdir, int ydir,
517de2362d3Smrg		int rop,
518de2362d3Smrg		Pixel planemask)
519de2362d3Smrg{
520de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
521de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
522de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
523de2362d3Smrg    struct r600_accel_object src_obj, dst_obj;
524de2362d3Smrg
525de2362d3Smrg    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
526de2362d3Smrg	RADEON_FALLBACK(("R600CheckDatatype src failed\n"));
527de2362d3Smrg    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
528de2362d3Smrg	RADEON_FALLBACK(("R600CheckDatatype dst failed\n"));
529de2362d3Smrg    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
530de2362d3Smrg	RADEON_FALLBACK(("Invalid planemask\n"));
531de2362d3Smrg
532de2362d3Smrg    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
533de2362d3Smrg    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
534de2362d3Smrg
535de2362d3Smrg    accel_state->same_surface = FALSE;
536de2362d3Smrg
53739413783Smrg    src_obj.bo = radeon_get_pixmap_bo(pSrc)->bo.radeon;
53839413783Smrg    dst_obj.bo = radeon_get_pixmap_bo(pDst)->bo.radeon;
53918781e08Smrg    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
54018781e08Smrg    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
54118781e08Smrg    src_obj.surface = radeon_get_pixmap_surface(pSrc);
54218781e08Smrg    dst_obj.surface = radeon_get_pixmap_surface(pDst);
54339413783Smrg    if (src_obj.bo == dst_obj.bo)
54418781e08Smrg	accel_state->same_surface = TRUE;
545de2362d3Smrg
546de2362d3Smrg    src_obj.width = pSrc->drawable.width;
547de2362d3Smrg    src_obj.height = pSrc->drawable.height;
548de2362d3Smrg    src_obj.bpp = pSrc->drawable.bitsPerPixel;
549de2362d3Smrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
550de2362d3Smrg
551de2362d3Smrg    dst_obj.width = pDst->drawable.width;
552de2362d3Smrg    dst_obj.height = pDst->drawable.height;
553de2362d3Smrg    dst_obj.bpp = pDst->drawable.bitsPerPixel;
55418781e08Smrg    if (radeon_get_pixmap_shared(pDst) == TRUE) {
55518781e08Smrg	dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
55618781e08Smrg    } else
55718781e08Smrg	dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
558de2362d3Smrg
559de2362d3Smrg    if (!R600SetAccelState(pScrn,
560de2362d3Smrg			   &src_obj,
561de2362d3Smrg			   NULL,
562de2362d3Smrg			   &dst_obj,
563de2362d3Smrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
564de2362d3Smrg			   rop, planemask))
565de2362d3Smrg	return FALSE;
566de2362d3Smrg
567de2362d3Smrg    if (accel_state->same_surface == TRUE) {
568de2362d3Smrg	unsigned long size = accel_state->dst_obj.surface->bo_size;
569de2362d3Smrg	unsigned long align = accel_state->dst_obj.surface->bo_alignment;
570de2362d3Smrg
57118781e08Smrg	if (accel_state->copy_area_bo) {
57218781e08Smrg	    radeon_bo_unref(accel_state->copy_area_bo);
57318781e08Smrg	    accel_state->copy_area_bo = NULL;
57418781e08Smrg	}
57518781e08Smrg	accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, align,
57618781e08Smrg						   RADEON_GEM_DOMAIN_VRAM,
57718781e08Smrg						   0);
57839413783Smrg	if (!accel_state->copy_area_bo)
57918781e08Smrg	    RADEON_FALLBACK(("temp copy surface alloc failed\n"));
58018781e08Smrg
58118781e08Smrg	radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
58218781e08Smrg					  0, RADEON_GEM_DOMAIN_VRAM);
58318781e08Smrg	if (radeon_cs_space_check(info->cs)) {
58418781e08Smrg	    radeon_bo_unref(accel_state->copy_area_bo);
58518781e08Smrg	    accel_state->copy_area_bo = NULL;
58618781e08Smrg	    return FALSE;
587de2362d3Smrg	}
58818781e08Smrg	accel_state->copy_area = (void*)accel_state->copy_area_bo;
589de2362d3Smrg    } else
590de2362d3Smrg	R600DoPrepareCopy(pScrn);
591de2362d3Smrg
592de2362d3Smrg    if (accel_state->vsync)
593de2362d3Smrg	RADEONVlineHelperClear(pScrn);
594de2362d3Smrg
595de2362d3Smrg    accel_state->dst_pix = pDst;
596de2362d3Smrg    accel_state->src_pix = pSrc;
597de2362d3Smrg    accel_state->xdir = xdir;
598de2362d3Smrg    accel_state->ydir = ydir;
599de2362d3Smrg
600de2362d3Smrg    return TRUE;
601de2362d3Smrg}
602de2362d3Smrg
603de2362d3Smrgstatic void
604de2362d3SmrgR600DoneCopy(PixmapPtr pDst)
605de2362d3Smrg{
606de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
607de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
608de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
609de2362d3Smrg
610de2362d3Smrg    if (!accel_state->same_surface)
611de2362d3Smrg	R600DoCopyVline(pDst);
612de2362d3Smrg
613de2362d3Smrg    if (accel_state->copy_area) {
614de2362d3Smrg	accel_state->copy_area = NULL;
615de2362d3Smrg    }
616de2362d3Smrg
617de2362d3Smrg}
618de2362d3Smrg
619de2362d3Smrgstatic void
620de2362d3SmrgR600Copy(PixmapPtr pDst,
621de2362d3Smrg	 int srcX, int srcY,
622de2362d3Smrg	 int dstX, int dstY,
623de2362d3Smrg	 int w, int h)
624de2362d3Smrg{
625de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
626de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
627de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
628de2362d3Smrg
629de2362d3Smrg    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
630de2362d3Smrg	return;
631de2362d3Smrg
63218781e08Smrg    if (CS_FULL(info->cs)) {
633de2362d3Smrg	R600DoneCopy(info->accel_state->dst_pix);
634de2362d3Smrg	radeon_cs_flush_indirect(pScrn);
635de2362d3Smrg	R600PrepareCopy(accel_state->src_pix,
636de2362d3Smrg			accel_state->dst_pix,
637de2362d3Smrg			accel_state->xdir,
638de2362d3Smrg			accel_state->ydir,
639de2362d3Smrg			accel_state->rop,
640de2362d3Smrg			accel_state->planemask);
641de2362d3Smrg    }
642de2362d3Smrg
643de2362d3Smrg    if (accel_state->vsync)
644de2362d3Smrg	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
645de2362d3Smrg
64618781e08Smrg    if (accel_state->same_surface &&
64718781e08Smrg	    (srcX + w <= dstX || dstX + w <= srcX || srcY + h <= dstY || dstY + h <= srcY)) {
64818781e08Smrg	R600DoPrepareCopy(pScrn);
64918781e08Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
65018781e08Smrg	R600DoCopyVline(pDst);
65118781e08Smrg    } else if (accel_state->same_surface && accel_state->copy_area) {
652de2362d3Smrg	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
653de2362d3Smrg	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
654de2362d3Smrg	uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags;
655de2362d3Smrg	uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags;
656de2362d3Smrg	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
657de2362d3Smrg	int orig_rop = accel_state->rop;
658de2362d3Smrg
659de2362d3Smrg	/* src to tmp */
660de2362d3Smrg	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
661de2362d3Smrg	accel_state->dst_obj.bo = accel_state->copy_area_bo;
662de2362d3Smrg	accel_state->dst_obj.tiling_flags = 0;
663de2362d3Smrg	accel_state->rop = 3;
664de2362d3Smrg	R600DoPrepareCopy(pScrn);
665de2362d3Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
666de2362d3Smrg	R600DoCopy(pScrn);
667de2362d3Smrg
668de2362d3Smrg	/* tmp to dst */
669de2362d3Smrg	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
670de2362d3Smrg	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
671de2362d3Smrg	accel_state->src_obj[0].tiling_flags = 0;
672de2362d3Smrg	accel_state->dst_obj.domain = orig_dst_domain;
673de2362d3Smrg	accel_state->dst_obj.bo = orig_bo;
674de2362d3Smrg	accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags;
675de2362d3Smrg	accel_state->rop = orig_rop;
676de2362d3Smrg	R600DoPrepareCopy(pScrn);
677de2362d3Smrg	R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
678de2362d3Smrg	R600DoCopyVline(pDst);
679de2362d3Smrg
680de2362d3Smrg	/* restore state */
681de2362d3Smrg	accel_state->src_obj[0].domain = orig_src_domain;
682de2362d3Smrg	accel_state->src_obj[0].bo = orig_bo;
683de2362d3Smrg	accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags;
684de2362d3Smrg    } else
685de2362d3Smrg	R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
686de2362d3Smrg
687de2362d3Smrg}
688de2362d3Smrg
689de2362d3Smrgstruct blendinfo {
690de2362d3Smrg    Bool dst_alpha;
691de2362d3Smrg    Bool src_alpha;
692de2362d3Smrg    uint32_t blend_cntl;
693de2362d3Smrg};
694de2362d3Smrg
695de2362d3Smrgstatic struct blendinfo R600BlendOp[] = {
696de2362d3Smrg    /* Clear */
697de2362d3Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
698de2362d3Smrg    /* Src */
699de2362d3Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
700de2362d3Smrg    /* Dst */
701de2362d3Smrg    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
702de2362d3Smrg    /* Over */
703de2362d3Smrg    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
704de2362d3Smrg    /* OverReverse */
705de2362d3Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
706de2362d3Smrg    /* In */
707de2362d3Smrg    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
708de2362d3Smrg    /* InReverse */
709de2362d3Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
710de2362d3Smrg    /* Out */
711de2362d3Smrg    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
712de2362d3Smrg    /* OutReverse */
713de2362d3Smrg    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
714de2362d3Smrg    /* Atop */
715de2362d3Smrg    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
716de2362d3Smrg    /* AtopReverse */
717de2362d3Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
718de2362d3Smrg    /* Xor */
719de2362d3Smrg    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
720de2362d3Smrg    /* Add */
721de2362d3Smrg    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
722de2362d3Smrg};
723de2362d3Smrg
724de2362d3Smrgstruct formatinfo {
725de2362d3Smrg    unsigned int fmt;
726de2362d3Smrg    uint32_t card_fmt;
727de2362d3Smrg};
728de2362d3Smrg
729de2362d3Smrgstatic struct formatinfo R600TexFormats[] = {
7308bf5c682Smrg    {PICT_a2r10g10b10,	FMT_2_10_10_10},
7318bf5c682Smrg    {PICT_x2r10g10b10,	FMT_2_10_10_10},
7328bf5c682Smrg    {PICT_a2b10g10r10,	FMT_2_10_10_10},
7338bf5c682Smrg    {PICT_x2b10g10r10,	FMT_2_10_10_10},
734de2362d3Smrg    {PICT_a8r8g8b8,	FMT_8_8_8_8},
735de2362d3Smrg    {PICT_x8r8g8b8,	FMT_8_8_8_8},
736de2362d3Smrg    {PICT_a8b8g8r8,	FMT_8_8_8_8},
737de2362d3Smrg    {PICT_x8b8g8r8,	FMT_8_8_8_8},
738de2362d3Smrg    {PICT_b8g8r8a8,	FMT_8_8_8_8},
739de2362d3Smrg    {PICT_b8g8r8x8,	FMT_8_8_8_8},
740de2362d3Smrg    {PICT_r5g6b5,	FMT_5_6_5},
741de2362d3Smrg    {PICT_a1r5g5b5,	FMT_1_5_5_5},
742de2362d3Smrg    {PICT_x1r5g5b5,     FMT_1_5_5_5},
743de2362d3Smrg    {PICT_a8,		FMT_8},
744de2362d3Smrg};
745de2362d3Smrg
746de2362d3Smrgstatic uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
747de2362d3Smrg{
748de2362d3Smrg    uint32_t sblend, dblend;
749de2362d3Smrg
750de2362d3Smrg    sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
751de2362d3Smrg    dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
752de2362d3Smrg
753de2362d3Smrg    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
754de2362d3Smrg     * it as always 1.
755de2362d3Smrg     */
756de2362d3Smrg    if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
757de2362d3Smrg	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
758de2362d3Smrg	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
759de2362d3Smrg	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
760de2362d3Smrg	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
761de2362d3Smrg    }
762de2362d3Smrg
763de2362d3Smrg    /* If the source alpha is being used, then we should only be in a case where
764de2362d3Smrg     * the source blend factor is 0, and the source blend value is the mask
765de2362d3Smrg     * channels multiplied by the source picture's alpha.
766de2362d3Smrg     */
767de2362d3Smrg    if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
768de2362d3Smrg	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
769de2362d3Smrg	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
770de2362d3Smrg	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
771de2362d3Smrg	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
772de2362d3Smrg	}
77318781e08Smrg
77418781e08Smrg	/* With some tricks, we can still accelerate PictOpOver with solid src.
77518781e08Smrg	 * This is commonly used for text rendering, so it's worth the extra
77618781e08Smrg	 * effort.
77718781e08Smrg	 */
77818781e08Smrg	if (sblend == (BLEND_ONE << COLOR_SRCBLEND_shift)) {
77918781e08Smrg	    sblend = (BLEND_CONSTANT_COLOR << COLOR_SRCBLEND_shift);
78018781e08Smrg	}
781de2362d3Smrg    }
782de2362d3Smrg
783de2362d3Smrg    return sblend | dblend;
784de2362d3Smrg}
785de2362d3Smrg
786de2362d3Smrgstatic Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
787de2362d3Smrg{
788de2362d3Smrg    switch (pDstPicture->format) {
7898bf5c682Smrg    case PICT_a2r10g10b10:
7908bf5c682Smrg    case PICT_x2r10g10b10:
7918bf5c682Smrg    case PICT_a2b10g10r10:
7928bf5c682Smrg    case PICT_x2b10g10r10:
7938bf5c682Smrg	*dst_format = COLOR_2_10_10_10;
7948bf5c682Smrg	break;
795de2362d3Smrg    case PICT_a8r8g8b8:
796de2362d3Smrg    case PICT_x8r8g8b8:
797de2362d3Smrg    case PICT_a8b8g8r8:
798de2362d3Smrg    case PICT_x8b8g8r8:
799de2362d3Smrg    case PICT_b8g8r8a8:
800de2362d3Smrg    case PICT_b8g8r8x8:
801de2362d3Smrg	*dst_format = COLOR_8_8_8_8;
802de2362d3Smrg	break;
803de2362d3Smrg    case PICT_r5g6b5:
804de2362d3Smrg	*dst_format = COLOR_5_6_5;
805de2362d3Smrg	break;
806de2362d3Smrg    case PICT_a1r5g5b5:
807de2362d3Smrg    case PICT_x1r5g5b5:
808de2362d3Smrg	*dst_format = COLOR_1_5_5_5;
809de2362d3Smrg	break;
810de2362d3Smrg    case PICT_a8:
811de2362d3Smrg	*dst_format = COLOR_8;
812de2362d3Smrg	break;
813de2362d3Smrg    default:
814de2362d3Smrg	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
815de2362d3Smrg	       (int)pDstPicture->format));
816de2362d3Smrg    }
817de2362d3Smrg    return TRUE;
818de2362d3Smrg}
819de2362d3Smrg
820de2362d3Smrgstatic Bool R600CheckCompositeTexture(PicturePtr pPict,
821de2362d3Smrg				      PicturePtr pDstPict,
822de2362d3Smrg				      int op,
823de2362d3Smrg				      int unit)
824de2362d3Smrg{
825de2362d3Smrg    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
826de2362d3Smrg    unsigned int i;
827de2362d3Smrg
828de2362d3Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
829de2362d3Smrg	if (R600TexFormats[i].fmt == pPict->format)
830de2362d3Smrg	    break;
831de2362d3Smrg    }
832de2362d3Smrg    if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
833de2362d3Smrg	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
834de2362d3Smrg			 (int)pPict->format));
835de2362d3Smrg
836de2362d3Smrg    if (pPict->filter != PictFilterNearest &&
837de2362d3Smrg	pPict->filter != PictFilterBilinear)
838de2362d3Smrg	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
839de2362d3Smrg
840de2362d3Smrg    /* for REPEAT_NONE, Render semantics are that sampling outside the source
841de2362d3Smrg     * picture results in alpha=0 pixels. We can implement this with a border color
842de2362d3Smrg     * *if* our source texture has an alpha channel, otherwise we need to fall
843de2362d3Smrg     * back. If we're not transformed then we hope that upper layers have clipped
844de2362d3Smrg     * rendering to the bounds of the source drawable, in which case it doesn't
845de2362d3Smrg     * matter. I have not, however, verified that the X server always does such
846de2362d3Smrg     * clipping.
847de2362d3Smrg     */
848de2362d3Smrg    /* FIXME R6xx */
849de2362d3Smrg    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
850de2362d3Smrg	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
851de2362d3Smrg	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
852de2362d3Smrg    }
853de2362d3Smrg
854de2362d3Smrg    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
855de2362d3Smrg	RADEON_FALLBACK(("non-affine transforms not supported\n"));
856de2362d3Smrg
857de2362d3Smrg    return TRUE;
858de2362d3Smrg}
859de2362d3Smrg
860de2362d3Smrgstatic Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
861de2362d3Smrg					int unit)
862de2362d3Smrg{
863de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
864de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
865de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
866de2362d3Smrg    unsigned int repeatType;
867de2362d3Smrg    unsigned int i;
868de2362d3Smrg    tex_resource_t  tex_res;
869de2362d3Smrg    tex_sampler_t   tex_samp;
870de2362d3Smrg    int pix_r, pix_g, pix_b, pix_a;
871de2362d3Smrg    float vs_alu_consts[8];
872de2362d3Smrg
873de2362d3Smrg    CLEAR (tex_res);
874de2362d3Smrg    CLEAR (tex_samp);
875de2362d3Smrg
876de2362d3Smrg    for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
877de2362d3Smrg	if (R600TexFormats[i].fmt == pPict->format)
878de2362d3Smrg	    break;
879de2362d3Smrg    }
880de2362d3Smrg
881de2362d3Smrg    /* Texture */
882de2362d3Smrg    if (pPict->pDrawable) {
883de2362d3Smrg	tex_res.w               = pPict->pDrawable->width;
884de2362d3Smrg	tex_res.h               = pPict->pDrawable->height;
885de2362d3Smrg	repeatType              = pPict->repeat ? pPict->repeatType : RepeatNone;
886de2362d3Smrg    } else {
887de2362d3Smrg	tex_res.w               = 1;
888de2362d3Smrg	tex_res.h               = 1;
889de2362d3Smrg	repeatType              = RepeatNormal;
890de2362d3Smrg    }
891de2362d3Smrg    tex_res.id                  = unit;
892de2362d3Smrg    tex_res.pitch               = accel_state->src_obj[unit].pitch;
893de2362d3Smrg    tex_res.depth               = 0;
894de2362d3Smrg    tex_res.dim                 = SQ_TEX_DIM_2D;
89518781e08Smrg    tex_res.base                = 0;
89618781e08Smrg    tex_res.mip_base            = 0;
897de2362d3Smrg    tex_res.size                = accel_state->src_size[unit];
898de2362d3Smrg    tex_res.format              = R600TexFormats[i].card_fmt;
899de2362d3Smrg    tex_res.bo                  = accel_state->src_obj[unit].bo;
900de2362d3Smrg    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
901de2362d3Smrg    tex_res.surface             = accel_state->src_obj[unit].surface;
902de2362d3Smrg    tex_res.request_size        = 1;
903de2362d3Smrg
904de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
905de2362d3Smrg    switch (accel_state->src_obj[unit].bpp) {
906de2362d3Smrg    case 16:
907de2362d3Smrg	tex_res.endian = SQ_ENDIAN_8IN16;
908de2362d3Smrg	break;
909de2362d3Smrg    case 32:
910de2362d3Smrg	tex_res.endian = SQ_ENDIAN_8IN32;
911de2362d3Smrg	break;
912de2362d3Smrg    default :
913de2362d3Smrg	break;
914de2362d3Smrg    }
915de2362d3Smrg#endif
916de2362d3Smrg
917de2362d3Smrg    /* component swizzles */
918de2362d3Smrg    switch (pPict->format) {
9198bf5c682Smrg    case PICT_a2r10g10b10:
920de2362d3Smrg    case PICT_a1r5g5b5:
921de2362d3Smrg    case PICT_a8r8g8b8:
922de2362d3Smrg	pix_r = SQ_SEL_Z; /* R */
923de2362d3Smrg	pix_g = SQ_SEL_Y; /* G */
924de2362d3Smrg	pix_b = SQ_SEL_X; /* B */
925de2362d3Smrg	pix_a = SQ_SEL_W; /* A */
926de2362d3Smrg	break;
9278bf5c682Smrg    case PICT_a2b10g10r10:
928de2362d3Smrg    case PICT_a8b8g8r8:
929de2362d3Smrg	pix_r = SQ_SEL_X; /* R */
930de2362d3Smrg	pix_g = SQ_SEL_Y; /* G */
931de2362d3Smrg	pix_b = SQ_SEL_Z; /* B */
932de2362d3Smrg	pix_a = SQ_SEL_W; /* A */
933de2362d3Smrg	break;
9348bf5c682Smrg    case PICT_x2b10g10r10:
935de2362d3Smrg    case PICT_x8b8g8r8:
936de2362d3Smrg	pix_r = SQ_SEL_X; /* R */
937de2362d3Smrg	pix_g = SQ_SEL_Y; /* G */
938de2362d3Smrg	pix_b = SQ_SEL_Z; /* B */
939de2362d3Smrg	pix_a = SQ_SEL_1; /* A */
940de2362d3Smrg	break;
941de2362d3Smrg    case PICT_b8g8r8a8:
942de2362d3Smrg	pix_r = SQ_SEL_Y; /* R */
943de2362d3Smrg	pix_g = SQ_SEL_Z; /* G */
944de2362d3Smrg	pix_b = SQ_SEL_W; /* B */
945de2362d3Smrg	pix_a = SQ_SEL_X; /* A */
946de2362d3Smrg	break;
947de2362d3Smrg    case PICT_b8g8r8x8:
948de2362d3Smrg	pix_r = SQ_SEL_Y; /* R */
949de2362d3Smrg	pix_g = SQ_SEL_Z; /* G */
950de2362d3Smrg	pix_b = SQ_SEL_W; /* B */
951de2362d3Smrg	pix_a = SQ_SEL_1; /* A */
952de2362d3Smrg	break;
9538bf5c682Smrg    case PICT_x2r10g10b10:
954de2362d3Smrg    case PICT_x1r5g5b5:
955de2362d3Smrg    case PICT_x8r8g8b8:
956de2362d3Smrg    case PICT_r5g6b5:
957de2362d3Smrg	pix_r = SQ_SEL_Z; /* R */
958de2362d3Smrg	pix_g = SQ_SEL_Y; /* G */
959de2362d3Smrg	pix_b = SQ_SEL_X; /* B */
960de2362d3Smrg	pix_a = SQ_SEL_1; /* A */
961de2362d3Smrg	break;
962de2362d3Smrg    case PICT_a8:
963de2362d3Smrg	pix_r = SQ_SEL_0; /* R */
964de2362d3Smrg	pix_g = SQ_SEL_0; /* G */
965de2362d3Smrg	pix_b = SQ_SEL_0; /* B */
966de2362d3Smrg	pix_a = SQ_SEL_X; /* A */
967de2362d3Smrg	break;
968de2362d3Smrg    default:
969de2362d3Smrg	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
970de2362d3Smrg    }
971de2362d3Smrg
972de2362d3Smrg    if (unit == 0) {
973de2362d3Smrg	if (!accel_state->msk_pic) {
974de2362d3Smrg	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
975de2362d3Smrg		pix_r = SQ_SEL_0;
976de2362d3Smrg		pix_g = SQ_SEL_0;
977de2362d3Smrg		pix_b = SQ_SEL_0;
978de2362d3Smrg	    }
979de2362d3Smrg
980de2362d3Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
981de2362d3Smrg		pix_a = SQ_SEL_1;
982de2362d3Smrg	} else {
983de2362d3Smrg	    if (accel_state->component_alpha) {
984de2362d3Smrg		if (accel_state->src_alpha) {
985de2362d3Smrg		    if (PICT_FORMAT_A(pPict->format) == 0) {
986de2362d3Smrg			pix_r = SQ_SEL_1;
987de2362d3Smrg			pix_g = SQ_SEL_1;
988de2362d3Smrg			pix_b = SQ_SEL_1;
989de2362d3Smrg			pix_a = SQ_SEL_1;
990de2362d3Smrg		    } else {
991de2362d3Smrg			pix_r = pix_a;
992de2362d3Smrg			pix_g = pix_a;
993de2362d3Smrg			pix_b = pix_a;
994de2362d3Smrg		    }
995de2362d3Smrg		} else {
996de2362d3Smrg		    if (PICT_FORMAT_A(pPict->format) == 0)
997de2362d3Smrg			pix_a = SQ_SEL_1;
998de2362d3Smrg		}
999de2362d3Smrg	    } else {
1000de2362d3Smrg		if (PICT_FORMAT_RGB(pPict->format) == 0) {
1001de2362d3Smrg		    pix_r = SQ_SEL_0;
1002de2362d3Smrg		    pix_g = SQ_SEL_0;
1003de2362d3Smrg		    pix_b = SQ_SEL_0;
1004de2362d3Smrg		}
1005de2362d3Smrg
1006de2362d3Smrg		if (PICT_FORMAT_A(pPict->format) == 0)
1007de2362d3Smrg		    pix_a = SQ_SEL_1;
1008de2362d3Smrg	    }
1009de2362d3Smrg	}
1010de2362d3Smrg    } else {
1011de2362d3Smrg	if (accel_state->component_alpha) {
1012de2362d3Smrg	    if (PICT_FORMAT_A(pPict->format) == 0)
1013de2362d3Smrg		pix_a = SQ_SEL_1;
1014de2362d3Smrg	} else {
1015de2362d3Smrg	    if (PICT_FORMAT_A(pPict->format) == 0) {
1016de2362d3Smrg		pix_r = SQ_SEL_1;
1017de2362d3Smrg		pix_g = SQ_SEL_1;
1018de2362d3Smrg		pix_b = SQ_SEL_1;
1019de2362d3Smrg		pix_a = SQ_SEL_1;
1020de2362d3Smrg	    } else {
1021de2362d3Smrg		pix_r = pix_a;
1022de2362d3Smrg		pix_g = pix_a;
1023de2362d3Smrg		pix_b = pix_a;
1024de2362d3Smrg	    }
1025de2362d3Smrg	}
1026de2362d3Smrg    }
1027de2362d3Smrg
1028de2362d3Smrg    tex_res.dst_sel_x           = pix_r; /* R */
1029de2362d3Smrg    tex_res.dst_sel_y           = pix_g; /* G */
1030de2362d3Smrg    tex_res.dst_sel_z           = pix_b; /* B */
1031de2362d3Smrg    tex_res.dst_sel_w           = pix_a; /* A */
1032de2362d3Smrg
1033de2362d3Smrg    tex_res.base_level          = 0;
1034de2362d3Smrg    tex_res.last_level          = 0;
1035de2362d3Smrg    tex_res.perf_modulation     = 0;
1036de2362d3Smrg    if (accel_state->src_obj[unit].tiling_flags == 0)
1037de2362d3Smrg	tex_res.tile_mode           = 1;
103818781e08Smrg    r600_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[unit].domain);
1039de2362d3Smrg
1040de2362d3Smrg    tex_samp.id                 = unit;
1041de2362d3Smrg    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1042de2362d3Smrg
1043de2362d3Smrg    switch (repeatType) {
1044de2362d3Smrg    case RepeatNormal:
1045de2362d3Smrg	tex_samp.clamp_x            = SQ_TEX_WRAP;
1046de2362d3Smrg	tex_samp.clamp_y            = SQ_TEX_WRAP;
1047de2362d3Smrg	break;
1048de2362d3Smrg    case RepeatPad:
1049de2362d3Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
1050de2362d3Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
1051de2362d3Smrg	break;
1052de2362d3Smrg    case RepeatReflect:
1053de2362d3Smrg	tex_samp.clamp_x            = SQ_TEX_MIRROR;
1054de2362d3Smrg	tex_samp.clamp_y            = SQ_TEX_MIRROR;
1055de2362d3Smrg	break;
1056de2362d3Smrg    case RepeatNone:
1057de2362d3Smrg	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
1058de2362d3Smrg	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
1059de2362d3Smrg	break;
1060de2362d3Smrg    default:
1061de2362d3Smrg	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1062de2362d3Smrg    }
1063de2362d3Smrg
1064de2362d3Smrg    switch (pPict->filter) {
1065de2362d3Smrg    case PictFilterNearest:
1066de2362d3Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
1067de2362d3Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
1068de2362d3Smrg	tex_samp.mc_coord_truncate  = 1;
1069de2362d3Smrg	break;
1070de2362d3Smrg    case PictFilterBilinear:
1071de2362d3Smrg	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1072de2362d3Smrg	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
1073de2362d3Smrg	break;
1074de2362d3Smrg    default:
1075de2362d3Smrg	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1076de2362d3Smrg    }
1077de2362d3Smrg
1078de2362d3Smrg    tex_samp.clamp_z            = SQ_TEX_WRAP;
1079de2362d3Smrg    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
1080de2362d3Smrg    tex_samp.mip_filter         = 0;			/* no mipmap */
108118781e08Smrg    r600_set_tex_sampler(pScrn, &tex_samp);
1082de2362d3Smrg
1083de2362d3Smrg    if (pPict->transform != 0) {
1084de2362d3Smrg	accel_state->is_transform[unit] = TRUE;
1085de2362d3Smrg	accel_state->transform[unit] = pPict->transform;
1086de2362d3Smrg
1087de2362d3Smrg	vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
1088de2362d3Smrg	vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
1089de2362d3Smrg	vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
1090de2362d3Smrg	vs_alu_consts[3] = 1.0 / tex_res.w;
1091de2362d3Smrg
1092de2362d3Smrg	vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
1093de2362d3Smrg	vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
1094de2362d3Smrg	vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
1095de2362d3Smrg	vs_alu_consts[7] = 1.0 / tex_res.h;
1096de2362d3Smrg    } else {
1097de2362d3Smrg	accel_state->is_transform[unit] = FALSE;
1098de2362d3Smrg
1099de2362d3Smrg	vs_alu_consts[0] = 1.0;
1100de2362d3Smrg	vs_alu_consts[1] = 0.0;
1101de2362d3Smrg	vs_alu_consts[2] = 0.0;
1102de2362d3Smrg	vs_alu_consts[3] = 1.0 / tex_res.w;
1103de2362d3Smrg
1104de2362d3Smrg	vs_alu_consts[4] = 0.0;
1105de2362d3Smrg	vs_alu_consts[5] = 1.0;
1106de2362d3Smrg	vs_alu_consts[6] = 0.0;
1107de2362d3Smrg	vs_alu_consts[7] = 1.0 / tex_res.h;
1108de2362d3Smrg    }
1109de2362d3Smrg
1110de2362d3Smrg    /* VS alu constants */
111118781e08Smrg    r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_vs + (unit * 2),
1112de2362d3Smrg			sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
1113de2362d3Smrg
1114de2362d3Smrg    return TRUE;
1115de2362d3Smrg}
1116de2362d3Smrg
1117de2362d3Smrgstatic Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1118de2362d3Smrg			       PicturePtr pDstPicture)
1119de2362d3Smrg{
1120de2362d3Smrg    uint32_t tmp1;
1121de2362d3Smrg    PixmapPtr pSrcPixmap, pDstPixmap;
1122de2362d3Smrg
1123de2362d3Smrg    /* Check for unsupported compositing operations. */
1124de2362d3Smrg    if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1125de2362d3Smrg	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1126de2362d3Smrg
1127de2362d3Smrg    if (pSrcPicture->pDrawable) {
1128de2362d3Smrg	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1129de2362d3Smrg
1130de2362d3Smrg	if (pSrcPixmap->drawable.width >= 8192 ||
1131de2362d3Smrg	    pSrcPixmap->drawable.height >= 8192) {
1132de2362d3Smrg	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1133de2362d3Smrg			     pSrcPixmap->drawable.width,
1134de2362d3Smrg			     pSrcPixmap->drawable.height));
1135de2362d3Smrg	}
1136de2362d3Smrg
1137de2362d3Smrg	if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1138de2362d3Smrg	    return FALSE;
1139de2362d3Smrg    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
1140de2362d3Smrg	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1141de2362d3Smrg
1142de2362d3Smrg    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1143de2362d3Smrg
1144de2362d3Smrg    if (pDstPixmap->drawable.width >= 8192 ||
1145de2362d3Smrg	pDstPixmap->drawable.height >= 8192) {
1146de2362d3Smrg	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1147de2362d3Smrg			 pDstPixmap->drawable.width,
1148de2362d3Smrg			 pDstPixmap->drawable.height));
1149de2362d3Smrg    }
1150de2362d3Smrg
1151de2362d3Smrg    if (pMaskPicture) {
1152de2362d3Smrg	PixmapPtr pMaskPixmap;
1153de2362d3Smrg
1154de2362d3Smrg	if (pMaskPicture->pDrawable) {
1155de2362d3Smrg	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1156de2362d3Smrg
1157de2362d3Smrg	    if (pMaskPixmap->drawable.width >= 8192 ||
1158de2362d3Smrg		pMaskPixmap->drawable.height >= 8192) {
1159de2362d3Smrg	      RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1160de2362d3Smrg			       pMaskPixmap->drawable.width,
1161de2362d3Smrg			       pMaskPixmap->drawable.height));
1162de2362d3Smrg	    }
1163de2362d3Smrg
1164de2362d3Smrg	    if (pMaskPicture->componentAlpha) {
1165de2362d3Smrg		/* Check if it's component alpha that relies on a source alpha and
1166de2362d3Smrg		 * on the source value.  We can only get one of those into the
1167de2362d3Smrg		 * single source value that we get to blend with.
116818781e08Smrg		 *
116918781e08Smrg		 * We can cheat a bit if the src is solid, though. PictOpOver
117018781e08Smrg		 * can use the constant blend color to sneak a second blend
117118781e08Smrg		 * source in.
1172de2362d3Smrg		 */
1173de2362d3Smrg		if (R600BlendOp[op].src_alpha &&
1174de2362d3Smrg		    (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1175de2362d3Smrg		    (BLEND_ZERO << COLOR_SRCBLEND_shift)) {
117618781e08Smrg		    if (pSrcPicture->pDrawable || op != PictOpOver)
117718781e08Smrg			RADEON_FALLBACK(("Component alpha not supported with source "
117818781e08Smrg					 "alpha and source value blending.\n"));
1179de2362d3Smrg		}
1180de2362d3Smrg	    }
1181de2362d3Smrg
1182de2362d3Smrg	    if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1183de2362d3Smrg		return FALSE;
1184de2362d3Smrg	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
1185de2362d3Smrg	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1186de2362d3Smrg    }
1187de2362d3Smrg
1188de2362d3Smrg    if (!R600GetDestFormat(pDstPicture, &tmp1))
1189de2362d3Smrg	return FALSE;
1190de2362d3Smrg
1191de2362d3Smrg    return TRUE;
1192de2362d3Smrg
1193de2362d3Smrg}
1194de2362d3Smrg
119518781e08Smrgstatic void R600SetSolidConsts(ScrnInfoPtr pScrn, float *buf, int format, uint32_t fg, int unit)
119618781e08Smrg{
119718781e08Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
119818781e08Smrg    struct radeon_accel_state *accel_state = info->accel_state;
119918781e08Smrg    float pix_r = 0, pix_g = 0, pix_b = 0, pix_a = 0;
120018781e08Smrg
120118781e08Smrg    uint32_t w = (fg >> 24) & 0xff;
120218781e08Smrg    uint32_t z = (fg >> 16) & 0xff;
120318781e08Smrg    uint32_t y = (fg >> 8) & 0xff;
120418781e08Smrg    uint32_t x = (fg >> 0) & 0xff;
120518781e08Smrg    float xf = (float)x / 255; /* R */
120618781e08Smrg    float yf = (float)y / 255; /* G */
120718781e08Smrg    float zf = (float)z / 255; /* B */
120818781e08Smrg    float wf = (float)w / 255; /* A */
120918781e08Smrg
121018781e08Smrg    /* component swizzles */
121118781e08Smrg    switch (format) {
121218781e08Smrg	case PICT_a1r5g5b5:
121318781e08Smrg	case PICT_a8r8g8b8:
121418781e08Smrg	    pix_r = zf; /* R */
121518781e08Smrg	    pix_g = yf; /* G */
121618781e08Smrg	    pix_b = xf; /* B */
121718781e08Smrg	    pix_a = wf; /* A */
121818781e08Smrg	    break;
121918781e08Smrg	case PICT_a8b8g8r8:
122018781e08Smrg	    pix_r = xf; /* R */
122118781e08Smrg	    pix_g = yf; /* G */
122218781e08Smrg	    pix_b = zf; /* B */
122318781e08Smrg	    pix_a = wf; /* A */
122418781e08Smrg	    break;
122518781e08Smrg	case PICT_x8b8g8r8:
122618781e08Smrg	    pix_r = xf; /* R */
122718781e08Smrg	    pix_g = yf; /* G */
122818781e08Smrg	    pix_b = zf; /* B */
122918781e08Smrg	    pix_a = 1.0; /* A */
123018781e08Smrg	    break;
123118781e08Smrg	case PICT_b8g8r8a8:
123218781e08Smrg	    pix_r = yf; /* R */
123318781e08Smrg	    pix_g = zf; /* G */
123418781e08Smrg	    pix_b = wf; /* B */
123518781e08Smrg	    pix_a = xf; /* A */
123618781e08Smrg	    break;
123718781e08Smrg	case PICT_b8g8r8x8:
123818781e08Smrg	    pix_r = yf; /* R */
123918781e08Smrg	    pix_g = zf; /* G */
124018781e08Smrg	    pix_b = wf; /* B */
124118781e08Smrg	    pix_a = 1.0; /* A */
124218781e08Smrg	    break;
124318781e08Smrg	case PICT_x1r5g5b5:
124418781e08Smrg	case PICT_x8r8g8b8:
124518781e08Smrg	case PICT_r5g6b5:
124618781e08Smrg	    pix_r = zf; /* R */
124718781e08Smrg	    pix_g = yf; /* G */
124818781e08Smrg	    pix_b = xf; /* B */
124918781e08Smrg	    pix_a = 1.0; /* A */
125018781e08Smrg	    break;
125118781e08Smrg	case PICT_a8:
125218781e08Smrg	    pix_r = 0.0; /* R */
125318781e08Smrg	    pix_g = 0.0; /* G */
125418781e08Smrg	    pix_b = 0.0; /* B */
125518781e08Smrg	    pix_a = xf; /* A */
125618781e08Smrg	    break;
125718781e08Smrg	default:
125818781e08Smrg	    ErrorF("Bad format 0x%x\n", format);
125918781e08Smrg    }
126018781e08Smrg
126118781e08Smrg    if (unit == 0) {
126218781e08Smrg	if (!accel_state->msk_pic) {
126318781e08Smrg	    if (PICT_FORMAT_RGB(format) == 0) {
126418781e08Smrg		pix_r = 0.0;
126518781e08Smrg		pix_g = 0.0;
126618781e08Smrg		pix_b = 0.0;
126718781e08Smrg	    }
126818781e08Smrg
126918781e08Smrg	    if (PICT_FORMAT_A(format) == 0)
127018781e08Smrg		pix_a = 1.0;
127118781e08Smrg	} else {
127218781e08Smrg	    if (accel_state->component_alpha) {
127318781e08Smrg		if (accel_state->src_alpha) {
127418781e08Smrg		    /* required for PictOpOver */
127518781e08Smrg		    float cblend[4] = { pix_r / pix_a, pix_g / pix_a,
127618781e08Smrg					pix_b / pix_a, pix_a / pix_a };
127718781e08Smrg		    r600_set_blend_color(pScrn, cblend);
127818781e08Smrg
127918781e08Smrg		    if (PICT_FORMAT_A(format) == 0) {
128018781e08Smrg			pix_r = 1.0;
128118781e08Smrg			pix_g = 1.0;
128218781e08Smrg			pix_b = 1.0;
128318781e08Smrg			pix_a = 1.0;
128418781e08Smrg		    } else {
128518781e08Smrg			pix_r = pix_a;
128618781e08Smrg			pix_g = pix_a;
128718781e08Smrg			pix_b = pix_a;
128818781e08Smrg		    }
128918781e08Smrg		} else {
129018781e08Smrg		    if (PICT_FORMAT_A(format) == 0)
129118781e08Smrg			pix_a = 1.0;
129218781e08Smrg		}
129318781e08Smrg	    } else {
129418781e08Smrg		if (PICT_FORMAT_RGB(format) == 0) {
129518781e08Smrg		    pix_r = 0;
129618781e08Smrg		    pix_g = 0;
129718781e08Smrg		    pix_b = 0;
129818781e08Smrg		}
129918781e08Smrg
130018781e08Smrg		if (PICT_FORMAT_A(format) == 0)
130118781e08Smrg		    pix_a = 1.0;
130218781e08Smrg	    }
130318781e08Smrg	}
130418781e08Smrg    } else {
130518781e08Smrg	if (accel_state->component_alpha) {
130618781e08Smrg	    if (PICT_FORMAT_A(format) == 0)
130718781e08Smrg		pix_a = 1.0;
130818781e08Smrg	} else {
130918781e08Smrg	    if (PICT_FORMAT_A(format) == 0) {
131018781e08Smrg		pix_r = 1.0;
131118781e08Smrg		pix_g = 1.0;
131218781e08Smrg		pix_b = 1.0;
131318781e08Smrg		pix_a = 1.0;
131418781e08Smrg	    } else {
131518781e08Smrg		pix_r = pix_a;
131618781e08Smrg		pix_g = pix_a;
131718781e08Smrg		pix_b = pix_a;
131818781e08Smrg	    }
131918781e08Smrg	}
132018781e08Smrg    }
132118781e08Smrg
132218781e08Smrg    buf[0] = pix_r;
132318781e08Smrg    buf[1] = pix_g;
132418781e08Smrg    buf[2] = pix_b;
132518781e08Smrg    buf[3] = pix_a;
132618781e08Smrg}
132718781e08Smrg
1328de2362d3Smrgstatic Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1329de2362d3Smrg				 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1330de2362d3Smrg				 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1331de2362d3Smrg{
1332de2362d3Smrg    ScreenPtr pScreen = pDst->drawable.pScreen;
1333de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1334de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1335de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1336de2362d3Smrg    uint32_t dst_format;
1337de2362d3Smrg    cb_config_t cb_conf;
1338de2362d3Smrg    shader_config_t vs_conf, ps_conf;
1339de2362d3Smrg    struct r600_accel_object src_obj, mask_obj, dst_obj;
134018781e08Smrg    uint32_t ps_bool_consts = 0;
134118781e08Smrg    float ps_alu_consts[8];
1342de2362d3Smrg
1343de2362d3Smrg    if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8))
1344de2362d3Smrg	return FALSE;
1345de2362d3Smrg
134618781e08Smrg    if (pSrc) {
134739413783Smrg	src_obj.bo = radeon_get_pixmap_bo(pSrc)->bo.radeon;
13487314432eSmrg	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
13497314432eSmrg	src_obj.surface = radeon_get_pixmap_surface(pSrc);
135018781e08Smrg	src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
135118781e08Smrg	src_obj.width = pSrc->drawable.width;
135218781e08Smrg	src_obj.height = pSrc->drawable.height;
135318781e08Smrg	src_obj.bpp = pSrc->drawable.bitsPerPixel;
135418781e08Smrg	src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1355de2362d3Smrg    }
13567821949aSmrg
135739413783Smrg    dst_obj.bo = radeon_get_pixmap_bo(pDst)->bo.radeon;
135818781e08Smrg    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
135918781e08Smrg    dst_obj.surface = radeon_get_pixmap_surface(pDst);
136018781e08Smrg    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1361de2362d3Smrg    dst_obj.width = pDst->drawable.width;
1362de2362d3Smrg    dst_obj.height = pDst->drawable.height;
1363de2362d3Smrg    dst_obj.bpp = pDst->drawable.bitsPerPixel;
136418781e08Smrg    if (radeon_get_pixmap_shared(pDst) == TRUE)
136518781e08Smrg	dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
136618781e08Smrg    else
136718781e08Smrg	dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1368de2362d3Smrg
1369de2362d3Smrg    if (pMaskPicture) {
137018781e08Smrg	if (pMask) {
137139413783Smrg	    mask_obj.bo = radeon_get_pixmap_bo(pMask)->bo.radeon;
13727314432eSmrg	    mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
13737314432eSmrg	    mask_obj.surface = radeon_get_pixmap_surface(pMask);
137418781e08Smrg	    mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
137518781e08Smrg	    mask_obj.width = pMask->drawable.width;
137618781e08Smrg	    mask_obj.height = pMask->drawable.height;
137718781e08Smrg	    mask_obj.bpp = pMask->drawable.bitsPerPixel;
137818781e08Smrg	    mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1379de2362d3Smrg	}
1380de2362d3Smrg
1381de2362d3Smrg	accel_state->msk_pic = pMaskPicture;
1382de2362d3Smrg	if (pMaskPicture->componentAlpha) {
1383de2362d3Smrg	    accel_state->component_alpha = TRUE;
1384de2362d3Smrg	    if (R600BlendOp[op].src_alpha)
1385de2362d3Smrg		accel_state->src_alpha = TRUE;
1386de2362d3Smrg	    else
1387de2362d3Smrg		accel_state->src_alpha = FALSE;
1388de2362d3Smrg	} else {
1389de2362d3Smrg	    accel_state->component_alpha = FALSE;
1390de2362d3Smrg	    accel_state->src_alpha = FALSE;
1391de2362d3Smrg	}
1392de2362d3Smrg    } else {
1393de2362d3Smrg	accel_state->msk_pic = NULL;
1394de2362d3Smrg	accel_state->component_alpha = FALSE;
1395de2362d3Smrg	accel_state->src_alpha = FALSE;
1396de2362d3Smrg    }
1397de2362d3Smrg
139818781e08Smrg    if (!R600SetAccelState(pScrn,
139918781e08Smrg			   pSrc ? &src_obj : NULL,
140018781e08Smrg			   (pMaskPicture && pMask) ? &mask_obj : NULL,
140118781e08Smrg			   &dst_obj,
140218781e08Smrg			   accel_state->comp_vs_offset, accel_state->comp_ps_offset,
140318781e08Smrg			   3, 0xffffffff))
140418781e08Smrg	return FALSE;
140518781e08Smrg
1406de2362d3Smrg    if (!R600GetDestFormat(pDstPicture, &dst_format))
1407de2362d3Smrg	return FALSE;
1408de2362d3Smrg
1409de2362d3Smrg    CLEAR (cb_conf);
1410de2362d3Smrg    CLEAR (vs_conf);
1411de2362d3Smrg    CLEAR (ps_conf);
1412de2362d3Smrg
1413de2362d3Smrg    if (pMask)
1414de2362d3Smrg        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1415de2362d3Smrg    else
1416de2362d3Smrg        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1417de2362d3Smrg
1418de2362d3Smrg    radeon_cp_start(pScrn);
1419de2362d3Smrg
142018781e08Smrg    r600_set_default_state(pScrn);
1421de2362d3Smrg
142218781e08Smrg    r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
142318781e08Smrg    r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
142418781e08Smrg    r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1425de2362d3Smrg
142618781e08Smrg    if (pSrc) {
142718781e08Smrg        if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
142818781e08Smrg            R600IBDiscard(pScrn);
142918781e08Smrg            return FALSE;
143018781e08Smrg        }
143118781e08Smrg    } else
143218781e08Smrg        accel_state->is_transform[0] = FALSE;
1433de2362d3Smrg
1434de2362d3Smrg    if (pMask) {
1435de2362d3Smrg        if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
143618781e08Smrg            R600IBDiscard(pScrn);
1437de2362d3Smrg            return FALSE;
1438de2362d3Smrg        }
1439de2362d3Smrg    } else
1440de2362d3Smrg        accel_state->is_transform[1] = FALSE;
1441de2362d3Smrg
144218781e08Smrg    if (pSrc)
144318781e08Smrg	ps_bool_consts |= (1 << 0);
144418781e08Smrg    if (pMask)
144518781e08Smrg	ps_bool_consts |= (1 << 1);
144618781e08Smrg    r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, ps_bool_consts);
144718781e08Smrg
1448de2362d3Smrg    if (pMask) {
144918781e08Smrg	r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0));
1450de2362d3Smrg    } else {
145118781e08Smrg	r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0));
1452de2362d3Smrg    }
1453de2362d3Smrg
1454de2362d3Smrg    /* Shader */
1455de2362d3Smrg    vs_conf.shader_addr         = accel_state->vs_mc_addr;
1456de2362d3Smrg    vs_conf.shader_size         = accel_state->vs_size;
1457de2362d3Smrg    vs_conf.num_gprs            = 5;
1458de2362d3Smrg    vs_conf.stack_size          = 1;
1459de2362d3Smrg    vs_conf.bo                  = accel_state->shaders_bo;
146018781e08Smrg    r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1461de2362d3Smrg
1462de2362d3Smrg    ps_conf.shader_addr         = accel_state->ps_mc_addr;
1463de2362d3Smrg    ps_conf.shader_size         = accel_state->ps_size;
146418781e08Smrg    ps_conf.num_gprs            = 2;
1465de2362d3Smrg    ps_conf.stack_size          = 1;
1466de2362d3Smrg    ps_conf.uncached_first_inst = 1;
1467de2362d3Smrg    ps_conf.clamp_consts        = 0;
1468de2362d3Smrg    ps_conf.export_mode         = 2;
1469de2362d3Smrg    ps_conf.bo                  = accel_state->shaders_bo;
147018781e08Smrg    r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1471de2362d3Smrg
1472de2362d3Smrg    cb_conf.id = 0;
1473de2362d3Smrg    cb_conf.w = accel_state->dst_obj.pitch;
1474de2362d3Smrg    cb_conf.h = accel_state->dst_obj.height;
147518781e08Smrg    cb_conf.base = 0;
1476de2362d3Smrg    cb_conf.format = dst_format;
1477de2362d3Smrg    cb_conf.bo = accel_state->dst_obj.bo;
1478de2362d3Smrg    cb_conf.surface = accel_state->dst_obj.surface;
1479de2362d3Smrg
1480de2362d3Smrg    switch (pDstPicture->format) {
14818bf5c682Smrg    case PICT_a2r10g10b10:
14828bf5c682Smrg    case PICT_x2r10g10b10:
1483de2362d3Smrg    case PICT_a8r8g8b8:
1484de2362d3Smrg    case PICT_x8r8g8b8:
1485de2362d3Smrg    case PICT_a1r5g5b5:
1486de2362d3Smrg    case PICT_x1r5g5b5:
1487de2362d3Smrg    default:
1488de2362d3Smrg	cb_conf.comp_swap = 1; /* ARGB */
1489de2362d3Smrg	break;
14908bf5c682Smrg    case PICT_a2b10g10r10:
14918bf5c682Smrg    case PICT_x2b10g10r10:
1492de2362d3Smrg    case PICT_a8b8g8r8:
1493de2362d3Smrg    case PICT_x8b8g8r8:
1494de2362d3Smrg	cb_conf.comp_swap = 0; /* ABGR */
1495de2362d3Smrg	break;
1496de2362d3Smrg    case PICT_b8g8r8a8:
1497de2362d3Smrg    case PICT_b8g8r8x8:
1498de2362d3Smrg	cb_conf.comp_swap = 3; /* BGRA */
1499de2362d3Smrg	break;
1500de2362d3Smrg    case PICT_r5g6b5:
1501de2362d3Smrg	cb_conf.comp_swap = 2; /* RGB */
1502de2362d3Smrg	break;
1503de2362d3Smrg    case PICT_a8:
1504de2362d3Smrg	cb_conf.comp_swap = 3; /* A */
1505de2362d3Smrg	break;
1506de2362d3Smrg    }
1507de2362d3Smrg    cb_conf.source_format = 1;
1508de2362d3Smrg    cb_conf.blend_clamp = 1;
1509de2362d3Smrg    cb_conf.blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1510de2362d3Smrg    cb_conf.blend_enable = 1;
1511de2362d3Smrg    cb_conf.pmask = 0xf;
1512de2362d3Smrg    cb_conf.rop = 3;
1513de2362d3Smrg    if (accel_state->dst_obj.tiling_flags == 0)
1514de2362d3Smrg	cb_conf.array_mode = 0;
1515de2362d3Smrg#if X_BYTE_ORDER == X_BIG_ENDIAN
1516de2362d3Smrg    switch (dst_obj.bpp) {
1517de2362d3Smrg    case 16:
1518de2362d3Smrg	cb_conf.endian = ENDIAN_8IN16;
1519de2362d3Smrg	break;
1520de2362d3Smrg    case 32:
1521de2362d3Smrg	cb_conf.endian = ENDIAN_8IN32;
1522de2362d3Smrg	break;
1523de2362d3Smrg    default:
1524de2362d3Smrg	break;
1525de2362d3Smrg    }
1526de2362d3Smrg#endif
152718781e08Smrg    r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
1528de2362d3Smrg
1529de2362d3Smrg    if (pMask)
153018781e08Smrg	r600_set_spi(pScrn, (2 - 1), 2);
1531de2362d3Smrg    else
153218781e08Smrg	r600_set_spi(pScrn, (1 - 1), 1);
153318781e08Smrg
153418781e08Smrg    if (!pSrc) {
153518781e08Smrg	/* solid src color */
153618781e08Smrg	R600SetSolidConsts(pScrn, &ps_alu_consts[0], pSrcPicture->format,
153718781e08Smrg			   pSrcPicture->pSourcePict->solidFill.color, 0);
153818781e08Smrg    }
153918781e08Smrg
154018781e08Smrg    if (!pMaskPicture) {
154118781e08Smrg	/* use identity constant if there is no mask */
154218781e08Smrg	ps_alu_consts[4] = 1.0;
154318781e08Smrg	ps_alu_consts[5] = 1.0;
154418781e08Smrg	ps_alu_consts[6] = 1.0;
154518781e08Smrg	ps_alu_consts[7] = 1.0;
154618781e08Smrg    } else if (!pMask) {
154718781e08Smrg	/* solid mask color */
154818781e08Smrg	R600SetSolidConsts(pScrn, &ps_alu_consts[4], pMaskPicture->format,
154918781e08Smrg			   pMaskPicture->pSourcePict->solidFill.color, 1);
155018781e08Smrg    }
155118781e08Smrg
155218781e08Smrg    r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_ps,
155318781e08Smrg			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
15547314432eSmrg
1555de2362d3Smrg    if (accel_state->vsync)
1556de2362d3Smrg	RADEONVlineHelperClear(pScrn);
1557de2362d3Smrg
1558de2362d3Smrg    accel_state->composite_op = op;
1559de2362d3Smrg    accel_state->dst_pic = pDstPicture;
1560de2362d3Smrg    accel_state->src_pic = pSrcPicture;
1561de2362d3Smrg    accel_state->dst_pix = pDst;
1562de2362d3Smrg    accel_state->msk_pix = pMask;
1563de2362d3Smrg    accel_state->src_pix = pSrc;
1564de2362d3Smrg
1565de2362d3Smrg    return TRUE;
1566de2362d3Smrg}
1567de2362d3Smrg
1568de2362d3Smrgstatic void R600FinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst,
1569de2362d3Smrg				struct radeon_accel_state *accel_state)
1570de2362d3Smrg{
1571de2362d3Smrg    int vtx_size;
1572de2362d3Smrg
1573de2362d3Smrg    if (accel_state->vsync)
157418781e08Smrg       r600_cp_wait_vline_sync(pScrn, pDst,
1575de2362d3Smrg			       accel_state->vline_crtc,
1576de2362d3Smrg			       accel_state->vline_y1,
1577de2362d3Smrg			       accel_state->vline_y2);
1578de2362d3Smrg
157918781e08Smrg    vtx_size = accel_state->msk_pix ? 24 : 16;
1580de2362d3Smrg
1581de2362d3Smrg    r600_finish_op(pScrn, vtx_size);
1582de2362d3Smrg}
1583de2362d3Smrg
1584de2362d3Smrgstatic void R600DoneComposite(PixmapPtr pDst)
1585de2362d3Smrg{
1586de2362d3Smrg    ScreenPtr pScreen = pDst->drawable.pScreen;
1587de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1588de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1589de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1590de2362d3Smrg
1591de2362d3Smrg    R600FinishComposite(pScrn, pDst, accel_state);
1592de2362d3Smrg}
1593de2362d3Smrg
1594de2362d3Smrgstatic void R600Composite(PixmapPtr pDst,
1595de2362d3Smrg			  int srcX, int srcY,
1596de2362d3Smrg			  int maskX, int maskY,
1597de2362d3Smrg			  int dstX, int dstY,
1598de2362d3Smrg			  int w, int h)
1599de2362d3Smrg{
1600de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1601de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1602de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1603de2362d3Smrg    float *vb;
1604de2362d3Smrg
1605de2362d3Smrg    /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1606de2362d3Smrg       srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1607de2362d3Smrg
160818781e08Smrg    if (CS_FULL(info->cs)) {
1609de2362d3Smrg	R600FinishComposite(pScrn, pDst, info->accel_state);
1610de2362d3Smrg	radeon_cs_flush_indirect(pScrn);
1611de2362d3Smrg	R600PrepareComposite(info->accel_state->composite_op,
1612de2362d3Smrg			     info->accel_state->src_pic,
1613de2362d3Smrg			     info->accel_state->msk_pic,
1614de2362d3Smrg			     info->accel_state->dst_pic,
1615de2362d3Smrg			     info->accel_state->src_pix,
1616de2362d3Smrg			     info->accel_state->msk_pix,
1617de2362d3Smrg			     info->accel_state->dst_pix);
1618de2362d3Smrg    }
1619de2362d3Smrg
1620de2362d3Smrg    if (accel_state->vsync)
1621de2362d3Smrg	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1622de2362d3Smrg
162318781e08Smrg    if (accel_state->msk_pix) {
1624de2362d3Smrg
1625de2362d3Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1626de2362d3Smrg
1627de2362d3Smrg	vb[0] = (float)dstX;
1628de2362d3Smrg	vb[1] = (float)dstY;
1629de2362d3Smrg	vb[2] = (float)srcX;
1630de2362d3Smrg	vb[3] = (float)srcY;
1631de2362d3Smrg	vb[4] = (float)maskX;
1632de2362d3Smrg	vb[5] = (float)maskY;
1633de2362d3Smrg
1634de2362d3Smrg	vb[6] = (float)dstX;
1635de2362d3Smrg	vb[7] = (float)(dstY + h);
1636de2362d3Smrg	vb[8] = (float)srcX;
1637de2362d3Smrg	vb[9] = (float)(srcY + h);
1638de2362d3Smrg	vb[10] = (float)maskX;
1639de2362d3Smrg	vb[11] = (float)(maskY + h);
1640de2362d3Smrg
1641de2362d3Smrg	vb[12] = (float)(dstX + w);
1642de2362d3Smrg	vb[13] = (float)(dstY + h);
1643de2362d3Smrg	vb[14] = (float)(srcX + w);
1644de2362d3Smrg	vb[15] = (float)(srcY + h);
1645de2362d3Smrg	vb[16] = (float)(maskX + w);
1646de2362d3Smrg	vb[17] = (float)(maskY + h);
1647de2362d3Smrg
1648de2362d3Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
1649de2362d3Smrg
1650de2362d3Smrg    } else {
1651de2362d3Smrg
1652de2362d3Smrg	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1653de2362d3Smrg
1654de2362d3Smrg	vb[0] = (float)dstX;
1655de2362d3Smrg	vb[1] = (float)dstY;
1656de2362d3Smrg	vb[2] = (float)srcX;
1657de2362d3Smrg	vb[3] = (float)srcY;
1658de2362d3Smrg
1659de2362d3Smrg	vb[4] = (float)dstX;
1660de2362d3Smrg	vb[5] = (float)(dstY + h);
1661de2362d3Smrg	vb[6] = (float)srcX;
1662de2362d3Smrg	vb[7] = (float)(srcY + h);
1663de2362d3Smrg
1664de2362d3Smrg	vb[8] = (float)(dstX + w);
1665de2362d3Smrg	vb[9] = (float)(dstY + h);
1666de2362d3Smrg	vb[10] = (float)(srcX + w);
1667de2362d3Smrg	vb[11] = (float)(srcY + h);
1668de2362d3Smrg
1669de2362d3Smrg	radeon_vbo_commit(pScrn, &accel_state->vbo);
1670de2362d3Smrg    }
1671de2362d3Smrg
1672de2362d3Smrg
1673de2362d3Smrg}
1674de2362d3Smrg
1675de2362d3Smrgstatic Bool
1676de2362d3SmrgR600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
1677de2362d3Smrg		     char *src, int src_pitch)
1678de2362d3Smrg{
1679de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1680de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1681de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1682de2362d3Smrg    struct radeon_exa_pixmap_priv *driver_priv;
1683de2362d3Smrg    struct radeon_bo *scratch = NULL;
1684de2362d3Smrg    struct radeon_bo *copy_dst;
1685de2362d3Smrg    unsigned char *dst;
1686de2362d3Smrg    unsigned size;
1687de2362d3Smrg    uint32_t dst_domain;
1688de2362d3Smrg    int bpp = pDst->drawable.bitsPerPixel;
1689de2362d3Smrg    uint32_t scratch_pitch;
1690de2362d3Smrg    uint32_t copy_pitch;
1691de2362d3Smrg    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
1692de2362d3Smrg    int ret;
1693de2362d3Smrg    Bool flush = TRUE;
1694de2362d3Smrg    Bool r;
1695de2362d3Smrg    int i;
1696de2362d3Smrg    struct r600_accel_object src_obj, dst_obj;
1697de2362d3Smrg    uint32_t height, base_align;
1698de2362d3Smrg
1699de2362d3Smrg    if (bpp < 8)
1700de2362d3Smrg	return FALSE;
1701de2362d3Smrg
1702de2362d3Smrg    driver_priv = exaGetPixmapDriverPrivate(pDst);
170339413783Smrg    if (!driver_priv || !driver_priv->bo->bo.radeon)
1704de2362d3Smrg	return FALSE;
1705de2362d3Smrg
170618781e08Smrg    /* If we know the BO won't be busy / in VRAM, don't bother with a scratch */
170739413783Smrg    copy_dst = driver_priv->bo->bo.radeon;
1708de2362d3Smrg    copy_pitch = pDst->devKind;
1709de2362d3Smrg    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
171039413783Smrg	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo->bo.radeon, info->cs)) {
1711de2362d3Smrg	    flush = FALSE;
171239413783Smrg	    if (!radeon_bo_is_busy(driver_priv->bo->bo.radeon, &dst_domain) &&
171318781e08Smrg		!(dst_domain & RADEON_GEM_DOMAIN_VRAM))
1714de2362d3Smrg		goto copy;
1715de2362d3Smrg	}
171618781e08Smrg	/* use cpu copy for fast fb access */
171718781e08Smrg	if (info->is_fast_fb)
171818781e08Smrg	    goto copy;
1719de2362d3Smrg    }
1720de2362d3Smrg
1721de2362d3Smrg    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1722de2362d3Smrg    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1723de2362d3Smrg    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1724de2362d3Smrg    size = scratch_pitch * height * (bpp / 8);
1725de2362d3Smrg    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
172639413783Smrg    if (!scratch) {
1727de2362d3Smrg	goto copy;
1728de2362d3Smrg    }
1729de2362d3Smrg
1730de2362d3Smrg    src_obj.pitch = scratch_pitch;
1731de2362d3Smrg    src_obj.width = w;
1732de2362d3Smrg    src_obj.height = h;
1733de2362d3Smrg    src_obj.bpp = bpp;
1734de2362d3Smrg    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1735de2362d3Smrg    src_obj.bo = scratch;
1736de2362d3Smrg    src_obj.tiling_flags = 0;
1737de2362d3Smrg    src_obj.surface = NULL;
1738de2362d3Smrg
1739de2362d3Smrg    dst_obj.pitch = dst_pitch_hw;
1740de2362d3Smrg    dst_obj.width = pDst->drawable.width;
1741de2362d3Smrg    dst_obj.height = pDst->drawable.height;
1742de2362d3Smrg    dst_obj.bpp = bpp;
1743de2362d3Smrg    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
174439413783Smrg    dst_obj.bo = radeon_get_pixmap_bo(pDst)->bo.radeon;
1745de2362d3Smrg    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1746de2362d3Smrg    dst_obj.surface = radeon_get_pixmap_surface(pDst);
1747de2362d3Smrg
1748de2362d3Smrg    if (!R600SetAccelState(pScrn,
1749de2362d3Smrg			   &src_obj,
1750de2362d3Smrg			   NULL,
1751de2362d3Smrg			   &dst_obj,
1752de2362d3Smrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1753de2362d3Smrg			   3, 0xffffffff)) {
1754de2362d3Smrg        goto copy;
1755de2362d3Smrg    }
1756de2362d3Smrg    copy_dst = scratch;
1757de2362d3Smrg    copy_pitch = scratch_pitch * (bpp / 8);
1758de2362d3Smrg    flush = FALSE;
1759de2362d3Smrg
1760de2362d3Smrgcopy:
1761de2362d3Smrg    if (flush)
1762de2362d3Smrg	radeon_cs_flush_indirect(pScrn);
1763de2362d3Smrg
1764de2362d3Smrg    ret = radeon_bo_map(copy_dst, 0);
1765de2362d3Smrg    if (ret) {
1766de2362d3Smrg        r = FALSE;
1767de2362d3Smrg        goto out;
1768de2362d3Smrg    }
1769de2362d3Smrg    r = TRUE;
1770de2362d3Smrg    size = w * bpp / 8;
1771de2362d3Smrg    dst = copy_dst->ptr;
177239413783Smrg    if (copy_dst == driver_priv->bo->bo.radeon)
1773de2362d3Smrg	dst += y * copy_pitch + x * bpp / 8;
1774de2362d3Smrg    for (i = 0; i < h; i++) {
1775de2362d3Smrg        memcpy(dst + i * copy_pitch, src, size);
1776de2362d3Smrg        src += src_pitch;
1777de2362d3Smrg    }
1778de2362d3Smrg    radeon_bo_unmap(copy_dst);
1779de2362d3Smrg
1780de2362d3Smrg    if (copy_dst == scratch) {
1781de2362d3Smrg	if (info->accel_state->vsync)
1782de2362d3Smrg	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1783de2362d3Smrg
1784de2362d3Smrg	/* blit from gart to vram */
1785de2362d3Smrg	R600DoPrepareCopy(pScrn);
1786de2362d3Smrg	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
1787de2362d3Smrg	R600DoCopyVline(pDst);
1788de2362d3Smrg    }
1789de2362d3Smrg
1790de2362d3Smrgout:
1791de2362d3Smrg    if (scratch)
1792de2362d3Smrg	radeon_bo_unref(scratch);
1793de2362d3Smrg    return r;
1794de2362d3Smrg}
1795de2362d3Smrg
1796de2362d3Smrgstatic Bool
1797de2362d3SmrgR600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
1798de2362d3Smrg			 int h, char *dst, int dst_pitch)
1799de2362d3Smrg{
1800de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
1801de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1802de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1803de2362d3Smrg    struct radeon_exa_pixmap_priv *driver_priv;
1804de2362d3Smrg    struct radeon_bo *scratch = NULL;
1805de2362d3Smrg    struct radeon_bo *copy_src;
1806de2362d3Smrg    unsigned size;
1807de2362d3Smrg    uint32_t src_domain = 0;
1808de2362d3Smrg    int bpp = pSrc->drawable.bitsPerPixel;
1809de2362d3Smrg    uint32_t scratch_pitch;
1810de2362d3Smrg    uint32_t copy_pitch;
1811de2362d3Smrg    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
1812de2362d3Smrg    int ret;
1813de2362d3Smrg    Bool flush = FALSE;
1814de2362d3Smrg    Bool r;
1815de2362d3Smrg    struct r600_accel_object src_obj, dst_obj;
1816de2362d3Smrg    uint32_t height, base_align;
1817de2362d3Smrg
1818de2362d3Smrg    if (bpp < 8)
1819de2362d3Smrg	return FALSE;
1820de2362d3Smrg
1821de2362d3Smrg    driver_priv = exaGetPixmapDriverPrivate(pSrc);
182239413783Smrg    if (!driver_priv || !driver_priv->bo->bo.radeon)
1823de2362d3Smrg	return FALSE;
1824de2362d3Smrg
1825de2362d3Smrg    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
182639413783Smrg    copy_src = driver_priv->bo->bo.radeon;
1827de2362d3Smrg    copy_pitch = pSrc->devKind;
1828de2362d3Smrg    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
182939413783Smrg	if (radeon_bo_is_referenced_by_cs(driver_priv->bo->bo.radeon, info->cs)) {
183039413783Smrg	    src_domain = radeon_bo_get_src_domain(driver_priv->bo->bo.radeon);
1831de2362d3Smrg	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
1832de2362d3Smrg		(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
1833de2362d3Smrg		src_domain = 0;
1834de2362d3Smrg	    else /* A write may be scheduled */
1835de2362d3Smrg		flush = TRUE;
1836de2362d3Smrg	}
1837de2362d3Smrg
1838de2362d3Smrg	if (!src_domain)
183939413783Smrg	    radeon_bo_is_busy(driver_priv->bo->bo.radeon, &src_domain);
1840de2362d3Smrg
1841de2362d3Smrg	if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
1842de2362d3Smrg	    goto copy;
1843de2362d3Smrg    }
1844de2362d3Smrg
1845de2362d3Smrg    scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1846de2362d3Smrg    height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1847de2362d3Smrg    base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1848de2362d3Smrg    size = scratch_pitch * height * (bpp / 8);
1849de2362d3Smrg    scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
185039413783Smrg    if (!scratch) {
1851de2362d3Smrg	goto copy;
1852de2362d3Smrg    }
1853de2362d3Smrg    radeon_cs_space_reset_bos(info->cs);
1854de2362d3Smrg    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
1855de2362d3Smrg				      RADEON_GEM_DOMAIN_VRAM, 0);
1856de2362d3Smrg    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
1857de2362d3Smrg    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
1858de2362d3Smrg    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1859de2362d3Smrg    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
1860de2362d3Smrg    ret = radeon_cs_space_check(info->cs);
1861de2362d3Smrg    if (ret) {
1862de2362d3Smrg        goto copy;
1863de2362d3Smrg    }
1864de2362d3Smrg
1865de2362d3Smrg    src_obj.pitch = src_pitch_hw;
1866de2362d3Smrg    src_obj.width = pSrc->drawable.width;
1867de2362d3Smrg    src_obj.height = pSrc->drawable.height;
1868de2362d3Smrg    src_obj.bpp = bpp;
1869de2362d3Smrg    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
187039413783Smrg    src_obj.bo = radeon_get_pixmap_bo(pSrc)->bo.radeon;
1871de2362d3Smrg    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1872de2362d3Smrg    src_obj.surface = radeon_get_pixmap_surface(pSrc);
1873de2362d3Smrg
1874de2362d3Smrg    dst_obj.pitch = scratch_pitch;
1875de2362d3Smrg    dst_obj.width = w;
1876de2362d3Smrg    dst_obj.height = h;
1877de2362d3Smrg    dst_obj.bo = scratch;
1878de2362d3Smrg    dst_obj.bpp = bpp;
1879de2362d3Smrg    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1880de2362d3Smrg    dst_obj.tiling_flags = 0;
1881de2362d3Smrg    dst_obj.surface = NULL;
1882de2362d3Smrg
1883de2362d3Smrg    if (!R600SetAccelState(pScrn,
1884de2362d3Smrg			   &src_obj,
1885de2362d3Smrg			   NULL,
1886de2362d3Smrg			   &dst_obj,
1887de2362d3Smrg			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1888de2362d3Smrg			   3, 0xffffffff)) {
1889de2362d3Smrg        goto copy;
1890de2362d3Smrg    }
1891de2362d3Smrg
1892de2362d3Smrg    /* blit from vram to gart */
1893de2362d3Smrg    R600DoPrepareCopy(pScrn);
1894de2362d3Smrg    R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
1895de2362d3Smrg    R600DoCopy(pScrn);
1896de2362d3Smrg    copy_src = scratch;
1897de2362d3Smrg    copy_pitch = scratch_pitch * (bpp / 8);
1898de2362d3Smrg    flush = TRUE;
1899de2362d3Smrg
1900de2362d3Smrgcopy:
190118781e08Smrg    if (flush)
1902de2362d3Smrg	radeon_cs_flush_indirect(pScrn);
1903de2362d3Smrg
1904de2362d3Smrg    ret = radeon_bo_map(copy_src, 0);
1905de2362d3Smrg    if (ret) {
1906de2362d3Smrg	ErrorF("failed to map pixmap: %d\n", ret);
1907de2362d3Smrg        r = FALSE;
1908de2362d3Smrg        goto out;
1909de2362d3Smrg    }
1910de2362d3Smrg    r = TRUE;
1911de2362d3Smrg    w *= bpp / 8;
191239413783Smrg    if (copy_src == driver_priv->bo->bo.radeon)
1913de2362d3Smrg	size = y * copy_pitch + x * bpp / 8;
1914de2362d3Smrg    else
1915de2362d3Smrg	size = 0;
1916de2362d3Smrg    while (h--) {
1917de2362d3Smrg        memcpy(dst, copy_src->ptr + size, w);
1918de2362d3Smrg        size += copy_pitch;
1919de2362d3Smrg        dst += dst_pitch;
1920de2362d3Smrg    }
1921de2362d3Smrg    radeon_bo_unmap(copy_src);
1922de2362d3Smrgout:
1923de2362d3Smrg    if (scratch)
1924de2362d3Smrg	radeon_bo_unref(scratch);
1925de2362d3Smrg    return r;
1926de2362d3Smrg}
1927de2362d3Smrg
1928de2362d3Smrgstatic int
1929de2362d3SmrgR600MarkSync(ScreenPtr pScreen)
1930de2362d3Smrg{
1931de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1932de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1933de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1934de2362d3Smrg
1935de2362d3Smrg    return ++accel_state->exaSyncMarker;
1936de2362d3Smrg
1937de2362d3Smrg}
1938de2362d3Smrg
1939de2362d3Smrgstatic void
1940de2362d3SmrgR600Sync(ScreenPtr pScreen, int marker)
1941de2362d3Smrg{
1942de2362d3Smrg    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1943de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1944de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1945de2362d3Smrg
1946de2362d3Smrg    if (accel_state->exaMarkerSynced != marker) {
1947de2362d3Smrg	accel_state->exaMarkerSynced = marker;
1948de2362d3Smrg    }
1949de2362d3Smrg
1950de2362d3Smrg}
1951de2362d3Smrg
1952de2362d3Smrgstatic Bool
1953de2362d3SmrgR600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
1954de2362d3Smrg{
1955de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1956de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1957de2362d3Smrg
1958de2362d3Smrg    /* 512 bytes per shader for now */
1959de2362d3Smrg    int size = 512 * 9;
1960de2362d3Smrg
196118781e08Smrg    accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
196218781e08Smrg					     RADEON_GEM_DOMAIN_VRAM, 0);
196339413783Smrg    if (!accel_state->shaders_bo) {
196418781e08Smrg        ErrorF("Allocating shader failed\n");
196518781e08Smrg	return FALSE;
1966de2362d3Smrg    }
1967de2362d3Smrg    return TRUE;
1968de2362d3Smrg}
1969de2362d3Smrg
1970de2362d3SmrgBool
1971de2362d3SmrgR600LoadShaders(ScrnInfoPtr pScrn)
1972de2362d3Smrg{
1973de2362d3Smrg    RADEONInfoPtr info = RADEONPTR(pScrn);
1974de2362d3Smrg    struct radeon_accel_state *accel_state = info->accel_state;
1975de2362d3Smrg    RADEONChipFamily ChipSet = info->ChipFamily;
1976de2362d3Smrg    uint32_t *shader;
1977de2362d3Smrg    int ret;
1978de2362d3Smrg
197918781e08Smrg    ret = radeon_bo_map(accel_state->shaders_bo, 1);
198018781e08Smrg    if (ret) {
198118781e08Smrg        FatalError("failed to map shader %d\n", ret);
198218781e08Smrg	return FALSE;
198318781e08Smrg    }
198418781e08Smrg    shader = accel_state->shaders_bo->ptr;
1985de2362d3Smrg
1986de2362d3Smrg    /*  solid vs --------------------------------------- */
1987de2362d3Smrg    accel_state->solid_vs_offset = 0;
1988de2362d3Smrg    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
1989de2362d3Smrg
1990de2362d3Smrg    /*  solid ps --------------------------------------- */
1991de2362d3Smrg    accel_state->solid_ps_offset = 512;
1992de2362d3Smrg    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
1993de2362d3Smrg
1994de2362d3Smrg    /*  copy vs --------------------------------------- */
1995de2362d3Smrg    accel_state->copy_vs_offset = 1024;
1996de2362d3Smrg    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
1997de2362d3Smrg
1998de2362d3Smrg    /*  copy ps --------------------------------------- */
1999de2362d3Smrg    accel_state->copy_ps_offset = 1536;
2000de2362d3Smrg    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2001de2362d3Smrg
2002de2362d3Smrg    /*  comp vs --------------------------------------- */
2003de2362d3Smrg    accel_state->comp_vs_offset = 2048;
2004de2362d3Smrg    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2005de2362d3Smrg
2006de2362d3Smrg    /*  comp ps --------------------------------------- */
2007de2362d3Smrg    accel_state->comp_ps_offset = 2560;
2008de2362d3Smrg    R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2009de2362d3Smrg
2010de2362d3Smrg    /*  xv vs --------------------------------------- */
2011de2362d3Smrg    accel_state->xv_vs_offset = 3072;
2012de2362d3Smrg    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2013de2362d3Smrg
2014de2362d3Smrg    /*  xv ps --------------------------------------- */
2015de2362d3Smrg    accel_state->xv_ps_offset = 3584;
2016de2362d3Smrg    R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2017de2362d3Smrg
201818781e08Smrg    radeon_bo_unmap(accel_state->shaders_bo);
2019de2362d3Smrg    return TRUE;
2020de2362d3Smrg}
2021de2362d3Smrg
2022de2362d3SmrgBool
2023de2362d3SmrgR600DrawInit(ScreenPtr pScreen)
2024de2362d3Smrg{
2025de2362d3Smrg    ScrnInfoPtr pScrn =  xf86ScreenToScrn(pScreen);
2026de2362d3Smrg    RADEONInfoPtr info   = RADEONPTR(pScrn);
2027de2362d3Smrg
202839413783Smrg    if (!info->accel_state->exa) {
2029de2362d3Smrg	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2030de2362d3Smrg	return FALSE;
2031de2362d3Smrg    }
2032de2362d3Smrg
2033de2362d3Smrg    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2034de2362d3Smrg    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2035de2362d3Smrg
2036de2362d3Smrg    info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2037de2362d3Smrg    info->accel_state->exa->Solid = R600Solid;
2038de2362d3Smrg    info->accel_state->exa->DoneSolid = R600DoneSolid;
2039de2362d3Smrg
2040de2362d3Smrg    info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2041de2362d3Smrg    info->accel_state->exa->Copy = R600Copy;
2042de2362d3Smrg    info->accel_state->exa->DoneCopy = R600DoneCopy;
2043de2362d3Smrg
2044de2362d3Smrg    info->accel_state->exa->MarkSync = R600MarkSync;
2045de2362d3Smrg    info->accel_state->exa->WaitMarker = R600Sync;
2046de2362d3Smrg
204718781e08Smrg    info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
204818781e08Smrg    info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
204918781e08Smrg    info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
205018781e08Smrg    info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
205118781e08Smrg    info->accel_state->exa->UploadToScreen = R600UploadToScreenCS;
205218781e08Smrg    info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS;
205318781e08Smrg    info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
205418781e08Smrg    info->accel_state->exa->SharePixmapBacking = RADEONEXASharePixmapBacking;
205518781e08Smrg    info->accel_state->exa->SetSharedPixmapBacking = RADEONEXASetSharedPixmapBacking;
205618781e08Smrg    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS | EXA_SUPPORTS_PREPARE_AUX |
205718781e08Smrg	EXA_HANDLES_PIXMAPS | EXA_MIXED_PIXMAPS;
2058de2362d3Smrg    info->accel_state->exa->pixmapOffsetAlign = 256;
2059de2362d3Smrg    info->accel_state->exa->pixmapPitchAlign = 256;
2060de2362d3Smrg
2061de2362d3Smrg    info->accel_state->exa->CheckComposite = R600CheckComposite;
2062de2362d3Smrg    info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2063de2362d3Smrg    info->accel_state->exa->Composite = R600Composite;
2064de2362d3Smrg    info->accel_state->exa->DoneComposite = R600DoneComposite;
2065de2362d3Smrg
2066de2362d3Smrg    info->accel_state->exa->maxPitchBytes = 32768;
2067de2362d3Smrg    info->accel_state->exa->maxX = 8192;
2068de2362d3Smrg    info->accel_state->exa->maxY = 8192;
2069de2362d3Smrg
2070de2362d3Smrg    /* not supported yet */
2071de2362d3Smrg    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2072de2362d3Smrg	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2073de2362d3Smrg	info->accel_state->vsync = TRUE;
2074de2362d3Smrg    } else
2075de2362d3Smrg	info->accel_state->vsync = FALSE;
2076de2362d3Smrg
2077de2362d3Smrg    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
2078de2362d3Smrg	free(info->accel_state->exa);
2079de2362d3Smrg	return FALSE;
2080de2362d3Smrg    }
2081de2362d3Smrg
2082de2362d3Smrg    info->accel_state->XInited3D = FALSE;
2083de2362d3Smrg    info->accel_state->src_obj[0].bo = NULL;
2084de2362d3Smrg    info->accel_state->src_obj[1].bo = NULL;
2085de2362d3Smrg    info->accel_state->dst_obj.bo = NULL;
2086de2362d3Smrg    info->accel_state->copy_area_bo = NULL;
2087de2362d3Smrg    info->accel_state->vbo.vb_start_op = -1;
2088de2362d3Smrg    info->accel_state->finish_op = r600_finish_op;
2089de2362d3Smrg    info->accel_state->vbo.verts_per_op = 3;
2090de2362d3Smrg    RADEONVlineHelperClear(pScrn);
2091de2362d3Smrg
2092de2362d3Smrg    radeon_vbo_init_lists(pScrn);
2093de2362d3Smrg
2094de2362d3Smrg    if (!R600AllocShaders(pScrn, pScreen))
2095de2362d3Smrg	return FALSE;
2096de2362d3Smrg
2097de2362d3Smrg    if (!R600LoadShaders(pScrn))
2098de2362d3Smrg	return FALSE;
2099de2362d3Smrg
2100de2362d3Smrg    exaMarkSync(pScreen);
2101de2362d3Smrg
2102de2362d3Smrg    return TRUE;
2103de2362d3Smrg
2104de2362d3Smrg}
2105de2362d3Smrg
2106