r6xx_accel.c revision b7e1c893
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Alex Deucher <alexander.deucher@amd.com>
24 *          Matthias Hopf <mhopf@suse.de>
25 */
26#ifdef HAVE_CONFIG_H
27#include "config.h"
28#endif
29
30#include "xf86.h"
31
32#include <errno.h>
33
34#include "radeon.h"
35#include "r600_shader.h"
36#include "radeon_reg.h"
37#include "r600_reg.h"
38#include "r600_state.h"
39
40#include "radeon_drm.h"
41
42/* Flush the indirect buffer to the kernel for submission to the card */
43void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib)
44{
45    RADEONInfoPtr  info = RADEONPTR(pScrn);
46    drmBufPtr          buffer = ib;
47    int                start  = 0;
48    drm_radeon_indirect_t  indirect;
49
50    if (!buffer) return;
51
52    //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n",
53    //       buffer->idx);
54
55    while (buffer->used & 0x3c){
56        E32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */
57    }
58
59    //ErrorF("buffer bytes: %d\n", buffer->used);
60
61    indirect.idx     = buffer->idx;
62    indirect.start   = start;
63    indirect.end     = buffer->used;
64    indirect.discard = 1;
65
66    drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INDIRECT,
67			&indirect, sizeof(drm_radeon_indirect_t));
68
69}
70
71void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib)
72{
73    if (!ib) return;
74
75    ib->used = 0;
76    R600CPFlushIndirect(pScrn, ib);
77}
78
79void
80wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib)
81{
82
83    //flush caches, don't generate timestamp
84    PACK3(ib, IT_EVENT_WRITE, 1);
85    E32(ib, CACHE_FLUSH_AND_INV_EVENT);
86    // wait for 3D idle clean
87    EREG(ib, WAIT_UNTIL,                          (WAIT_3D_IDLE_bit |
88						   WAIT_3D_IDLECLEAN_bit));
89}
90
91void
92wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib)
93{
94
95    EREG(ib, WAIT_UNTIL,                          WAIT_3D_IDLE_bit);
96
97}
98
99static void
100reset_cb(ScrnInfoPtr pScrn, drmBufPtr ib)
101{
102    int i;
103
104    PACK0(ib, CB_COLOR0_INFO, 8);
105    for (i = 0; i < 8; i++)
106	E32(ib, 0);
107}
108
109static void
110reset_td_samplers(ScrnInfoPtr pScrn, drmBufPtr ib)
111{
112    int i;
113
114    wait_3d_idle(pScrn, ib);
115
116    PACK0(ib, TD_PS_SAMPLER0_BORDER_RED, 4*TD_PS_SAMPLER0_BORDER_RED_num);
117    for (i = 0; i < 4*TD_PS_SAMPLER0_BORDER_RED_num; i++)
118	E32(ib, 0);
119    PACK0(ib, TD_VS_SAMPLER0_BORDER_RED, 4*TD_VS_SAMPLER0_BORDER_RED_num);
120    for (i = 0; i < 4*TD_VS_SAMPLER0_BORDER_RED_num; i++)
121	E32(ib, 0);
122
123    wait_3d_idle(pScrn, ib);
124}
125
126static void
127reset_sampler_const (ScrnInfoPtr pScrn, drmBufPtr ib)
128{
129    int i;
130
131    for (i = 0; i < SQ_TEX_SAMPLER_WORD_all_num; i++) {
132	PACK0(ib, SQ_TEX_SAMPLER_WORD + i * SQ_TEX_SAMPLER_WORD_offset, 3);
133	E32(ib, SQ_TEX_DEPTH_COMPARE_LESSEQUAL << DEPTH_COMPARE_FUNCTION_shift);
134	E32(ib, MAX_LOD_mask);
135	E32(ib, 0);
136    }
137}
138
139static void
140reset_dx9_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib)
141{
142    int i;
143
144    const int count = SQ_ALU_CONSTANT_all_num * (SQ_ALU_CONSTANT_offset >> 2);
145
146    PACK0(ib, SQ_ALU_CONSTANT, count);
147    for (i = 0; i < count; i++)
148	EFLOAT(ib, 0.0);
149}
150
151static void
152reset_bool_loop_const(ScrnInfoPtr pScrn, drmBufPtr ib)
153{
154    int i;
155
156
157    PACK0(ib, SQ_BOOL_CONST, SQ_BOOL_CONST_all_num);
158    for (i = 0; i < SQ_BOOL_CONST_all_num; i++)
159	E32(ib, 0);
160
161    PACK0(ib, SQ_LOOP_CONST, SQ_LOOP_CONST_all_num);
162
163    for (i = 0; i < SQ_LOOP_CONST_all_num; i++)
164	E32(ib, 0);
165
166}
167
168void
169start_3d(ScrnInfoPtr pScrn, drmBufPtr ib)
170{
171    RADEONInfoPtr info = RADEONPTR(pScrn);
172
173    if (info->ChipFamily < CHIP_FAMILY_RV770) {
174	PACK3(ib, IT_START_3D_CMDBUF, 1);
175	E32(ib, 0);
176    }
177
178    PACK3(ib, IT_CONTEXT_CONTROL, 2);
179    E32(ib, 0x80000000);
180    E32(ib, 0x80000000);
181
182    wait_3d_idle_clean (pScrn, ib);
183}
184
185/*
186 * Setup of functional groups
187 */
188
189// asic stack/thread/gpr limits - need to query the drm
190static void
191sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf)
192{
193    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
194    uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
195    RADEONInfoPtr info = RADEONPTR(pScrn);
196
197    if ((info->ChipFamily == CHIP_FAMILY_RV610) ||
198	(info->ChipFamily == CHIP_FAMILY_RV620) ||
199	(info->ChipFamily == CHIP_FAMILY_RS780) ||
200	(info->ChipFamily == CHIP_FAMILY_RV710))
201	sq_config = 0;						// no VC
202    else
203	sq_config = VC_ENABLE_bit;
204
205    sq_config |= (DX9_CONSTS_bit |
206		  ALU_INST_PREFER_VECTOR_bit |
207		  (sq_conf->ps_prio << PS_PRIO_shift) |
208		  (sq_conf->vs_prio << VS_PRIO_shift) |
209		  (sq_conf->gs_prio << GS_PRIO_shift) |
210		  (sq_conf->es_prio << ES_PRIO_shift));
211
212    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
213			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
214			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
215    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
216			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
217
218    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
219			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
220			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
221			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
222
223    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
224				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
225
226    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
227				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
228
229    PACK0(ib, SQ_CONFIG, 6);
230    E32(ib, sq_config);
231    E32(ib, sq_gpr_resource_mgmt_1);
232    E32(ib, sq_gpr_resource_mgmt_2);
233    E32(ib, sq_thread_resource_mgmt);
234    E32(ib, sq_stack_resource_mgmt_1);
235    E32(ib, sq_stack_resource_mgmt_2);
236
237}
238
239void
240set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf)
241{
242    uint32_t cb_color_info;
243    int pitch, slice, h;
244    RADEONInfoPtr info = RADEONPTR(pScrn);
245
246    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
247		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
248		     (cb_conf->array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
249		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
250		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
251		     (cb_conf->tile_mode   << CB_COLOR0_INFO__TILE_MODE_shift));
252    if (cb_conf->read_size)
253	cb_color_info |= CB_COLOR0_INFO__READ_SIZE_bit;
254    if (cb_conf->blend_clamp)
255	cb_color_info |= BLEND_CLAMP_bit;
256    if (cb_conf->clear_color)
257	cb_color_info |= CLEAR_COLOR_bit;
258    if (cb_conf->blend_bypass)
259	cb_color_info |= BLEND_BYPASS_bit;
260    if (cb_conf->blend_float32)
261	cb_color_info |= BLEND_FLOAT32_bit;
262    if (cb_conf->simple_float)
263	cb_color_info |= SIMPLE_FLOAT_bit;
264    if (cb_conf->round_mode)
265	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
266    if (cb_conf->tile_compact)
267	cb_color_info |= TILE_COMPACT_bit;
268    if (cb_conf->source_format)
269	cb_color_info |= SOURCE_FORMAT_bit;
270
271    pitch = (cb_conf->w / 8) - 1;
272    h = (cb_conf->h + 7) & ~7;
273    slice = ((cb_conf->w * h) / 64) - 1;
274
275    EREG(ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8));
276
277    // rv6xx workaround
278    if ((info->ChipFamily > CHIP_FAMILY_R600) &&
279	(info->ChipFamily < CHIP_FAMILY_RV770)) {
280	PACK3(ib, IT_SURFACE_BASE_UPDATE, 1);
281	E32(ib, (2 << cb_conf->id));
282    }
283
284    // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib
285    EREG(ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift)	|
286						    (slice << SLICE_TILE_MAX_shift)));
287    EREG(ib, (CB_COLOR0_VIEW + (4 * cb_conf->id)), ((0    << SLICE_START_shift)		|
288						    (0    << SLICE_MAX_shift)));
289    EREG(ib, (CB_COLOR0_INFO + (4 * cb_conf->id)), cb_color_info);
290    EREG(ib, (CB_COLOR0_TILE + (4 * cb_conf->id)), (0     >> 8));	// CMASK per-tile data base/256
291    EREG(ib, (CB_COLOR0_FRAG + (4 * cb_conf->id)), (0     >> 8));	// FMASK per-tile data base/256
292    EREG(ib, (CB_COLOR0_MASK + (4 * cb_conf->id)), ((0    << CMASK_BLOCK_MAX_shift)	|
293						    (0    << FMASK_TILE_MAX_shift)));
294}
295
296void
297cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr)
298{
299    uint32_t cp_coher_size;
300    if (size == 0xffffffff)
301	cp_coher_size = 0xffffffff;
302    else
303	cp_coher_size = ((size + 255) >> 8);
304
305    PACK3(ib, IT_SURFACE_SYNC, 4);
306    E32(ib, sync_type);
307    E32(ib, cp_coher_size);
308    E32(ib, (mc_addr >> 8));
309    E32(ib, 10); /* poll interval */
310}
311
312/* inserts a wait for vline in the command stream */
313void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix,
314	int crtc, int start, int stop)
315{
316    RADEONInfoPtr  info = RADEONPTR(pScrn);
317    xf86CrtcConfigPtr  xf86_config = XF86_CRTC_CONFIG_PTR(pScrn);
318    uint32_t offset;
319    RADEONCrtcPrivatePtr radeon_crtc;
320
321    if ((crtc < 0) || (crtc > 1))
322        return;
323
324    if (stop < start)
325        return;
326
327    if (!xf86_config->crtc[crtc]->enabled)
328        return;
329
330#ifdef USE_EXA
331    if (info->useEXA)
332        offset = exaGetPixmapOffset(pPix);
333    else
334#endif
335        offset = pPix->devPrivate.ptr - info->FB;
336
337    /* if drawing to front buffer */
338    if (offset != 0)
339        return;
340
341    start = max(start, 0);
342    stop = min(stop, xf86_config->crtc[crtc]->mode.VDisplay);
343
344    if (start > xf86_config->crtc[crtc]->mode.VDisplay)
345        return;
346
347    radeon_crtc = xf86_config->crtc[crtc]->driver_private;
348
349    /* set the VLINE range */
350    EREG(ib, AVIVO_D1MODE_VLINE_START_END + radeon_crtc->crtc_offset,
351         (start << AVIVO_D1MODE_VLINE_START_SHIFT) |
352         (stop << AVIVO_D1MODE_VLINE_END_SHIFT));
353
354    /* tell the CP to poll the VLINE state register */
355    PACK3(ib, IT_WAIT_REG_MEM, 6);
356    E32(ib, IT_WAIT_REG | IT_WAIT_EQ);
357    E32(ib, IT_WAIT_ADDR(AVIVO_D1MODE_VLINE_STATUS + radeon_crtc->crtc_offset));
358    E32(ib, 0);
359    E32(ib, 0);                          // Ref value
360    E32(ib, AVIVO_D1MODE_VLINE_STAT);    // Mask
361    E32(ib, 10);                         // Wait interval
362}
363
364void
365fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf)
366{
367    uint32_t sq_pgm_resources;
368
369    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
370			(fs_conf->stack_size << STACK_SIZE_shift));
371
372    if (fs_conf->dx10_clamp)
373	sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit;
374
375    EREG(ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
376    EREG(ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources);
377    EREG(ib, SQ_PGM_CF_OFFSET_FS, 0);
378}
379
380void
381vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf)
382{
383    uint32_t sq_pgm_resources;
384
385    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
386			(vs_conf->stack_size << STACK_SIZE_shift));
387
388    if (vs_conf->dx10_clamp)
389	sq_pgm_resources |= SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit;
390    if (vs_conf->fetch_cache_lines)
391	sq_pgm_resources |= (vs_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
392    if (vs_conf->uncached_first_inst)
393	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
394
395    EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
396    EREG(ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources);
397    EREG(ib, SQ_PGM_CF_OFFSET_VS, 0);
398}
399
400void
401ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf)
402{
403    uint32_t sq_pgm_resources;
404
405    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
406			(ps_conf->stack_size << STACK_SIZE_shift));
407
408    if (ps_conf->dx10_clamp)
409	sq_pgm_resources |= SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit;
410    if (ps_conf->fetch_cache_lines)
411	sq_pgm_resources |= (ps_conf->fetch_cache_lines << FETCH_CACHE_LINES_shift);
412    if (ps_conf->uncached_first_inst)
413	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
414    if (ps_conf->clamp_consts)
415	sq_pgm_resources |= CLAMP_CONSTS_bit;
416
417    EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
418    EREG(ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources);
419    EREG(ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode);
420    EREG(ib, SQ_PGM_CF_OFFSET_PS, 0);
421}
422
423void
424set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf)
425{
426    int i;
427    const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2);
428
429    PACK0(ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg);
430    for (i = 0; i < countreg; i++)
431	EFLOAT(ib, const_buf[i]);
432}
433
434void
435set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val)
436{
437    /* bool register order is: ps, vs, gs; one register each
438     * 1 bits per bool; 32 bools each for ps, vs, gs.
439     */
440    EREG(ib, SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
441}
442
443void
444set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res)
445{
446    uint32_t sq_vtx_constant_word2;
447
448    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
449			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
450			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
451			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
452			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
453    if (res->clamp_x)
454	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
455
456    if (res->format_comp_all)
457	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
458
459    if (res->srf_mode_all)
460	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
461
462    PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7);
463    E32(ib, res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
464    E32(ib, (res->vtx_num_entries << 2) - 1);			// 1: SIZE
465    E32(ib, sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
466    E32(ib, res->mem_req_size << MEM_REQUEST_SIZE_shift);		// 3: MEM_REQUEST_SIZE ?!?
467    E32(ib, 0);							// 4: n/a
468    E32(ib, 0);							// 5: n/a
469    E32(ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift);	// 6: TYPE
470}
471
472void
473set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res)
474{
475    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
476    uint32_t sq_tex_resource_word5, sq_tex_resource_word6;
477
478    sq_tex_resource_word0 = ((tex_res->dim << DIM_shift) |
479			     (tex_res->tile_mode << SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift));
480
481    if (tex_res->w)
482	sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) |
483				  ((tex_res->w - 1) << TEX_WIDTH_shift));
484
485    if (tex_res->tile_type)
486	sq_tex_resource_word0 |= TILE_TYPE_bit;
487
488    sq_tex_resource_word1 = (tex_res->format << SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift);
489
490    if (tex_res->h)
491	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
492    if (tex_res->depth)
493	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
494
495    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
496			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
497			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
498			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
499			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
500			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
501			     (tex_res->request_size << REQUEST_SIZE_shift) |
502			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
503			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
504			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
505			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
506			     (tex_res->base_level << BASE_LEVEL_shift));
507
508    if (tex_res->srf_mode_all)
509	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
510    if (tex_res->force_degamma)
511	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
512
513    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
514			     (tex_res->base_array << BASE_ARRAY_shift) |
515			     (tex_res->last_array << LAST_ARRAY_shift));
516
517    sq_tex_resource_word6 = ((tex_res->mpeg_clamp << MPEG_CLAMP_shift) |
518			     (tex_res->perf_modulation << PERF_MODULATION_shift) |
519			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift));
520
521    if (tex_res->interlaced)
522	sq_tex_resource_word6 |= INTERLACED_bit;
523
524    PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7);
525    E32(ib, sq_tex_resource_word0);
526    E32(ib, sq_tex_resource_word1);
527    E32(ib, ((tex_res->base) >> 8));
528    E32(ib, ((tex_res->mip_base) >> 8));
529    E32(ib, sq_tex_resource_word4);
530    E32(ib, sq_tex_resource_word5);
531    E32(ib, sq_tex_resource_word6);
532}
533
534void
535set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s)
536{
537    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
538
539    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
540			    (s->clamp_y       << CLAMP_Y_shift)					|
541			    (s->clamp_z       << CLAMP_Z_shift)					|
542			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
543			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
544			    (s->z_filter      << Z_FILTER_shift)	|
545			    (s->mip_filter    << MIP_FILTER_shift)				|
546			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
547			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
548			    (s->chroma_key    << CHROMA_KEY_shift));
549    if (s->point_sampling_clamp)
550	sq_tex_sampler_word0 |= POINT_SAMPLING_CLAMP_bit;
551    if (s->tex_array_override)
552	sq_tex_sampler_word0 |= TEX_ARRAY_OVERRIDE_bit;
553    if (s->lod_uses_minor_axis)
554	sq_tex_sampler_word0 |= LOD_USES_MINOR_AXIS_bit;
555
556    sq_tex_sampler_word1 = ((s->min_lod       << MIN_LOD_shift)					|
557			    (s->max_lod       << MAX_LOD_shift)					|
558			    (s->lod_bias      << SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift));
559
560    sq_tex_sampler_word2 = ((s->lod_bias2     << LOD_BIAS_SEC_shift)	|
561			    (s->perf_mip      << PERF_MIP_shift)	|
562			    (s->perf_z        << PERF_Z_shift));
563    if (s->mc_coord_truncate)
564	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
565    if (s->force_degamma)
566	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
567    if (s->high_precision_filter)
568	sq_tex_sampler_word2 |= HIGH_PRECISION_FILTER_bit;
569    if (s->fetch_4)
570	sq_tex_sampler_word2 |= FETCH_4_bit;
571    if (s->sample_is_pcf)
572	sq_tex_sampler_word2 |= SAMPLE_IS_PCF_bit;
573    if (s->type)
574	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
575
576    PACK0(ib, SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
577    E32(ib, sq_tex_sampler_word0);
578    E32(ib, sq_tex_sampler_word1);
579    E32(ib, sq_tex_sampler_word2);
580}
581
582//XXX deal with clip offsets in clip setup
583void
584set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
585{
586
587    EREG(ib, PA_SC_SCREEN_SCISSOR_TL,              ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
588						    (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
589    EREG(ib, PA_SC_SCREEN_SCISSOR_BR,              ((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
590						    (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
591}
592
593void
594set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
595{
596
597    EREG(ib, PA_SC_VPORT_SCISSOR_0_TL +
598	 id * PA_SC_VPORT_SCISSOR_0_TL_offset, ((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
599						(y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
600						WINDOW_OFFSET_DISABLE_bit));
601    EREG(ib, PA_SC_VPORT_SCISSOR_0_BR +
602	 id * PA_SC_VPORT_SCISSOR_0_BR_offset, ((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
603						(y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
604}
605
606void
607set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
608{
609
610    EREG(ib, PA_SC_GENERIC_SCISSOR_TL,            ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
611						   (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
612						   WINDOW_OFFSET_DISABLE_bit));
613    EREG(ib, PA_SC_GENERIC_SCISSOR_BR,            ((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
614						   (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
615}
616
617void
618set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
619{
620
621    EREG(ib, PA_SC_WINDOW_SCISSOR_TL,             ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
622						   (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
623						   WINDOW_OFFSET_DISABLE_bit));
624    EREG(ib, PA_SC_WINDOW_SCISSOR_BR,             ((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
625						   (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
626}
627
628void
629set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
630{
631
632    EREG(ib, PA_SC_CLIPRECT_0_TL +
633	 id * PA_SC_CLIPRECT_0_TL_offset,     ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
634					       (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
635    EREG(ib, PA_SC_CLIPRECT_0_BR +
636	 id * PA_SC_CLIPRECT_0_BR_offset,     ((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
637					       (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
638}
639
640/*
641 * Setup of default state
642 */
643
644void
645set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
646{
647    tex_resource_t tex_res;
648    shader_config_t fs_conf;
649    sq_config_t sq_conf;
650    int i;
651    RADEONInfoPtr info = RADEONPTR(pScrn);
652    struct radeon_accel_state *accel_state = info->accel_state;
653
654    memset(&tex_res, 0, sizeof(tex_resource_t));
655    memset(&fs_conf, 0, sizeof(shader_config_t));
656
657#if 1
658    if (accel_state->XInited3D)
659	return;
660#endif
661
662    accel_state->XInited3D = TRUE;
663
664    wait_3d_idle(pScrn, ib);
665
666    // ASIC specific setup, see drm
667    if (info->ChipFamily < CHIP_FAMILY_RV770) {
668	EREG(ib, TA_CNTL_AUX,                     (( 3 << GRADIENT_CREDIT_shift)		|
669						   (28 << TD_FIFO_CREDIT_shift)));
670	EREG(ib, VC_ENHANCE,                      0);
671	EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
672	EREG(ib, DB_DEBUG,                        0x82000000); /* ? */
673	EREG(ib, DB_WATERMARKS,		        ((4  << DEPTH_FREE_shift)		|
674						 (16 << DEPTH_FLUSH_shift)		|
675						 (0  << FORCE_SUMMARIZE_shift)		|
676						 (4  << DEPTH_PENDING_FREE_shift)	|
677						 (16 << DEPTH_CACHELINE_FREE_shift)	|
678						 0));
679    } else {
680	EREG(ib, TA_CNTL_AUX,                      (( 2 << GRADIENT_CREDIT_shift)		|
681						    (28 << TD_FIFO_CREDIT_shift)));
682	EREG(ib, VC_ENHANCE,                       0);
683	EREG(ib, R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, VS_PC_LIMIT_ENABLE_bit);
684	EREG(ib, DB_DEBUG,                         0);
685	EREG(ib, DB_WATERMARKS,                    ((4  << DEPTH_FREE_shift)		|
686						    (16 << DEPTH_FLUSH_shift)		|
687						    (0  << FORCE_SUMMARIZE_shift)		|
688						    (4  << DEPTH_PENDING_FREE_shift)	|
689						    (4  << DEPTH_CACHELINE_FREE_shift)	|
690						    0));
691    }
692
693    reset_td_samplers(pScrn, ib);
694    reset_dx9_alu_consts(pScrn, ib);
695    reset_bool_loop_const (pScrn, ib);
696    reset_sampler_const (pScrn, ib);
697
698    // SQ
699    sq_conf.ps_prio = 0;
700    sq_conf.vs_prio = 1;
701    sq_conf.gs_prio = 2;
702    sq_conf.es_prio = 3;
703    // need to set stack/thread/gpr limits based on the asic
704    // for now just set them low enough so any card will work
705    // see r600_cp.c in the drm
706    switch (info->ChipFamily) {
707    case CHIP_FAMILY_R600:
708	sq_conf.num_ps_gprs = 192;
709	sq_conf.num_vs_gprs = 56;
710	sq_conf.num_temp_gprs = 4;
711	sq_conf.num_gs_gprs = 0;
712	sq_conf.num_es_gprs = 0;
713	sq_conf.num_ps_threads = 136;
714	sq_conf.num_vs_threads = 48;
715	sq_conf.num_gs_threads = 4;
716	sq_conf.num_es_threads = 4;
717	sq_conf.num_ps_stack_entries = 128;
718	sq_conf.num_vs_stack_entries = 128;
719	sq_conf.num_gs_stack_entries = 0;
720	sq_conf.num_es_stack_entries = 0;
721	break;
722    case CHIP_FAMILY_RV630:
723    case CHIP_FAMILY_RV635:
724	sq_conf.num_ps_gprs = 84;
725	sq_conf.num_vs_gprs = 36;
726	sq_conf.num_temp_gprs = 4;
727	sq_conf.num_gs_gprs = 0;
728	sq_conf.num_es_gprs = 0;
729	sq_conf.num_ps_threads = 144;
730	sq_conf.num_vs_threads = 40;
731	sq_conf.num_gs_threads = 4;
732	sq_conf.num_es_threads = 4;
733	sq_conf.num_ps_stack_entries = 40;
734	sq_conf.num_vs_stack_entries = 40;
735	sq_conf.num_gs_stack_entries = 32;
736	sq_conf.num_es_stack_entries = 16;
737	break;
738    case CHIP_FAMILY_RV610:
739    case CHIP_FAMILY_RV620:
740    case CHIP_FAMILY_RS780:
741    default:
742	sq_conf.num_ps_gprs = 84;
743	sq_conf.num_vs_gprs = 36;
744	sq_conf.num_temp_gprs = 4;
745	sq_conf.num_gs_gprs = 0;
746	sq_conf.num_es_gprs = 0;
747	sq_conf.num_ps_threads = 136;
748	sq_conf.num_vs_threads = 48;
749	sq_conf.num_gs_threads = 4;
750	sq_conf.num_es_threads = 4;
751	sq_conf.num_ps_stack_entries = 40;
752	sq_conf.num_vs_stack_entries = 40;
753	sq_conf.num_gs_stack_entries = 32;
754	sq_conf.num_es_stack_entries = 16;
755	break;
756    case CHIP_FAMILY_RV670:
757	sq_conf.num_ps_gprs = 144;
758	sq_conf.num_vs_gprs = 40;
759	sq_conf.num_temp_gprs = 4;
760	sq_conf.num_gs_gprs = 0;
761	sq_conf.num_es_gprs = 0;
762	sq_conf.num_ps_threads = 136;
763	sq_conf.num_vs_threads = 48;
764	sq_conf.num_gs_threads = 4;
765	sq_conf.num_es_threads = 4;
766	sq_conf.num_ps_stack_entries = 40;
767	sq_conf.num_vs_stack_entries = 40;
768	sq_conf.num_gs_stack_entries = 32;
769	sq_conf.num_es_stack_entries = 16;
770	break;
771    case CHIP_FAMILY_RV770:
772	sq_conf.num_ps_gprs = 192;
773	sq_conf.num_vs_gprs = 56;
774	sq_conf.num_temp_gprs = 4;
775	sq_conf.num_gs_gprs = 0;
776	sq_conf.num_es_gprs = 0;
777	sq_conf.num_ps_threads = 188;
778	sq_conf.num_vs_threads = 60;
779	sq_conf.num_gs_threads = 0;
780	sq_conf.num_es_threads = 0;
781	sq_conf.num_ps_stack_entries = 256;
782	sq_conf.num_vs_stack_entries = 256;
783	sq_conf.num_gs_stack_entries = 0;
784	sq_conf.num_es_stack_entries = 0;
785	break;
786    case CHIP_FAMILY_RV730:
787	sq_conf.num_ps_gprs = 84;
788	sq_conf.num_vs_gprs = 36;
789	sq_conf.num_temp_gprs = 4;
790	sq_conf.num_gs_gprs = 0;
791	sq_conf.num_es_gprs = 0;
792	sq_conf.num_ps_threads = 188;
793	sq_conf.num_vs_threads = 60;
794	sq_conf.num_gs_threads = 0;
795	sq_conf.num_es_threads = 0;
796	sq_conf.num_ps_stack_entries = 128;
797	sq_conf.num_vs_stack_entries = 128;
798	sq_conf.num_gs_stack_entries = 0;
799	sq_conf.num_es_stack_entries = 0;
800	break;
801    case CHIP_FAMILY_RV710:
802	sq_conf.num_ps_gprs = 192;
803	sq_conf.num_vs_gprs = 56;
804	sq_conf.num_temp_gprs = 4;
805	sq_conf.num_gs_gprs = 0;
806	sq_conf.num_es_gprs = 0;
807	sq_conf.num_ps_threads = 144;
808	sq_conf.num_vs_threads = 48;
809	sq_conf.num_gs_threads = 0;
810	sq_conf.num_es_threads = 0;
811	sq_conf.num_ps_stack_entries = 128;
812	sq_conf.num_vs_stack_entries = 128;
813	sq_conf.num_gs_stack_entries = 0;
814	sq_conf.num_es_stack_entries = 0;
815	break;
816    }
817
818    sq_setup(pScrn, ib, &sq_conf);
819
820    EREG(ib, SQ_VTX_BASE_VTX_LOC,                 0);
821    EREG(ib, SQ_VTX_START_INST_LOC,               0);
822
823    PACK0(ib, SQ_ESGS_RING_ITEMSIZE, 9);
824    E32(ib, 0);							// SQ_ESGS_RING_ITEMSIZE
825    E32(ib, 0);							// SQ_GSVS_RING_ITEMSIZE
826    E32(ib, 0);							// SQ_ESTMP_RING_ITEMSIZE
827    E32(ib, 0);							// SQ_GSTMP_RING_ITEMSIZE
828    E32(ib, 0);							// SQ_VSTMP_RING_ITEMSIZE
829    E32(ib, 0);							// SQ_PSTMP_RING_ITEMSIZE
830    E32(ib, 0);							// SQ_FBUF_RING_ITEMSIZE
831    E32(ib, 0);							// SQ_REDUC_RING_ITEMSIZE
832    E32(ib, 0);							// SQ_GS_VERT_ITEMSIZE
833
834    // DB
835    EREG(ib, DB_DEPTH_INFO,                       0);
836    EREG(ib, DB_STENCIL_CLEAR,                    0);
837    EREG(ib, DB_DEPTH_CLEAR,                      0);
838    EREG(ib, DB_STENCILREFMASK,                   0);
839    EREG(ib, DB_STENCILREFMASK_BF,                0);
840    EREG(ib, DB_DEPTH_CONTROL,                    0);
841    EREG(ib, DB_RENDER_CONTROL,                   STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit);
842    if (info->ChipFamily < CHIP_FAMILY_RV770)
843	EREG(ib, DB_RENDER_OVERRIDE,              FORCE_SHADER_Z_ORDER_bit);
844    else
845	EREG(ib, DB_RENDER_OVERRIDE,              0);
846    EREG(ib, DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
847						   (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
848						   (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
849						   (2 << ALPHA_TO_MASK_OFFSET3_shift)));
850
851    // SX
852    EREG(ib, SX_ALPHA_TEST_CONTROL,               0);
853    EREG(ib, SX_ALPHA_REF,                        0);
854
855    // CB
856    reset_cb(pScrn, ib);
857
858    PACK0(ib, CB_BLEND_RED, 4);
859    E32(ib, 0x00000000);
860    E32(ib, 0x00000000);
861    E32(ib, 0x00000000);
862    E32(ib, 0x00000000);
863
864    /* CB_COLOR_CONTROL.PER_MRT_BLEND is off */
865    // RV6xx+ have per-MRT blend
866    if (info->ChipFamily > CHIP_FAMILY_R600) {
867	PACK0(ib, CB_BLEND0_CONTROL, CB_BLEND0_CONTROL_num);
868	for (i = 0; i < CB_BLEND0_CONTROL_num; i++)
869	    E32(ib, 0);
870    }
871
872    EREG(ib, CB_BLEND_CONTROL,                    0);
873
874    if (info->ChipFamily < CHIP_FAMILY_RV770) {
875	PACK0(ib, CB_FOG_RED, 3);
876	E32(ib, 0x00000000);
877	E32(ib, 0x00000000);
878	E32(ib, 0x00000000);
879    }
880
881    EREG(ib, CB_COLOR_CONTROL,                    0);
882    PACK0(ib, CB_CLRCMP_CONTROL, 4);
883    E32(ib, 1 << CLRCMP_FCN_SEL_shift);				// CB_CLRCMP_CONTROL: use CLRCMP_FCN_SRC
884    E32(ib, 0);							// CB_CLRCMP_SRC
885    E32(ib, 0);							// CB_CLRCMP_DST
886    E32(ib, 0);							// CB_CLRCMP_MSK
887
888
889    if (info->ChipFamily < CHIP_FAMILY_RV770) {
890	PACK0(ib, CB_CLEAR_RED, 4);
891	EFLOAT(ib, 1.0);						/* WTF? */
892	EFLOAT(ib, 0.0);
893	EFLOAT(ib, 1.0);
894	EFLOAT(ib, 1.0);
895    }
896    EREG(ib, CB_TARGET_MASK,                      (0x0f << TARGET0_ENABLE_shift));
897
898    // SC
899    set_generic_scissor(pScrn, ib, 0, 0, 8192, 8192);
900    set_screen_scissor(pScrn, ib, 0, 0, 8192, 8192);
901    EREG(ib, PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
902						   (0 << WINDOW_Y_OFFSET_shift)));
903    set_window_scissor(pScrn, ib, 0, 0, 8192, 8192);
904
905    EREG(ib, PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
906
907    /* clip boolean is set to always visible -> doesn't matter */
908    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
909	set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192);
910
911    if (info->ChipFamily < CHIP_FAMILY_RV770)
912	EREG(ib, R7xx_PA_SC_EDGERULE,             0x00000000);
913    else
914	EREG(ib, R7xx_PA_SC_EDGERULE,             0xAAAAAAAA);
915
916    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++) {
917	set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192);
918	PACK0(ib, PA_SC_VPORT_ZMIN_0 + i * PA_SC_VPORT_ZMIN_0_offset, 2);
919	EFLOAT(ib, 0.0);
920	EFLOAT(ib, 1.0);
921    }
922
923    if (info->ChipFamily < CHIP_FAMILY_RV770)
924	EREG(ib, PA_SC_MODE_CNTL,                 (WALK_ORDER_ENABLE_bit | FORCE_EOV_CNTDWN_ENABLE_bit));
925    else
926	EREG(ib, PA_SC_MODE_CNTL,                 (FORCE_EOV_CNTDWN_ENABLE_bit | FORCE_EOV_REZ_ENABLE_bit |
927						   0x00500000)); /* ? */
928
929    EREG(ib, PA_SC_LINE_CNTL,                     0);
930    EREG(ib, PA_SC_AA_CONFIG,                     0);
931    EREG(ib, PA_SC_AA_MASK,                       0xFFFFFFFF);
932
933    //XXX: double check this
934    if (info->ChipFamily > CHIP_FAMILY_R600) {
935	EREG(ib, PA_SC_AA_SAMPLE_LOCS_MCTX,       0);
936	EREG(ib, PA_SC_AA_SAMPLE_LOCS_8S_WD1_M,   0);
937    }
938
939    EREG(ib, PA_SC_LINE_STIPPLE,                  0);
940    EREG(ib, PA_SC_MPASS_PS_CNTL,                 0);
941
942    // CL
943    PACK0(ib, PA_CL_VPORT_XSCALE_0, 6);
944    EFLOAT(ib, 0.0f);						// PA_CL_VPORT_XSCALE
945    EFLOAT(ib, 0.0f);						// PA_CL_VPORT_XOFFSET
946    EFLOAT(ib, 0.0f);						// PA_CL_VPORT_YSCALE
947    EFLOAT(ib, 0.0f);						// PA_CL_VPORT_YOFFSET
948    EFLOAT(ib, 0.0f);						// PA_CL_VPORT_ZSCALE
949    EFLOAT(ib, 0.0f);						// PA_CL_VPORT_ZOFFSET
950    EREG(ib, PA_CL_CLIP_CNTL,                     (CLIP_DISABLE_bit | DX_CLIP_SPACE_DEF_bit));
951    EREG(ib, PA_CL_VTE_CNTL,                      0);
952    EREG(ib, PA_CL_VS_OUT_CNTL,                   0);
953    EREG(ib, PA_CL_NANINF_CNTL,                   0);
954    PACK0(ib, PA_CL_GB_VERT_CLIP_ADJ, 4);
955    EFLOAT(ib, 1.0);						// PA_CL_GB_VERT_CLIP_ADJ
956    EFLOAT(ib, 1.0);						// PA_CL_GB_VERT_DISC_ADJ
957    EFLOAT(ib, 1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
958    EFLOAT(ib, 1.0);						// PA_CL_GB_HORZ_DISC_ADJ
959
960    /* user clipping planes are disabled by default */
961    PACK0(ib, PA_CL_UCP_0_X, 24);
962    for (i = 0; i < 24; i++)
963	EFLOAT(ib, 0.0);
964
965    // SU
966    EREG(ib, PA_SU_SC_MODE_CNTL,                  FACE_bit);
967    EREG(ib, PA_SU_POINT_SIZE,                    0);
968    EREG(ib, PA_SU_POINT_MINMAX,                  0);
969    EREG(ib, PA_SU_POLY_OFFSET_DB_FMT_CNTL,       0);
970    EREG(ib, PA_SU_POLY_OFFSET_BACK_SCALE,        0);
971    EREG(ib, PA_SU_POLY_OFFSET_FRONT_SCALE,       0);
972    EREG(ib, PA_SU_POLY_OFFSET_BACK_OFFSET,       0);
973    EREG(ib, PA_SU_POLY_OFFSET_FRONT_OFFSET,      0);
974
975    EREG(ib, PA_SU_LINE_CNTL,                     (8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */
976    EREG(ib, PA_SU_VTX_CNTL,                      ((2 << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
977						   (5 << QUANT_MODE_shift))); /* Round to Even, fixed point 1/256 */
978    EREG(ib, PA_SU_POLY_OFFSET_CLAMP,             0);
979
980    // SPI
981    if (info->ChipFamily < CHIP_FAMILY_RV770)
982	EREG(ib, R7xx_SPI_THREAD_GROUPING,        0);
983    else
984	EREG(ib, R7xx_SPI_THREAD_GROUPING,        (1 << PS_GROUPING_shift));
985
986    EREG(ib, SPI_INTERP_CONTROL_0,                ((2 << PNT_SPRITE_OVRD_X_shift)		|
987						   (3 << PNT_SPRITE_OVRD_Y_shift)		|
988						   (0 << PNT_SPRITE_OVRD_Z_shift)		|
989						   (1 << PNT_SPRITE_OVRD_W_shift))); /* s,t,0,1 */
990    EREG(ib, SPI_INPUT_Z,                         0);
991    EREG(ib, SPI_FOG_CNTL,                        0);
992    EREG(ib, SPI_FOG_FUNC_SCALE,                  0);
993    EREG(ib, SPI_FOG_FUNC_BIAS,                   0);
994
995    PACK0(ib, SPI_VS_OUT_ID_0, SPI_VS_OUT_ID_0_num);
996    for (i = 0; i < SPI_VS_OUT_ID_0_num; i++)		/* identity mapping */
997	E32(ib, 0x03020100 + i*0x04040404);
998    EREG(ib, SPI_VS_OUT_CONFIG,                   0);
999
1000    // clear FS
1001    fs_setup(pScrn, ib, &fs_conf);
1002
1003    // VGT
1004    EREG(ib, VGT_MAX_VTX_INDX,                    2048); /* XXX set to a reasonably large number of indices */
1005    EREG(ib, VGT_MIN_VTX_INDX,                    0);
1006    EREG(ib, VGT_INDX_OFFSET,                     0);
1007    EREG(ib, VGT_INSTANCE_STEP_RATE_0,            0);
1008    EREG(ib, VGT_INSTANCE_STEP_RATE_1,            0);
1009
1010    EREG(ib, VGT_MULTI_PRIM_IB_RESET_INDX,        0);
1011    EREG(ib, VGT_OUTPUT_PATH_CNTL,                0);
1012    EREG(ib, VGT_GS_MODE,                         0);
1013    EREG(ib, VGT_HOS_CNTL,                        0);
1014    EREG(ib, VGT_HOS_MAX_TESS_LEVEL,              0);
1015    EREG(ib, VGT_HOS_MIN_TESS_LEVEL,              0);
1016    EREG(ib, VGT_HOS_REUSE_DEPTH,                 0);
1017    EREG(ib, VGT_GROUP_PRIM_TYPE,                 0);
1018    EREG(ib, VGT_GROUP_FIRST_DECR,                0);
1019    EREG(ib, VGT_GROUP_DECR,                      0);
1020    EREG(ib, VGT_GROUP_VECT_0_CNTL,               0);
1021    EREG(ib, VGT_GROUP_VECT_1_CNTL,               0);
1022    EREG(ib, VGT_GROUP_VECT_0_FMT_CNTL,           0);
1023    EREG(ib, VGT_GROUP_VECT_1_FMT_CNTL,           0);
1024    EREG(ib, VGT_PRIMITIVEID_EN,                  0);
1025    EREG(ib, VGT_MULTI_PRIM_IB_RESET_EN,          0);
1026    EREG(ib, VGT_STRMOUT_EN,                      0);
1027    EREG(ib, VGT_REUSE_OFF,                       0);
1028    EREG(ib, VGT_VTX_CNT_EN,                      0);
1029    EREG(ib, VGT_STRMOUT_BUFFER_EN,               0);
1030
1031    // clear tex resources - PS
1032    for (i = 0; i < 16; i++) {
1033	tex_res.id = i;
1034	set_tex_resource(pScrn, ib, &tex_res);
1035    }
1036
1037    // clear tex resources - VS
1038    for (i = 160; i < 164; i++) {
1039	tex_res.id = i;
1040	set_tex_resource(pScrn, ib, &tex_res);
1041    }
1042
1043    // clear tex resources - FS
1044    for (i = 320; i < 335; i++) {
1045	tex_res.id = i;
1046	set_tex_resource(pScrn, ib, &tex_res);
1047    }
1048
1049}
1050
1051
1052/*
1053 * Commands
1054 */
1055
1056void
1057draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices)
1058{
1059    uint32_t i, count;
1060
1061    EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1062    PACK3(ib, IT_INDEX_TYPE, 1);
1063    E32(ib, draw_conf->index_type);
1064    PACK3(ib, IT_NUM_INSTANCES, 1);
1065    E32(ib, draw_conf->num_instances);
1066
1067    // calculate num of packets
1068    count = 2;
1069    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT)
1070	count += (draw_conf->num_indices + 1) / 2;
1071    else
1072	count += draw_conf->num_indices;
1073
1074    PACK3(ib, IT_DRAW_INDEX_IMMD, count);
1075    E32(ib, draw_conf->num_indices);
1076    E32(ib, draw_conf->vgt_draw_initiator);
1077
1078    if (draw_conf->index_type == DI_INDEX_SIZE_16_BIT) {
1079	for (i = 0; i < draw_conf->num_indices; i += 2) {
1080	    if ((i + 1) == draw_conf->num_indices)
1081		E32(ib, indices[i]);
1082	    else
1083		E32(ib, (indices[i] | (indices[i + 1] << 16)));
1084	}
1085    } else {
1086	for (i = 0; i < draw_conf->num_indices; i++)
1087	    E32(ib, indices[i]);
1088    }
1089}
1090
1091void
1092draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf)
1093{
1094
1095    EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
1096    PACK3(ib, IT_INDEX_TYPE, 1);
1097    E32(ib, draw_conf->index_type);
1098    PACK3(ib, IT_NUM_INSTANCES, 1);
1099    E32(ib, draw_conf->num_instances);
1100    PACK3(ib, IT_DRAW_INDEX_AUTO, 2);
1101    E32(ib, draw_conf->num_indices);
1102    E32(ib, draw_conf->vgt_draw_initiator);
1103}
1104