1/**************************************************************************
2
3Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
4                     VMware, Inc.
5
6All Rights Reserved.
7
8Permission is hereby granted, free of charge, to any person obtaining
9a copy of this software and associated documentation files (the
10"Software"), to deal in the Software without restriction, including
11without limitation the rights to use, copy, modify, merge, publish,
12distribute, sublicense, and/or sell copies of the Software, and to
13permit persons to whom the Software is furnished to do so, subject to
14the following conditions:
15
16The above copyright notice and this permission notice (including the
17next paragraph) shall be included in all copies or substantial
18portions of the Software.
19
20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28**************************************************************************/
29
30/*
31 * Authors:
32 *   Keith Whitwell <keithw@vmware.com>
33 */
34
35#include "main/glheader.h"
36#include "main/imports.h"
37#include "main/mtypes.h"
38#include "main/light.h"
39#include "main/enums.h"
40#include "main/state.h"
41
42#include "util/macros.h"
43
44#include "vbo/vbo.h"
45#include "tnl/tnl.h"
46#include "tnl/t_pipeline.h"
47
48#include "radeon_common.h"
49#include "radeon_context.h"
50#include "radeon_state.h"
51#include "radeon_ioctl.h"
52#include "radeon_tcl.h"
53#include "radeon_swtcl.h"
54#include "radeon_maos.h"
55#include "radeon_common_context.h"
56
57
58
59/*
60 * Render unclipped vertex buffers by emitting vertices directly to
61 * dma buffers.  Use strip/fan hardware primitives where possible.
62 * Try to simulate missing primitives with indexed vertices.
63 */
64#define HAVE_POINTS      1
65#define HAVE_LINES       1
66#define HAVE_LINE_LOOP   0
67#define HAVE_LINE_STRIPS 1
68#define HAVE_TRIANGLES   1
69#define HAVE_TRI_STRIPS  1
70#define HAVE_TRI_FANS    1
71#define HAVE_QUADS       0
72#define HAVE_QUAD_STRIPS 0
73#define HAVE_POLYGONS    1
74#define HAVE_ELTS        1
75
76
77#define HW_POINTS           RADEON_CP_VC_CNTL_PRIM_TYPE_POINT
78#define HW_LINES            RADEON_CP_VC_CNTL_PRIM_TYPE_LINE
79#define HW_LINE_LOOP        0
80#define HW_LINE_STRIP       RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP
81#define HW_TRIANGLES        RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
82#define HW_TRIANGLE_STRIP_0 RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP
83#define HW_TRIANGLE_STRIP_1 0
84#define HW_TRIANGLE_FAN     RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
85#define HW_QUADS            0
86#define HW_QUAD_STRIP       0
87#define HW_POLYGON          RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
88
89
90static GLboolean discrete_prim[0x10] = {
91   0,				/* 0 none */
92   1,				/* 1 points */
93   1,				/* 2 lines */
94   0,				/* 3 line_strip */
95   1,				/* 4 tri_list */
96   0,				/* 5 tri_fan */
97   0,				/* 6 tri_type2 */
98   1,				/* 7 rect list (unused) */
99   1,				/* 8 3vert point */
100   1,				/* 9 3vert line */
101   0,
102   0,
103   0,
104   0,
105   0,
106   0,
107};
108
109
110#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
111#define ELT_TYPE  GLushort
112
113#define ELT_INIT(prim, hw_prim) \
114   radeonTclPrimitive( ctx, prim, hw_prim | RADEON_CP_VC_CNTL_PRIM_WALK_IND )
115
116#define GET_MESA_ELTS() rmesa->tcl.Elts
117
118
119/* Don't really know how many elts will fit in what's left of cmdbuf,
120 * as there is state to emit, etc:
121 */
122
123/* Testing on isosurf shows a maximum around here.  Don't know if it's
124 * the card or driver or kernel module that is causing the behaviour.
125 */
126#define GET_MAX_HW_ELTS() 300
127
128
129#define RESET_STIPPLE() do {			\
130   RADEON_STATECHANGE( rmesa, lin );		\
131   radeonEmitState(&rmesa->radeon);			\
132} while (0)
133
134#define AUTO_STIPPLE( mode )  do {		\
135   RADEON_STATECHANGE( rmesa, lin );		\
136   if (mode)					\
137      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |=	\
138	 RADEON_LINE_PATTERN_AUTO_RESET;	\
139   else						\
140      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
141	 ~RADEON_LINE_PATTERN_AUTO_RESET;	\
142   radeonEmitState(&rmesa->radeon);		\
143} while (0)
144
145
146
147#define ALLOC_ELTS(nr)	radeonAllocElts( rmesa, nr )
148
149static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr )
150{
151      if (rmesa->radeon.dma.flush)
152	 rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
153
154      radeonEmitAOS( rmesa,
155		     rmesa->radeon.tcl.aos_count, 0 );
156
157      return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format,
158				       rmesa->tcl.hw_primitive, nr );
159}
160
161#define CLOSE_ELTS() if (0)  RADEON_NEWPRIM( rmesa )
162
163
164
165/* TODO: Try to extend existing primitive if both are identical,
166 * discrete and there are no intervening state changes.  (Somewhat
167 * duplicates changes to DrawArrays code)
168 */
169static void radeonEmitPrim( struct gl_context *ctx,
170		       GLenum prim,
171		       GLuint hwprim,
172		       GLuint start,
173		       GLuint count)
174{
175   r100ContextPtr rmesa = R100_CONTEXT( ctx );
176   radeonTclPrimitive( ctx, prim, hwprim );
177
178   radeonEmitAOS( rmesa,
179		  rmesa->radeon.tcl.aos_count,
180		  start );
181
182   /* Why couldn't this packet have taken an offset param?
183    */
184   radeonEmitVbufPrim( rmesa,
185		       rmesa->tcl.vertex_format,
186		       rmesa->tcl.hw_primitive,
187		       count - start );
188}
189
190#define EMIT_PRIM( ctx, prim, hwprim, start, count ) do {       \
191   radeonEmitPrim( ctx, prim, hwprim, start, count );           \
192   (void) rmesa; } while (0)
193
194#define MAX_CONVERSION_SIZE 40
195
196/* Try & join small primitives
197 */
198#if 0
199#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
200#else
201#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM )			\
202  ((NR) < 20 ||							\
203   ((NR) < 40 &&						\
204    rmesa->tcl.hw_primitive == (PRIM|				\
205			    RADEON_CP_VC_CNTL_PRIM_WALK_IND|	\
206			    RADEON_CP_VC_CNTL_TCL_ENABLE)))
207#endif
208
209#ifdef MESA_BIG_ENDIAN
210/* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
211#define EMIT_ELT(dest, offset, x) do {				\
212	int off = offset + ( ( (uintptr_t)dest & 0x2 ) >> 1 );	\
213	GLushort *des = (GLushort *)( (uintptr_t)dest & ~0x2 );	\
214	(des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); 	\
215	(void)rmesa; } while (0)
216#else
217#define EMIT_ELT(dest, offset, x) do {				\
218	(dest)[offset] = (GLushort) (x);			\
219	(void)rmesa; } while (0)
220#endif
221
222#define EMIT_TWO_ELTS(dest, offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
223
224
225
226#define TAG(x) tcl_##x
227#include "tnl_dd/t_dd_dmatmp2.h"
228
229/**********************************************************************/
230/*                          External entrypoints                     */
231/**********************************************************************/
232
233void radeonEmitPrimitive( struct gl_context *ctx,
234			  GLuint first,
235			  GLuint last,
236			  GLuint flags )
237{
238   tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
239}
240
241void radeonEmitEltPrimitive( struct gl_context *ctx,
242			     GLuint first,
243			     GLuint last,
244			     GLuint flags )
245{
246   tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
247}
248
249void radeonTclPrimitive( struct gl_context *ctx,
250			 GLenum prim,
251			 int hw_prim )
252{
253   r100ContextPtr rmesa = R100_CONTEXT(ctx);
254   GLuint se_cntl;
255   GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
256
257   radeon_prepare_render(&rmesa->radeon);
258   if (rmesa->radeon.NewGLState)
259      radeonValidateState( ctx );
260
261   if (newprim != rmesa->tcl.hw_primitive ||
262       !discrete_prim[hw_prim&0xf]) {
263      RADEON_NEWPRIM( rmesa );
264      rmesa->tcl.hw_primitive = newprim;
265   }
266
267   se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
268   se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST;
269
270   if (prim == GL_POLYGON && ctx->Light.ShadeModel == GL_FLAT)
271      se_cntl |= RADEON_FLAT_SHADE_VTX_0;
272   else
273      se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
274
275   if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
276      RADEON_STATECHANGE( rmesa, set );
277      rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
278   }
279}
280
281/**
282 * Predict total emit size for next rendering operation so there is no flush in middle of rendering
283 * Prediction has to aim towards the best possible value that is worse than worst case scenario
284 */
285static GLuint radeonEnsureEmitSize( struct gl_context * ctx , GLuint inputs )
286{
287  r100ContextPtr rmesa = R100_CONTEXT(ctx);
288  TNLcontext *tnl = TNL_CONTEXT(ctx);
289  struct vertex_buffer *VB = &tnl->vb;
290  GLuint space_required;
291  GLuint state_size;
292  GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */
293  int i;
294  /* list of flags that are allocating aos object */
295  const GLuint flags_to_check[] = {
296    VERT_BIT_NORMAL,
297    VERT_BIT_COLOR0,
298    VERT_BIT_COLOR1,
299    VERT_BIT_FOG
300  };
301  /* predict number of aos to emit */
302  for (i=0; i < ARRAY_SIZE(flags_to_check); ++i)
303  {
304    if (inputs & flags_to_check[i])
305      ++nr_aos;
306  }
307  for (i = 0; i < ctx->Const.MaxTextureUnits; ++i)
308  {
309    if (inputs & VERT_BIT_TEX(i))
310      ++nr_aos;
311  }
312
313  {
314    /* count the prediction for state size */
315    space_required = 0;
316    state_size = radeonCountStateEmitSize( &rmesa->radeon );
317    /* tcl may be changed in radeonEmitArrays so account for it if not dirty */
318    if (!rmesa->hw.tcl.dirty)
319      state_size += rmesa->hw.tcl.check( &rmesa->radeon.glCtx, &rmesa->hw.tcl );
320    /* predict size for elements */
321    for (i = 0; i < VB->PrimitiveCount; ++i)
322    {
323      /* If primitive.count is less than MAX_CONVERSION_SIZE
324	 rendering code may decide convert to elts.
325	 In that case we have to make pessimistic prediction.
326	 and use larger of 2 paths. */
327      const GLuint elts = ELTS_BUFSZ(nr_aos);
328      const GLuint index = INDEX_BUFSZ;
329      const GLuint vbuf = VBUF_BUFSZ;
330      if (!VB->Primitive[i].count)
331	continue;
332      if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
333	  || vbuf > index + elts)
334	space_required += vbuf;
335      else
336	space_required += index + elts;
337      space_required += VB->Primitive[i].count * 3;
338      space_required += AOS_BUFSZ(nr_aos);
339    }
340    space_required += SCISSOR_BUFSZ;
341  }
342  /* flush the buffer in case we need more than is left. */
343  if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __func__))
344    return space_required + radeonCountStateEmitSize( &rmesa->radeon );
345  else
346    return space_required + state_size;
347}
348
349/**********************************************************************/
350/*                          Render pipeline stage                     */
351/**********************************************************************/
352
353
354/* TCL render.
355 */
356static GLboolean radeon_run_tcl_render( struct gl_context *ctx,
357					struct tnl_pipeline_stage *stage )
358{
359   r100ContextPtr rmesa = R100_CONTEXT(ctx);
360   TNLcontext *tnl = TNL_CONTEXT(ctx);
361   struct vertex_buffer *VB = &tnl->vb;
362   GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
363   GLuint i;
364   GLuint emit_end;
365
366   /* TODO: separate this from the swtnl pipeline
367    */
368   if (rmesa->radeon.TclFallback)
369      return GL_TRUE;	/* fallback to software t&l */
370
371   if (VB->Count == 0)
372      return GL_FALSE;
373
374   /* NOTE: inputs != tnl->render_inputs - these are the untransformed
375    * inputs.
376    */
377   if (ctx->Light.Enabled) {
378      inputs |= VERT_BIT_NORMAL;
379   }
380
381   if (_mesa_need_secondary_color(ctx)) {
382      inputs |= VERT_BIT_COLOR1;
383   }
384
385   if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) {
386      inputs |= VERT_BIT_FOG;
387   }
388
389   for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) {
390      if (ctx->Texture.Unit[i]._Current) {
391      /* TODO: probably should not emit texture coords when texgen is enabled */
392	 if (rmesa->TexGenNeedNormals[i]) {
393	    inputs |= VERT_BIT_NORMAL;
394	 }
395	 inputs |= VERT_BIT_TEX(i);
396      }
397   }
398
399   radeonReleaseArrays( ctx, ~0 );
400   emit_end = radeonEnsureEmitSize( ctx, inputs )
401     + rmesa->radeon.cmdbuf.cs->cdw;
402   radeonEmitArrays( ctx, inputs );
403
404   rmesa->tcl.Elts = VB->Elts;
405
406   for (i = 0 ; i < VB->PrimitiveCount ; i++)
407   {
408      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
409      GLuint start = VB->Primitive[i].start;
410      GLuint length = VB->Primitive[i].count;
411
412      if (!length)
413	 continue;
414
415      if (rmesa->tcl.Elts)
416	 radeonEmitEltPrimitive( ctx, start, start+length, prim );
417      else
418	 radeonEmitPrimitive( ctx, start, start+length, prim );
419   }
420
421   if (emit_end < rmesa->radeon.cmdbuf.cs->cdw)
422      WARN_ONCE("Rendering was %d commands larger than predicted size."
423	  " We might overflow  command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
424
425   return GL_FALSE;		/* finished the pipe */
426}
427
428
429
430/* Initial state for tcl stage.
431 */
432const struct tnl_pipeline_stage _radeon_tcl_stage =
433{
434   "radeon render",
435   NULL,
436   NULL,
437   NULL,
438   NULL,
439   radeon_run_tcl_render	/* run */
440};
441
442
443
444/**********************************************************************/
445/*                 Validate state at pipeline start                   */
446/**********************************************************************/
447
448
449/*-----------------------------------------------------------------------
450 * Manage TCL fallbacks
451 */
452
453
454static void transition_to_swtnl( struct gl_context *ctx )
455{
456   r100ContextPtr rmesa = R100_CONTEXT(ctx);
457   TNLcontext *tnl = TNL_CONTEXT(ctx);
458   GLuint se_cntl;
459
460   RADEON_NEWPRIM( rmesa );
461   rmesa->swtcl.vertex_format = 0;
462
463   radeonChooseVertexState( ctx );
464   radeonChooseRenderState( ctx );
465
466   _tnl_validate_shine_tables( ctx );
467
468   tnl->Driver.NotifyMaterialChange =
469      _tnl_validate_shine_tables;
470
471   radeonReleaseArrays( ctx, ~0 );
472
473   se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
474   se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
475
476   if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
477      RADEON_STATECHANGE( rmesa, set );
478      rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
479   }
480}
481
482
483static void transition_to_hwtnl( struct gl_context *ctx )
484{
485   r100ContextPtr rmesa = R100_CONTEXT(ctx);
486   TNLcontext *tnl = TNL_CONTEXT(ctx);
487   GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
488
489   se_coord_fmt &= ~(RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
490		     RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
491		     RADEON_VTX_W0_IS_NOT_1_OVER_W0);
492   se_coord_fmt |= RADEON_VTX_W0_IS_NOT_1_OVER_W0;
493
494   if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
495      RADEON_STATECHANGE( rmesa, set );
496      rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
497      _tnl_need_projected_coords( ctx, GL_FALSE );
498   }
499
500   radeonUpdateMaterial( ctx );
501
502   tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
503
504   if ( rmesa->radeon.dma.flush )
505      rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
506
507   rmesa->radeon.dma.flush = NULL;
508   rmesa->swtcl.vertex_format = 0;
509
510   //   if (rmesa->swtcl.indexed_verts.buf)
511   //      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
512   //			      __func__ );
513
514   if (RADEON_DEBUG & RADEON_FALLBACKS)
515      fprintf(stderr, "Radeon end tcl fallback\n");
516}
517
518static char *fallbackStrings[] = {
519   "Rasterization fallback",
520   "Unfilled triangles",
521   "Twosided lighting, differing materials",
522   "Materials in VB (maybe between begin/end)",
523   "Texgen unit 0",
524   "Texgen unit 1",
525   "Texgen unit 2",
526   "User disable",
527   "Fogcoord with separate specular lighting"
528};
529
530
531static char *getFallbackString(GLuint bit)
532{
533   int i = 0;
534   while (bit > 1) {
535      i++;
536      bit >>= 1;
537   }
538   return fallbackStrings[i];
539}
540
541
542
543void radeonTclFallback( struct gl_context *ctx, GLuint bit, GLboolean mode )
544{
545   r100ContextPtr rmesa = R100_CONTEXT(ctx);
546   GLuint oldfallback = rmesa->radeon.TclFallback;
547
548   if (mode) {
549      rmesa->radeon.TclFallback |= bit;
550      if (oldfallback == 0) {
551	 if (RADEON_DEBUG & RADEON_FALLBACKS)
552	    fprintf(stderr, "Radeon begin tcl fallback %s\n",
553		    getFallbackString( bit ));
554	 transition_to_swtnl( ctx );
555      }
556   }
557   else {
558      rmesa->radeon.TclFallback &= ~bit;
559      if (oldfallback == bit) {
560	 if (RADEON_DEBUG & RADEON_FALLBACKS)
561	    fprintf(stderr, "Radeon end tcl fallback %s\n",
562		    getFallbackString( bit ));
563	 transition_to_hwtnl( ctx );
564      }
565   }
566}
567