t_vertex.c revision b8e80941
1/*
2 * Copyright 2003 VMware, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
19 * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 *    Keith Whitwell <keithw@vmware.com>
26 */
27
28#include <stdio.h>
29#include "main/glheader.h"
30#include "main/context.h"
31#include "main/execmem.h"
32#include "swrast/s_chan.h"
33#include "t_context.h"
34#include "t_vertex.h"
35
36#define DBG 0
37
38/* Build and manage clipspace/ndc/window vertices.
39 */
40
41static GLboolean match_fastpath( struct tnl_clipspace *vtx,
42				 const struct tnl_clipspace_fastpath *fp)
43{
44   GLuint j;
45
46   if (vtx->attr_count != fp->attr_count)
47      return GL_FALSE;
48
49   for (j = 0; j < vtx->attr_count; j++)
50      if (vtx->attr[j].format != fp->attr[j].format ||
51	  vtx->attr[j].inputsize != fp->attr[j].size ||
52	  vtx->attr[j].vertoffset != fp->attr[j].offset)
53	 return GL_FALSE;
54
55   if (fp->match_strides) {
56      if (vtx->vertex_size != fp->vertex_size)
57	 return GL_FALSE;
58
59      for (j = 0; j < vtx->attr_count; j++)
60	 if (vtx->attr[j].inputstride != fp->attr[j].stride)
61	    return GL_FALSE;
62   }
63
64   return GL_TRUE;
65}
66
67static GLboolean search_fastpath_emit( struct tnl_clipspace *vtx )
68{
69   struct tnl_clipspace_fastpath *fp = vtx->fastpath;
70
71   for ( ; fp ; fp = fp->next) {
72      if (match_fastpath(vtx, fp)) {
73         vtx->emit = fp->func;
74	 return GL_TRUE;
75      }
76   }
77
78   return GL_FALSE;
79}
80
81void _tnl_register_fastpath( struct tnl_clipspace *vtx,
82			     GLboolean match_strides )
83{
84   struct tnl_clipspace_fastpath *fastpath = CALLOC_STRUCT(tnl_clipspace_fastpath);
85   GLuint i;
86
87   if (fastpath == NULL) {
88      _mesa_error_no_memory(__func__);
89      return;
90   }
91
92   fastpath->vertex_size = vtx->vertex_size;
93   fastpath->attr_count = vtx->attr_count;
94   fastpath->match_strides = match_strides;
95   fastpath->func = vtx->emit;
96   fastpath->attr = malloc(vtx->attr_count * sizeof(fastpath->attr[0]));
97
98   if (fastpath->attr == NULL) {
99      free(fastpath);
100      _mesa_error_no_memory(__func__);
101      return;
102   }
103
104   for (i = 0; i < vtx->attr_count; i++) {
105      fastpath->attr[i].format = vtx->attr[i].format;
106      fastpath->attr[i].stride = vtx->attr[i].inputstride;
107      fastpath->attr[i].size = vtx->attr[i].inputsize;
108      fastpath->attr[i].offset = vtx->attr[i].vertoffset;
109   }
110
111   fastpath->next = vtx->fastpath;
112   vtx->fastpath = fastpath;
113}
114
115
116
117/***********************************************************************
118 * Build codegen functions or return generic ones:
119 */
120static void choose_emit_func( struct gl_context *ctx, GLuint count, GLubyte *dest)
121{
122   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
123   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
124   struct tnl_clipspace_attr *a = vtx->attr;
125   const GLuint attr_count = vtx->attr_count;
126   GLuint j;
127
128   for (j = 0; j < attr_count; j++) {
129      GLvector4f *vptr = VB->AttribPtr[a[j].attrib];
130      a[j].inputstride = vptr->stride;
131      a[j].inputsize = vptr->size;
132      a[j].emit = a[j].insert[vptr->size - 1]; /* not always used */
133   }
134
135   vtx->emit = NULL;
136
137   /* Does this match an existing (hardwired, codegen or known-bad)
138    * fastpath?
139    */
140   if (search_fastpath_emit(vtx)) {
141      /* Use this result.  If it is null, then it is already known
142       * that the current state will fail for codegen and there is no
143       * point trying again.
144       */
145   }
146   else if (vtx->codegen_emit) {
147      vtx->codegen_emit(ctx);
148   }
149
150   if (!vtx->emit) {
151      _tnl_generate_hardwired_emit(ctx);
152   }
153
154   /* Otherwise use the generic version:
155    */
156   if (!vtx->emit)
157      vtx->emit = _tnl_generic_emit;
158
159   vtx->emit( ctx, count, dest );
160}
161
162
163
164static void choose_interp_func( struct gl_context *ctx,
165				GLfloat t,
166				GLuint edst, GLuint eout, GLuint ein,
167				GLboolean force_boundary )
168{
169   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
170   GLboolean unfilled = (ctx->Polygon.FrontMode != GL_FILL ||
171                         ctx->Polygon.BackMode != GL_FILL);
172   GLboolean twosided = ctx->Light.Enabled && ctx->Light.Model.TwoSide;
173
174   if (vtx->need_extras && (twosided || unfilled)) {
175      vtx->interp = _tnl_generic_interp_extras;
176   } else {
177      vtx->interp = _tnl_generic_interp;
178   }
179
180   vtx->interp( ctx, t, edst, eout, ein, force_boundary );
181}
182
183
184static void choose_copy_pv_func(  struct gl_context *ctx, GLuint edst, GLuint esrc )
185{
186   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
187   GLboolean unfilled = (ctx->Polygon.FrontMode != GL_FILL ||
188                         ctx->Polygon.BackMode != GL_FILL);
189
190   GLboolean twosided = ctx->Light.Enabled && ctx->Light.Model.TwoSide;
191
192   if (vtx->need_extras && (twosided || unfilled)) {
193      vtx->copy_pv = _tnl_generic_copy_pv_extras;
194   } else {
195      vtx->copy_pv = _tnl_generic_copy_pv;
196   }
197
198   vtx->copy_pv( ctx, edst, esrc );
199}
200
201
202/***********************************************************************
203 * Public entrypoints, mostly dispatch to the above:
204 */
205
206
207/* Interpolate between two vertices to produce a third:
208 */
209void _tnl_interp( struct gl_context *ctx,
210		  GLfloat t,
211		  GLuint edst, GLuint eout, GLuint ein,
212		  GLboolean force_boundary )
213{
214   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
215   vtx->interp( ctx, t, edst, eout, ein, force_boundary );
216}
217
218/* Copy colors from one vertex to another:
219 */
220void _tnl_copy_pv(  struct gl_context *ctx, GLuint edst, GLuint esrc )
221{
222   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
223   vtx->copy_pv( ctx, edst, esrc );
224}
225
226
227/* Extract a named attribute from a hardware vertex.  Will have to
228 * reverse any viewport transformation, swizzling or other conversions
229 * which may have been applied:
230 */
231void _tnl_get_attr( struct gl_context *ctx, const void *vin,
232			      GLenum attr, GLfloat *dest )
233{
234   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
235   const struct tnl_clipspace_attr *a = vtx->attr;
236   const GLuint attr_count = vtx->attr_count;
237   GLuint j;
238
239   for (j = 0; j < attr_count; j++) {
240      if (a[j].attrib == attr) {
241	 a[j].extract( &a[j], dest, (GLubyte *)vin + a[j].vertoffset );
242	 return;
243      }
244   }
245
246   /* Else return the value from ctx->Current.
247    */
248   if (attr == _TNL_ATTRIB_POINTSIZE) {
249      /* If the hardware vertex doesn't have point size then use size from
250       * struct gl_context.  XXX this will be wrong if drawing attenuated points!
251       */
252      dest[0] = ctx->Point.Size;
253   }
254   else {
255      memcpy( dest, ctx->Current.Attrib[attr], 4*sizeof(GLfloat));
256   }
257}
258
259
260/* Complementary operation to the above.
261 */
262void _tnl_set_attr( struct gl_context *ctx, void *vout,
263		    GLenum attr, const GLfloat *src )
264{
265   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
266   const struct tnl_clipspace_attr *a = vtx->attr;
267   const GLuint attr_count = vtx->attr_count;
268   GLuint j;
269
270   for (j = 0; j < attr_count; j++) {
271      if (a[j].attrib == attr) {
272	 a[j].insert[4-1]( &a[j], (GLubyte *)vout + a[j].vertoffset, src );
273	 return;
274      }
275   }
276}
277
278
279void *_tnl_get_vertex( struct gl_context *ctx, GLuint nr )
280{
281   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
282
283   return vtx->vertex_buf + nr * vtx->vertex_size;
284}
285
286void _tnl_invalidate_vertex_state( struct gl_context *ctx, GLuint new_state )
287{
288   /* if two-sided lighting changes or filled/unfilled polygon state changes */
289   if (new_state & (_NEW_LIGHT | _NEW_POLYGON) ) {
290      struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
291      vtx->new_inputs = ~0;
292      vtx->interp = choose_interp_func;
293      vtx->copy_pv = choose_copy_pv_func;
294   }
295}
296
297static void invalidate_funcs( struct tnl_clipspace *vtx )
298{
299   vtx->emit = choose_emit_func;
300   vtx->interp = choose_interp_func;
301   vtx->copy_pv = choose_copy_pv_func;
302   vtx->new_inputs = ~0;
303}
304
305GLuint _tnl_install_attrs( struct gl_context *ctx, const struct tnl_attr_map *map,
306			   GLuint nr, const GLfloat *vp,
307			   GLuint unpacked_size )
308{
309   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
310   GLuint offset = 0;
311   GLuint i, j;
312
313   assert(nr < _TNL_ATTRIB_MAX);
314   assert(nr == 0 || map[0].attrib == VERT_ATTRIB_POS);
315
316   vtx->new_inputs = ~0;
317   vtx->need_viewport = GL_FALSE;
318
319   if (vp) {
320      vtx->need_viewport = GL_TRUE;
321   }
322
323   for (j = 0, i = 0; i < nr; i++) {
324      const GLuint format = map[i].format;
325      if (format == EMIT_PAD) {
326	 if (DBG)
327	    printf("%d: pad %d, offset %d\n", i,
328		   map[i].offset, offset);
329
330	 offset += map[i].offset;
331
332      }
333      else {
334	 GLuint tmpoffset;
335
336	 if (unpacked_size)
337	    tmpoffset = map[i].offset;
338	 else
339	    tmpoffset = offset;
340
341	 if (vtx->attr_count != j ||
342	     vtx->attr[j].attrib != map[i].attrib ||
343	     vtx->attr[j].format != format ||
344	     vtx->attr[j].vertoffset != tmpoffset) {
345	    invalidate_funcs(vtx);
346
347	    vtx->attr[j].attrib = map[i].attrib;
348	    vtx->attr[j].format = format;
349	    vtx->attr[j].vp = vp;
350	    vtx->attr[j].insert = _tnl_format_info[format].insert;
351	    vtx->attr[j].extract = _tnl_format_info[format].extract;
352	    vtx->attr[j].vertattrsize = _tnl_format_info[format].attrsize;
353	    vtx->attr[j].vertoffset = tmpoffset;
354	 }
355
356
357	 if (DBG)
358	    printf("%d: %s, vp %p, offset %d\n", i,
359		   _tnl_format_info[format].name, (void *)vp,
360		   vtx->attr[j].vertoffset);
361
362	 offset += _tnl_format_info[format].attrsize;
363	 j++;
364      }
365   }
366
367   vtx->attr_count = j;
368
369   if (unpacked_size)
370      vtx->vertex_size = unpacked_size;
371   else
372      vtx->vertex_size = offset;
373
374   assert(vtx->vertex_size <= vtx->max_vertex_size);
375   return vtx->vertex_size;
376}
377
378
379
380void _tnl_invalidate_vertices( struct gl_context *ctx, GLuint newinputs )
381{
382   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
383   vtx->new_inputs |= newinputs;
384}
385
386
387/* This event has broader use beyond this file - will move elsewhere
388 * and probably invoke a driver callback.
389 */
390void _tnl_notify_pipeline_output_change( struct gl_context *ctx )
391{
392   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
393   invalidate_funcs(vtx);
394}
395
396
397static void adjust_input_ptrs( struct gl_context *ctx, GLint diff)
398{
399   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
400   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
401   struct tnl_clipspace_attr *a = vtx->attr;
402   const GLuint count = vtx->attr_count;
403   GLuint j;
404
405   diff -= 1;
406   for (j=0; j<count; ++j) {
407           register GLvector4f *vptr = VB->AttribPtr[a->attrib];
408	   (a++)->inputptr += diff*vptr->stride;
409   }
410}
411
412static void update_input_ptrs( struct gl_context *ctx, GLuint start )
413{
414   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
415   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
416   struct tnl_clipspace_attr *a = vtx->attr;
417   const GLuint count = vtx->attr_count;
418   GLuint j;
419
420   for (j = 0; j < count; j++) {
421      GLvector4f *vptr = VB->AttribPtr[a[j].attrib];
422
423      if (vtx->emit != choose_emit_func) {
424	 assert(a[j].inputstride == vptr->stride);
425	 assert(a[j].inputsize == vptr->size);
426      }
427
428      a[j].inputptr = ((GLubyte *)vptr->data) + start * vptr->stride;
429   }
430
431   if (a->vp) {
432      vtx->vp_scale[0] = a->vp[MAT_SX];
433      vtx->vp_scale[1] = a->vp[MAT_SY];
434      vtx->vp_scale[2] = a->vp[MAT_SZ];
435      vtx->vp_scale[3] = 1.0;
436      vtx->vp_xlate[0] = a->vp[MAT_TX];
437      vtx->vp_xlate[1] = a->vp[MAT_TY];
438      vtx->vp_xlate[2] = a->vp[MAT_TZ];
439      vtx->vp_xlate[3] = 0.0;
440   }
441}
442
443
444void _tnl_build_vertices( struct gl_context *ctx,
445			  GLuint start,
446			  GLuint end,
447			  GLuint newinputs )
448{
449   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
450   update_input_ptrs( ctx, start );
451   vtx->emit( ctx, end - start,
452	      (GLubyte *)(vtx->vertex_buf +
453			  start * vtx->vertex_size));
454}
455
456/* Emit VB vertices start..end to dest.  Note that VB vertex at
457 * postion start will be emitted to dest at position zero.
458 */
459void *_tnl_emit_vertices_to_buffer( struct gl_context *ctx,
460				    GLuint start,
461				    GLuint end,
462				    void *dest )
463{
464   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
465
466   update_input_ptrs(ctx, start);
467   /* Note: dest should not be adjusted for non-zero 'start' values:
468    */
469   vtx->emit( ctx, end - start, (GLubyte*) dest );
470   return (void *)((GLubyte *)dest + vtx->vertex_size * (end - start));
471}
472
473/* Emit indexed VB vertices start..end to dest.  Note that VB vertex at
474 * postion start will be emitted to dest at position zero.
475 */
476
477void *_tnl_emit_indexed_vertices_to_buffer( struct gl_context *ctx,
478					    const GLuint *elts,
479					    GLuint start,
480					    GLuint end,
481					    void *dest )
482{
483   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
484   GLuint oldIndex;
485   GLubyte *cdest = dest;
486
487   update_input_ptrs(ctx, oldIndex = elts[start++]);
488   vtx->emit( ctx, 1, cdest );
489   cdest += vtx->vertex_size;
490
491   for (; start < end; ++start) {
492      adjust_input_ptrs(ctx, elts[start] - oldIndex);
493      oldIndex = elts[start];
494      vtx->emit( ctx, 1, cdest);
495      cdest += vtx->vertex_size;
496   }
497
498   return (void *) cdest;
499}
500
501
502void _tnl_init_vertices( struct gl_context *ctx,
503			GLuint vb_size,
504			GLuint max_vertex_size )
505{
506   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
507
508   _tnl_install_attrs( ctx, NULL, 0, NULL, 0 );
509
510   vtx->need_extras = GL_TRUE;
511   if (max_vertex_size > vtx->max_vertex_size) {
512      _tnl_free_vertices( ctx );
513      vtx->max_vertex_size = max_vertex_size;
514      vtx->vertex_buf = _mesa_align_calloc(vb_size * max_vertex_size, 32 );
515      invalidate_funcs(vtx);
516   }
517
518   switch(CHAN_TYPE) {
519   case GL_UNSIGNED_BYTE:
520      vtx->chan_scale[0] = 255.0;
521      vtx->chan_scale[1] = 255.0;
522      vtx->chan_scale[2] = 255.0;
523      vtx->chan_scale[3] = 255.0;
524      break;
525   case GL_UNSIGNED_SHORT:
526      vtx->chan_scale[0] = 65535.0;
527      vtx->chan_scale[1] = 65535.0;
528      vtx->chan_scale[2] = 65535.0;
529      vtx->chan_scale[3] = 65535.0;
530      break;
531   default:
532      vtx->chan_scale[0] = 1.0;
533      vtx->chan_scale[1] = 1.0;
534      vtx->chan_scale[2] = 1.0;
535      vtx->chan_scale[3] = 1.0;
536      break;
537   }
538
539   vtx->identity[0] = 0.0;
540   vtx->identity[1] = 0.0;
541   vtx->identity[2] = 0.0;
542   vtx->identity[3] = 1.0;
543
544   vtx->codegen_emit = NULL;
545
546#ifdef USE_SSE_ASM
547   if (!getenv("MESA_NO_CODEGEN"))
548      vtx->codegen_emit = _tnl_generate_sse_emit;
549#endif
550}
551
552
553void _tnl_free_vertices( struct gl_context *ctx )
554{
555   TNLcontext *tnl = TNL_CONTEXT(ctx);
556   if (tnl) {
557      struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
558      struct tnl_clipspace_fastpath *fp, *tmp;
559
560      _mesa_align_free(vtx->vertex_buf);
561      vtx->vertex_buf = NULL;
562
563      for (fp = vtx->fastpath ; fp ; fp = tmp) {
564         tmp = fp->next;
565         free(fp->attr);
566
567         /* KW: At the moment, fp->func is constrained to be allocated by
568          * _mesa_exec_alloc(), as the hardwired fastpaths in
569          * t_vertex_generic.c are handled specially.  It would be nice
570          * to unify them, but this probably won't change until this
571          * module gets another overhaul.
572          */
573         _mesa_exec_free((void *) fp->func);
574         free(fp);
575      }
576
577      vtx->fastpath = NULL;
578   }
579}
580