mesa/tnl/t_pipeline.c

848b8605Smrg/*
848b8605Smrg * Mesa 3-D graphics library
848b8605Smrg *
848b8605Smrg * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
848b8605Smrg *
848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
848b8605Smrg * copy of this software and associated documentation files (the "Software"),
848b8605Smrg * to deal in the Software without restriction, including without limitation
848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the
848b8605Smrg * Software is furnished to do so, subject to the following conditions:
848b8605Smrg *
848b8605Smrg * The above copyright notice and this permission notice shall be included
848b8605Smrg * in all copies or substantial portions of the Software.
848b8605Smrg *
848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
848b8605Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
848b8605Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
848b8605Smrg * OTHER DEALINGS IN THE SOFTWARE.
848b8605Smrg *
848b8605Smrg * Authors:
848b8605Smrg *    Keith Whitwell <keithw@vmware.com>
848b8605Smrg */
848b8605Smrg
848b8605Smrg#include "main/glheader.h"
848b8605Smrg#include "main/context.h"
848b8605Smrg#include "main/imports.h"
848b8605Smrg#include "main/mtypes.h"
848b8605Smrg
848b8605Smrg#include "t_context.h"
848b8605Smrg#include "t_pipeline.h"
848b8605Smrg#include "t_vp_build.h"
848b8605Smrg#include "t_vertex.h"
848b8605Smrg
848b8605Smrgvoid _tnl_install_pipeline( struct gl_context *ctx,
848b8605Smrg			    const struct tnl_pipeline_stage **stages )
848b8605Smrg{
848b8605Smrg   TNLcontext *tnl = TNL_CONTEXT(ctx);
848b8605Smrg   GLuint i;
848b8605Smrg
848b8605Smrg   tnl->pipeline.new_state = ~0;
848b8605Smrg
848b8605Smrg   /* Create a writeable copy of each stage.
848b8605Smrg    */
848b8605Smrg   for (i = 0 ; i < MAX_PIPELINE_STAGES && stages[i] ; i++) {
848b8605Smrg      struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
848b8605Smrg      memcpy(s, stages[i], sizeof(*s));
848b8605Smrg      if (s->create)
848b8605Smrg	 s->create(ctx, s);
848b8605Smrg   }
848b8605Smrg
848b8605Smrg   tnl->pipeline.nr_stages = i;
848b8605Smrg}
848b8605Smrg
848b8605Smrgvoid _tnl_destroy_pipeline( struct gl_context *ctx )
848b8605Smrg{
848b8605Smrg   TNLcontext *tnl = TNL_CONTEXT(ctx);
848b8605Smrg   GLuint i;
848b8605Smrg
848b8605Smrg   for (i = 0 ; i < tnl->pipeline.nr_stages ; i++) {
848b8605Smrg      struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
848b8605Smrg      if (s->destroy)
848b8605Smrg	 s->destroy(s);
848b8605Smrg   }
848b8605Smrg
848b8605Smrg   tnl->pipeline.nr_stages = 0;
848b8605Smrg}
848b8605Smrg
848b8605Smrg
848b8605Smrg
848b8605Smrgstatic GLuint check_input_changes( struct gl_context *ctx )
848b8605Smrg{
848b8605Smrg   TNLcontext *tnl = TNL_CONTEXT(ctx);
848b8605Smrg   GLuint i;
848b8605Smrg
848b8605Smrg   for (i = 0; i <= _TNL_LAST_MAT; i++) {
848b8605Smrg      if (tnl->vb.AttribPtr[i]->size != tnl->pipeline.last_attrib_size[i] ||
848b8605Smrg	  tnl->vb.AttribPtr[i]->stride != tnl->pipeline.last_attrib_stride[i]) {
848b8605Smrg	 tnl->pipeline.last_attrib_size[i] = tnl->vb.AttribPtr[i]->size;
848b8605Smrg	 tnl->pipeline.last_attrib_stride[i] = tnl->vb.AttribPtr[i]->stride;
848b8605Smrg	 tnl->pipeline.input_changes |= 1<<i;
848b8605Smrg      }
848b8605Smrg   }
848b8605Smrg
848b8605Smrg   return tnl->pipeline.input_changes;
848b8605Smrg}
848b8605Smrg
848b8605Smrg
848b8605Smrgstatic GLuint check_output_changes( struct gl_context *ctx )
848b8605Smrg{
848b8605Smrg#if 0
848b8605Smrg   TNLcontext *tnl = TNL_CONTEXT(ctx);
848b8605Smrg
848b8605Smrg   for (i = 0; i < VARYING_SLOT_MAX; i++) {
848b8605Smrg      if (tnl->vb.ResultPtr[i]->size != tnl->last_result_size[i] ||
848b8605Smrg	  tnl->vb.ResultPtr[i]->stride != tnl->last_result_stride[i]) {
848b8605Smrg	 tnl->last_result_size[i] = tnl->vb.ResultPtr[i]->size;
848b8605Smrg	 tnl->last_result_stride[i] = tnl->vb.ResultPtr[i]->stride;
848b8605Smrg	 tnl->pipeline.output_changes |= 1<<i;
848b8605Smrg      }
848b8605Smrg   }
848b8605Smrg
848b8605Smrg   if (tnl->pipeline.output_changes)
848b8605Smrg      tnl->Driver.NotifyOutputChanges( ctx, tnl->pipeline.output_changes );
848b8605Smrg
848b8605Smrg   return tnl->pipeline.output_changes;
848b8605Smrg#else
848b8605Smrg   return ~0;
848b8605Smrg#endif
848b8605Smrg}
848b8605Smrg
b8e80941Smrg/**
b8e80941Smrg * START/END_FAST_MATH macros:
b8e80941Smrg *
b8e80941Smrg * START_FAST_MATH: Set x86 FPU to faster, 32-bit precision mode (and save
b8e80941Smrg *                  original mode to a temporary).
b8e80941Smrg * END_FAST_MATH: Restore x86 FPU to original mode.
b8e80941Smrg */
b8e80941Smrg#if defined(__GNUC__) && defined(__i386__)
b8e80941Smrg/*
b8e80941Smrg * Set the x86 FPU control word to guarentee only 32 bits of precision
b8e80941Smrg * are stored in registers.  Allowing the FPU to store more introduces
b8e80941Smrg * differences between situations where numbers are pulled out of memory
b8e80941Smrg * vs. situations where the compiler is able to optimize register usage.
b8e80941Smrg *
b8e80941Smrg * In the worst case, we force the compiler to use a memory access to
b8e80941Smrg * truncate the float, by specifying the 'volatile' keyword.
b8e80941Smrg */
b8e80941Smrg/* Hardware default: All exceptions masked, extended double precision,
b8e80941Smrg * round to nearest (IEEE compliant):
b8e80941Smrg */
b8e80941Smrg#define DEFAULT_X86_FPU		0x037f
b8e80941Smrg/* All exceptions masked, single precision, round to nearest:
b8e80941Smrg */
b8e80941Smrg#define FAST_X86_FPU		0x003f
b8e80941Smrg/* The fldcw instruction will cause any pending FP exceptions to be
b8e80941Smrg * raised prior to entering the block, and we clear any pending
b8e80941Smrg * exceptions before exiting the block.  Hence, asm code has free
b8e80941Smrg * reign over the FPU while in the fast math block.
b8e80941Smrg */
b8e80941Smrg#if defined(NO_FAST_MATH)
b8e80941Smrg#define START_FAST_MATH(x)						\
b8e80941Smrgdo {									\
b8e80941Smrg   static GLuint mask = DEFAULT_X86_FPU;				\
b8e80941Smrg   __asm__ ( "fnstcw %0" : "=m" (*&(x)) );				\
b8e80941Smrg   __asm__ ( "fldcw %0" : : "m" (mask) );				\
b8e80941Smrg} while (0)
b8e80941Smrg#else
b8e80941Smrg#define START_FAST_MATH(x)						\
b8e80941Smrgdo {									\
b8e80941Smrg   static GLuint mask = FAST_X86_FPU;					\
b8e80941Smrg   __asm__ ( "fnstcw %0" : "=m" (*&(x)) );				\
b8e80941Smrg   __asm__ ( "fldcw %0" : : "m" (mask) );				\
b8e80941Smrg} while (0)
b8e80941Smrg#endif
b8e80941Smrg/* Restore original FPU mode, and clear any exceptions that may have
b8e80941Smrg * occurred in the FAST_MATH block.
b8e80941Smrg */
b8e80941Smrg#define END_FAST_MATH(x)						\
b8e80941Smrgdo {									\
b8e80941Smrg   __asm__ ( "fnclex ; fldcw %0" : : "m" (*&(x)) );			\
b8e80941Smrg} while (0)
b8e80941Smrg
b8e80941Smrg#elif defined(_MSC_VER) && defined(_M_IX86)
b8e80941Smrg#define DEFAULT_X86_FPU		0x037f /* See GCC comments above */
b8e80941Smrg#define FAST_X86_FPU		0x003f /* See GCC comments above */
b8e80941Smrg#if defined(NO_FAST_MATH)
b8e80941Smrg#define START_FAST_MATH(x) do {\
b8e80941Smrg	static GLuint mask = DEFAULT_X86_FPU;\
b8e80941Smrg	__asm fnstcw word ptr [x]\
b8e80941Smrg	__asm fldcw word ptr [mask]\
b8e80941Smrg} while(0)
b8e80941Smrg#else
b8e80941Smrg#define START_FAST_MATH(x) do {\
b8e80941Smrg	static GLuint mask = FAST_X86_FPU;\
b8e80941Smrg	__asm fnstcw word ptr [x]\
b8e80941Smrg	__asm fldcw word ptr [mask]\
b8e80941Smrg} while(0)
b8e80941Smrg#endif
b8e80941Smrg#define END_FAST_MATH(x) do {\
b8e80941Smrg	__asm fnclex\
b8e80941Smrg	__asm fldcw word ptr [x]\
b8e80941Smrg} while(0)
b8e80941Smrg
b8e80941Smrg#else
b8e80941Smrg#define START_FAST_MATH(x)  x = 0
b8e80941Smrg#define END_FAST_MATH(x)  (void)(x)
b8e80941Smrg#endif
b8e80941Smrg
848b8605Smrg
848b8605Smrgvoid _tnl_run_pipeline( struct gl_context *ctx )
848b8605Smrg{
848b8605Smrg   TNLcontext *tnl = TNL_CONTEXT(ctx);
848b8605Smrg   unsigned short __tmp;
848b8605Smrg   GLuint i;
848b8605Smrg
848b8605Smrg   if (!tnl->vb.Count)
848b8605Smrg      return;
848b8605Smrg
848b8605Smrg   /* Check for changed input sizes or change in stride to/from zero
848b8605Smrg    * (ie const or non-const).
848b8605Smrg    */
848b8605Smrg   if (check_input_changes( ctx ) || tnl->pipeline.new_state) {
848b8605Smrg      if (ctx->VertexProgram._MaintainTnlProgram)
848b8605Smrg	 _tnl_UpdateFixedFunctionProgram( ctx );
848b8605Smrg
848b8605Smrg      for (i = 0; i < tnl->pipeline.nr_stages ; i++) {
848b8605Smrg	 struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
848b8605Smrg	 if (s->validate)
848b8605Smrg	    s->validate( ctx, s );
848b8605Smrg      }
848b8605Smrg
848b8605Smrg      tnl->pipeline.new_state = 0;
848b8605Smrg      tnl->pipeline.input_changes = 0;
848b8605Smrg
848b8605Smrg      /* Pipeline can only change its output in response to either a
848b8605Smrg       * statechange or an input size/stride change.  No other changes
848b8605Smrg       * are allowed.
848b8605Smrg       */
848b8605Smrg      if (check_output_changes( ctx ))
848b8605Smrg	 _tnl_notify_pipeline_output_change( ctx );
848b8605Smrg   }
848b8605Smrg
848b8605Smrg#ifndef _OPENMP
848b8605Smrg   /* Don't adjust FPU precision mode in case multiple threads are to be used.
848b8605Smrg    * This would require that the additional threads also changed the FPU mode
848b8605Smrg    * which is quite a mess as this had to be done in all parallelized sections;
848b8605Smrg    * otherwise the master thread and all other threads are running in different
848b8605Smrg    * modes, producing inconsistent results.
848b8605Smrg    * Note that all x64 implementations don't define/use START_FAST_MATH, so
848b8605Smrg    * this is "hack" is only used in i386 mode
848b8605Smrg    */
848b8605Smrg   START_FAST_MATH(__tmp);
848b8605Smrg#endif
848b8605Smrg
848b8605Smrg   for (i = 0; i < tnl->pipeline.nr_stages ; i++) {
848b8605Smrg      struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
848b8605Smrg      if (!s->run( ctx, s ))
848b8605Smrg	 break;
848b8605Smrg   }
848b8605Smrg
848b8605Smrg#ifndef _OPENMP
848b8605Smrg   END_FAST_MATH(__tmp);
848b8605Smrg#endif
848b8605Smrg}
848b8605Smrg
848b8605Smrg
848b8605Smrg
848b8605Smrg/* The default pipeline.  This is useful for software rasterizers, and
848b8605Smrg * simple hardware rasterizers.  For customization, I don't recommend
848b8605Smrg * tampering with the internals of these stages in the way that
848b8605Smrg * drivers did in Mesa 3.4.  These stages are basically black boxes,
848b8605Smrg * and should be left intact.
848b8605Smrg *
848b8605Smrg * To customize the pipeline, consider:
848b8605Smrg *
848b8605Smrg * - removing redundant stages (making sure that the software rasterizer
848b8605Smrg *   can cope with this on fallback paths).  An example is fog
848b8605Smrg *   coordinate generation, which is not required in the FX driver.
848b8605Smrg *
848b8605Smrg * - replacing general-purpose machine-independent stages with
848b8605Smrg *   general-purpose machine-specific stages.  There is no example of
848b8605Smrg *   this to date, though it must be borne in mind that all subsequent
848b8605Smrg *   stages that reference the output of the new stage must cope with
848b8605Smrg *   any machine-specific data introduced.  This may not be easy
848b8605Smrg *   unless there are no such stages (ie the new stage is the last in
848b8605Smrg *   the pipe).
848b8605Smrg *
848b8605Smrg * - inserting optimized (but specialized) stages ahead of the
848b8605Smrg *   general-purpose fallback implementation.  For example, the old
848b8605Smrg *   fastpath mechanism, which only works when the VB->Elts input is
848b8605Smrg *   available, can be duplicated by placing the fastpath stage at the
848b8605Smrg *   head of this pipeline.  Such specialized stages are currently
848b8605Smrg *   constrained to have no outputs (ie. they must either finish the *
848b8605Smrg *   pipeline by returning GL_FALSE from run(), or do nothing).
848b8605Smrg *
848b8605Smrg * Some work can be done to lift some of the restrictions in the final
848b8605Smrg * case, if it becomes necessary to do so.
848b8605Smrg */
848b8605Smrgconst struct tnl_pipeline_stage *_tnl_default_pipeline[] = {
848b8605Smrg   &_tnl_vertex_transform_stage,
848b8605Smrg   &_tnl_normal_transform_stage,
848b8605Smrg   &_tnl_lighting_stage,
848b8605Smrg   &_tnl_texgen_stage,
848b8605Smrg   &_tnl_texture_transform_stage,
848b8605Smrg   &_tnl_point_attenuation_stage,
848b8605Smrg   &_tnl_vertex_program_stage,
848b8605Smrg   &_tnl_fog_coordinate_stage,
848b8605Smrg   &_tnl_render_stage,
848b8605Smrg   NULL
848b8605Smrg};
848b8605Smrg
848b8605Smrgconst struct tnl_pipeline_stage *_tnl_vp_pipeline[] = {
848b8605Smrg   &_tnl_vertex_program_stage,
848b8605Smrg   &_tnl_render_stage,
848b8605Smrg   NULL
848b8605Smrg};