head	3.10;
access;
symbols
	merge-1:3.8.2.3
	autoconf:3.8.0.4
	experimental-1:3.8.0.2
	mesa-3-1-with-kw3:3.1;
locks; strict;
comment	@ * @;


3.10
date	99.07.12.19.36.05;	author miklos;	state Exp;
branches;
next	3.9;

3.9
date	99.07.12.12.05.23;	author keithw;	state Exp;
branches;
next	3.8;

3.8
date	99.04.21.11.28.45;	author joukj;	state Exp;
branches
	3.8.2.1;
next	3.7;

3.7
date	99.04.07.22.19.04;	author brianp;	state Exp;
branches;
next	3.6;

3.6
date	99.04.06.01.11.49;	author keithw;	state Exp;
branches;
next	3.5;

3.5
date	99.03.31.20.18.38;	author keithw;	state Exp;
branches;
next	3.4;

3.4
date	99.03.20.18.56.47;	author brianp;	state Exp;
branches;
next	3.3;

3.3
date	99.03.17.12.08.22;	author keithw;	state Exp;
branches;
next	3.2;

3.2
date	99.03.16.09.29.20;	author joukj;	state Exp;
branches;
next	3.1;

3.1
date	99.02.25.14.12.30;	author keithw;	state Exp;
branches;
next	;

3.8.2.1
date	99.05.21.21.29.25;	author keithw;	state Exp;
branches;
next	3.8.2.2;

3.8.2.2
date	99.06.19.15.04.13;	author keithw;	state Exp;
branches;
next	3.8.2.3;

3.8.2.3
date	99.07.06.09.28.06;	author miklos;	state Exp;
branches;
next	3.8.2.4;

3.8.2.4
date	99.07.09.19.24.39;	author miklos;	state Exp;
branches;
next	;


desc
@@


3.10
log
@Typo fixed in powerpc code.
@
text
@/* $Id: clip_tmp.h,v 3.9 1999/07/12 12:05:23 keithw Exp $ */

/*
 * Mesa 3-D graphics library
 * Version:  3.1
 * 
 * Copyright (C) 1999  Brian Paul   All Rights Reserved.
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

/*
 * New (3.1) transformation code written by Keith Whitwell.
 */


/* KW: a clever asm implementation would nestle integer versions
 * of the outcode calculation underneath the division.  Gcc won't
 * do this, strangely enough, so I only do the divide in
 * the case where the cliptest passes.  This isn't essential,
 * and an asm implementation needn't replicate that behaviour.
 */
static GLvector4f * TAG(cliptest_points4)( GLvector4f *clip_vec, 
					   GLvector4f *proj_vec, 
					   GLubyte clipMask[],
					   GLubyte *orMask, 
					   GLubyte *andMask )
{
   const GLuint stride = clip_vec->stride;
   const GLfloat *from = (GLfloat *)clip_vec->start;
   const GLuint count = clip_vec->count;
   GLuint c = 0;
   GLfloat (*vProj)[4] = (GLfloat (*)[4])proj_vec->start;
   GLubyte tmpAndMask = *andMask;
   GLubyte tmpOrMask = *orMask;
   GLuint i;
   STRIDE_LOOP {
      const GLfloat cx = from[0];
      const GLfloat cy = from[1];
      const GLfloat cz = from[2];
      const GLfloat cw = from[3];
   #if defined(macintosh)
      /* on powerpc cliptest is 17% faster in this way. */
      GLuint mask;
      mask = (((cw < cx) << CLIP_RIGHT_SHIFT));
      mask |= (((cw < -cx) << CLIP_LEFT_SHIFT));
      mask |= (((cw < cy) << CLIP_TOP_SHIFT));
      mask |= (((cw < -cy) << CLIP_BOTTOM_SHIFT));
      mask |= (((cw < cz) << CLIP_FAR_SHIFT));
      mask |= (((cw < -cz) << CLIP_NEAR_SHIFT));
    #else /* !defined(macintosh)) */
      GLubyte mask = 0;
      if (-cx + cw < 0) mask |= CLIP_RIGHT_BIT;
      if ( cx + cw < 0) mask |= CLIP_LEFT_BIT;
      if (-cy + cw < 0) mask |= CLIP_TOP_BIT;
      if ( cy + cw < 0) mask |= CLIP_BOTTOM_BIT;
      if (-cz + cw < 0) mask |= CLIP_FAR_BIT;
      if ( cz + cw < 0) mask |= CLIP_NEAR_BIT;
    #endif /* defined(macintosh) */

      clipMask[i] = mask;
      if (mask) {
	 c++;
	 tmpAndMask &= mask;
	 tmpOrMask |= mask;
	 vProj[i][0] = 0;	/* no longer required? */
	 vProj[i][1] = 0;
	 vProj[i][2] = 0;
	 vProj[i][3] = 1;
      } else {
	 GLfloat oow = 1.0 / cw;	 
	 vProj[i][3] = oow;
	 vProj[i][0] = cx * oow;
	 vProj[i][1] = cy * oow;
	 vProj[i][2] = cz * oow;      
      }	 
   }

   *orMask = tmpOrMask;
   *andMask = (c < count ? 0 : tmpAndMask);

   proj_vec->flags |= VEC_SIZE_4;
   proj_vec->size = 3;
   proj_vec->count = clip_vec->count;
   return proj_vec;
}

static GLvector4f * TAG(cliptest_points3)( GLvector4f *clip_vec, 
					   GLvector4f *proj_vec, 
					   GLubyte clipMask[],
					   GLubyte *orMask, 
					   GLubyte *andMask )
{
   const GLuint stride = clip_vec->stride;
   const GLuint count = clip_vec->count;
   const GLfloat *from = (GLfloat *)clip_vec->start;

   GLubyte tmpOrMask = *orMask;
   GLubyte tmpAndMask = *andMask;
   GLuint i;
   STRIDE_LOOP {
      const GLfloat cx = from[0], cy = from[1], cz = from[2];
      GLubyte mask = 0;
      if (cx >  1.0)       mask |= CLIP_RIGHT_BIT;
      else if (cx < -1.0)  mask |= CLIP_LEFT_BIT;
      if (cy >  1.0)       mask |= CLIP_TOP_BIT;
      else if (cy < -1.0)  mask |= CLIP_BOTTOM_BIT;
      if (cz >  1.0)       mask |= CLIP_FAR_BIT;
      else if (cz < -1.0)  mask |= CLIP_NEAR_BIT;
      clipMask[i] = mask;
      tmpOrMask |= mask;
      tmpAndMask &= mask;
   }

   gl_vector4f_clean_elem(proj_vec, count, 3);
      
   *orMask = tmpOrMask;
   *andMask = tmpAndMask;
   return clip_vec;
}

static GLvector4f * TAG(cliptest_points2)( GLvector4f *clip_vec, 
					   GLvector4f *proj_vec, 
					   GLubyte clipMask[],
					   GLubyte *orMask, 
					   GLubyte *andMask )
{
   const GLuint stride = clip_vec->stride;
   const GLuint count = clip_vec->count;
   const GLfloat *from = (GLfloat *)clip_vec->start;

   GLubyte tmpOrMask = *orMask;
   GLubyte tmpAndMask = *andMask;
   GLuint i;
   STRIDE_LOOP {
      const GLfloat cx = from[0], cy = from[1];
      GLubyte mask = 0;
      if (cx >  1.0)       mask |= CLIP_RIGHT_BIT;
      else if (cx < -1.0)  mask |= CLIP_LEFT_BIT;
      if (cy >  1.0)       mask |= CLIP_TOP_BIT;
      else if (cy < -1.0)  mask |= CLIP_BOTTOM_BIT;
      clipMask[i] = mask;
      tmpOrMask |= mask;
      tmpAndMask &= mask;
   }

   gl_vector4f_clean_elem(proj_vec, count, 3);

   *orMask = tmpOrMask;
   *andMask = tmpAndMask;
   return clip_vec;
}


static void TAG(init_c_cliptest)( void )
{
   gl_clip_tab[4] = TAG(cliptest_points4);
   gl_clip_tab[3] = TAG(cliptest_points3);
   gl_clip_tab[2] = TAG(cliptest_points2);
}
@


3.9
log
@merge from experimental branch upto merge-1 tag
@
text
@d1 1
a1 1
/* $Id: clip_tmp.h,v 3.8.2.3 1999/07/06 09:28:06 miklos Exp $ */
d58 1
a58 1
      /* On PowerPC this is 17% faster */
d64 2
a65 2
      mask |= (((cw < cz) << CLIP_NEAR_SHIFT));
      mask |= (((cw < -cz) << CLIP_FAR_SHIFT));
@


3.8
log
@
 Unitialized proj[n] caused crashes on VMS (and other machines?)
@
text
@d1 1
a1 1
/* $Id: clip_tmp.h,v 3.7 1999/04/07 22:19:04 brianp Exp $ */
a52 1
      GLubyte mask = 0;
d57 20
a76 6
      if (cx >  cw) mask |= CLIP_RIGHT_BIT;
      if (cx < -cw) mask |= CLIP_LEFT_BIT;
      if (cy >  cw) mask |= CLIP_TOP_BIT;
      if (cy < -cw) mask |= CLIP_BOTTOM_BIT;
      if (cz >  cw) mask |= CLIP_FAR_BIT;
      if (cz < -cw) mask |= CLIP_NEAR_BIT;
a79 1
	 clipMask[i] = mask;
d81 1
a81 2
	 vProj[i][3] = 0;
	 vProj[i][0] = 0;
d83 2
a84 1
	 vProj[i][2] = 0;      
@


3.8.2.1
log
@Quake3 inspired optimizations
@
text
@d1 1
a1 1
/* $Id: clip_tmp.h,v 3.8 1999/04/21 11:28:45 joukj Exp $ */
d58 6
a63 6
      if (-cx + cw < 0) mask |= CLIP_RIGHT_BIT;
      if ( cx + cw < 0) mask |= CLIP_LEFT_BIT;
      if (-cy + cw < 0) mask |= CLIP_TOP_BIT;
      if ( cy + cw < 0) mask |= CLIP_BOTTOM_BIT;
      if (-cz + cw < 0) mask |= CLIP_FAR_BIT;
      if ( cz + cw < 0) mask |= CLIP_NEAR_BIT;
d69 2
a70 1
	 vProj[i][0] = 0;	/* no longer required? */
d72 1
a72 2
	 vProj[i][2] = 0;
	 vProj[i][3] = 1;
@


3.8.2.2
log
@Removed SGIS multitexture, added FX/X86 assm directory
@
text
@d1 1
a1 1
/* $Id: clip_tmp.h,v 3.8.2.1 1999/05/21 21:29:25 keithw Exp $ */
a63 2

      clipMask[i] = mask;
d67 1
@


3.8.2.3
log
@Cliptest_points4 optimalizations for PowerPC.
@
text
@d1 1
a1 1
/* $Id: clip_tmp.h,v 3.8.2.2 1999/06/19 15:04:13 keithw Exp $ */
d53 1
a57 11
   #if defined(macintosh)
      /* On PowerPC this is 17% faster */
      GLuint mask;
      mask = (((cw < cx) << CLIP_RIGHT_SHIFT));
      mask |= (((cw < -cx) << CLIP_LEFT_SHIFT));
      mask |= (((cw < cy) << CLIP_TOP_SHIFT));
      mask |= (((cw < -cy) << CLIP_BOTTOM_SHIFT));
      mask |= (((cw < cz) << CLIP_NEAR_SHIFT));
      mask |= (((cw < -cz) << CLIP_FAR_SHIFT));
    #else /* !defined(macintosh)) */
      GLubyte mask = 0;
a63 1
    #endif /* defined(macintosh) */
@


3.8.2.4
log
@Typo fixed in PowerPC optimalizations.
@
text
@d1 1
a1 1
/* $Id: clip_tmp.h,v 3.8.2.3 1999/07/06 09:28:06 miklos Exp $ */
d58 1
a58 1
      /* On PowerPC cliptest is 17% faster in this way*/
d64 2
a65 2
      mask |= (((cw < cz) << CLIP_FAR_SHIFT));
      mask |= (((cw < -cz) << CLIP_NEAR_SHIFT));
@


3.7
log
@inserted copyright info
@
text
@d1 1
a1 1
/* $Id$ */
d69 4
@


3.6
log
@user-clip bug fixes, faster FX vertex snapping
@
text
@d1 30
@


3.5
log
@Compiled vertex arrays
@
text
@d84 1
a84 1
   if (proj_vec->flags & VEC_DIRTY_3) gl_vector4f_clean_elem(proj_vec, 3);
d116 1
a116 1
   if (proj_vec->flags & VEC_DIRTY_3) gl_vector4f_clean_elem(proj_vec, 3);
@


3.4
log
@fixed IRIX compiler warnings
@
text
@a6 2
 *
 * KW: Removed old CLIP_4D flag as part of fix for Jouk's FPE problems.
a23 1
      const GLfloat cw = from[3];
d27 1
d40 1
a40 1
	 GLfloat oow = 1.0 / from[3];	 
d84 1
a84 1
   if (proj_vec->flags & VEC_DIRTY_3) gl_clean_elem(3, proj_vec);
d116 1
a116 1
   if (proj_vec->flags & VEC_DIRTY_3) gl_clean_elem(3, proj_vec);
@


3.3
log
@Removed CLIP_4D_BIT, added CLIP_CULLED_BIT.  Clipmask is now used
to drive culling in vertex transformation, allowing us to skip
both clipped and culled vertices with a single test.
@
text
@d126 1
a126 4



static void TAG(init_c_cliptest)()
a131 3



@


3.2
log
@
    Patch got from joshv@@planet.net, which solves the overflow problem
    with gears
@
text
@d7 2
a8 6
 *   
 * For clipped primitives with W all +ve, we can ignore planes not
 * in the union of the bitmasks of the vertices.  With mixed +,-
 * this doesn't seem to be possible.  Thus the test for cw < 0 and
 * the new flag.  This test could alternately be done in 
 * viewclip_polygon_4.
d26 1
d30 6
a35 7
      const GLfloat cw = from[3];
      if (cx >  cw)       mask |= CLIP_RIGHT_BIT;
      else if (cx < -cw)  mask |= CLIP_LEFT_BIT;
      if (cy >  cw)       mask |= CLIP_TOP_BIT;
      else if (cy < -cw)  mask |= CLIP_BOTTOM_BIT;
      if (cz >  cw)       mask |= CLIP_FAR_BIT;
      else if (cz < -cw)  mask |= CLIP_NEAR_BIT;
a36 1
	 if (cw <= 0.0) mask |= CLIP_4D; /* can't skip planes in clipping */
a40 12
	 vProj[i][3] = 0.0;
	 vProj[i][0] = 0.0;
	 vProj[i][1] = 0.0;
	 vProj[i][2] = 0.0;
#if 0
      } else if (cw == 0) {
	 /* only get here for 0,0,0,0 - not really sure what
	  * the correct behaviour should be - at the moment 
	  * I'm inclined to ignore it.
	  */
	 clipMask[i] = tmpOrMask = CLIP_ALL_BITS|CLIP_4D;
#endif
d42 1
a42 1
	 GLfloat oow = 1.0 / cw; 
d46 2
a47 2
	 vProj[i][2] = cz * oow;
      }
d49 1
@


3.1
log
@Merged in kw3 patch
@
text
@d46 4
@

