Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] View of /trunk/src/compiler/cl-target/fragments/eigen3x3.in
ViewVC logotype

View of /trunk/src/compiler/cl-target/fragments/eigen3x3.in

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1875 - (download) (annotate)
Wed May 16 23:07:38 2012 UTC (7 years, 2 months ago) by jhr
File size: 10302 byte(s)
  Change "eval->" to "(*eval)." to satisfy the AMD OpenCL compiler.
#define ROOT_TRIPLE 2           /* ell_cubic_root_triple */
#define ROOT_SINGLE_DOUBLE 3    /* ell_cubic_root_single_double */
#define ROOT_THREE 4            /* ell_cubic_root_three */

// from http://en.wikipedia.org/wiki/Square_root_of_3
#define M_SQRT3 1.732050807568877293527446341506

/*
** All the three given vectors span only a 2D space, and this finds
** the normal to that plane.  Simply sums up all the pair-wise
** cross-products to get a good estimate.  Trick is getting the cross
** products to line up before summing.
*/
STATIC_INLINE Diderot_vec3_t nullspace1 (
    const Diderot_vec3_t r0,
    const Diderot_vec3_t r1,
    const Diderot_vec3_t r2)
{
    Diderot_vec3_t ret, crs;
  
    ret = cross(r0, r1);
    crs = cross(r1, r2);
  /* ret += crs or ret -= crs; whichever makes ret longer */
    if (dot(ret, crs) > 0.0)
	ret += crs;
    else
	ret -= crs;

    crs = cross(r0, r2);
  /* ret += crs or ret -= crs; whichever makes ret longer */
    if (dot(ret, crs) > 0.0)
	ret += crs;
    else
	ret -= crs;

    return ret;
}

/*
** All vectors are in the same 1D space, we have to find two 
** mutually vectors perpendicular to that span
*/
static void nullspace2 (Diderot_vec3_t rets[2],
    const Diderot_vec3_t r0,
    const Diderot_vec3_t r1,
    const Diderot_vec3_t r2)
{
    Diderot_vec3_t sqr, sum;
    int idx;

    sum = r0;
    if (dot(sum, r1) > 0)
	sum += r1;
    else
	sum -= r1;
    if (dot(sum, r2) > 0)
	sum += r2;
    else
	sum -= r2;
  // find largest component, to get most stable expression for a perpendicular vector
    sqr = sum*sum;
    idx = 0;
    if ((sqr.s0 > sqr.s1) && (sqr.s0 > sqr.s2)) {
      // sum[0] is largest component
        rets[0] = VEC3(sum.s1 - sum.s2, -sum.s0, sum.s0);
    }
    else if (sqr.s1 > sqr.s2) {
      // sum[0] is largest component
        rets[0] = VEC3(-sum.s1, sum.s0 - sum.s2, sum.s1);
    }
    else {
      // sum[0] is largest component
        rets[0] = VEC3(-sum.s2, sum.s2, sum.s0 - sum.s1);
    }
  
    rets[1] = cross(rets[0], sum);
    return;
}

/*
** Eigensolver for symmetric 3x3 matrix:
**
**  M00  M01  M02
**  M01  M11  M12
**  M02  M12  M22
**
** Must be passed pointer to eval vector, and will compute eigenvalues
** only if evec[9] is non-NULL.  Computed eigenvectors are at evec+0,
** evec+3, and evec+6.
**
** Return value indicates something about the eigenvalue solution to
** the cubic characteristic equation; see ROOT_ #defines above
** 
** HEY: the numerical precision issues here are very subtle, and
** merit some more scrutiny.  With evals (1.000001, 1, 1), for example,
** whether it comes back as a single/double root, vs three distinct roots,
** is determines by the comparison between "D" and "epsilon", and the 
** setting of epsilon seems pretty arbitrary at this point...
** 
*/
int Diderot_evals3x3 (
    float3 *eval,
    const Diderot_real_t _M00, const Diderot_real_t _M01, const Diderot_real_t _M02, 
    const Diderot_real_t _M11, const Diderot_real_t _M12, 
    const Diderot_real_t _M22)
{
    Diderot_real_t mean, norm, rnorm, Q, R, QQQ, D, theta, M00, M01, M02, M11, M12, M22;
    int roots;

  /* copy the given matrix elements */
    M00 = _M00;
    M01 = _M01;
    M02 = _M02;
    M11 = _M11;
    M12 = _M12;
    M22 = _M22;

  /*
  ** subtract out the eigenvalue mean (will add back to evals later);
  ** helps with numerical stability
  */
    mean = (M00 + M11 + M22)/3.0;
    M00 -= mean;
    M11 -= mean;
    M22 -= mean;
  
  /* 
  ** divide out L2 norm of eigenvalues (will multiply back later);
  ** this too seems to help with stability
  */
    norm = sqrt(M00*M00 + 2*M01*M01 + 2*M02*M02 +
		M11*M11 + 2*M12*M12 +
		M22*M22);
// QUESTION: what if norm is very close to zero?
    rnorm = norm ? 1.0/norm : 1.0;
    M00 *= rnorm;
    M01 *= rnorm;
    M02 *= rnorm;
    M11 *= rnorm;
    M12 *= rnorm;
    M22 *= rnorm;

  /* this code is a mix of prior Teem code and ideas from Eberly's
   * "Eigensystems for 3 x 3 Symmetric Matrices (Revisited)"
   */
    Q = (M01*M01 + M02*M02 + M12*M12 - M00*M11 - M00*M22 - M11*M22)/3.0;
    QQQ = Q*Q*Q;
    R = (M00*M11*M22 + M02*(2*M01*M12 - M02*M11) 
	 - M00*M12*M12 - M01*M01*M22)/2.0;
    D = QQQ - R*R;
    if (D > EPSILON) {
      /* three distinct roots- this is the most common case */
	Diderot_real_t  mm, ss, cc;
	theta = atan2(sqrt(D), R)/3.0;
	mm = sqrt(Q);
	ss = sin(theta);
	cc = cos(theta);
	(*eval)s0 = 2*mm*cc;
	(*eval)s1 = mm*(-cc + M_SQRT3*ss);
	(*eval)s2 = mm*(-cc - M_SQRT3*ss);
	roots = ROOT_THREE;
    }
  /* else D is near enough to zero */
    else if ((R < -EPSILON) || (EPSILON < R)) {
	Diderot_real_t  U;
      /* one double root and one single root */
	U = cbrt(R); /* cube root function */
	if (U > 0) {
	    (*eval)s0 = 2*U;
	    (*eval)s1 = -U;
	    (*eval)s2 = -U;
	}
	else {
	    (*eval)s0 = -U;
	    (*eval)s1 = -U;
	    (*eval)s2 = 2*U;
	}
	roots = ROOT_SINGLE_DOUBLE;
    }
    else {
      /* a triple root! */
	(*eval)s0 = (*eval)s1 = (*eval)s2 = 0.0;
	roots = ROOT_TRIPLE;
    }

  /* multiply back by eigenvalue L2 norm */
    (*eval)s0 /= rnorm;
    (*eval)s1 /= rnorm;
    (*eval)s2 /= rnorm;

  /* add back in the eigenvalue mean */
    (*eval)s0 += mean;
    (*eval)s1 += mean;
    (*eval)s2 += mean;

    return roots;
}

int Diderot_evecs3x3 (
	float3 * eval, Diderot_vec3_t evecs[3],
	const Diderot_real_t _M00, const Diderot_real_t _M01, const Diderot_real_t _M02, 
	const Diderot_real_t _M11, const Diderot_real_t _M12, 
	const Diderot_real_t _M22)
{
    Diderot_real_t len;

    Diderot_real_t mean, norm, rnorm, Q, R, QQQ, D, theta, M00, M01, M02, M11, M12, M22;
    Diderot_real_t epsilon = 1.0E-12;
    int roots;

  /* copy the given matrix elements */
    M00 = _M00;
    M01 = _M01;
    M02 = _M02;
    M11 = _M11;
    M12 = _M12;
    M22 = _M22;

  /*
  ** subtract out the eigenvalue mean (will add back to evals later);
  ** helps with numerical stability
  */
    mean = (M00 + M11 + M22)/3.0;
    M00 -= mean;
    M11 -= mean;
    M22 -= mean;
  
  /* 
  ** divide out L2 norm of eigenvalues (will multiply back later);
  ** this too seems to help with stability
  */
    norm = sqrt(M00*M00 + 2*M01*M01 + 2*M02*M02 +
                M11*M11 + 2*M12*M12 +
                M22*M22);
    rnorm = norm ? 1.0/norm : 1.0;
    M00 *= rnorm;
    M01 *= rnorm;
    M02 *= rnorm;
    M11 *= rnorm;
    M12 *= rnorm;
    M22 *= rnorm;

  /* this code is a mix of prior Teem code and ideas from Eberly's
   * "Eigensystems for 3 x 3 Symmetric Matrices (Revisited)"
   */
    Q = (M01*M01 + M02*M02 + M12*M12 - M00*M11 - M00*M22 - M11*M22)/3.0;
    QQQ = Q*Q*Q;
    R = (M00*M11*M22 + M02*(2*M01*M12 - M02*M11) - M00*M12*M12 - M01*M01*M22)/2.0;
    D = QQQ - R*R;
    if (D > epsilon) {
      /* three distinct roots- this is the most common case */
        Diderot_real_t  mm, ss, cc;
        theta = atan2(sqrt(D), R)/3.0;
        mm = sqrt(Q);
        ss = sin(theta);
        cc = cos(theta);
        (*eval)s0 = 2*mm*cc;
        (*eval)s1 = mm*(-cc + sqrt(3.0)*ss);
        (*eval)s2 = mm*(-cc - sqrt(3.0)*ss);
        roots = ROOT_THREE;
    }
  /* else D is near enough to zero */
    else if (R < -epsilon || epsilon < R) {
        Diderot_real_t  U;
      /* one double root and one single root */
        U = cbrt(R); /* cube root function */
        if (U > 0) {
            (*eval)s0 = 2*U;
            (*eval)s1 = -U;
            (*eval)s2 = -U;
        } else {
            (*eval)s0 = -U;
            (*eval)s1 = -U;
            (*eval)s2 = 2*U;
        }
        roots = ROOT_SINGLE_DOUBLE;
    }
    else {
      /* a triple root! */
        (*eval)s0 = (*eval)s1 = (*eval)s2 = 0.0;
        roots = ROOT_TRIPLE;
    }

    Diderot_vec3_t ev = VEC3((*eval)s0, (*eval)s1, (*eval)s2);
    if (ROOT_THREE == roots) {
	evecs[0] = nullspace1 (
		VEC3(M00 - (*eval)s0, M01, M02),
		VEC3(M01, M11 - (*eval)s0, M12),
		VEC3(M02, M12, M22 - (*eval)s0));
	evecs[1] = nullspace1 (
		VEC3(M00 - (*eval)s1, M01, M02),
		VEC3(M01, M11 - (*eval)s1, M12),
		VEC3(M02, M12, M22 - (*eval)s1));
	evecs[2] = nullspace1 (
		VEC3(M00 - (*eval)s2, M01, M02),
		VEC3(M01, M11 - (*eval)s2, M12),
		VEC3(M02, M12, M22 - (*eval)s2));
    }
    else if (ROOT_SINGLE_DOUBLE == roots) {
	if ((*eval)s1 == (*eval)s2) {
	  /* one big (eval[0]) , two small (eval[1,2]) */
	    evecs[0] = nullspace1 (
		    VEC3(M00 - (*eval)s0, M01, M02),
		    VEC3(M01, M11 - (*eval)s0, M12),
		    VEC3(M02, M12, M22 - (*eval)s0));
	    nullspace2 (evecs+1,
		    VEC3(M00 - (*eval)s1, M01, M02),
		    VEC3(M01, M11 - (*eval)s1, M12),
		    VEC3(M02, M12, M22 - (*eval)s1));
	}
	else {
	  /* two big (eval[0,1]), one small (eval[2]) */
	    nullspace2 (evecs,
		    VEC3(M00 - (*eval)s0, M01, M02),
		    VEC3(M01, M11 - (*eval)s0, M12),
		    VEC3(M02, M12, M22 - (*eval)s0));
	    evecs[2] = nullspace1 (
		    VEC3(M00 - (*eval)s2, M01, M02),
		    VEC3(M01, M11 - (*eval)s2, M12),
		    VEC3(M02, M12, M22 - (*eval)s2));
	}
    }
    else {
      /* ROOT_TRIPLE == roots; use any basis for eigenvectors */
	evecs[0] = VEC3(1, 0, 0);
	evecs[1] = VEC3(0, 1, 0);
	evecs[2] = VEC3(0, 0, 1);
    }
  /* we always make sure it's really orthonormal; keeping fixed the
   * eigenvector associated with the largest-magnitude eigenvalue
   */
    if (fabs((*eval)s0) > fabs((*eval)s2)) {
      /* normalize evec[0] but don't move it */
	evecs[0] = normalize(evecs[0]);
	evecs[1] -= dot(evecs[1], evecs[0]) * evecs[0];
	evecs[1] = normalize(evecs[1]);
        evecs[2] = cross(evecs[0], evecs[1]);
    }
    else {
      /* normalize evec[2] but don't move it */
	evecs[2] = normalize(evecs[2]);
	evecs[1] -= dot(evecs[1], evecs[2]) * evecs[2];
	evecs[1] = normalize(evecs[1]);
        evecs[0] = cross(evecs[1], evecs[2]);
    }
    /* note that the right-handedness check has been folded into
       the code above to enforce orthogonality.  Indeed, some work
       could be removed by never really bothering to find all three
       eigenvectors; just find two and then use the cross-product. 
       The next iteration of the code will do that */
    
  /* multiply back by eigenvalue L2 norm */
    (*eval)s0 /= rnorm;
    (*eval)s1 /= rnorm;
    (*eval)s2 /= rnorm;

  /* add back in the eigenvalue mean */
    (*eval)s0 += mean;
    (*eval)s1 += mean;
    (*eval)s2 += mean;

    return roots;
}

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0