Home My Page Projects Code Snippets Project Openings diderot

# SCM Repository

[diderot] View of /branches/pure-cfg/src/compiler/cl-target/fragments/eigen3x3.in
 [diderot] / branches / pure-cfg / src / compiler / cl-target / fragments / eigen3x3.in # View of /branches/pure-cfg/src/compiler/cl-target/fragments/eigen3x3.in

Tue Nov 29 10:43:54 2011 UTC (9 years, 10 months ago) by lamonts
File size: 10208 byte(s)
`Fixed some bugs on eigen implementation for OpenCL`
```#define ROOT_TRIPLE 2           /* ell_cubic_root_triple */
#define ROOT_SINGLE_DOUBLE 3    /* ell_cubic_root_single_double */
#define ROOT_THREE 4            /* ell_cubic_root_three */

// from http://en.wikipedia.org/wiki/Square_root_of_3
#define M_SQRT3 1.732050807568877293527446341506

/*
** All the three given vectors span only a 2D space, and this finds
** the normal to that plane.  Simply sums up all the pair-wise
** cross-products to get a good estimate.  Trick is getting the cross
** products to line up before summing.
*/
STATIC_INLINE Diderot_vec3_t nullspace1 (
const Diderot_vec3_t r0,
const Diderot_vec3_t r1,
const Diderot_vec3_t r2)
{
Diderot_vec3_t ret, crs;

ret = cross(r0, r1);
crs = cross(r1, r2);
/* ret += crs or ret -= crs; whichever makes ret longer */
if (dot(ret, crs) > 0.0)
ret += crs;
else
ret -= crs;

crs = cross(r0, r2);
/* ret += crs or ret -= crs; whichever makes ret longer */
if (dot(ret, crs) > 0.0)
ret += crs;
else
ret -= crs;

return ret;
}

/*
** All vectors are in the same 1D space, we have to find two
** mutually vectors perpendicular to that span
*/
static void nullspace2 (Diderot_vec3_t rets,
const Diderot_vec3_t r0,
const Diderot_vec3_t r1,
const Diderot_vec3_t r2)
{
Diderot_vec3_t sqr, sum;
int idx;

sum = r0;
if (dot(sum, r1) > 0)
sum += r1;
else
sum -= r1;
if (dot(sum, r2) > 0)
sum += r2;
else
sum -= r2;
// find largest component, to get most stable expression for a perpendicular vector
sqr = sum*sum;
idx = 0;
if ((sqr.s0 > sqr.s1) && (sqr.s0 > sqr.s2)) {
// sum is largest component
rets = VEC3(sum.s1 - sum.s2, -sum.s0, sum.s0);
}
else if (sqr.s1 > sqr.s2) {
// sum is largest component
rets = VEC3(-sum.s1, sum.s0 - sum.s2, sum.s1);
}
else {
// sum is largest component
rets = VEC3(-sum.s2, sum.s2, sum.s0 - sum.s1);
}

rets = cross(rets, sum);
return;
}

/*
** Eigensolver for symmetric 3x3 matrix:
**
**  M00  M01  M02
**  M01  M11  M12
**  M02  M12  M22
**
** Must be passed pointer to eval vector, and will compute eigenvalues
** only if evec is non-NULL.  Computed eigenvectors are at evec+0,
** evec+3, and evec+6.
**
** Return value indicates something about the eigenvalue solution to
** the cubic characteristic equation; see ROOT_ #defines above
**
** HEY: the numerical precision issues here are very subtle, and
** merit some more scrutiny.  With evals (1.000001, 1, 1), for example,
** whether it comes back as a single/double root, vs three distinct roots,
** is determines by the comparison between "D" and "epsilon", and the
** setting of epsilon seems pretty arbitrary at this point...
**
*/
int Diderot_evals3x3 (
float3 * eval,
const Diderot_real_t _M00, const Diderot_real_t _M01, const Diderot_real_t _M02,
const Diderot_real_t _M11, const Diderot_real_t _M12,
const Diderot_real_t _M22)
{
Diderot_real_t mean, norm, rnorm, Q, R, QQQ, D, theta, M00, M01, M02, M11, M12, M22;
int roots;

/* copy the given matrix elements */
M00 = _M00;
M01 = _M01;
M02 = _M02;
M11 = _M11;
M12 = _M12;
M22 = _M22;

/*
** subtract out the eigenvalue mean (will add back to evals later);
** helps with numerical stability
*/
mean = (M00 + M11 + M22)/3.0;
M00 -= mean;
M11 -= mean;
M22 -= mean;

/*
** divide out L2 norm of eigenvalues (will multiply back later);
** this too seems to help with stability
*/
norm = sqrt(M00*M00 + 2*M01*M01 + 2*M02*M02 +
M11*M11 + 2*M12*M12 +
M22*M22);
// QUESTION: what if norm is very close to zero?
rnorm = norm ? 1.0/norm : 1.0;
M00 *= rnorm;
M01 *= rnorm;
M02 *= rnorm;
M11 *= rnorm;
M12 *= rnorm;
M22 *= rnorm;

/* this code is a mix of prior Teem code and ideas from Eberly's
* "Eigensystems for 3 x 3 Symmetric Matrices (Revisited)"
*/
Q = (M01*M01 + M02*M02 + M12*M12 - M00*M11 - M00*M22 - M11*M22)/3.0;
QQQ = Q*Q*Q;
R = (M00*M11*M22 + M02*(2*M01*M12 - M02*M11)
- M00*M12*M12 - M01*M01*M22)/2.0;
D = QQQ - R*R;
if (D > EPSILON) {
/* three distinct roots- this is the most common case */
double mm, ss, cc;
theta = atan2(sqrt(D), R)/3.0;
mm = sqrt(Q);
ss = sin(theta);
cc = cos(theta);
eval->s0 = 2*mm*cc;
eval->s1 = mm*(-cc + M_SQRT3*ss);
eval->s2 = mm*(-cc - M_SQRT3*ss);
roots = ROOT_THREE;
}
/* else D is near enough to zero */
else if ((R < -EPSILON) || (EPSILON < R)) {
double U;
/* one double root and one single root */
U = cbrt(R); /* cube root function */
if (U > 0) {
eval->s0 = 2*U;
eval->s1 = -U;
eval->s2 = -U;
}
else {
eval->s0 = -U;
eval->s1 = -U;
eval->s2 = 2*U;
}
roots = ROOT_SINGLE_DOUBLE;
}
else {
/* a triple root! */
eval->s0 = eval->s1 = eval->s2 = 0.0;
roots = ROOT_TRIPLE;
}

/* multiply back by eigenvalue L2 norm */
eval->s0 /= rnorm;
eval->s1 /= rnorm;
eval->s2 /= rnorm;

/* add back in the eigenvalue mean */
eval->s0 += mean;
eval->s1 += mean;
eval->s2 += mean;

return roots;
}

int Diderot_evecs3x3 (
float3 * eval, Diderot_vec3_t evecs,
const Diderot_real_t _M00, const Diderot_real_t _M01, const Diderot_real_t _M02,
const Diderot_real_t _M11, const Diderot_real_t _M12,
const Diderot_real_t _M22)
{
Diderot_real_t len, dot;

Diderot_real_t mean, norm, rnorm, Q, R, QQQ, D, theta, M00, M01, M02, M11, M12, M22;
Diderot_real_t epsilon = 1.0E-12;
int roots;

/* copy the given matrix elements */
M00 = _M00;
M01 = _M01;
M02 = _M02;
M11 = _M11;
M12 = _M12;
M22 = _M22;

/*
** subtract out the eigenvalue mean (will add back to evals later);
** helps with numerical stability
*/
mean = (M00 + M11 + M22)/3.0;
M00 -= mean;
M11 -= mean;
M22 -= mean;

/*
** divide out L2 norm of eigenvalues (will multiply back later);
** this too seems to help with stability
*/
norm = sqrt(M00*M00 + 2*M01*M01 + 2*M02*M02 +
M11*M11 + 2*M12*M12 +
M22*M22);
rnorm = norm ? 1.0/norm : 1.0;
M00 *= rnorm;
M01 *= rnorm;
M02 *= rnorm;
M11 *= rnorm;
M12 *= rnorm;
M22 *= rnorm;

/* this code is a mix of prior Teem code and ideas from Eberly's
* "Eigensystems for 3 x 3 Symmetric Matrices (Revisited)"
*/
Q = (M01*M01 + M02*M02 + M12*M12 - M00*M11 - M00*M22 - M11*M22)/3.0;
QQQ = Q*Q*Q;
R = (M00*M11*M22 + M02*(2*M01*M12 - M02*M11) - M00*M12*M12 - M01*M01*M22)/2.0;
D = QQQ - R*R;
if (D > epsilon) {
/* three distinct roots- this is the most common case */
double mm, ss, cc;
theta = atan2(sqrt(D), R)/3.0;
mm = sqrt(Q);
ss = sin(theta);
cc = cos(theta);
eval->s0 = 2*mm*cc;
eval->s1 = mm*(-cc + sqrt(3.0)*ss);
eval->s2 = mm*(-cc - sqrt(3.0)*ss);
roots = ROOT_THREE;
}
/* else D is near enough to zero */
else if (R < -epsilon || epsilon < R) {
double U;
/* one double root and one single root */
U = cbrt(R); /* cube root function */
if (U > 0) {
eval->s0 = 2*U;
eval->s1 = -U;
eval->s2 = -U;
} else {
eval->s0 = -U;
eval->s1 = -U;
eval->s2 = 2*U;
}
roots = ROOT_SINGLE_DOUBLE;
}
else {
/* a triple root! */
eval->s0 = eval->s1 = eval->s2 = 0.0;
roots = ROOT_TRIPLE;
}

Diderot_vec3_t ev = VEC3(eval->s0, eval->s1, eval->s2);
if (ROOT_THREE == roots) {
evecs = nullspace1 (
VEC3(M00 - eval->s0, M01, M02),
VEC3(M01, M11 - eval->s0, M12),
VEC3(M02, M12, M22 - eval->s0));
evecs = nullspace1 (
VEC3(M00 - eval->s1, M01, M02),
VEC3(M01, M11 - eval->s1, M12),
VEC3(M02, M12, M22 - eval->s1));
evecs = nullspace1 (
VEC3(M00 - eval->s2, M01, M02),
VEC3(M01, M11 - eval->s2, M12),
VEC3(M02, M12, M22 - eval->s2));
}
else if (ROOT_SINGLE_DOUBLE == roots) {
if (eval->s1 == eval->s2) {
/* one big (eval) , two small (eval[1,2]) */
evecs = nullspace1 (
VEC3(M00 - eval->s0, M01, M02),
VEC3(M01, M11 - eval->s0, M12),
VEC3(M02, M12, M22 - eval->s0));
nullspace2 (evecs+1,
VEC3(M00 - eval->s1, M01, M02),
VEC3(M01, M11 - eval->s1, M12),
VEC3(M02, M12, M22 - eval->s1));
}
else {
/* two big (eval[0,1]), one small (eval) */
nullspace2 (evecs,
VEC3(M00 - eval->s0, M01, M02),
VEC3(M01, M11 - eval->s0, M12),
VEC3(M02, M12, M22 - eval->s0));
evecs = nullspace1 (
VEC3(M00 - eval->s2, M01, M02),
VEC3(M01, M11 - eval->s2, M12),
VEC3(M02, M12, M22 - eval->s2));
}
}
else {
/* ROOT_TRIPLE == roots; use any basis for eigenvectors */
evecs = VEC3(1, 0, 0);
evecs = VEC3(0, 1, 0);
evecs = VEC3(0, 0, 1);
}
/* we always make sure it's really orthonormal; keeping fixed the
* eigenvector associated with the largest-magnitude eigenvalue
*/
if (fabs(eval->s0) > fabs(eval->s2)) {
/* normalize evec but don't move it */
evecs = normalize(evecs);
evecs -= dot(evecs, evecs) * evecs;
evecs = normalize(evecs);
evecs = cross(evecs, evecs);
}
else {
/* normalize evec but don't move it */
evecs = normalize(evecs);
evecs -= dot(evecs, evecs) * evecs;
evecs = normalize(evecs);
evecs = cross(evecs, evecs);
}
/* note that the right-handedness check has been folded into
the code above to enforce orthogonality.  Indeed, some work
could be removed by never really bothering to find all three
eigenvectors; just find two and then use the cross-product.
The next iteration of the code will do that */

/* multiply back by eigenvalue L2 norm */
eval->s0 /= rnorm;
eval->s1 /= rnorm;
eval->s2 /= rnorm;

/* add back in the eigenvalue mean */
eval->s0 += mean;
eval->s1 += mean;
eval->s2 += mean;

return roots;
}
```