SCM Repository
[diderot] / branches / cuda / src / compiler / cuda-target / fragments / eigen3x3.in |
View of /branches/cuda/src/compiler/cuda-target/fragments/eigen3x3.in
Parent Directory
|
Revision Log
Revision 1677 -
(download)
(annotate)
Fri Dec 16 22:17:33 2011 UTC (9 years, 1 month ago) by jhr
File size: 10208 byte(s)
Fri Dec 16 22:17:33 2011 UTC (9 years, 1 month ago) by jhr
File size: 10208 byte(s)
Working on CUDA target
#define ROOT_TRIPLE 2 /* ell_cubic_root_triple */ #define ROOT_SINGLE_DOUBLE 3 /* ell_cubic_root_single_double */ #define ROOT_THREE 4 /* ell_cubic_root_three */ // from http://en.wikipedia.org/wiki/Square_root_of_3 #define M_SQRT3 1.732050807568877293527446341506 /* ** All the three given vectors span only a 2D space, and this finds ** the normal to that plane. Simply sums up all the pair-wise ** cross-products to get a good estimate. Trick is getting the cross ** products to line up before summing. */ STATIC_INLINE Diderot_vec3_t nullspace1 ( const Diderot_vec3_t r0, const Diderot_vec3_t r1, const Diderot_vec3_t r2) { Diderot_vec3_t ret, crs; ret = cross(r0, r1); crs = cross(r1, r2); /* ret += crs or ret -= crs; whichever makes ret longer */ if (dot(ret, crs) > 0.0) ret += crs; else ret -= crs; crs = cross(r0, r2); /* ret += crs or ret -= crs; whichever makes ret longer */ if (dot(ret, crs) > 0.0) ret += crs; else ret -= crs; return ret; } /* ** All vectors are in the same 1D space, we have to find two ** mutually vectors perpendicular to that span */ static void nullspace2 (Diderot_vec3_t rets[2], const Diderot_vec3_t r0, const Diderot_vec3_t r1, const Diderot_vec3_t r2) { Diderot_vec3_t sqr, sum; int idx; sum = r0; if (dot(sum, r1) > 0) sum += r1; else sum -= r1; if (dot(sum, r2) > 0) sum += r2; else sum -= r2; // find largest component, to get most stable expression for a perpendicular vector sqr = sum*sum; idx = 0; if ((sqr.s0 > sqr.s1) && (sqr.s0 > sqr.s2)) { // sum[0] is largest component rets[0] = VEC3(sum.s1 - sum.s2, -sum.s0, sum.s0); } else if (sqr.s1 > sqr.s2) { // sum[0] is largest component rets[0] = VEC3(-sum.s1, sum.s0 - sum.s2, sum.s1); } else { // sum[0] is largest component rets[0] = VEC3(-sum.s2, sum.s2, sum.s0 - sum.s1); } rets[1] = cross(rets[0], sum); return; } /* ** Eigensolver for symmetric 3x3 matrix: ** ** M00 M01 M02 ** M01 M11 M12 ** M02 M12 M22 ** ** Must be passed pointer to eval vector, and will compute eigenvalues ** only if evec[9] is non-NULL. Computed eigenvectors are at evec+0, ** evec+3, and evec+6. ** ** Return value indicates something about the eigenvalue solution to ** the cubic characteristic equation; see ROOT_ #defines above ** ** HEY: the numerical precision issues here are very subtle, and ** merit some more scrutiny. With evals (1.000001, 1, 1), for example, ** whether it comes back as a single/double root, vs three distinct roots, ** is determines by the comparison between "D" and "epsilon", and the ** setting of epsilon seems pretty arbitrary at this point... ** */ int Diderot_evals3x3 ( float3 * eval, const Diderot_real_t _M00, const Diderot_real_t _M01, const Diderot_real_t _M02, const Diderot_real_t _M11, const Diderot_real_t _M12, const Diderot_real_t _M22) { Diderot_real_t mean, norm, rnorm, Q, R, QQQ, D, theta, M00, M01, M02, M11, M12, M22; int roots; /* copy the given matrix elements */ M00 = _M00; M01 = _M01; M02 = _M02; M11 = _M11; M12 = _M12; M22 = _M22; /* ** subtract out the eigenvalue mean (will add back to evals later); ** helps with numerical stability */ mean = (M00 + M11 + M22)/3.0; M00 -= mean; M11 -= mean; M22 -= mean; /* ** divide out L2 norm of eigenvalues (will multiply back later); ** this too seems to help with stability */ norm = sqrt(M00*M00 + 2*M01*M01 + 2*M02*M02 + M11*M11 + 2*M12*M12 + M22*M22); // QUESTION: what if norm is very close to zero? rnorm = norm ? 1.0/norm : 1.0; M00 *= rnorm; M01 *= rnorm; M02 *= rnorm; M11 *= rnorm; M12 *= rnorm; M22 *= rnorm; /* this code is a mix of prior Teem code and ideas from Eberly's * "Eigensystems for 3 x 3 Symmetric Matrices (Revisited)" */ Q = (M01*M01 + M02*M02 + M12*M12 - M00*M11 - M00*M22 - M11*M22)/3.0; QQQ = Q*Q*Q; R = (M00*M11*M22 + M02*(2*M01*M12 - M02*M11) - M00*M12*M12 - M01*M01*M22)/2.0; D = QQQ - R*R; if (D > EPSILON) { /* three distinct roots- this is the most common case */ double mm, ss, cc; theta = atan2(sqrt(D), R)/3.0; mm = sqrt(Q); ss = sin(theta); cc = cos(theta); eval->s0 = 2*mm*cc; eval->s1 = mm*(-cc + M_SQRT3*ss); eval->s2 = mm*(-cc - M_SQRT3*ss); roots = ROOT_THREE; } /* else D is near enough to zero */ else if ((R < -EPSILON) || (EPSILON < R)) { double U; /* one double root and one single root */ U = cbrt(R); /* cube root function */ if (U > 0) { eval->s0 = 2*U; eval->s1 = -U; eval->s2 = -U; } else { eval->s0 = -U; eval->s1 = -U; eval->s2 = 2*U; } roots = ROOT_SINGLE_DOUBLE; } else { /* a triple root! */ eval->s0 = eval->s1 = eval->s2 = 0.0; roots = ROOT_TRIPLE; } /* multiply back by eigenvalue L2 norm */ eval->s0 /= rnorm; eval->s1 /= rnorm; eval->s2 /= rnorm; /* add back in the eigenvalue mean */ eval->s0 += mean; eval->s1 += mean; eval->s2 += mean; return roots; } int Diderot_evecs3x3 ( float3 * eval, Diderot_vec3_t evecs[3], const Diderot_real_t _M00, const Diderot_real_t _M01, const Diderot_real_t _M02, const Diderot_real_t _M11, const Diderot_real_t _M12, const Diderot_real_t _M22) { Diderot_real_t len, dot; Diderot_real_t mean, norm, rnorm, Q, R, QQQ, D, theta, M00, M01, M02, M11, M12, M22; Diderot_real_t epsilon = 1.0E-12; int roots; /* copy the given matrix elements */ M00 = _M00; M01 = _M01; M02 = _M02; M11 = _M11; M12 = _M12; M22 = _M22; /* ** subtract out the eigenvalue mean (will add back to evals later); ** helps with numerical stability */ mean = (M00 + M11 + M22)/3.0; M00 -= mean; M11 -= mean; M22 -= mean; /* ** divide out L2 norm of eigenvalues (will multiply back later); ** this too seems to help with stability */ norm = sqrt(M00*M00 + 2*M01*M01 + 2*M02*M02 + M11*M11 + 2*M12*M12 + M22*M22); rnorm = norm ? 1.0/norm : 1.0; M00 *= rnorm; M01 *= rnorm; M02 *= rnorm; M11 *= rnorm; M12 *= rnorm; M22 *= rnorm; /* this code is a mix of prior Teem code and ideas from Eberly's * "Eigensystems for 3 x 3 Symmetric Matrices (Revisited)" */ Q = (M01*M01 + M02*M02 + M12*M12 - M00*M11 - M00*M22 - M11*M22)/3.0; QQQ = Q*Q*Q; R = (M00*M11*M22 + M02*(2*M01*M12 - M02*M11) - M00*M12*M12 - M01*M01*M22)/2.0; D = QQQ - R*R; if (D > epsilon) { /* three distinct roots- this is the most common case */ double mm, ss, cc; theta = atan2(sqrt(D), R)/3.0; mm = sqrt(Q); ss = sin(theta); cc = cos(theta); eval->s0 = 2*mm*cc; eval->s1 = mm*(-cc + sqrt(3.0)*ss); eval->s2 = mm*(-cc - sqrt(3.0)*ss); roots = ROOT_THREE; } /* else D is near enough to zero */ else if (R < -epsilon || epsilon < R) { double U; /* one double root and one single root */ U = cbrt(R); /* cube root function */ if (U > 0) { eval->s0 = 2*U; eval->s1 = -U; eval->s2 = -U; } else { eval->s0 = -U; eval->s1 = -U; eval->s2 = 2*U; } roots = ROOT_SINGLE_DOUBLE; } else { /* a triple root! */ eval->s0 = eval->s1 = eval->s2 = 0.0; roots = ROOT_TRIPLE; } Diderot_vec3_t ev = VEC3(eval->s0, eval->s1, eval->s2); if (ROOT_THREE == roots) { evecs[0] = nullspace1 ( VEC3(M00 - eval->s0, M01, M02), VEC3(M01, M11 - eval->s0, M12), VEC3(M02, M12, M22 - eval->s0)); evecs[1] = nullspace1 ( VEC3(M00 - eval->s1, M01, M02), VEC3(M01, M11 - eval->s1, M12), VEC3(M02, M12, M22 - eval->s1)); evecs[2] = nullspace1 ( VEC3(M00 - eval->s2, M01, M02), VEC3(M01, M11 - eval->s2, M12), VEC3(M02, M12, M22 - eval->s2)); } else if (ROOT_SINGLE_DOUBLE == roots) { if (eval->s1 == eval->s2) { /* one big (eval[0]) , two small (eval[1,2]) */ evecs[0] = nullspace1 ( VEC3(M00 - eval->s0, M01, M02), VEC3(M01, M11 - eval->s0, M12), VEC3(M02, M12, M22 - eval->s0)); nullspace2 (evecs+1, VEC3(M00 - eval->s1, M01, M02), VEC3(M01, M11 - eval->s1, M12), VEC3(M02, M12, M22 - eval->s1)); } else { /* two big (eval[0,1]), one small (eval[2]) */ nullspace2 (evecs, VEC3(M00 - eval->s0, M01, M02), VEC3(M01, M11 - eval->s0, M12), VEC3(M02, M12, M22 - eval->s0)); evecs[2] = nullspace1 ( VEC3(M00 - eval->s2, M01, M02), VEC3(M01, M11 - eval->s2, M12), VEC3(M02, M12, M22 - eval->s2)); } } else { /* ROOT_TRIPLE == roots; use any basis for eigenvectors */ evecs[0] = VEC3(1, 0, 0); evecs[1] = VEC3(0, 1, 0); evecs[2] = VEC3(0, 0, 1); } /* we always make sure it's really orthonormal; keeping fixed the * eigenvector associated with the largest-magnitude eigenvalue */ if (fabs(eval->s0) > fabs(eval->s2)) { /* normalize evec[0] but don't move it */ evecs[0] = normalize(evecs[0]); evecs[1] -= dot(evecs[1], evecs[0]) * evecs[0]; evecs[1] = normalize(evecs[1]); evecs[2] = cross(evecs[0], evecs[1]); } else { /* normalize evec[2] but don't move it */ evecs[2] = normalize(evecs[2]); evecs[1] -= dot(evecs[1], evecs[2]) * evecs[2]; evecs[1] = normalize(evecs[1]); evecs[0] = cross(evecs[1], evecs[2]); } /* note that the right-handedness check has been folded into the code above to enforce orthogonality. Indeed, some work could be removed by never really bothering to find all three eigenvectors; just find two and then use the cross-product. The next iteration of the code will do that */ /* multiply back by eigenvalue L2 norm */ eval->s0 /= rnorm; eval->s1 /= rnorm; eval->s2 /= rnorm; /* add back in the eigenvalue mean */ eval->s0 += mean; eval->s1 += mean; eval->s2 += mean; return roots; }
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |