Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Annotation of /branches/pure-cfg/src/lib/c-target/eigen3x3.c
ViewVC logotype

Annotation of /branches/pure-cfg/src/lib/c-target/eigen3x3.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1544 - (view) (download) (as text)

1 : jhr 1172 /*! \file eigen.c
2 :     *
3 :     * \author Gordon Kindlmann
4 :     */
5 :    
6 : jhr 1106 /*
7 : jhr 1172 * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
8 :     * All rights reserved.
9 :     */
10 :    
11 :     /*
12 : jhr 1106 Teem: Tools to process and visualize scientific data and images
13 :     Copyright (C) 2011, 2010, 2009, University of Chicago
14 :     Copyright (C) 2008, 2007, 2006, 2005 Gordon Kindlmann
15 :     Copyright (C) 2004, 2003, 2002, 2001, 2000, 1999, 1998 University of Utah
16 :    
17 :     This library is free software; you can redistribute it and/or
18 :     modify it under the terms of the GNU Lesser General Public License
19 :     (LGPL) as published by the Free Software Foundation; either
20 :     version 2.1 of the License, or (at your option) any later version.
21 :     The terms of redistributing and/or modifying this software also
22 :     include exceptions to the LGPL that facilitate static linking.
23 :    
24 :     This library is distributed in the hope that it will be useful,
25 :     but WITHOUT ANY WARRANTY; without even the implied warranty of
26 :     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 :     Lesser General Public License for more details.
28 :    
29 :     You should have received a copy of the GNU Lesser General Public License
30 :     along with this library; if not, write to Free Software Foundation, Inc.,
31 :     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
32 :     */
33 :    
34 : jhr 1172 #include <Diderot/diderot.h>
35 : jhr 1106
36 :     #define ROOT_TRIPLE 2 /* ell_cubic_root_triple */
37 :     #define ROOT_SINGLE_DOUBLE 3 /* ell_cubic_root_single_double */
38 :     #define ROOT_THREE 4 /* ell_cubic_root_three */
39 :    
40 : jhr 1174 // from http://en.wikipedia.org/wiki/Square_root_of_3
41 :     #define M_SQRT3 1.732050807568877293527446341506
42 : jhr 1106
43 : jhr 1174 #ifdef DIDEROT_DOUBLE_PRECISION
44 : jhr 1544 # define ABS(x) fabs(x)
45 :     # define SQRT(x) sqrt(x)
46 :     # define SIN(x) sin(x)
47 :     # define COS(x) cos(x)
48 :     # define ATAN2(x,y) atan2(x,y)
49 :     # define CBRT(x) cbrt(x)
50 :     # define VEC(a,b,c) vec3d(a,b,c)
51 :     # define DOT(u,v) dot3d(u,v)
52 :     # define CROSS(u,v) cross3d(u,v)
53 :     # define SCALE(s,v) scale3d(s,v)
54 :     # define NORMALIZE(v) normalize3d(v)
55 : jhr 1174 #else
56 :     # define ABS(x) fabsf(x)
57 :     # define SQRT(x) sqrtf(x)
58 :     # define SIN(x) sinf(x)
59 :     # define COS(x) cosf(x)
60 :     # define ATAN2(x,y) atan2f(x,y)
61 :     # define CBRT(x) cbrtf(x)
62 : jhr 1509 # define VEC(a,b,c) vec3f(a,b,c)
63 :     # define DOT(u,v) dot3f(u,v)
64 :     # define CROSS(u,v) cross3f(u,v)
65 :     # define SCALE(s,v) scale3f(s,v)
66 :     # define NORMALIZE(v) normalize3f(v)
67 : jhr 1174 #endif
68 : jhr 1511 #define SUB(v,i) (((Diderot_union3_t)(v)).r[i])
69 : jhr 1174
70 : glk 1534 // OpenCL 1.0 does not specify the C representation of the host types
71 :     // for OpenCL vector types (e.g., cl_float4), so we have to handle this
72 :     // mechanism with a macro.
73 :     #if defined(CL_HOST_VECTORS_ARE_STRUCTS)
74 :     # define VSUB(v,i) (v).s[i]
75 :     #elif defined(CL_HOST_VECTORS_ARE_ARRAYS)
76 :     # define VSUB(v,i) (v)[i]
77 :     #else
78 :     # error unable to access elements of host vectors
79 : lamonts 1513 #endif
80 : jhr 1544
81 : jhr 1106 /*
82 :     ** All the three given vectors span only a 2D space, and this finds
83 :     ** the normal to that plane. Simply sums up all the pair-wise
84 :     ** cross-products to get a good estimate. Trick is getting the cross
85 :     ** products to line up before summing.
86 :     */
87 : jhr 1174 STATIC_INLINE Diderot_vec3_t nullspace1 (
88 :     const Diderot_vec3_t r0,
89 :     const Diderot_vec3_t r1,
90 :     const Diderot_vec3_t r2)
91 :     {
92 :     Diderot_vec3_t ret, crs;
93 : jhr 1106
94 : jhr 1509 ret = CROSS(r0, r1);
95 :     crs = CROSS(r1, r2);
96 : jhr 1106 /* ret += crs or ret -= crs; whichever makes ret longer */
97 : jhr 1509 if (DOT(ret, crs) > 0.0)
98 : jhr 1174 ret += crs;
99 :     else
100 :     ret -= crs;
101 :    
102 : jhr 1509 crs = CROSS(r0, r2);
103 : jhr 1106 /* ret += crs or ret -= crs; whichever makes ret longer */
104 : jhr 1509 if (DOT(ret, crs) > 0.0)
105 : jhr 1174 ret += crs;
106 :     else
107 :     ret -= crs;
108 : jhr 1106
109 : jhr 1174 return ret;
110 : jhr 1106 }
111 :    
112 :     /*
113 :     ** All vectors are in the same 1D space, we have to find two
114 :     ** mutually vectors perpendicular to that span
115 :     */
116 : jhr 1544 static void nullspace2 (Diderot_vec3_t rets[2],
117 : jhr 1174 const Diderot_vec3_t r0,
118 :     const Diderot_vec3_t r1,
119 :     const Diderot_vec3_t r2)
120 :     {
121 : jhr 1511 Diderot_vec3_t sqr, sum;
122 :     int idx;
123 : jhr 1106
124 : jhr 1544 sum = r0;
125 : jhr 1509 if (DOT(sum, r1) > 0)
126 : jhr 1184 sum += r1;
127 :     else
128 :     sum -= r1;
129 : jhr 1509 if (DOT(sum, r2) > 0)
130 : jhr 1184 sum += r2;
131 :     else
132 :     sum -= r2;
133 : lamonts 1513 // find largest component, to get most stable expression for a perpendicular vector
134 : jhr 1184 sqr = sum*sum;
135 :     idx = 0;
136 : jhr 1511 if (SUB(sqr,0) < SUB(sqr,1))
137 : jhr 1184 idx = 1;
138 : jhr 1512 if (SUB(sqr, idx) < SUB(sqr,2))
139 : jhr 1184 idx = 2;
140 : lamonts 1513
141 : jhr 1184 if (0 == idx) {
142 : jhr 1511 rets[0] = VEC(SUB(sum,1) - SUB(sum,2), -SUB(sum,0), SUB(sum,0));
143 : jhr 1184 } else if (1 == idx) {
144 : jhr 1511 rets[0] = VEC(-SUB(sum,1), SUB(sum,0) - SUB(sum,2), SUB(sum,1));
145 : jhr 1184 } else {
146 : jhr 1511 rets[0] = VEC(-SUB(sum,2), SUB(sum,2), SUB(sum,0) - SUB(sum,1));
147 : jhr 1184 }
148 : lamonts 1513
149 : jhr 1509 rets[1] = CROSS(rets[0], sum);
150 : jhr 1184 return;
151 : jhr 1106 }
152 :    
153 :     /*
154 :     ** Eigensolver for symmetric 3x3 matrix:
155 :     **
156 :     ** M00 M01 M02
157 :     ** M01 M11 M12
158 :     ** M02 M12 M22
159 :     **
160 : jhr 1174 ** Must be passed pointer to eval vector, and will compute eigenvalues
161 : jhr 1106 ** only if evec[9] is non-NULL. Computed eigenvectors are at evec+0,
162 :     ** evec+3, and evec+6.
163 :     **
164 :     ** Return value indicates something about the eigenvalue solution to
165 :     ** the cubic characteristic equation; see ROOT_ #defines above
166 :     **
167 :     ** HEY: the numerical precision issues here are very subtle, and
168 :     ** merit some more scrutiny. With evals (1.000001, 1, 1), for example,
169 :     ** whether it comes back as a single/double root, vs three distinct roots,
170 :     ** is determines by the comparison between "D" and "epsilon", and the
171 :     ** setting of epsilon seems pretty arbitrary at this point...
172 :     **
173 :     */
174 : jhr 1544 int Diderot_evals3x3 (
175 :     Diderot_real_t eval[3],
176 :     const Diderot_real_t _M00, const Diderot_real_t _M01, const Diderot_real_t _M02,
177 :     const Diderot_real_t _M11, const Diderot_real_t _M12,
178 :     const Diderot_real_t _M22)
179 : jhr 1174 {
180 :     Diderot_real_t mean, norm, rnorm, Q, R, QQQ, D, theta, M00, M01, M02, M11, M12, M22;
181 :     // FIXME: probably want a different value of epsilon for single-precision
182 :     Diderot_real_t epsilon = 1.0E-12;
183 :     int roots;
184 : jhr 1106
185 : jhr 1174 /* copy the given matrix elements */
186 :     M00 = _M00;
187 :     M01 = _M01;
188 :     M02 = _M02;
189 :     M11 = _M11;
190 :     M12 = _M12;
191 :     M22 = _M22;
192 : jhr 1106
193 : jhr 1174 /*
194 :     ** subtract out the eigenvalue mean (will add back to evals later);
195 :     ** helps with numerical stability
196 :     */
197 :     mean = (M00 + M11 + M22)/3.0;
198 :     M00 -= mean;
199 :     M11 -= mean;
200 :     M22 -= mean;
201 :    
202 :     /*
203 :     ** divide out L2 norm of eigenvalues (will multiply back later);
204 :     ** this too seems to help with stability
205 :     */
206 :     norm = SQRT(M00*M00 + 2*M01*M01 + 2*M02*M02 +
207 :     M11*M11 + 2*M12*M12 +
208 :     M22*M22);
209 :     // QUESTION: what if norm is very close to zero?
210 :     rnorm = norm ? 1.0/norm : 1.0;
211 :     M00 *= rnorm;
212 :     M01 *= rnorm;
213 :     M02 *= rnorm;
214 :     M11 *= rnorm;
215 :     M12 *= rnorm;
216 :     M22 *= rnorm;
217 : jhr 1106
218 : jhr 1174 /* this code is a mix of prior Teem code and ideas from Eberly's
219 :     * "Eigensystems for 3 x 3 Symmetric Matrices (Revisited)"
220 :     */
221 :     Q = (M01*M01 + M02*M02 + M12*M12 - M00*M11 - M00*M22 - M11*M22)/3.0;
222 :     QQQ = Q*Q*Q;
223 :     R = (M00*M11*M22 + M02*(2*M01*M12 - M02*M11)
224 :     - M00*M12*M12 - M01*M01*M22)/2.0;
225 :     D = QQQ - R*R;
226 :     if (D > epsilon) {
227 :     /* three distinct roots- this is the most common case */
228 :     double mm, ss, cc;
229 :     theta = ATAN2(SQRT(D), R)/3.0;
230 :     mm = SQRT(Q);
231 :     ss = SIN(theta);
232 :     cc = COS(theta);
233 :     eval[0] = 2*mm*cc;
234 :     eval[1] = mm*(-cc + M_SQRT3*ss);
235 :     eval[2] = mm*(-cc - M_SQRT3*ss);
236 :     roots = ROOT_THREE;
237 :     }
238 :     /* else D is near enough to zero */
239 :     else if (R < -epsilon || epsilon < R) {
240 :     double U;
241 :     /* one double root and one single root */
242 :     U = CBRT(R); /* cube root function */
243 :     if (U > 0) {
244 :     eval[0] = 2*U;
245 :     eval[1] = -U;
246 :     eval[2] = -U;
247 :     }
248 :     else {
249 :     eval[0] = -U;
250 :     eval[1] = -U;
251 :     eval[2] = 2*U;
252 :     }
253 :     roots = ROOT_SINGLE_DOUBLE;
254 :     }
255 :     else {
256 :     /* a triple root! */
257 :     eval[0] = eval[1] = eval[2] = 0.0;
258 :     roots = ROOT_TRIPLE;
259 :     }
260 :    
261 :     /* multiply back by eigenvalue L2 norm */
262 :     eval[0] /= rnorm;
263 :     eval[1] /= rnorm;
264 :     eval[2] /= rnorm;
265 :    
266 :     /* add back in the eigenvalue mean */
267 :     eval[0] += mean;
268 :     eval[1] += mean;
269 :     eval[2] += mean;
270 :    
271 :     return roots;
272 : jhr 1106 }
273 :    
274 : jhr 1544 int Diderot_evecs3x3 (
275 : jhr 1174 Diderot_real_t eval[3], Diderot_vec3_t evecs[3],
276 :     const Diderot_real_t _M00, const Diderot_real_t _M01, const Diderot_real_t _M02,
277 :     const Diderot_real_t _M11, const Diderot_real_t _M12,
278 :     const Diderot_real_t _M22)
279 :     {
280 :     Diderot_vec3_t r0, r1, r2, crs;
281 :     Diderot_real_t len, dot;
282 : jhr 1106
283 : jhr 1509 /* BEGIN #include "teigen-evals-A.c" */
284 :     Diderot_real_t mean, norm, rnorm, Q, R, QQQ, D, theta, M00, M01, M02, M11, M12, M22;
285 :     Diderot_real_t epsilon = 1.0E-12;
286 :     int roots;
287 : jhr 1106
288 : jhr 1509 /* copy the given matrix elements */
289 :     M00 = _M00;
290 :     M01 = _M01;
291 :     M02 = _M02;
292 :     M11 = _M11;
293 :     M12 = _M12;
294 :     M22 = _M22;
295 :    
296 :     /*
297 :     ** subtract out the eigenvalue mean (will add back to evals later);
298 :     ** helps with numerical stability
299 :     */
300 :     mean = (M00 + M11 + M22)/3.0;
301 :     M00 -= mean;
302 :     M11 -= mean;
303 :     M22 -= mean;
304 :    
305 :     /*
306 :     ** divide out L2 norm of eigenvalues (will multiply back later);
307 :     ** this too seems to help with stability
308 :     */
309 :     norm = SQRT(M00*M00 + 2*M01*M01 + 2*M02*M02 +
310 :     M11*M11 + 2*M12*M12 +
311 :     M22*M22);
312 :     rnorm = norm ? 1.0/norm : 1.0;
313 :     M00 *= rnorm;
314 :     M01 *= rnorm;
315 :     M02 *= rnorm;
316 :     M11 *= rnorm;
317 :     M12 *= rnorm;
318 :     M22 *= rnorm;
319 :    
320 :     /* this code is a mix of prior Teem code and ideas from Eberly's
321 :     * "Eigensystems for 3 x 3 Symmetric Matrices (Revisited)"
322 :     */
323 :     Q = (M01*M01 + M02*M02 + M12*M12 - M00*M11 - M00*M22 - M11*M22)/3.0;
324 :     QQQ = Q*Q*Q;
325 :     R = (M00*M11*M22 + M02*(2*M01*M12 - M02*M11) - M00*M12*M12 - M01*M01*M22)/2.0;
326 :     D = QQQ - R*R;
327 :     if (D > epsilon) {
328 :     /* three distinct roots- this is the most common case */
329 :     double mm, ss, cc;
330 :     theta = ATAN2(SQRT(D), R)/3.0;
331 :     mm = SQRT(Q);
332 :     ss = SIN(theta);
333 :     cc = COS(theta);
334 :     eval[0] = 2*mm*cc;
335 :     eval[1] = mm*(-cc + SQRT(3.0)*ss);
336 :     eval[2] = mm*(-cc - SQRT(3.0)*ss);
337 :     roots = ROOT_THREE;
338 :     }
339 :     /* else D is near enough to zero */
340 :     else if (R < -epsilon || epsilon < R) {
341 :     double U;
342 :     /* one double root and one single root */
343 :     U = CBRT(R); /* cube root function */
344 :     if (U > 0) {
345 :     eval[0] = 2*U;
346 :     eval[1] = -U;
347 :     eval[2] = -U;
348 :     } else {
349 :     eval[0] = -U;
350 :     eval[1] = -U;
351 :     eval[2] = 2*U;
352 :     }
353 :     roots = ROOT_SINGLE_DOUBLE;
354 :     }
355 :     else {
356 :     /* a triple root! */
357 :     eval[0] = eval[1] = eval[2] = 0.0;
358 :     roots = ROOT_TRIPLE;
359 :     }
360 :     /* END #include "teigen-evals-A.c" */
361 :    
362 :     /* r0, r1, r2 are the vectors we manipulate to find the nullspaces of M - lambda*I */
363 :     r0 = VEC(0.0, M01, M02);
364 :     r1 = VEC(M01, 0.0, M12);
365 :     r2 = VEC(M02, M12, 0.0);
366 :     Diderot_vec3_t ev = VEC(eval[0], eval[1], eval[2]);
367 : jhr 1174 if (ROOT_THREE == roots) {
368 : jhr 1509 evecs[0] = nullspace1 (
369 :     VEC(M00 - eval[0], M01, M02),
370 :     VEC(M01, M11 - eval[0], M12),
371 :     VEC(M02, M12, M22 - eval[0]));
372 :     evecs[1] = nullspace1 (
373 :     VEC(M00 - eval[1], M01, M02),
374 :     VEC(M01, M11 - eval[1], M12),
375 :     VEC(M02, M12, M22 - eval[1]));
376 :     evecs[2] = nullspace1 (
377 :     VEC(M00 - eval[2], M01, M02),
378 :     VEC(M01, M11 - eval[2], M12),
379 :     VEC(M02, M12, M22 - eval[2]));
380 : jhr 1106 }
381 : jhr 1174 else if (ROOT_SINGLE_DOUBLE == roots) {
382 :     if (eval[1] == eval[2]) {
383 :     /* one big (eval[0]) , two small (eval[1,2]) */
384 : jhr 1509 evecs[0] = nullspace1 (
385 :     VEC(M00 - eval[0], M01, M02),
386 :     VEC(M01, M11 - eval[0], M12),
387 :     VEC(M02, M12, M22 - eval[0]));
388 :     nullspace2 (evecs+1,
389 :     VEC(M00 - eval[1], M01, M02),
390 :     VEC(M01, M11 - eval[1], M12),
391 :     VEC(M02, M12, M22 - eval[1]));
392 : jhr 1174 }
393 :     else {
394 :     /* two big (eval[0,1]), one small (eval[2]) */
395 : jhr 1509 nullspace2 (evecs,
396 :     VEC(M00 - eval[0], M01, M02),
397 :     VEC(M01, M11 - eval[0], M12),
398 :     VEC(M02, M12, M22 - eval[0]));
399 :     evecs[2] = nullspace1 (
400 :     VEC(M00 - eval[2], M01, M02),
401 :     VEC(M01, M11 - eval[2], M12),
402 :     VEC(M02, M12, M22 - eval[2]));
403 : jhr 1174 }
404 :     }
405 : jhr 1106 else {
406 : jhr 1174 /* ROOT_TRIPLE == roots; use any basis for eigenvectors */
407 : jhr 1509 evecs[0] = VEC(1, 0, 0);
408 :     evecs[1] = VEC(0, 1, 0);
409 :     evecs[2] = VEC(0, 0, 1);
410 : jhr 1106 }
411 : jhr 1184 /* we always make sure it's really orthonormal; keeping fixed the
412 : jhr 1174 * eigenvector associated with the largest-magnitude eigenvalue
413 :     */
414 :     if (ABS(eval[0]) > ABS(eval[2])) {
415 : jhr 1184 /* normalize evec[0] but don't move it */
416 : jhr 1509 evecs[0] = NORMALIZE(evecs[0]);
417 :     evecs[1] -= SCALE(DOT(evecs[0], evecs[1]), evecs[0]);
418 :     evecs[1] = NORMALIZE(evecs[1]);
419 :     evecs[2] -= SCALE(DOT(evecs[0], evecs[2]), evecs[0]);
420 :     evecs[2] -= SCALE(DOT(evecs[1], evecs[2]), evecs[1]);
421 :     evecs[2] = NORMALIZE(evecs[2]);
422 : jhr 1174 }
423 : jhr 1184 else {
424 :     /* normalize evec[2] but don't move it */
425 : jhr 1509 evecs[2] = NORMALIZE(evecs[2]);
426 :     evecs[1] -= SCALE(DOT(evecs[2], evecs[1]), evecs[2]);
427 :     evecs[1] = NORMALIZE(evecs[1]);
428 :     evecs[0] -= SCALE(DOT(evecs[1], evecs[0]), evecs[1]);
429 :     evecs[0] -= SCALE(DOT(evecs[2], evecs[0]), evecs[2]);
430 :     evecs[0] = NORMALIZE(evecs[0]);
431 : jhr 1184 }
432 : jhr 1106
433 :     /* to be nice, make it right-handed */
434 : jhr 1509 crs = CROSS(evecs[0], evecs[1]);
435 :     if (0 > DOT(crs, evecs[2])) {
436 :     evecs[2] = -evecs[2];
437 : jhr 1174 }
438 : jhr 1106
439 : jhr 1509 /* BEGIN #include "teigen-evals-B.c" */
440 : jhr 1174 /* multiply back by eigenvalue L2 norm */
441 :     eval[0] /= rnorm;
442 :     eval[1] /= rnorm;
443 :     eval[2] /= rnorm;
444 : jhr 1106
445 : jhr 1174 /* add back in the eigenvalue mean */
446 :     eval[0] += mean;
447 :     eval[1] += mean;
448 :     eval[2] += mean;
449 : jhr 1509 /* END #include "teigen-evals-B.c" */
450 : jhr 1174
451 :     return roots;
452 : jhr 1106 }

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0