26 |
|
|
27 |
/* SSE vector types */ |
/* SSE vector types */ |
28 |
typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); |
typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); |
29 |
typedef float vec2f __attribute__ ((vector_size (8))); |
typedef float vec2f_t __attribute__ ((vector_size (8))); |
30 |
typedef float vec4f __attribute__ ((vector_size (16))); |
typedef float vec4f_t __attribute__ ((vector_size (16))); |
31 |
typedef float vec8f __attribute__ ((vector_size (32))); |
typedef float vec8f_t __attribute__ ((vector_size (32))); |
32 |
|
|
33 |
typedef int32_t vec2i __attribute__ ((vector_size (8))); |
typedef int32_t vec2i_t __attribute__ ((vector_size (8))); |
34 |
typedef int32_t vec4i __attribute__ ((vector_size (16))); |
typedef int32_t vec4i_t __attribute__ ((vector_size (16))); |
35 |
typedef int32_t vec8i __attribute__ ((vector_size (32))); |
typedef int32_t vec8i_t __attribute__ ((vector_size (32))); |
36 |
|
|
37 |
typedef float vec2d __attribute__ ((vector_size (16))); |
typedef float vec2d_t __attribute__ ((vector_size (16))); |
38 |
typedef float vec4d __attribute__ ((vector_size (32))); |
typedef float vec4d_t __attribute__ ((vector_size (32))); |
39 |
typedef float vec8d __attribute__ ((vector_size (64))); |
typedef float vec8d_t __attribute__ ((vector_size (64))); |
40 |
|
|
41 |
typedef int64_t vec2l __attribute__ ((vector_size (16))); |
typedef int64_t vec2l_t __attribute__ ((vector_size (16))); |
42 |
typedef int64_t vec4l __attribute__ ((vector_size (32))); |
typedef int64_t vec4l_t __attribute__ ((vector_size (32))); |
43 |
typedef int64_t vec8l __attribute__ ((vector_size (64))); |
typedef int64_t vec8l_t __attribute__ ((vector_size (64))); |
44 |
|
|
45 |
typedef union { float r[2]; vec2f v; } union2f; |
typedef union { float r[2]; vec2f_t v; } union2f_t; |
46 |
typedef union { float r[4]; vec4f v; } union4f; |
typedef union { float r[4]; vec4f_t v; } union4f_t; |
47 |
typedef union { float r[8]; vec8f v; } union8f; |
typedef union { float r[8]; vec8f_t v; } union8f_t; |
48 |
|
|
49 |
typedef union { int32_t i[2]; vec2i v; } union2i; |
typedef union { int32_t i[2]; vec2i_t v; } union2i_t; |
50 |
typedef union { int32_t i[4]; vec4i v; } union4i; |
typedef union { int32_t i[4]; vec4i_t v; } union4i_t; |
51 |
typedef union { int32_t i[8]; vec8i v; } union8i; |
typedef union { int32_t i[8]; vec8i_t v; } union8i_t; |
52 |
|
|
53 |
typedef union { double r[2]; vec2d v; } union2d; |
typedef union { double r[2]; vec2d_t v; } union2d_t; |
54 |
typedef union { double r[4]; vec4d v; } union4d; |
typedef union { double r[4]; vec4d_t v; } union4d_t; |
55 |
typedef union { double r[8]; vec8d v; } union8d; |
typedef union { double r[8]; vec8d_t v; } union8d_t; |
56 |
|
|
57 |
typedef union { int64_t r[2]; vec2l v; } union2l; |
typedef union { int64_t r[2]; vec2l_t v; } union2l_t; |
58 |
typedef union { int64_t r[4]; vec4l v; } union4l; |
typedef union { int64_t r[4]; vec4l_t v; } union4l_t; |
59 |
typedef union { int64_t r[8]; vec8l v; } union8l; |
typedef union { int64_t r[8]; vec8l_t v; } union8l_t; |
60 |
|
|
61 |
/* vector lengths must be power of 2, but vec3 is useful, so we pad to 4 */ |
/* vector lengths must be power of 2, but vec3 is useful, so we pad to 4 */ |
62 |
typedef vec4f vec3f; |
typedef vec4f_t vec3f_t; |
63 |
typedef vec4d vec3d; |
typedef union4f_t union3f_t; |
64 |
typedef vec4i vec3i; |
typedef vec4d_t vec3d_t; |
65 |
typedef vec4l vec3l; |
typedef union4d_t union3d_t; |
66 |
|
typedef vec4i_t vec3i_t; |
67 |
|
typedef union4i_t union3i_t; |
68 |
|
typedef vec4l_t vec3l_t; |
69 |
|
typedef union4l_t union3l_t; |
70 |
|
|
71 |
/* typedefs for Diderot types */ |
/* typedefs for Diderot types */ |
72 |
#if defined(DIDEROT_SINGLE_PRECISION) |
#if defined(DIDEROT_SINGLE_PRECISION) |
73 |
typedef int32_t Diderot_int_t; |
typedef int32_t Diderot_int_t; |
74 |
typedef float Diderot_real_t; |
typedef float Diderot_real_t; |
75 |
typedef union4f Diderot_vec3_t; // padded to fit in SSE register |
typedef union4f_t Diderot_vec3_t; // padded to fit in SSE register |
76 |
typedef union4f Diderot_vec4_t; |
typedef union4f_t Diderot_vec4_t; |
77 |
#else |
#else |
78 |
typedef int64_t Diderot_int_t; |
typedef int64_t Diderot_int_t; |
79 |
typedef double Diderot_real_t; |
typedef double Diderot_real_t; |
80 |
typedef union4d Diderot_vec3_t; // padded to fit in SSE register |
typedef union4d_t Diderot_vec3_t; // padded to fit in SSE register |
81 |
typedef union4d Diderot_vec4_t; |
typedef union4d_t Diderot_vec4_t; |
82 |
#endif |
#endif |
83 |
typedef const char *Diderot_string_t; |
typedef const char *Diderot_string_t; |
84 |
|
|
117 |
/* Diderot library functions */ |
/* Diderot library functions */ |
118 |
|
|
119 |
/* load image data from Nrrd files */ |
/* load image data from Nrrd files */ |
120 |
extern Status_t Diderot_LoadImage1D (Diderot_string_t name, Diderot_image1D_t *img); |
extern Status_t Diderot_LoadImage1D (Diderot_string_t name, Diderot_image1D_t **img); |
121 |
extern Status_t Diderot_LoadImage2D (Diderot_string_t name, Diderot_image2D_t *img); |
extern Status_t Diderot_LoadImage2D (Diderot_string_t name, Diderot_image2D_t **img); |
122 |
extern Status_t Diderot_LoadImage3D (Diderot_string_t name, Diderot_image3D_t *img); |
extern Status_t Diderot_LoadImage3D (Diderot_string_t name, Diderot_image3D_t **img); |
123 |
|
|
124 |
/* functions to get input-parameter values */ |
/* functions to get input-parameter values */ |
125 |
extern Status_t Diderot_InputString (const char *, const char **, bool); |
extern Status_t Diderot_InputString (const char *, const char **, bool); |
126 |
extern Status_t Diderot_InputReal (const char *, Diderot_real_t *, bool); |
extern Status_t Diderot_Inputf (const char *, float *, bool); |
127 |
extern Status_t Diderot_InputVec3 (const char *, Diderot_vec3_t *, bool); |
extern Status_t Diderot_InputVec3f (const char *, vec3f_t *, bool); |
128 |
|
|
129 |
|
/********** scalar math functions **********/ |
130 |
|
|
131 |
|
STATIC_INLINE float maxf (float a, float b) |
132 |
|
{ |
133 |
|
return (a < b)? b : a; |
134 |
|
} |
135 |
|
|
136 |
/********** 3-element vector functions **********/ |
/********** 3-element vector functions **********/ |
137 |
|
|
138 |
STATIC_INLINE vec3f Diderot_Vec3 (Diderot_real_t a, Diderot_real_t b, Diderot_real_t c) |
STATIC_INLINE vec3f_t vec3f (float a, float b, float c) |
139 |
|
{ |
140 |
|
return __extension__ (vec4f_t){ a, b, c, 0.0f }; |
141 |
|
} |
142 |
|
|
143 |
|
STATIC_INLINE vec3f_t scale3f (float s, vec3f_t v) |
144 |
{ |
{ |
145 |
return __extension__ (vec4f){ a, b, c, 0.0f }; |
return vec3f(s, s, s) * v; |
146 |
} |
} |
147 |
|
|
148 |
STATIC_INLINE vec4f Diderot_ScaleV3 (Diderot_real_t s, vec4f v) |
STATIC_INLINE vec3f_t floor3f (vec3f_t v) |
149 |
{ |
{ |
150 |
return Diderot_Vec3(s, s, s) * v; |
union4f_t u; |
151 |
|
u.v = v; |
152 |
|
return vec3f(floorf(u.r[0]), floorf(u.r[1]), floorf(u.r[2])); |
153 |
} |
} |
154 |
|
|
155 |
STATIC_INLINE vec3i Diderot_TruncToIntV3 (vec3f v) |
STATIC_INLINE float length3f (vec3f_t v) |
156 |
{ |
{ |
157 |
union4f t; |
return 0.0; // FIXME |
158 |
|
} |
159 |
|
|
160 |
|
STATIC_INLINE vec3i_t truncToInt3f (vec3f_t v) |
161 |
|
{ |
162 |
|
union4f_t t; |
163 |
t.v = v; |
t.v = v; |
164 |
return __extension__ (vec4i){ |
return __extension__ (vec4i_t){ |
165 |
(int32_t)truncf(t.r[0]), |
(int32_t)truncf(t.r[0]), |
166 |
(int32_t)truncf(t.r[1]), |
(int32_t)truncf(t.r[1]), |
167 |
(int32_t)truncf(t.r[2]), |
(int32_t)truncf(t.r[2]), |
171 |
|
|
172 |
/********** 4-element vector functions **********/ |
/********** 4-element vector functions **********/ |
173 |
|
|
174 |
STATIC_INLINE vec4f Diderot_Vec4 (Diderot_real_t a, Diderot_real_t b, Diderot_real_t c, Diderot_real_t d) |
STATIC_INLINE vec4f_t vec4f (float a, float b, float c, float d) |
175 |
{ |
{ |
176 |
return __extension__ (vec4f){ a, b, c, d }; |
return __extension__ (vec4f_t){ a, b, c, d }; |
177 |
|
} |
178 |
|
|
179 |
|
STATIC_INLINE float dot4f (vec4f_t u, vec4f_t v) |
180 |
|
{ |
181 |
|
return 0.0f; // FIXME |
182 |
} |
} |
183 |
|
|
184 |
|
|
186 |
|
|
187 |
// check if pos is inside the img, assuming that we have a border of width s. |
// check if pos is inside the img, assuming that we have a border of width s. |
188 |
// |
// |
189 |
STATIC_INLINE bool Diderot_Inside3D (Diderot_vec3_t pos, Diderot_image3D_t *img, int s) |
STATIC_INLINE bool Diderot_Inside3f (vec3f_t posArg, Diderot_image3D_t *img, int s) |
190 |
{ |
{ |
191 |
// NOTE: there might be a vectorized way to do this compare! |
// NOTE: there might be a vectorized way to do this compare! |
192 |
// cvtps2pi -- converts vector of floats to vector of int32_t values |
// cvtps2pi -- converts vector of floats to vector of int32_t values |
193 |
|
|
194 |
|
union4f_t pos; |
195 |
|
pos.v = posArg; |
196 |
return ((s <= pos.r[0]) && (pos.r[0] < (img->size[0] - s)) |
return ((s <= pos.r[0]) && (pos.r[0] < (img->size[0] - s)) |
197 |
&& (s <= pos.r[1]) && (pos.r[1] < (img->size[1] - s)) |
&& (s <= pos.r[1]) && (pos.r[1] < (img->size[1] - s)) |
198 |
&& (s <= pos.r[2]) && (pos.r[2] < (img->size[2] - s))); |
&& (s <= pos.r[2]) && (pos.r[2] < (img->size[2] - s))); |
199 |
} |
} |
200 |
|
|
201 |
|
vec3f_t Diderot_ToImageSpace3f (Diderot_image3D_t *img, vec3f_t posArg); |
202 |
|
|
203 |
#endif /* !_DIDEROT_H_ */ |
#endif /* !_DIDEROT_H_ */ |