Revision 622
Mon Mar 14 19:56:40 2011 UTC
File size: 9765 byte(s)
  Added runtime interface to support creation of initial array/collection
  of strands
/*! \file diderot.h
 * \author John Reppy
 * This is the interface to the Diderot runtime for the C target.  For now,
 * we are targetting single-precision computations.

 * COPYRIGHT (c) 2010 The Diderot Project (http://diderot-language.cs.uchicago.edu)
 * All rights reserved.

#ifndef _DIDEROT_H_
#define _DIDEROT_H_

/* gcc has a bug that breaks code that uses 8-byte vectors (e.g., vec2f_t), but clang
 * handles the code correctly.
#if defined(__clang__)
#  define VEC2_OK

#ifdef NDEBUG
#define STATIC_INLINE	static inline
#define STATIC_INLINE	static

#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
#include <math.h>

/* library-call status */
typedef enum { DIDEROT_OK = 0, DIDEROT_FAIL = -1 } Status_t;

/* update method return type */

/* SSE vector types */
typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
#ifdef VEC2_OK
typedef float vec2f_t __attribute__ ((vector_size (8)));
typedef float vec2f_t __attribute__ ((vector_size (16)));  // pad so that gcc aligns correctly
typedef float vec4f_t __attribute__ ((vector_size (16)));
typedef float vec8f_t __attribute__ ((vector_size (32)));

#ifdef VEC2_OK
typedef int32_t vec2i_t __attribute__ ((vector_size (8)));
typedef int32_t vec2i_t __attribute__ ((vector_size (16)));  // pad so that gcc aligns correctly
typedef int32_t vec4i_t __attribute__ ((vector_size (16)));
typedef int32_t vec8i_t __attribute__ ((vector_size (32)));

typedef float vec2d_t __attribute__ ((vector_size (16)));
typedef float vec4d_t __attribute__ ((vector_size (32)));
typedef float vec8d_t __attribute__ ((vector_size (64)));

typedef int64_t vec2l_t __attribute__ ((vector_size (16)));
typedef int64_t vec4l_t __attribute__ ((vector_size (32)));
typedef int64_t vec8l_t __attribute__ ((vector_size (64)));

typedef union { float r[2]; vec2f_t v; } union2f_t;
typedef union { float r[4]; vec4f_t v; } union4f_t;
typedef union { float r[8]; vec8f_t v; } union8f_t;

typedef union { int32_t i[2]; vec2i_t v; } union2i_t;
typedef union { int32_t i[4]; vec4i_t v; } union4i_t;
typedef union { int32_t i[8]; vec8i_t v; } union8i_t;

typedef union { double r[2]; vec2d_t v; } union2d_t;
typedef union { double r[4]; vec4d_t v; } union4d_t;
typedef union { double r[8]; vec8d_t v; } union8d_t;

typedef union { int64_t r[2]; vec2l_t v; } union2l_t;
typedef union { int64_t r[4]; vec4l_t v; } union4l_t;
typedef union { int64_t r[8]; vec8l_t v; } union8l_t;

/* vector lengths must be power of 2, but vec3 is useful, so we pad to 4 */
typedef vec4f_t vec3f_t;
typedef union4f_t union3f_t;
typedef vec4d_t vec3d_t;
typedef union4d_t union3d_t;
typedef vec4i_t vec3i_t;
typedef union4i_t union3i_t;
typedef vec4l_t vec3l_t;
typedef union4l_t union3l_t;

/* typedefs for Diderot types */
typedef int32_t Diderot_int_t;
typedef float Diderot_real_t;
typedef union4f_t Diderot_vec3_t;	// padded to fit in SSE register
typedef union4f_t Diderot_vec4_t;
typedef int64_t Diderot_int_t;
typedef double Diderot_real_t;
typedef union4d_t Diderot_vec3_t;	// padded to fit in SSE register
typedef union4d_t Diderot_vec4_t;
typedef const char *Diderot_string_t;

typedef Diderot_real_t Diderot_Mat2x2_t[4];
typedef Diderot_real_t Diderot_Mat3x3_t[9];
typedef union4f_t Diderot_Mat4x4_t[4];	// represented as row vectors
//typedef struct {
//    float	m[16] __attribute__((aligned(16)));	// individual elements
//} Diderot_Mat4x4_t;

typedef struct {		// wrapper for 1D image data
    uint32_t	dim;		// dimension (== 1)
    uint32_t	size[1];
    void	*data;
    Diderot_Mat2x2_t m;		// image to world-space transform
    Diderot_Mat2x2_t mInv;	// world to image-space transform (m inverse)
} Diderot_image1D_t;

typedef struct {		// wrapper for 2D image data
    uint32_t	dim;		// dimension (== 2)
    uint32_t	size[2];	// sizes (fast to slow)
    void	*data;
    Diderot_Mat3x3_t m;		// image to world-space transform
    Diderot_Mat3x3_t mInv;	// world to image-space transform (m inverse)
    Diderot_Mat3x3_t mInvT;	// image to world-space transform for gradients
				// (m inverse transpose)
} Diderot_image2D_t;

typedef struct {		// wrapper for 3D image data
    uint32_t	dim;		// dimension (== 3)
    uint32_t	size[3];	// sizes (fast to slow)
    void	*data;
    Diderot_Mat4x4_t m;		// image to world-space transform
    Diderot_Mat4x4_t mInv;	// world to image-space transform (m inverse)
    Diderot_Mat4x4_t mInvT;	// image to world-space transform for gradients
				// (m inverse transpose)
} Diderot_image3D_t;

typedef StrandStatus_t (*strand_init_t)(void *, int, int);
typedef StrandStatus_t (*update_method_t)(void *, void *);

/* hooks generated by compiler for runtime */
typedef struct {
    char		*name;		// name of strand
    size_t		stateSzb;	// sze of strand state
    strand_init_t	init;		// initialization
    update_method_t	update;		// update method
} Strand_t;

extern int Diderot_NumStrands;		// the number of strands in the program
extern Strand_t Diderot_Strands[];	// array of NumStrands strand descriptors

// initial strand description
typedef struct {
    bool		isArray;	// is the initialization an array or collection?
    uint32_t		nDims;		// depth of iteration nesting
    int32_t		*base;		// nDims array of base indices
    uint32_t		*size;		// nDims array of iteration sizes
} Diderot_Initially_t;

typedef struct struct_world Diderot_World_t;

extern void Diderot_InitGlobals ();
extern Diderot_World_t *Diderot_Initially ();

/* Diderot library functions */

// block allocation of an initial collection of strands
extern Diderot_World_t *Diderot_AllocInitially (Strand_t *strand, Diderot_Initially_t *init);

// get strand state pointers
extern void *Diderot_InState (Diderot_World_t *wrld, uint32_t i);
extern void *Diderot_OutState (Diderot_World_t *wrld, uint32_t i);
extern bool Diderot_IsActive (Diderot_World_t *wrld, uint32_t i);

// strand allocation (no initializtion)
extern void *Diderot_AllocStrand (Strand_t *strand);

/* load image data from Nrrd files */
extern Status_t Diderot_LoadImage1D (Diderot_string_t name, Diderot_image1D_t **img);
extern Status_t Diderot_LoadImage2D (Diderot_string_t name, Diderot_image2D_t **img);
extern Status_t Diderot_LoadImage3D (Diderot_string_t name, Diderot_image3D_t **img);

/* functions to get input-parameter values */
extern Status_t Diderot_InputString (const char *, const char **, bool);
extern Status_t Diderot_Inputf (const char *, float *, bool);
extern Status_t Diderot_InputVec3f (const char *, vec3f_t *, bool);

/********** scalar math functions **********/

STATIC_INLINE float maxf (float a, float b)
    return (a < b)? b : a;

/********** 2-element vector functions **********/

STATIC_INLINE vec2f_t vec2f (float a, float b)
#ifdef VEC2_OK
    return __extension__ (vec2f_t){ a, b };
    return __extension__ (vec2f_t){ a, b, 0.0, 0.0 };

STATIC_INLINE float dot2f (vec2f_t u, vec2f_t v)
    union2f_t uv = (union2f_t)(u*v);
    return uv.r[0] + uv.r[1];

/********** 3-element vector functions **********/

STATIC_INLINE vec3i_t vec3ftoi (vec3f_t v)
    union4f_t u;
    u.v = v;
    return __extension__ (vec3i_t){ (int32_t)u.r[0], (int32_t)u.r[1], (int32_t)u.r[2], 0 };

STATIC_INLINE vec3f_t vec3itof (vec3i_t v)
    union4i_t u;
    u.v = v;
    return __extension__ (vec3f_t){ (float)u.i[0], (float)u.i[1], (float)u.i[2], 0 };

STATIC_INLINE vec3f_t vec3f (float a, float b, float c)
    return __extension__ (vec4f_t){ a, b, c, 0.0f };

STATIC_INLINE vec3f_t scale3f (float s, vec3f_t v)
    return vec3f(s, s, s) * v;

STATIC_INLINE vec3f_t floor3f (vec3f_t v)
    union4f_t u;
    u.v = v;
    return vec3f(floorf(u.r[0]), floorf(u.r[1]), floorf(u.r[2]));

STATIC_INLINE float dot3f (vec3f_t u, vec3f_t v)
    union3f_t uv = (union3f_t)(u*v);
    return uv.r[0] + uv.r[1] + uv.r[2];

STATIC_INLINE float length3f (vec3f_t v)
    return sqrtf(dot3f(v, v));

STATIC_INLINE vec3i_t truncToInt3f (vec3f_t v)
    union4f_t t;
    t.v = v;
    return __extension__ (vec4i_t){
	0 };

/********** 4-element vector functions **********/

STATIC_INLINE vec4f_t vec4f (float a, float b, float c, float d)
    return __extension__ (vec4f_t){ a, b, c, d };

STATIC_INLINE float dot4f (vec4f_t u, vec4f_t v)
    union4f_t uv = (union4f_t)(u*v);
    return uv.r[0] + uv.r[1] + uv.r[2] + uv.r[3];

/********** other Diderot support functions **********/

// check if pos is inside the img, assuming that we have a border of width s.
STATIC_INLINE bool Diderot_Inside3f (vec3f_t posArg, Diderot_image3D_t *img, int s)
// NOTE: there might be a vectorized way to do this compare!
// cvtps2pi -- converts vector of floats to vector of int32_t values

    union4f_t pos;
    pos.v = posArg;
    return ((s-1 < pos.r[0]) && (pos.r[0] < (img->size[0] - s))
        &&  (s-1 < pos.r[1]) && (pos.r[1] < (img->size[1] - s))
        &&  (s-1 < pos.r[2]) && (pos.r[2] < (img->size[2] - s)));

STATIC_INLINE vec3f_t Diderot_ToImageSpace3f (Diderot_image3D_t *img, vec3f_t posArg)
    Diderot_Mat4x4_t *m = &(img->mInv);

    vec4f_t p = vec4f(

//    return vec3f(
//	dot4f(vec4f(m->m[0], m->m[1], m->m[2], m->m[3]), p),
//	dot4f(vec4f(m->m[4], m->m[5], m->m[6], m->m[7]), p),
//	dot4f(vec4f(m->m[8], m->m[9], m->m[10], m->m[11]), p));
    return vec3f(
	dot4f(img->mInv[0].v, p),
	dot4f(img->mInv[1].v, p),
	dot4f(img->mInv[2].v, p));

#endif /* !_DIDEROT_H_ */

