From 368c28bcf932224a9fad7815b182480d759e874e Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Thu, 14 Dec 2017 07:38:36 -0600 Subject: [PATCH] HYC: Made vectors and matrices aligned to 4 and 16 bytes, respectively --- src/useful.h | 49 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/src/useful.h b/src/useful.h index addd00f..d3a70fb 100644 --- a/src/useful.h +++ b/src/useful.h @@ -12,11 +12,21 @@ #define DEVICE #endif +#if defined(__CUDACC__) // NVCC + #define MY_ALIGN(n) __align__(n) +#elif defined(__GNUC__) // GCC + #define MY_ALIGN(n) __attribute__((aligned(n))) +#elif defined(_MSC_VER) // MSVC + #define MY_ALIGN(n) __declspec(align(n)) +#else + #error "Please provide a definition for MY_ALIGN macro for your host compiler!" +#endif + #include <cmath> #include <cstring> #include <cstdio> #include <cstdlib> -//#include <cuda_runtime.h> +#include <cuda_runtime.h> // using namespace std; @@ -119,13 +129,14 @@ String operator+(String s1, String s2); // class Vector3 // Operations on 3D float vectors // -class Vector3 { +class MY_ALIGN(16) Vector3 { public: HOST DEVICE inline Vector3() : x(0), y(0), z(0) {} HOST DEVICE inline Vector3(float s):x(s), y(s), z(s) {} HOST DEVICE inline Vector3(const Vector3& v):x(v.x), y(v.y), z(v.z) {} HOST DEVICE inline Vector3(float x0, float y0, float z0) : x(x0), y(y0), z(z0) {} HOST DEVICE inline Vector3(const float* d) : x(d[0]), y(d[1]), z(d[2]) {} + DEVICE inline Vector3(const float4 a) : x(a.x ), y(a.y ), z(a.z ) {} static Vector3 random(float s); @@ -240,9 +251,9 @@ public: HOST DEVICE inline void print() const { printf("%0.3f %0.3f %0.3f\n", x,y,z); } - + + float x, y, z, w; //append a member w String toString() const; - float x, y, z; }; HOST DEVICE inline Vector3 operator*(float s, Vector3 v) { @@ -267,7 +278,7 @@ HOST DEVICE inline Vector3 operator/(float s, Vector3 v) { // class Matrix3 // Operations on 3D float matrices -class Matrix3 { +class MY_ALIGN(16) Matrix3 { friend class TrajectoryWriter; friend class BaseGrid; friend class RigidBodyController; /* for trajectory writing */ @@ -380,19 +391,29 @@ public: Matrix3 inverse() const; float det() const; - + //Han-Yi Chou HOST DEVICE inline Matrix3 normalized() const { + Vector3 x = this->ex(); Vector3 y = this->ey(); + /* + x = x / x.length(); float error = x.dot(y); - x = x-(0.5*error)*y; - y = y-(0.5*error)*x; + y = y-(error*x); + y = y / y.length(); Vector3 z = x.cross(y); - - x = (0.5*(3-x.dot(x)))*x; /* approximate normalization */ - y = (0.5*(3-y.dot(y)))*y; - z = (0.5*(3-z.dot(z)))*z; - return Matrix3(x,y,z); + z = z / z.length();*/ + //x = (0.5*(3-x.dot(x)))*x; /* approximate normalization */ + //y = (0.5*(3-y.dot(y)))*y; + //z = (0.5*(3-z.dot(z)))*z; + //return Matrix3(x,y,z); + Vector3 z = x.cross(y); + z = z / z.length(); + x = x / x.length(); + y = z.cross(x); + y = y / y.length(); + + return Matrix3(x,y,z); } HOST DEVICE void setIsDiag() { @@ -428,7 +449,7 @@ private: // class IndexList // A growable list of integers. -class IndexList { +class MY_ALIGN(16) IndexList { public: IndexList(); IndexList(const IndexList& l); -- GitLab