From a8a60fa427396a8ba2e329392c9aa77fb55c36b5 Mon Sep 17 00:00:00 2001
From: Chris Maffeo <cmaffeo2@illinois.edu>
Date: Tue, 10 Nov 2015 19:10:22 -0600
Subject: [PATCH] copying memory for RB to device compiles

---
 BaseGrid.cu        |  10 +++
 BaseGrid.h         |   4 -
 Configuration.cpp  | 193 ++++++++++++++++++++++++++++++++++++++++-----
 Configuration.h    |   2 +-
 GrandBrownTown.cu  |   9 +++
 GrandBrownTown.cuh |   7 ++
 RigidBodyType.cu   |  21 ++++-
 RigidBodyType.h    |  46 +++++------
 makefile           |   6 +-
 runBrownTown.cpp   |   2 +
 10 files changed, 245 insertions(+), 55 deletions(-)

diff --git a/BaseGrid.cu b/BaseGrid.cu
index 5ef2390..6db492b 100644
--- a/BaseGrid.cu
+++ b/BaseGrid.cu
@@ -19,7 +19,17 @@ void BaseGrid::init() {
 }
 BaseGrid::BaseGrid() {
 	BaseGrid tmp(Matrix3(),Vector3(),1,1,1);
+	val = new float[1];
 	*this = tmp;									// TODO: verify that this is OK
+	
+	// basis = Matrix3();
+	// origin = Vector3();
+	// nx = 1;
+	// ny = 1;
+	// nz = 1;
+	
+	// init();
+	// zero();
 }
 
 // The most obvious of constructors.
diff --git a/BaseGrid.h b/BaseGrid.h
index b6ff917..9156f95 100644
--- a/BaseGrid.h
+++ b/BaseGrid.h
@@ -37,15 +37,12 @@ class BaseGrid {
 private:
   // Initialize the variables that get used a lot.
   // Also, allocate the main value array.
-	HOST DEVICE
   void init();
 
 public:
-	HOST DEVICE
 		BaseGrid(); // cmaffeo2 (2015) moved this out of protected, cause I wanted BaseGrid in a struct
 
   // The most obvious of constructors.
-	HOST DEVICE
 		BaseGrid(Matrix3 basis0, Vector3 origin0, int nx0, int ny0, int nz0);
 
   // Make an orthogonal grid given the box dimensions and resolution.
@@ -91,7 +88,6 @@ public:
   // Write the valies in a single column.
   virtual void writePotential(const char* fileName) const;
   
-	HOST DEVICE
 	virtual ~BaseGrid();
 
   void zero();
diff --git a/Configuration.cpp b/Configuration.cpp
index c1f1a43..1fddb08 100644
--- a/Configuration.cpp
+++ b/Configuration.cpp
@@ -175,16 +175,17 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) :
 	currSerial = 0;
 
 	// RigidBodies...
-	if (numRBs > 0) {
+	if (numRigidTypes > 0) {
 		printf("\nCounting rigid bodies specified in the configuration file.\n");
 		numRB = 0;
-		for (int i = 0; i < numRBs; i++) num += rigidBody[i].num;
+		for (int i = 0; i < numRigidTypes; i++) numRB += rigidBody[i].num;
 
 		// // state data
 		// rbPos = new Vector3[numRB * simNum];
 		// type = new int[numRB * simNum];
 		
 	}
+	printf("Initial RigidBodies: %d\n", numRB);
 	
 
 	// Create exclusions from the exclude rule, if it was specified in the config file
@@ -400,17 +401,13 @@ void Configuration::copyToCUDA() {
 	printf("Copying to GPU %d\n", GPUManager::current());
 
 	BrownianParticleType **part_addr = new BrownianParticleType*[numParts];
-	RigidBodyType **rb_addr = new RigidBodyType*[numRBs];	
+	RigidBodyType **rb_addr = new RigidBodyType*[numRigidTypes];	
 
 	// Copy the BaseGrid objects and their member variables/objects
 	gpuErrchk(cudaMalloc(&part_d, sizeof(BrownianParticleType*) * numParts));
-	gpuErrchk(cudaMalloc(&rbType_d, sizeof(RigidBodyType*) * numRBs));
+	gpuErrchk(cudaMalloc(&rbType_d, sizeof(RigidBodyType*) * numRigidTypes));
 	// TODO: The above line fails when there is not enough memory. If it fails, stop.
 	
-	// TODO: what's going on here?
-	// pmf_h is made as a copy of *part[i].pmf, which is then asynchronously copied to Device, and
-	// is not deleted
-	// seems like bad code, but not 100% sure
 	for (int i = 0; i < numParts; i++) {
 		BaseGrid *pmf = NULL, *diffusionGrid = NULL;
 		BrownianParticleType *b = new BrownianParticleType(part[i]);
@@ -451,14 +448,174 @@ void Configuration::copyToCUDA() {
 				cudaMemcpyHostToDevice));
 	}
 
-	// TODO ? utilize Thrust more extensively 
-	for (int i = 0; i < numRBs; i++) {
-		gpuErrchk(cudaMalloc(&rbType_d, sizeof(RigidBodyType*) * numRBs));
-		rigidBody[i].potentialGrids_D = rigidBody[i].potentialGrids;
-		rigidBody[i].densityGrids_D = rigidBody[i].densityGrids;
+	printf("copying RBs\n");
+	// Copy rigidbody types 
+	// http://stackoverflow.com/questions/16024087/copy-an-object-to-device
+ 	for (int i = 0; i < numRigidTypes; i++) {
+		printf("working on RB %d\n",i);
+		RigidBodyType *rb = &(rigidBody[i]); // temporary for convenience
+		rb->updateRaw();
+
+		int ng = rb->numPotGrids;
+
+		// copy rigidbody to device
+		RigidBodyType *rb_d;
+    gpuErrchk(cudaMalloc((void **) &rb_d, sizeof(RigidBodyType)));
+		gpuErrchk(cudaMemcpy(rb_d, rb, sizeof(RigidBodyType),
+												 cudaMemcpyHostToDevice));
+
+		// copy rb->grid to device
+		BaseGrid * gtmp;
+		gtmp = new BaseGrid[ng];
+		size_t sz = sizeof(BaseGrid)*ng;
 		
-	}
-	
+		// allocate grids on device
+		// copy temporary host pointer to device pointer
+		// copy grids to device through temporary host poin
+		gpuErrchk(cudaMalloc((void **) &gtmp, sz));
+		gpuErrchk(cudaMemcpy(&(rb_d->rawPotentialGrids), &gtmp, 
+												 sizeof(BaseGrid*) * ng, cudaMemcpyHostToDevice ));
+		gpuErrchk(cudaMemcpy(gtmp, &(rb->rawPotentialGrids),
+												 sizeof(BaseGrid)  * ng, cudaMemcpyHostToDevice ));
+		
+		// RBTODO: segfault when gtmp is deleted --> why?
+		//    delete[] gtmp;
+
+
+		// // copy grid data to device
+		// for (int gid = 0; gid < ng; gid++) { 
+		// 	BaseGrid *g = &(rb->rawPotentialGrids[gid]); // convenience
+		// 	int len = g->getSize();
+		// 	float *tmpData;
+		// 	tmpData = new float[len];
+			
+    //   // allocate grid data on device
+		// 	// copy temporary host pointer to device pointer
+		// 	// copy data to device through temporary host pointer
+		// 	sz = sizeof(float*) * len;
+		// 	gpuErrchk(cudaMalloc((void **) &tmpData, sz)); 
+		// 	gpuErrchk(cudaMemcpy( (rb_d->rawPotentialGrids[gid].val), &tmpData,
+		// 												sizeof(float*), cudaMemcpyHostToDevice));
+		// 	sz = sizeof(float) * len;
+		// 	gpuErrchk(cudaMemcpy( tmpData, g->val, sz, cudaMemcpyHostToDevice));
+		// 	// RBTODO: why can't this be deleted? 
+		// 	// delete[] tmpData;
+		// }
+
+
+
+
+		// // gpuErrchk(cudaMemcpy(&(rb_d->potentialGrids[i]), &gtmp, 
+		// // 											sz, cudaMemcpyHostToDevice ));
+				
+		// // size_t sz;
+		// // for (int gid = 0; gid < ng; gid++) { 
+		// // 	gpuErrchk(cudaMalloc((void**) &(gtmp[i]), sizeof(BaseGrid)));
+		// // 	gpuErrchk(cudaMemcpy(&(gtmp[i]->potentialGrids[i]), &(gtmp[i]),
+		// // 												sizeof(BaseGrid*), cudaMemcpyHostToDevice));
+		// // }
+
+		
+		// {
+		// 	BaseGrid *tmpData;
+		// 	size_t sz = sizeof(BaseGrid)*ng;
+
+		// 	gpuErrchk(cudaMalloc((void **) &tmpData, sz));
+		// 	gpuErrchk(cudaMemcpy(&(rb_d->potentialGrids[i]), &tmpData, 
+		// 											 sizeof(BaseGrid*), cudaMemcpyHostToDevice ));
+		// 	sz = sizeof(float) * ng
+		// 	gpuErrchk(cudaMemcpy(tmpData, rbg->val, sz, cudaMemcpyHostToDevice));
+		// }
+
+
+			// size_t sz = sizeof(float) * rb->potentialGrids[gid]->getSize();
+			// // gpuErrchk(cudaMalloc(&g_d, sizeof(BaseGrid)));
+
+			// gpuErrchk(cudaMalloc(&tmpData, sz));
+			// gpuErrchk(cudaMemcpyAsync(g_d, rb->potentialGrids[gid], 
+			// 													sizeof(BaseGrid), cudaMemcpyHostToDevice));
+
+		  // gpuErrchk(cudaMemcpy(val_d, rb->potentialGrids[gid], sz, cudaMemcpyHostToDevice));
+			
+			// set rb pointers appropriately
+			// rb_d->potentialGrids[gid] = g_d;
+			// g_d->val = val_d; // hopefully?
+				
+
+
+	// 		BaseGrid *g_d;
+	// 		float *val_d;
+	// 		size_t sz = sizeof(float) * rb.potentialGrids[gid]->getSize();
+	// 		gpuErrchk(cudaMalloc(&g_d, sizeof(BaseGrid)));
+	// 		gpuErrchk(cudaMalloc(&val_d, sz));
+	// 		gpuErrchk(cudaMemcpyAsync(g_d, rb->potentialGrids[gid], 
+	// 															sizeof(BaseGrid), cudaMemcpyHostToDevice));
+	// 	  gpuErrchk(cudaMemcpy(val_d, rb->potentialGrids[gid], sz, cudaMemcpyHostToDevice));
+			
+	// 		// set rb pointers appropriately
+	// 		rb_d->potentialGrids[gid] = g_d;
+	// 		g_d->val = val_d; // hopefully?
+
+	// 	}
+
+
+	// // Copy rigidbody types 
+ 	// for (int i = 0; i < numRigidTypes; i++) {
+	// 	RigidBodyType *rb = rigidBody[i]; // temporary for convenience
+	// 	// gpuErrchk(cudaMemcpy(&rb, sizeof(RigidBodyType)));
+
+	// 	RigidBodyType *rb_d;
+
+		
+	// 	for (int gid = 0; i < rb.numPotGrids) { 
+	// 		BaseGrid *g_d;
+	// 		float *val_d;
+	// 		size_t sz = sizeof(float) * rb.potentialGrids[gid]->getSize();
+	// 		gpuErrchk(cudaMalloc(&g_d, sizeof(BaseGrid)));
+	// 		gpuErrchk(cudaMalloc(&val_d, sz));
+	// 		gpuErrchk(cudaMemcpyAsync(g_d, rb->potentialGrids[gid], 
+	// 															sizeof(BaseGrid), cudaMemcpyHostToDevice));
+	// 	  gpuErrchk(cudaMemcpy(val_d, rb->potentialGrids[gid], sz, cudaMemcpyHostToDevice));
+			
+	// 		// set rb pointers appropriately
+	// 		rb_d->potentialGrids[gid] = g_d;
+	// 		g_d->val = val_d; // hopefully?
+
+	// 	}
+	// 	for (int gid = 0; i < rb.numDenGrids) { 
+	// 		BaseGrid *g_d;
+	// 		float *val_d;
+	// 		size_t sz = sizeof(float) * rb->densityGrids[gid]->getSize();
+	// 		gpuErrchk(cudaMalloc(&g_d, sizeof(BaseGrid)));
+	// 		gpuErrchk(cudaMalloc(&val_d, sz));
+	// 		gpuErrchk(cudaMemcpyAsync(g_d, rb->densityGrids[gid], 
+	// 															sizeof(BaseGrid), cudaMemcpyHostToDevice));
+	// 	  gpuErrchk(cudaMemcpy(val_d, rb->densityGrids[gid], sz, cudaMemcpyHostToDevice));
+			
+	// 	}
+		
+		
+	// 	// RBTODO
+	// 	// b->pmf = pmf;
+	// 	// b->diffusionGrid = diffusionGrid;
+	// 	gpuErrchk(cudaMalloc(&rb_addr[i], sizeof(BrownianParticleType)));
+	// 	gpuErrchk(cudaMemcpyAsync( rb_addr[i], rb_d, 
+	// 														sizeof(BrownianParticleType), cudaMemcpyHostToDevice));
+		
+
+	// 	gpuErrchk(cudaMalloc(&rbType_d, sizeof(RigidBodyType*) * numRigidTypes));
+	// 	rigidBody[i].potentialGrids_D = rigidBody[i].potentialGrids;
+	// 	rigidBody[i].densityGrids_D = rigidBody[i].densityGrids;
+		
+	// 	gpuErrchk(cudaMemcpyAsync(rb_addr[i], rb, sizeof(RigidBodyType),
+	// 														cudaMemcpyHostToDevice));
+		
+	// 	gpuErrchk(cudaMemcpyAsync(part_d, part_addr, sizeof(BrownianParticleType*) * numParts,
+	// 														cudaMemcpyHostToDevice));
+
+	// 	}
+  }	
+	printf("Done copying RBs\n");
 
 	// kTGrid_d
 	kTGrid_d = NULL;
@@ -574,7 +731,7 @@ int Configuration::readParameters(const char * config_file) {
 	// Get the number of particles.
 	const int numParams = config.length();
 	numParts = config.countParameter("particle");
-	numRBs = config.countParameter("rigidBody");
+	numRigidTypes = config.countParameter("rigidBody");
 
 	// Allocate the particle variables.
 	part = new BrownianParticleType[numParts];
@@ -591,7 +748,7 @@ int Configuration::readParameters(const char * config_file) {
 	partTableIndex1 = new int[numParts*numParts];
 
   // Allocate rigid body types
-	rigidBody = new RigidBodyType[numRBs];
+	rigidBody = new RigidBodyType[numRigidTypes];
 	
 	int btfcap = 10;
 	bondTableFile = new String[btfcap];
@@ -765,7 +922,7 @@ int Configuration::readParameters(const char * config_file) {
 		} 
 		// RIGID BODY
 		else if (param == String("rigidBody")) {
-			part[++currPart] = BrownianParticleType(value);
+			// part[++currPart] = BrownianParticleType(value);
 			rigidBody[++currRB] = RigidBodyType(value);
 			currPartClass = partClassRB;
 		}
diff --git a/Configuration.h b/Configuration.h
index ecefd16..0734a33 100644
--- a/Configuration.h
+++ b/Configuration.h
@@ -192,7 +192,7 @@ public:
 
 	// RigidBody parameters.
 	RigidBodyType* rigidBody;
-	int numRBs;
+	int numRigidTypes;
 
 
 };
diff --git a/GrandBrownTown.cu b/GrandBrownTown.cu
index b25d8ec..4ded8c3 100644
--- a/GrandBrownTown.cu
+++ b/GrandBrownTown.cu
@@ -9,6 +9,9 @@ inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) {
 	}
 }
 
+#include "RigidBodyType.h" //temporary
+
+
 bool GrandBrownTown::DEBUG;
 
 cudaEvent_t START, STOP;
@@ -27,6 +30,11 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 		outCurrFiles.push_back(curr_file.str());
 		restartFiles.push_back(restart_file.str());
 		outFilePrefixes.push_back(out_prefix.str());
+		
+		printf("About to devicePrint\n");
+		devicePrint<<<1,1>>>(&(c.rigidBody[0]));
+		printf("Done with devicePrint\n");
+
 	}
 
 	GrandBrownTown::DEBUG = debug;
@@ -146,6 +154,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 	} else {
 		r_seed = seed + randomSeed;
 	}
+	printf("Setting up Random Generator\n");
 	randoGen = new Random(num * numReplicas, r_seed);
 	printf("Random Generator Seed: %lu -> %lu\n", randomSeed, r_seed);
 
diff --git a/GrandBrownTown.cuh b/GrandBrownTown.cuh
index 64853d3..7989fdd 100644
--- a/GrandBrownTown.cuh
+++ b/GrandBrownTown.cuh
@@ -141,3 +141,10 @@ Vector3 step(Vector3 r0, float kTlocal, Vector3 force, float diffusion,
 
 	return sys->getBasis().transform(l) + sys->getOrigin();
 }
+
+__global__ void devicePrint(RigidBodyType* rb) {
+	printf("RigidBodyType: numGrids = %d\n", rb->numPotGrids);
+}
+// __device__ void devicePrint(BaseGrid g) {
+// 	printf("RigidBodyType: numGrids = %d\n", numPotGrids);
+// };
diff --git a/RigidBodyType.cu b/RigidBodyType.cu
index cf2cc13..f8f1da6 100644
--- a/RigidBodyType.cu
+++ b/RigidBodyType.cu
@@ -10,11 +10,14 @@ void RigidBodyType::clear() {
 	// TODO: make sure that this actually removes grid data
 	potentialGrids.clear();
 	densityGrids.clear();
-	potentialGrids_D.clear();
-	densityGrids_D.clear();
 
+	if (numPotGrids > 0) delete[] rawPotentialGrids;
+	if (numDenGrids > 0) delete[] rawDensityGrids;
+	rawPotentialGrids = NULL;
+	rawDensityGrids = NULL;
 }
 
+
 // void RigidBodyType::copy(const RigidBodyType& src) {
 // 	this = new RigidBodyType(src.name);
 // 	num = src.num;
@@ -64,6 +67,8 @@ void RigidBodyType::addPotentialGrid(String s) {
 	BaseGrid g(token[1]);
 	
 	potentialGrids.push_back( g );
+	// 	[numPotGrids] = g;
+	// numPotGrids++;
 }
 void RigidBodyType::addDensityGrid(String s) {
 	// tokenize and return
@@ -79,3 +84,15 @@ void RigidBodyType::addDensityGrid(String s) {
 	
 	densityGrids.push_back( g );
 }
+
+void RigidBodyType::updateRaw() {
+	if (numPotGrids > 0) delete[] rawPotentialGrids;
+	if (numDenGrids > 0) delete[] rawDensityGrids;
+	numPotGrids = potentialGrids.size();
+	numDenGrids = densityGrids.size();
+	if (numPotGrids > 0)
+		rawPotentialGrids = new BaseGrid[numPotGrids];
+	if (numDenGrids > 0)
+		rawDensityGrids = new BaseGrid[numDenGrids];
+}
+
diff --git a/RigidBodyType.h b/RigidBodyType.h
index e1826d4..d20f1e6 100644
--- a/RigidBodyType.h
+++ b/RigidBodyType.h
@@ -2,13 +2,15 @@
 // Author: Chris Maffeo <cmaffeo2@illinois.edu>
 
 #pragma once
-// #include <vector>
-#include <thrust/host_vector.h>
-#include <thrust/device_vector.h>
+#include <vector>
+/* #include <thrust/host_vector.h> */
+/* #include <thrust/device_vector.h> */
 #include "Reservoir.h"
 #include "useful.h"
 #include "BaseGrid.h"
 
+#include <cstdio>
+
 class RigidBodyType {
 private:
 	// Deletes all members
@@ -25,25 +27,7 @@ public:
 RigidBodyType(const String& name = "") :
 	name(name), num(0),
 		reservoir(NULL), mass(1.0f), inertia(), transDamping(),
-		rotDamping() {
-		/* potentialGrids = 	*(new thrust::host_vector<BaseGrid>()); */
-		/* densityGrids = 	*(new thrust::host_vector<BaseGrid>()); */
-		/* potentialGrids = 	*(new thrust::host_vector<BaseGrid>()); */
-		/* densityGrids = 	*(new thrust::host_vector<BaseGrid>()); */
-
-		/* thrust::host_vector<BaseGrid> potentialGrids; */
-		/* thrust::host_vector<BaseGrid> densityGrids; */
-		/* thrust::device_vector<BaseGrid> potentialGrids_D; */
-		/* thrust::device_vector<BaseGrid> densityGrids_D; */
-
-		/* potentialGrids = thrust::host_vector<BaseGrid>(); */
-		/* densityGrids	 = thrust::host_vector<BaseGrid>(); */
-		/* potentialGrids = thrust::host_vector<BaseGrid>(); */
-		/* densityGrids	 = thrust::host_vector<BaseGrid>(); */
-
-	}
-
-
+		rotDamping(), numPotGrids(0), numDenGrids(0) { }
 	
 	/* RigidBodyType(const RigidBodyType& src) { copy(src); } */
 	~RigidBodyType() { clear(); }
@@ -52,8 +36,10 @@ RigidBodyType(const String& name = "") :
 
   void addPotentialGrid(String s);
 	void addDensityGrid(String s);
-	
+	void updateRaw();
+
 public:
+
 	String name;
 	int num; // number of particles of this type
 
@@ -64,8 +50,14 @@ public:
 	Vector3 transDamping;
 	Vector3 rotDamping;
 
-	thrust::host_vector<BaseGrid> potentialGrids;
-	thrust::host_vector<BaseGrid> densityGrids;
-	thrust::device_vector<BaseGrid> potentialGrids_D;
-	thrust::device_vector<BaseGrid> densityGrids_D;
+	std::vector<BaseGrid> potentialGrids;
+	std::vector<BaseGrid> densityGrids;
+	
+	// for device
+	int numPotGrids;
+	int numDenGrids;
+	BaseGrid* rawPotentialGrids;
+	BaseGrid* rawDensityGrids;
+	
 };
+
diff --git a/makefile b/makefile
index 32d4d6d..c64722a 100644
--- a/makefile
+++ b/makefile
@@ -9,8 +9,9 @@ include ./findcudalib.mk
 
 INCLUDE = $(CUDA_PATH)/include
 
-CC_FLAGS = -Wall -Wno-write-strings -I$(INCLUDE)
-NV_FLAGS =
+DEBUG = -g
+CC_FLAGS = -Wall -Wno-write-strings -I$(INCLUDE) $(DEBUG) -std=c++0x -pedantic
+NV_FLAGS = $(DEBUG)
 EX_FLAGS = -O3 -m$(OS_SIZE)
 
 ifneq ($(MAVERICKS),)
@@ -40,7 +41,6 @@ NV_FLAGS += $(CODE_10) $(CODE_12) $(CODE_20) $(CODE_30) $(CODE_35)
 
 
 ### Sources
-
 CC_SRC := $(wildcard *.cpp)
 CC_SRC := $(filter-out runBrownTown.cpp, $(CC_SRC))
 CU_SRC := $(wildcard *.cu)
diff --git a/runBrownTown.cpp b/runBrownTown.cpp
index 5de068f..4cd7714 100644
--- a/runBrownTown.cpp
+++ b/runBrownTown.cpp
@@ -103,7 +103,9 @@ int main(int argc, char* argv[]) {
 	GPUManager::init();
 	GPUManager::safe(safe);
 	Configuration config(configFile, replicas, debug);
+	printf("Done with config\n");
 	GPUManager::set(0);
+	printf("Done with gpumanager\n");
 	config.copyToCUDA();
 
 	GrandBrownTown brown(config, outArg, randomSeed,
-- 
GitLab