From fd77880007ff51885a67efb2e0b9d515d88a45f6 Mon Sep 17 00:00:00 2001
From: Chris Maffeo <cmaffeo2@illinois.edu>
Date: Tue, 17 May 2016 17:28:57 -0500
Subject: [PATCH] added __restrict__ for pairlist speedup

---
 ComputeForce.cu  |  2 +-
 ComputeForce.cuh | 13 +++++++------
 GPUController.h  | 44 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 7 deletions(-)
 create mode 100644 GPUController.h

diff --git a/ComputeForce.cu b/ComputeForce.cu
index 6566354..d1cfc0b 100644
--- a/ComputeForce.cu
+++ b/ComputeForce.cu
@@ -413,7 +413,7 @@ void ComputeForce::decompose(Vector3* pos, int type[]) {
 		int tmp = 0;
 		gpuErrchk(cudaMemcpyAsync(numPairs_d, &tmp,
 															sizeof(int), cudaMemcpyHostToDevice));
-		gpuErrchk(cudaDeviceSynchronize()); /* RBTOOD: maybe unnecessary */
+		gpuErrchk(cudaDeviceSynchronize());
 	}
 
 	
diff --git a/ComputeForce.cuh b/ComputeForce.cuh
index 10454f2..715df18 100644
--- a/ComputeForce.cuh
+++ b/ComputeForce.cuh
@@ -286,10 +286,10 @@ void createPairlistsOld(Vector3* __restrict__ pos, int num, int numReplicas,
 
 __global__
 void createPairlists(Vector3* __restrict__ pos, int num, int numReplicas,
-				BaseGrid* sys, CellDecomposition* __restrict__ decomp,
+				const BaseGrid* __restrict__ sys, const CellDecomposition* __restrict__ decomp,
 				const int nCells,
 				int* g_numPairs, int2* g_pair,
-				int numParts, int type[], int* __restrict__ g_pairTabPotType,
+				int numParts, const int* __restrict__ type, int* __restrict__ g_pairTabPotType,
 				float pairlistdist2) {
 	// Loop over threads searching for atom pairs
   //   Each thread has designated values in shared memory as a buffer
@@ -299,7 +299,7 @@ void createPairlists(Vector3* __restrict__ pos, int num, int numReplicas,
 	const int split = 32;					/* numblocks should be divisible by split */
 	/* const int blocksPerCell = gridDim.x/split;  */
 	
-	const CellDecomposition::cell_t* pairs = decomp->getCells();
+	const CellDecomposition::cell_t* __restrict__ pairs = decomp->getCells();
 	for (int cID = 0 + (blockIdx.x % split); cID < nCells; cID += split) {
 	// for (int cID = blockIdx.x/blocksPerCell; cID < nCells; cID += split ) {
 		for (int repID = 0; repID < numReplicas; repID++) {
@@ -314,8 +314,8 @@ void createPairlists(Vector3* __restrict__ pos, int num, int numReplicas,
 				const int ai = pairs[ci].particle;
 				// const CellDecomposition::cell_t celli = decomp->getCellForParticle(ai);
 				const CellDecomposition::cell_t celli = pairs[ci];
-				const Vector3 posi = pos[ai];
-				
+				// Vector3 posi = pos[ai];
+
 				for (int x = -1; x <= 1; ++x) {
 					for (int y = -1; y <= 1; ++y) {
 						for (int z = -1; z <= 1; ++z) {					
@@ -329,7 +329,8 @@ void createPairlists(Vector3* __restrict__ pos, int num, int numReplicas,
 								if (aj <= ai) continue;
 								
 								// skip ones that are too far away
-								float dr = (sys->wrapDiff(pos[aj] - pos[ai])).length2();
+								const float dr = (sys->wrapDiff(pos[aj] - pos[ai])).length2();
+								// const float dr = (sys->wrapDiff(pos[aj] - posi)).length2();
 								if (dr > pairlistdist2) continue;
 								
 								int gid = atomicAggInc( g_numPairs, warpLane );
diff --git a/GPUController.h b/GPUController.h
new file mode 100644
index 0000000..088a3dd
--- /dev/null
+++ b/GPUController.h
@@ -0,0 +1,44 @@
+// provides interface between main CPU loop and various GPUs
+//   -- holds data for each GPU
+
+#pragma once
+#include "useful.h"
+
+class GPUcontroller {
+public:
+	GPUcontroller(const Configuration& c, const long int randomSeed,
+			bool debug, int numReplicas = 0);
+	~GPUcontroller();
+
+	static bool DEBUG;
+
+private:  
+
+	void copyToCUDA();
+
+	
+private:
+	const Configuration& conf;
+	int numReplicas;
+
+	// Integrator variables
+	BaseGrid* sys;
+	ComputeForce* internal;
+	Vector3* forceInternal;
+
+	// CUDA device variables
+	Vector3 *pos_d, *forceInternal_d, *force_d;
+	int *type_d;
+	BrownianParticleType **part_d;
+	BaseGrid *sys_d, *kTGrid_d;
+	Random *randoGen_d;
+	Bond* bonds_d;
+	int2* bondMap_d;
+	Exclude* excludes_d;
+	int2* excludeMap_d;
+	Angle* angles_d;
+	Dihedral* dihedrals_d;
+	
+}
+	
+	
-- 
GitLab