From b35d7abb27c64d8efd5a2661feb8403f5d2e9d03 Mon Sep 17 00:00:00 2001
From: Emmanuel Guzman <eguzman6@illinois.edu>
Date: Fri, 15 Jul 2016 15:12:32 -0500
Subject: [PATCH] ComputeForce class manages all GPU data for the system.
 Tested successfully for bonds tests.

---
 ComputeForce.cu   | 195 ++++++++++++++++++++++++++++++++--------------
 ComputeForce.cuh  |  19 +----
 ComputeForce.h    | 101 ++++++++++++++++++++----
 Configuration.cpp |  39 +++++-----
 Configuration.h   |  14 ++--
 GrandBrownTown.cu | 111 ++++++++++++++------------
 GrandBrownTown.h  |  18 ++---
 runBrownTown.cpp  |   2 +
 8 files changed, 320 insertions(+), 179 deletions(-)

diff --git a/ComputeForce.cu b/ComputeForce.cu
index 7f9207a..aee09cd 100644
--- a/ComputeForce.cu
+++ b/ComputeForce.cu
@@ -129,14 +129,29 @@ ComputeForce::~ComputeForce() {
 			delete tableAngle[j];
 	delete[] tableAngle;
 	delete[] tableAngle_addr;
-	gpuErrchk(cudaFree(tableAngle_d));
 
-	gpuErrchk(cudaFree(energies_d));
-
-	gpuErrchk(cudaFree(sys_d));
+	if(type_d != NULL)
+	{
+		gpuErrchk(cudaFree(tableAngle_d));
+
+		gpuErrchk(cudaFree(energies_d));
+
+		gpuErrchk(cudaFree(sys_d));
+
+		gpuErrchk( cudaFree(pos_d) );
+		gpuErrchk( cudaFree(forceInternal_d) );
+		gpuErrchk( cudaFree(type_d) );
+		gpuErrchk( cudaFree(bonds_d) );
+		gpuErrchk( cudaFree(bondMap_d) );
+		gpuErrchk( cudaFree(excludes_d) );
+		gpuErrchk( cudaFree(excludeMap_d) );
+		gpuErrchk( cudaFree(angles_d) );
+		gpuErrchk( cudaFree(dihedrals_d) );
+		gpuErrchk( cudaFree(bondList_d) );
+	}
 }
 
-void ComputeForce::updateNumber(Vector3* pos, int type[], int newNum) {
+void ComputeForce::updateNumber(int newNum) {
 	if (newNum == num or newNum < 0) return;
 
 	// Set the new number.
@@ -145,7 +160,7 @@ void ComputeForce::updateNumber(Vector3* pos, int type[], int newNum) {
 	// Reallocate the neighbor list.
 	//delete[] neigh;
 	//neigh = new IndexList[num];
-	decompose(pos, type);
+	decompose();
 
 	printf("updateNumber() called\n");
 	// Reallocate CUDA arrays
@@ -242,8 +257,8 @@ bool ComputeForce::addTabulatedPotential(String fileName, int type0, int type1)
 	return true;
 }
 
-bool ComputeForce::addBondPotential(String fileName, int ind,
-																		Bond bonds[], Bond bonds_d[]) {
+bool ComputeForce::addBondPotential(String fileName, int ind, Bond bonds[])
+{
 	if (tableBond[ind] != NULL) {
 		delete tableBond[ind];
 		gpuErrchk(cudaFree(tableBond_addr[ind]));
@@ -285,7 +300,7 @@ bool ComputeForce::addBondPotential(String fileName, int ind,
 	return true;
 }
 
-bool ComputeForce::addAnglePotential(String fileName, int ind, Angle* angles, Angle* angles_d) {
+bool ComputeForce::addAnglePotential(String fileName, int ind, Angle* angles) {
 	if (tableAngle[ind] != NULL) {
 		delete tableAngle[ind];
 		gpuErrchk(cudaFree(tableAngle_addr[ind]));
@@ -319,9 +334,8 @@ bool ComputeForce::addAnglePotential(String fileName, int ind, Angle* angles, An
 	return true;
 }
 
-bool ComputeForce::addDihedralPotential(String fileName, int ind,
-																				Dihedral dihedrals[],
-																				Dihedral dihedrals_d[]) {
+bool ComputeForce::addDihedralPotential(String fileName, int ind, Dihedral dihedrals[])
+{
 	for (int i = 0; i < numDihedrals; i++)
 		if (dihedrals[i].fileName == fileName)
 			dihedrals[i].tabFileIndex = ind;
@@ -356,7 +370,7 @@ bool ComputeForce::addDihedralPotential(String fileName, int ind,
 	return true;
 }
 
-void ComputeForce::decompose(Vector3* pos, int type[]) {
+void ComputeForce::decompose() {
 	gpuErrchk( cudaProfilerStart() );
 	// Reset the cell decomposition.
 	bool newDecomp = false;
@@ -365,7 +379,7 @@ void ComputeForce::decompose(Vector3* pos, int type[]) {
 	else
 		newDecomp = true;
 		
-	decomp.decompose_d(pos, num);
+	decomp.decompose_d(pos_d, num);
 	decomp_d = decomp.copyToCUDA();
 
 	// Update pairlists using cell decomposition (not sure this is really needed or good) 
@@ -409,21 +423,14 @@ void ComputeForce::decompose(Vector3* pos, int type[]) {
 	/* 																					 numPairs_d, pairListListI_d, pairListListJ_d); */
 	/* gpuErrchk(cudaDeviceSynchronize());	 */
 
-	{
-		int tmp = 0;
-		gpuErrchk(cudaMemcpyAsync(numPairs_d, &tmp,
-															sizeof(int), cudaMemcpyHostToDevice));
-		gpuErrchk(cudaDeviceSynchronize());
-	}
-
+	int tmp = 0;
+	gpuErrchk(cudaMemcpyAsync(numPairs_d, &tmp,	sizeof(int), cudaMemcpyHostToDevice));
+	gpuErrchk(cudaDeviceSynchronize());
 	
 	float pairlistdist2 = (sqrt(cutoff2) + 2.0f);
 	pairlistdist2 = pairlistdist2*pairlistdist2;
 	
-	createPairlists<<< 2048, 64 >>>(pos, num, numReplicas,
-					sys_d, decomp_d, nCells,
-					numPairs_d, pairLists_d,
-					numParts, type, pairTabPotType_d, pairlistdist2);
+	createPairlists<<< 2048, 64 >>>(pos_d, num, numReplicas, sys_d, decomp_d, nCells, numPairs_d, pairLists_d, numParts, type_d, pairTabPotType_d, pairlistdist2);
 	/* createPairlistsOld<<< nBlocks, NUMTHREADS >>>(pos, num, numReplicas, */
 	/* 																					 sys_d, decomp_d, nCells, blocksPerCell, */
 	/* 																					 numPairs_d, pairLists_d, */
@@ -466,14 +473,14 @@ int* ComputeForce::neighborhood(Vector3 r) {
 	return NULL;
 }
 
-float ComputeForce::computeFull(Vector3* force, Vector3* pos, int* type, bool get_energy) {
+float ComputeForce::computeFull(bool get_energy) {
 	float energy = 0.0f;
 	gridSize = (num * numReplicas) / NUM_THREADS + 1;
 	dim3 numBlocks(gridSize, 1, 1);
 	dim3 numThreads(NUM_THREADS, 1, 1);
 
 	// Call the kernel to calculate forces
-	computeFullKernel<<< numBlocks, numThreads >>>(force, pos, type, tableAlpha_d,
+	computeFullKernel<<< numBlocks, numThreads >>>(forceInternal_d, pos_d, type_d, tableAlpha_d,
 		tableEps_d, tableRad6_d, num, numParts, sys_d, energies_d, gridSize,
 		numReplicas, get_energy);
 
@@ -487,14 +494,14 @@ float ComputeForce::computeFull(Vector3* force, Vector3* pos, int* type, bool ge
 	return energy;
 }
 
-float ComputeForce::computeSoftcoreFull(Vector3* force, Vector3* pos, int* type, bool get_energy) {
+float ComputeForce::computeSoftcoreFull(bool get_energy) {
 	float energy = 0.0f;
 	gridSize = (num * numReplicas) / NUM_THREADS + 1;
 	dim3 numBlocks(gridSize, 1, 1);
 	dim3 numThreads(NUM_THREADS, 1, 1);
 
 	// Call the kernel to calculate forces
-	computeSoftcoreFullKernel<<<numBlocks, numThreads>>>(force, pos, type,
+	computeSoftcoreFullKernel<<<numBlocks, numThreads>>>(forceInternal_d, pos_d, type_d,
 			tableEps_d, tableRad6_d, num, numParts, sys_d, energies_d, gridSize,
 			numReplicas, get_energy);
 
@@ -508,8 +515,7 @@ float ComputeForce::computeSoftcoreFull(Vector3* force, Vector3* pos, int* type,
 	return energy;
 }
 
-float ComputeForce::computeElecFull(Vector3* force, Vector3* pos,
-		int* type, bool get_energy) {
+float ComputeForce::computeElecFull(bool get_energy) {
 	float energy = 0.0f;
 
 	gridSize = num/NUM_THREADS + 1;
@@ -517,7 +523,7 @@ float ComputeForce::computeElecFull(Vector3* force, Vector3* pos,
 	dim3 numThreads(NUM_THREADS, 1, 1);
 
 	// Call the kernel to calculate forces
-	computeElecFullKernel<<<numBlocks, numThreads>>>(force, pos, type,
+	computeElecFullKernel<<<numBlocks, numThreads>>>(forceInternal_d, pos_d, type_d,
 			tableAlpha_d, num, numParts, sys_d, energies_d, gridSize, numReplicas,
 			get_energy);
 
@@ -532,7 +538,7 @@ float ComputeForce::computeElecFull(Vector3* force, Vector3* pos,
 }
 
 
-float ComputeForce::compute(Vector3 force[], Vector3 pos[], int type[], bool get_energy) {
+float ComputeForce::compute(bool get_energy) {
 	float energy = 0.0f;
 
 	gridSize = (num * numReplicas) / NUM_THREADS + 1;
@@ -540,7 +546,7 @@ float ComputeForce::compute(Vector3 force[], Vector3 pos[], int type[], bool get
 	dim3 numThreads(NUM_THREADS, 1, 1);
 
 	// Call the kernel to calculate forces
-	computeKernel<<<numBlocks, numThreads>>>(force, pos, type,
+	computeKernel<<<numBlocks, numThreads>>>(forceInternal_d, pos_d, type_d,
 			tableAlpha_d, tableEps_d, tableRad6_d, num, numParts, sys_d,
 			decomp_d, energies_d, switchStart, switchLen, gridSize, numReplicas,
 			get_energy);
@@ -560,9 +566,7 @@ float ComputeForce::compute(Vector3 force[], Vector3 pos[], int type[], bool get
 /*float ComputeForce::computeTabulated(Vector3* force, Vector3* pos, int* type,
 		Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap,
 		Angle* angles, Dihedral* dihedrals, bool get_energy, Bond* bondList) {*/
-float ComputeForce::computeTabulated(Vector3* force, Vector3* pos, int* type,
-		Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap,
-		Angle* angles, Dihedral* dihedrals, bool get_energy, int3* bondList_d) {
+float ComputeForce::computeTabulated(bool get_energy) {
 	float energy = 0.0f;
 
 	gridSize = (num * numReplicas) / NUM_THREADS + 1;
@@ -575,32 +579,35 @@ float ComputeForce::computeTabulated(Vector3* force, Vector3* pos, int* type,
 	const int nb = 800;
 	// printf("ComputeTabulated\n");
 	
-	if (get_energy) {
+	if (get_energy) 
+	{
 		clearEnergies<<< nb, numThreads >>>(energies_d,num);
 		gpuErrchk(cudaDeviceSynchronize());
-		computeTabulatedEnergyKernel<<< nb, numThreads >>>(force, pos, type,
-						tablePot_d, tableBond_d, sys_d,
-						bonds, bondMap,	numBonds,	energies_d,	cutoff2,
-						numPairs_d, pairLists_d, pairTabPotType_d);
-	} else {
-		computeTabulatedKernel<<< nb, numThreads >>>(force, pos, type,
+		computeTabulatedEnergyKernel<<< nb, numThreads >>>(forceInternal_d, pos_d, type_d, tablePot_d, sys_d, energies_d, cutoff2, numPairs_d, pairLists_d, pairTabPotType_d);
+	}
+	
+	else
+	{
+		computeTabulatedKernel<<< nb, numThreads >>>(forceInternal_d, pos_d, type_d,
 						tablePot_d, tableBond_d, sys_d,
-						bonds, bondMap,	numBonds, cutoff2,
+						bonds_d, bondMap_d,	numBonds, cutoff2,
 						numPairs_d, pairLists_d, pairTabPotType_d);
 	}
 	/* printPairForceCounter<<<1,32>>>(); */
 	/* gpuErrchk(cudaDeviceSynchronize()); */
 
-	computeAngles<<<numBlocks, numThreads>>>(force, pos, angles,
+	computeAngles<<<numBlocks, numThreads>>>(forceInternal_d, pos_d, angles_d,
 			tableAngle_d, numAngles, num, sys_d, energies_d, get_energy);
 
 	//Mlog: the commented function doesn't use bondList, uncomment for testing.
-	if(bondMap != NULL && tableBond_d != NULL)
+	//if(bondMap_d != NULL && tableBond_d != NULL)
+	if(bondList_d != NULL && tableBond_d != NULL)
+
 	{
-		//computeTabulatedBonds <<<numBlocks, numThreads>>> ( force, pos, num, numParts, sys_d, bonds, bondMap, numBonds, numReplicas, energies_d, get_energy, tableBond_d);
-		computeTabulatedBonds <<<numBlocks, numThreads>>> ( force, pos, num, numParts, sys_d, bondList_d, (numBonds/2), numReplicas, energies_d, get_energy, tableBond_d);
+		//computeTabulatedBonds <<<numBlocks, numThreads>>> ( force, pos, num, numParts, sys_d, bonds, bondMap_d, numBonds, numReplicas, energies_d, get_energy, tableBond_d);
+		computeTabulatedBonds <<<numBlocks, numThreads>>> ( forceInternal_d, pos_d, num, numParts, sys_d, bondList_d, (numBonds/2), numReplicas, energies_d, get_energy, tableBond_d);
 	}
-	computeDihedrals<<<numBlocks, numThreads>>>(force, pos, dihedrals,
+	computeDihedrals<<<numBlocks, numThreads>>>(forceInternal_d, pos_d, dihedrals_d,
 			tableDihedral_d, numDihedrals, num, sys_d, energies_d, get_energy);
 
 	// Calculate the energy based on the array created by the kernel
@@ -613,9 +620,7 @@ float ComputeForce::computeTabulated(Vector3* force, Vector3* pos, int* type,
 	return energy;
 }
 
-float ComputeForce::computeTabulatedFull(Vector3* force, Vector3* pos, int* type,
-		Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap,
-		Angle* angles, Dihedral* dihedrals, bool get_energy) {
+float ComputeForce::computeTabulatedFull(bool get_energy) {
 	energy = 0.0f;
 
 	gridSize = (num * numReplicas) / NUM_THREADS + 1;
@@ -623,17 +628,14 @@ float ComputeForce::computeTabulatedFull(Vector3* force, Vector3* pos, int* type
 	dim3 numThreads(NUM_THREADS, 1, 1);
 
 	// Call the kernel to calculate forces
-	computeTabulatedFullKernel<<< numBlocks, numThreads >>>(force, pos, type,
-			tablePot_d, tableBond_d, num, numParts, sys_d, bonds, bondMap, numBonds,
-			excludes, excludeMap, numExcludes, energies_d, gridSize, numReplicas,
-			get_energy, angles);
+	computeTabulatedFullKernel<<< numBlocks, numThreads >>>(forceInternal_d, pos_d, type_d,	tablePot_d, tableBond_d, num, numParts, sys_d, bonds_d, bondMap_d, numBonds, excludes_d, excludeMap_d, numExcludes, energies_d, gridSize, numReplicas, get_energy, angles_d);
 	gpuErrchk(cudaDeviceSynchronize());
 
-	computeAngles<<< numBlocks, numThreads >>>(force, pos, angles, tableAngle_d,
+	computeAngles<<< numBlocks, numThreads >>>(forceInternal_d, pos_d, angles_d, tableAngle_d,
 																						 numAngles, num, sys_d, energies_d,
 																						 get_energy);
 	gpuErrchk(cudaDeviceSynchronize());
-	computeDihedrals<<< numBlocks, numThreads >>>(force, pos, dihedrals,
+	computeDihedrals<<< numBlocks, numThreads >>>(forceInternal_d, pos_d, dihedrals_d,
 																							  tableDihedral_d, numDihedrals,
 																								num, sys_d, energies_d,
 																								get_energy);
@@ -646,3 +648,78 @@ float ComputeForce::computeTabulatedFull(Vector3* force, Vector3* pos, int* type
 
 	return energy;
 }
+
+void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos)
+{
+	const size_t tot_num = num * numReplicas;
+
+	gpuErrchk(cudaMalloc(&pos_d, sizeof(Vector3) * tot_num));
+	gpuErrchk(cudaMemcpyAsync(pos_d, pos, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice));
+
+	gpuErrchk(cudaMalloc(&forceInternal_d, sizeof(Vector3) * num * numReplicas));
+	gpuErrchk(cudaMemcpyAsync(forceInternal_d, forceInternal, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice));
+
+	gpuErrchk(cudaDeviceSynchronize());
+}
+
+void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals)
+{
+	// type_d
+	gpuErrchk(cudaMalloc(&type_d, sizeof(int) * num * simNum));
+	gpuErrchk(cudaMemcpyAsync(type_d, type, sizeof(int) * num * simNum, cudaMemcpyHostToDevice));
+	
+	if (numBonds > 0)
+	{
+		// bonds_d
+		gpuErrchk(cudaMalloc(&bonds_d, sizeof(Bond) * numBonds));
+		gpuErrchk(cudaMemcpyAsync(bonds_d, bonds, sizeof(Bond) * numBonds, cudaMemcpyHostToDevice));
+		
+		// bondMap_d
+		gpuErrchk(cudaMalloc(&bondMap_d, sizeof(int2) * num));
+		gpuErrchk(cudaMemcpyAsync(bondMap_d, bondMap, sizeof(int2) * num, cudaMemcpyHostToDevice));
+	}
+
+	if (numExcludes > 0) {
+		// excludes_d
+		gpuErrchk(cudaMalloc(&excludes_d, sizeof(Exclude) * numExcludes));
+		gpuErrchk(cudaMemcpyAsync(excludes_d, excludes, sizeof(Exclude) * numExcludes,
+				cudaMemcpyHostToDevice));
+		
+		// excludeMap_d
+		gpuErrchk(cudaMalloc(&excludeMap_d, sizeof(int2) * num));
+		gpuErrchk(cudaMemcpyAsync(excludeMap_d, excludeMap, sizeof(int2) * num,
+				cudaMemcpyHostToDevice));
+	}
+
+	if (numAngles > 0) {
+		// angles_d
+		gpuErrchk(cudaMalloc(&angles_d, sizeof(Angle) * numAngles));
+		gpuErrchk(cudaMemcpyAsync(angles_d, angles, sizeof(Angle) * numAngles,
+				cudaMemcpyHostToDevice));
+	}
+
+	if (numDihedrals > 0) {
+		// dihedrals_d
+		gpuErrchk(cudaMalloc(&dihedrals_d, sizeof(Dihedral) * numDihedrals));
+		gpuErrchk(cudaMemcpyAsync(dihedrals_d, dihedrals,
+												 		  sizeof(Dihedral) * numDihedrals,
+														 	cudaMemcpyHostToDevice));
+	}
+
+	gpuErrchk(cudaDeviceSynchronize());
+}
+
+void ComputeForce::createBondList(int3 *bondList)
+{
+	size_t size = (numBonds / 2) * numReplicas * sizeof(int3);
+	gpuErrchk( cudaMalloc( &bondList_d, size ) );
+	gpuErrchk( cudaMemcpyAsync( bondList_d, bondList, size, cudaMemcpyHostToDevice) );
+
+	for(int i = 0 ; i < (numBonds / 2) * numReplicas ; i++)
+	{
+		cout << "Displaying: bondList_d["<< i <<"].x = " << bondList[i].x << ".\n"
+			<< "Displaying: bondList_d["<< i <<"].y = " << bondList[i].y << ".\n"
+			<< "Displaying: bondList_d["<< i <<"].z = " << bondList[i].z << ".\n";
+
+	}
+}
diff --git a/ComputeForce.cuh b/ComputeForce.cuh
index 359b879..cfb2b96 100644
--- a/ComputeForce.cuh
+++ b/ComputeForce.cuh
@@ -470,13 +470,7 @@ __global__ void clearEnergies(float* g_energies, int num) {
 		g_energies[i] = 0.0f;
 	}
 }
-__global__ void computeTabulatedEnergyKernel(Vector3* force, Vector3* __restrict__ pos,
-				int* type, TabulatedPotential** __restrict__ tablePot,
-				TabulatedPotential** __restrict__ tableBond,
-				BaseGrid* __restrict__ sys, Bond* __restrict__ bonds,
-				int2* __restrict__ bondMap, int numBonds, float* g_energies, float cutoff2,
-				int* __restrict__ g_numPairs,	int2* __restrict__ g_pair,
-				int* __restrict__ g_pairTabPotType) {
+__global__ void computeTabulatedEnergyKernel(Vector3* force, Vector3* __restrict__ pos,	int* type, TabulatedPotential** __restrict__ tablePot, BaseGrid* __restrict__ sys,  float* g_energies, float cutoff2, int* __restrict__ g_numPairs,	int2* __restrict__ g_pair, int* __restrict__ g_pairTabPotType) {
 	
 	const int numPairs = *g_numPairs;
 	// RBTODO: BONDS (handle through an independent kernel call?)
@@ -512,15 +506,8 @@ __global__ void computeTabulatedEnergyKernel(Vector3* force, Vector3* __restrict
 // NOT using cell decomposition
 //
 __global__
-void computeTabulatedFullKernel(Vector3 force[], Vector3 pos[], int type[],
-																TabulatedPotential* tablePot[],
-																TabulatedPotential* tableBond[],
-																int num, int numParts, BaseGrid *sys,
-																Bond bonds[], int2 bondMap[], int numBonds,
-																Exclude excludes[], int2 excludeMap[],
-																int numExcludes, float g_energies[],
-																int gridSize, int numReplicas, bool get_energy,
-																Angle angles[]) {
+void computeTabulatedFullKernel(Vector3 force[], Vector3 pos[], int type[], TabulatedPotential* tablePot[], TabulatedPotential* tableBond[], int num, int numParts, BaseGrid *sys, Bond bonds[], int2 bondMap[], int numBonds, Exclude excludes[], int2 excludeMap[], int numExcludes, float g_energies[], int gridSize, int numReplicas, bool get_energy, Angle angles[]) 
+{
 	// Thread's unique ID.
 	const int i = blockIdx.x * blockDim.x + threadIdx.x;
 
diff --git a/ComputeForce.h b/ComputeForce.h
index d1d0b35..22fd9ea 100644
--- a/ComputeForce.h
+++ b/ComputeForce.h
@@ -39,15 +39,15 @@ public:
 			int numDihedrals, int numTabDihedralFiles, int numReplicas = 1);
 	~ComputeForce();
 
-	void updateNumber(Vector3* pos, int type[], int newNum);
+	void updateNumber(int newNum);
 	void makeTables(const BrownianParticleType* part);
 
 	bool addTabulatedPotential(String fileName, int type0, int type1);
-	bool addBondPotential(String fileName, int ind, Bond* bonds, Bond* bonds_d);
-	bool addAnglePotential(String fileName, int ind, Angle* angles, Angle* angles_d);
-	bool addDihedralPotential(String fileName, int ind, Dihedral* dihedrals, Dihedral* dihedrals_d);
+	bool addBondPotential(String fileName, int ind, Bond* bonds);
+	bool addAnglePotential(String fileName, int ind, Angle* angles);
+	bool addDihedralPotential(String fileName, int ind, Dihedral* dihedrals);
 
-	void decompose(Vector3* pos, int type[]);
+	void decompose();
 	
 	CellDecomposition getDecomp();
 	IndexList decompDim() const;
@@ -57,22 +57,76 @@ public:
 	// Does nothing
 	int* neighborhood(Vector3 r);
 
-	float computeSoftcoreFull(Vector3* force, Vector3* pos, int* type, bool get_energy);
-	float computeElecFull(Vector3* force, Vector3* pos, int* type, bool get_energy);
+	float computeSoftcoreFull(bool get_energy);
+	float computeElecFull(bool get_energy);
 	
-	float compute(Vector3* force, Vector3* pos, int* type, bool get_energy);
-	float computeFull(Vector3* force, Vector3* pos, int* type, bool get_energy);
+	float compute(bool get_energy);
+	float computeFull(bool get_energy);
 	
 	//MLog: the commented function doesn't use bondList, uncomment for testing.
 	/*float computeTabulated(Vector3* force, Vector3* pos, int* type,
 			Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap,
 			Angle* angles, Dihedral* dihedrals, bool get_energy);*/
-	float computeTabulated(Vector3* force, Vector3* pos, int* type,
-			Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap,
-			Angle* angles, Dihedral* dihedrals, bool get_energy, int3* bondList_d);
-	float computeTabulatedFull(Vector3* force, Vector3* pos, int* type,
-			Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap,
-			Angle* angles, Dihedral* dihedrals, bool get_energy);
+	float computeTabulated(bool get_energy);
+	float computeTabulatedFull(bool get_energy);
+	
+	//MLog: new copy function to allocate memory required by ComputeForce class.
+	void copyToCUDA(Vector3* forceInternal, Vector3* pos);
+	void copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals);
+	
+	void createBondList(int3 *bondList);
+
+	//MLog: because of the move of a lot of private variables, some functions get starved necessary memory access to these variables, below is a list of functions that return the specified private variable.
+	Vector3* getPos_d()
+	{
+		return pos_d;
+	}
+
+	Vector3* getForceInternal_d()
+	{
+		return forceInternal_d;
+	}
+
+	int* getType_d()
+	{
+		return type_d;
+	}
+
+	Bond* getBonds_d()
+	{
+		return bonds_d;
+	}
+
+	int2* getBondMap_d()
+	{
+		return bondMap_d;
+	}
+
+	Exclude* getExcludes_d()
+	{
+		return excludes_d;
+	}
+
+	int2* getExcludeMap_d()
+	{
+		return excludeMap_d;
+	}
+
+	Angle* getAngles_d()
+	{
+		return angles_d;
+	}
+
+	Dihedral* getDihedrals_d()
+	{
+		return dihedrals_d;
+	}
+
+	int3* getBondList_d()
+	{
+		return bondList_d;
+	}
+	
 
 	HOST DEVICE
 	static EnergyForce coulombForce(Vector3 r, float alpha,float start, float len);
@@ -120,7 +174,22 @@ private:
 	int2 *pairLists_d;
 	int *pairTabPotType_d;
 
-	int *numPairs_d;	
+	int *numPairs_d;
+	
+	//MLog: List of variables that need to be moved over to ComputeForce class. Members of this list will be set to static to avoid large alterations in working code, thereby allowing us to access these variables easily.
+	//BrownianParticleType* part;
+	//float electricConst;
+	//int fullLongRange;
+	Vector3* pos_d;
+	Vector3* forceInternal_d;
+	int* type_d; 
+	Bond* bonds_d; 
+	int2* bondMap_d; 
+	Exclude* excludes_d; 
+	int2* excludeMap_d; 
+	Angle* angles_d;
+	Dihedral* dihedrals_d;
+	int3* bondList_d;
 };
 
 #endif
diff --git a/Configuration.cpp b/Configuration.cpp
index 8373e27..68b5db6 100644
--- a/Configuration.cpp
+++ b/Configuration.cpp
@@ -12,14 +12,14 @@ inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) {
 Configuration::Configuration(const char* config_file, int simNum, bool debug) :
 		simNum(simNum) {
 	// Read the parameters.
-	type_d = NULL;
+	//type_d = NULL;
 	kTGrid_d = NULL;
-	bonds_d = NULL;
-	bondMap_d = NULL;
-	excludes_d = NULL;
-	excludeMap_d = NULL;
-	angles_d = NULL;
-	dihedrals_d = NULL;
+	//bonds_d = NULL;
+	//bondMap_d = NULL;
+	//excludes_d = NULL;
+	//excludeMap_d = NULL;
+	//angles_d = NULL;
+	//dihedrals_d = NULL;
 	setDefaults();
 	readParameters(config_file);
 	if (readPartsFromFile) readAtoms();
@@ -397,18 +397,18 @@ Configuration::~Configuration() {
 
 	delete[] dihedralTableFile;
 
-	if (type_d != NULL) {
-		gpuErrchk(cudaFree(type_d));
+	//if (type_d != NULL) {
+		//gpuErrchk(cudaFree(type_d));
 		gpuErrchk(cudaFree(sys_d));
 		gpuErrchk(cudaFree(kTGrid_d));
 		gpuErrchk(cudaFree(part_d));
-		gpuErrchk(cudaFree(bonds_d));
-		gpuErrchk(cudaFree(bondMap_d));
-		gpuErrchk(cudaFree(excludes_d));
-		gpuErrchk(cudaFree(excludeMap_d));
-		gpuErrchk(cudaFree(angles_d));
-		gpuErrchk(cudaFree(dihedrals_d));
-	}
+		//gpuErrchk(cudaFree(bonds_d));
+		//gpuErrchk(cudaFree(bondMap_d));
+		//gpuErrchk(cudaFree(excludes_d));
+		//gpuErrchk(cudaFree(excludeMap_d));
+		//gpuErrchk(cudaFree(angles_d));
+		//gpuErrchk(cudaFree(dihedrals_d));
+	//}
 }
 
 void Configuration::copyToCUDA() {
@@ -468,11 +468,10 @@ void Configuration::copyToCUDA() {
 	}
 
 	// type_d and sys_d
-	gpuErrchk(cudaMalloc(&type_d, sizeof(int) * num * simNum));
-	gpuErrchk(cudaMemcpyAsync(type_d, type, sizeof(int) * num * simNum, cudaMemcpyHostToDevice));
 	gpuErrchk(cudaMalloc(&sys_d, sizeof(BaseGrid)));
 	gpuErrchk(cudaMemcpyAsync(sys_d, sys, sizeof(BaseGrid), cudaMemcpyHostToDevice));
-
+	/*gpuErrchk(cudaMalloc(&type_d, sizeof(int) * num * simNum));
+	gpuErrchk(cudaMemcpyAsync(type_d, type, sizeof(int) * num * simNum, cudaMemcpyHostToDevice));
 	
 	if (numBonds > 0) {
 		// bonds_d
@@ -509,7 +508,7 @@ void Configuration::copyToCUDA() {
 		gpuErrchk(cudaMemcpyAsync(dihedrals_d, dihedrals,
 												 		  sizeof(Dihedral) * numDihedrals,
 														 	cudaMemcpyHostToDevice));
-	}
+	}*/
 	gpuErrchk(cudaDeviceSynchronize());
 }
 
diff --git a/Configuration.h b/Configuration.h
index 4829e40..058b0bb 100644
--- a/Configuration.h
+++ b/Configuration.h
@@ -80,15 +80,15 @@ public:
 	bool loadedCoordinates;
 
 	// Device Variables
-	int *type_d;
+	//int *type_d;
 	BrownianParticleType **part_d;
 	BaseGrid *sys_d, *kTGrid_d;
-	Bond* bonds_d;
-	int2* bondMap_d;
-	Exclude* excludes_d;
-	int2* excludeMap_d;
-	Angle* angles_d;
-	Dihedral* dihedrals_d;
+	//Bond* bonds_d;
+	//int2* bondMap_d;
+	//Exclude* excludes_d;
+	//int2* excludeMap_d;
+	//Angle* angles_d;
+	//Dihedral* dihedrals_d;
 
 	// number of simulations
 	int simNum;
diff --git a/GrandBrownTown.cu b/GrandBrownTown.cu
index c2db8bf..b459fed 100644
--- a/GrandBrownTown.cu
+++ b/GrandBrownTown.cu
@@ -138,16 +138,16 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 	numTabDihedralFiles = c.numTabDihedralFiles;
 
 	// Device parameters
-	type_d = c.type_d;
+	//type_d = c.type_d;
 	part_d = c.part_d;
 	sys_d = c.sys_d;
 	kTGrid_d = c.kTGrid_d;
-	bonds_d = c.bonds_d;
-	bondMap_d = c.bondMap_d;
-	excludes_d = c.excludes_d;
-	excludeMap_d = c.excludeMap_d;
-	angles_d = c.angles_d;
-	dihedrals_d = c.dihedrals_d;
+	//bonds_d = c.bonds_d;
+	//bondMap_d = c.bondMap_d;
+	//excludes_d = c.excludes_d;
+	//excludeMap_d = c.excludeMap_d;
+	//angles_d = c.angles_d;
+	//dihedrals_d = c.dihedrals_d;
 
 	// Seed random number generator
 	long unsigned int r_seed;
@@ -179,6 +179,9 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 	internal = new ComputeForce(num, part, numParts, sys, switchStart, switchLen, coulombConst,
 			fullLongRange, numBonds, numTabBondFiles, numExcludes, numAngles, numTabAngleFiles,
 			numDihedrals, numTabDihedralFiles, numReplicas);
+	
+	//MLog: I did the other halve of the copyToCUDA function from the Configuration class here, keep an eye on any mistakes that may occur due to the location.
+	internal -> copyToCUDA(c.simNum, c.type, c.bonds, c.bondMap, c.excludes, c.excludeMap, c.angles, c.dihedrals);
 
 	// TODO: check for duplicate potentials 
 	if (c.tabulatedPotential) {
@@ -199,7 +202,8 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 		printf("Loading the tabulated bond potentials...\n");
 		for (int p = 0; p < numTabBondFiles; p++)
 			if (bondTableFile[p].length() > 0) {
-				internal->addBondPotential(bondTableFile[p].val(), p, bonds, bonds_d);
+				//MLog: make sure to add to all GPUs
+				internal->addBondPotential(bondTableFile[p].val(), p, bonds);
 				printf("%s\n",bondTableFile[p].val());
 			}
 	}
@@ -208,14 +212,17 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 		printf("Loading the tabulated angle potentials...\n");
 		for (int p = 0; p < numTabAngleFiles; p++)
 			if (angleTableFile[p].length() > 0)
-				internal->addAnglePotential(angleTableFile[p].val(), p, angles, angles_d);
+			{
+				//MLog: make sure to do this for every GPU
+				internal->addAnglePotential(angleTableFile[p].val(), p, angles);
+			}
 	}
 
 	if (c.readDihedralsFromFile) {
 		printf("Loading the tabulated dihedral potentials...\n");
 		for (int p = 0; p < numTabDihedralFiles; p++)
 			if (dihedralTableFile[p].length() > 0)
-				internal->addDihedralPotential(dihedralTableFile[p].val(), p, dihedrals, dihedrals_d);
+				internal->addDihedralPotential(dihedralTableFile[p].val(), p, dihedrals);
 	}
 
 	//Mlog: this is where we create the bondList.
@@ -237,7 +244,7 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
 		}
 	}
 
-	createBondList();
+	internal -> createBondList(bondList);
 
 	// TODO: copy data to device (not here)
 	// TODO: use bondList in computeTabulatedBonds kernel
@@ -271,10 +278,10 @@ GrandBrownTown::~GrandBrownTown() {
 	for (int i = 0; i < numReplicas; ++i)
 		delete writers[i];
 
-	gpuErrchk(cudaFree(pos_d));
-	gpuErrchk(cudaFree(forceInternal_d));
+	//gpuErrchk(cudaFree(pos_d));
+	//gpuErrchk(cudaFree(forceInternal_d));
 	gpuErrchk(cudaFree(randoGen_d));
-	gpuErrchk( cudaFree(bondList_d) );
+	//gpuErrchk( cudaFree(bondList_d) );
 }
 
 // Run the Brownian Dynamics steps.
@@ -298,6 +305,7 @@ void GrandBrownTown::run() {
 	timerS = rt_timer_create();
 
 	copyToCUDA();
+	internal -> copyToCUDA(forceInternal, pos);
 
 	// IMD Stuff
 	void* sock = NULL;
@@ -333,7 +341,10 @@ void GrandBrownTown::run() {
 	// We haven't done any steps yet.
 	// Do decomposition if we have to
 	if (fullLongRange == 0)
-		internal->decompose(pos_d, type_d);
+	{
+		cudaSetDevice(0);
+		internal->decompose();
+	}
 
 	float t; // simulation time
 
@@ -360,22 +371,18 @@ void GrandBrownTown::run() {
 				// 1 - do not use cutoff [ N^2 ]
 				switch (fullLongRange) {
 					case 0: // [ N*log(N) ] interactions, + cutoff | decomposition
-						if (s % decompPeriod == 0) {
-							internal->decompose(pos_d, type_d);
-							//internal->updatePairlists(pos_d); // perhaps this way?
+						if (s % decompPeriod == 0)
+						{
+							cudaSetDevice(0);
+							internal -> decompose();
 						}
 						
 						//MLog: added Bond* bondList to the list of passed in variables.
 						/*energy = internal->computeTabulated(forceInternal_d, pos_d, type_d, bonds_d, bondMap_d, excludes_d, excludeMap_d,	angles_d, dihedrals_d, get_energy);*/
-						energy = internal -> computeTabulated(forceInternal_d, pos_d, type_d, bonds_d, bondMap_d, excludes_d, excludeMap_d,	angles_d, dihedrals_d, get_energy, bondList_d);
+						energy = internal -> computeTabulated(get_energy);
 						break;
 					default: // [ N^2 ] interactions, no cutoff | decompositions
-						energy =
-								internal->computeTabulatedFull(forceInternal_d, pos_d, type_d,
-																							 bonds_d, bondMap_d,
-																							 excludes_d, excludeMap_d,
-																							 angles_d, dihedrals_d,
-																							 get_energy);
+						energy = internal->computeTabulatedFull(get_energy);
 						break;
 				}
 			
@@ -386,24 +393,23 @@ void GrandBrownTown::run() {
 
 					case 0: // Use cutoff | cell decomposition.
 						if (s % decompPeriod == 0)
-							internal->decompose(pos_d, type_d);
-						energy =
-								internal->compute(forceInternal_d, pos_d, type_d, get_energy);
+						{
+							cudaSetDevice(0);
+							internal->decompose();
+						}
+						energy = internal->compute(get_energy);
 						break;
 
 					case 1: // Do not use cutoff
-						energy = internal->computeFull(forceInternal_d,
-																					 pos_d, type_d, get_energy);
+						energy = internal->computeFull(get_energy);
 						break;
 
 					case 2: // Compute only softcore forces.
-						energy = internal->computeSoftcoreFull(forceInternal_d,
-																									 pos_d, type_d, get_energy);
+						energy = internal->computeSoftcoreFull(get_energy);
 						break;
 
 					case 3: // Compute only electrostatic forces.
-						energy = internal->computeElecFull(forceInternal_d,
-																							 pos_d, type_d, get_energy);
+						energy = internal->computeElecFull(get_energy);
 						break;
 				}
 			}
@@ -428,11 +434,9 @@ void GrandBrownTown::run() {
 
 		RBC.updateForces(s);					/* update RB forces before update particle positions... */
 
-		// Call the kernel to update the positions of each particle
-		updateKernel<<< numBlocks, NUM_THREADS >>>(pos_d, forceInternal_d, type_d,
-																							 part_d, kT, kTGrid_d,
-																							 electricField, tl, timestep, num,
-																							 sys_d, randoGen_d, numReplicas);
+		//MLog: Call the kernel to update the positions of each particle
+		cudaSetDevice(0);
+		updateKernel<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d(), internal -> getForceInternal_d(), internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, sys_d, randoGen_d, numReplicas);
 		//gpuErrchk(cudaPeekAtLastError()); // Does not work on old GPUs (like mine). TODO: write a better wrapper around Peek
 		
 		/* Time position computations.
@@ -472,7 +476,8 @@ void GrandBrownTown::run() {
 				}
 			}
 			if (clientsock) {
-				gpuErrchk(cudaMemcpy(pos, pos_d, sizeof(Vector3) * num, cudaMemcpyDeviceToHost));
+				cudaSetDevice(0);
+				gpuErrchk(cudaMemcpy(pos, internal -> getPos_d(), sizeof(Vector3) * num, cudaMemcpyDeviceToHost));
 				float* coords = new float[num * 3];
 				for (size_t i = 0; i < num; i++) {
 					Vector3 p = pos[i];
@@ -488,7 +493,8 @@ void GrandBrownTown::run() {
 		// Output trajectories (to files)
 		if (s % outputPeriod == 0) {
 			// Copy particle positions back to CPU
-			gpuErrchk(cudaMemcpy(pos, pos_d, sizeof(Vector3) * num * numReplicas,
+			cudaSetDevice(0);
+			gpuErrchk(cudaMemcpy(pos, internal ->  getPos_d(), sizeof(Vector3) * num * numReplicas,
 					cudaMemcpyDeviceToHost));
 
 			// Nanoseconds computed
@@ -522,10 +528,11 @@ void GrandBrownTown::run() {
 		// Output energy.
 		if (get_energy) {
 			// Stop the timer.
+			cudaSetDevice(0);
 			rt_timer_stop(timerS);
 
 			// Copy back forces to display (internal only)
-			gpuErrchk(cudaMemcpy(&force0, forceInternal_d, sizeof(Vector3), cudaMemcpyDeviceToHost));
+			gpuErrchk(cudaMemcpy(&force0, internal -> getForceInternal_d(), sizeof(Vector3), cudaMemcpyDeviceToHost));
 			
 			// Nanoseconds computed
 			t = s * timestep;
@@ -546,7 +553,7 @@ void GrandBrownTown::run() {
 		*/
 
 			// Copy positions from GPU to CPU.
-			gpuErrchk(cudaMemcpy(pos, pos_d, sizeof(Vector3) * num * numReplicas,
+			gpuErrchk(cudaMemcpy(pos, internal -> getPos_d(), sizeof(Vector3) * num * numReplicas,
 													 cudaMemcpyDeviceToHost));
 
 			// Write restart files for each replica.
@@ -559,7 +566,8 @@ void GrandBrownTown::run() {
 
 		{
 			int numBlocks = (num * numReplicas) / NUM_THREADS + (num * numReplicas % NUM_THREADS == 0 ? 0 : 1);
-			clearInternalForces<<< numBlocks, NUM_THREADS >>>(forceInternal_d, num, numReplicas);
+			//MLog: along with calls to internal (ComputeForce class) this function should execute once per GPU.
+			clearInternalForces<<< numBlocks, NUM_THREADS >>>(internal -> getForceInternal_d(), num, numReplicas);
 		}
 		
 	} // done with all Brownian dynamics steps
@@ -604,7 +612,8 @@ void GrandBrownTown::run() {
 	else if (tot_min > 0) printf("%dm%.1fs\n", tot_min, tot_sec);
 	else printf("%.2fs\n", tot_sec);
 
-	gpuErrchk(cudaMemcpy(pos, pos_d, sizeof(Vector3) * num * numReplicas, cudaMemcpyDeviceToHost));
+	cudaSetDevice(0);
+	gpuErrchk(cudaMemcpy(pos, internal -> getPos_d(), sizeof(Vector3) * num * numReplicas, cudaMemcpyDeviceToHost));
 
 	// Write the restart file (once again)
 	for (int repID = 0; repID < numReplicas; ++repID)
@@ -912,7 +921,7 @@ void GrandBrownTown::addParticles(int n, int typ, Vector3 r0, Vector3 r1, float
 		type[i] = typ;
 		num++;
 		// Update the cell decomposition
-		internal->updateNumber(pos, type, num); /* RBTODO: unsure if type arg is ok */
+		internal->updateNumber(num); /* RBTODO: unsure if type arg is ok */
 	}
 }
 
@@ -964,20 +973,20 @@ void GrandBrownTown::updateReservoirs() {
 	} // end particle loop
 
 	if (numberChange)
-		internal->updateNumber(pos, type, num);
+		internal->updateNumber(num);
 }
 
 // -----------------------------------------------------------------------------
 // Allocate memory on GPU(s) and copy to device
 void GrandBrownTown::copyToCUDA() {
 	const size_t tot_num = num * numReplicas;
-	gpuErrchk(cudaMalloc(&pos_d, sizeof(Vector3) * tot_num));
+	/*gpuErrchk(cudaMalloc(&pos_d, sizeof(Vector3) * tot_num));
 	gpuErrchk(cudaMemcpyAsync(pos_d, pos, sizeof(Vector3) * tot_num,
 														cudaMemcpyHostToDevice));
 
 	gpuErrchk(cudaMalloc(&forceInternal_d, sizeof(Vector3) * num * numReplicas));
 	gpuErrchk(cudaMemcpyAsync(forceInternal_d, forceInternal, sizeof(Vector3) * tot_num,
-														cudaMemcpyHostToDevice));
+														cudaMemcpyHostToDevice));*/
 
 	gpuErrchk(cudaMalloc(&randoGen_d, sizeof(Random)));
 	gpuErrchk(cudaMemcpyAsync(randoGen_d, randoGen, sizeof(Random),
@@ -985,7 +994,7 @@ void GrandBrownTown::copyToCUDA() {
 	gpuErrchk(cudaDeviceSynchronize());
 }
 
-void GrandBrownTown::createBondList()
+/*void GrandBrownTown::createBondList()
 {
 	size_t size = (numBonds / 2) * numReplicas * sizeof(int3);
 	gpuErrchk( cudaMalloc( &bondList_d, size ) );
@@ -998,4 +1007,4 @@ void GrandBrownTown::createBondList()
 			<< "Displaying: bondList_d["<< i <<"].z = " << bondList[i].z << ".\n";
 
 	}
-}
+}*/
diff --git a/GrandBrownTown.h b/GrandBrownTown.h
index dc8c3be..57bd2c1 100644
--- a/GrandBrownTown.h
+++ b/GrandBrownTown.h
@@ -98,7 +98,6 @@ private:
 	void getDebugForce();
 	
 	void copyToCUDA();
-	void createBondList();
 
 public:
 	// Compute the current in nanoamperes.
@@ -155,18 +154,17 @@ private:
 	Vector3* rbPos; 		// rigid body positions
 	
 	// CUDA device variables
-	Vector3 *pos_d, *forceInternal_d, *force_d;
-	int *type_d;
+	//Vector3 *pos_d, *forceInternal_d, *force_d;
+	//int *type_d;
 	BrownianParticleType **part_d;
 	BaseGrid *sys_d, *kTGrid_d;
 	Random *randoGen_d;
-	Bond* bonds_d;
-	int2* bondMap_d;
-	Exclude* excludes_d;
-	int2* excludeMap_d;
-	Angle* angles_d;
-	Dihedral* dihedrals_d;
-	int3 *bondList_d;
+	//Bond* bonds_d;
+	//int2* bondMap_d;
+	//Exclude* excludes_d;
+	//int2* excludeMap_d;
+	//Angle* angles_d;
+	//Dihedral* dihedrals_d;
 
 	// System parameters
 	String outputName;
diff --git a/runBrownTown.cpp b/runBrownTown.cpp
index 49b272d..ebd1eea 100644
--- a/runBrownTown.cpp
+++ b/runBrownTown.cpp
@@ -119,7 +119,9 @@ int main(int argc, char* argv[]) {
 	Configuration config(configFile, replicas, debug);
 	// GPUManager::set(0);
 	GPUManager::set(gpuID);
+	//MLog: this copyToCUDA function (along with the one in GrandBrownTown.cpp) was split into pieces to allocate memory into the ComputeForce, due to the location of this call we may get some memory error as a ComputeForce class isn't allocated until later on.
 	config.copyToCUDA();
+	
 
 	GrandBrownTown brown(config, outArg, randomSeed,
 			debug, imd_on, imd_port, replicas);
-- 
GitLab