diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index de54914892a0718327a8cc30e81374c7c93858f9..0000000000000000000000000000000000000000
--- a/.gitignore
+++ /dev/null
@@ -1,11 +0,0 @@
-*.o
-.svn
-*~
-.backup
-TAGS
-.dir-locals.el
-BD_example*
-arbd
-runBrownCUDA
-arbd*
-*.tar
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8904dc51bd97a0732600cdbbbf969e487ee7e8c8..22cecac16f5694c0e7b7610e42a75b611180690b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,6 +7,7 @@ endif()
 
 # option(USE_CUDA "Use CUDA" ON)
 set(USE_CUDA ON)
+set(USE_MPI OFF)
 
 ## specify the C++ standard
 set(CMAKE_CXX_STANDARD 14)
@@ -97,40 +98,13 @@ set(CMAKE_BUILD_RPATH "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}")
 
 # set(CMAKE_VERBOSE_MAKEFILE True)
 add_executable("${PROJECT_NAME}" src/arbd.cpp
-  src/Configuration.cpp
-  src/FlowForce.cpp
-  src/GPUManager.cpp
-  src/Scatter.cpp
+  src/ParticlePatch.cpp
+  src/Integrator.cpp
+  src/Integrator/CPU.cpp
+  src/Integrator/CUDA.cu
   src/SignalManager.cpp
-  src/WKFUtils.cpp
-  src/Angle.cu
-  src/BaseGrid.cu
-  src/BrownianParticle.cu
-  src/BrownianParticleType.cpp
-  src/RigidBodyController.cu
-  src/RigidBody.cu
-  src/RigidBodyGrid.cu
-  src/RigidBodyType.cu
-  src/CellDecomposition.cu
-  src/ComputeForce.cuh
-  src/ComputeForce.cu
-  src/ComputeGridGrid.cuh
-  src/ComputeGridGrid.cu
-  src/CudaUtil.cu
-  src/CudaUtil.cuh
-  src/Dihedral.cu
-  src/Exclude.cu
-  src/GrandBrownTown.cu
-  src/GrandBrownTown.cuh
-  src/imd.cpp
-  src/vmdsock.cpp
-  src/JamesBond.cu
-  src/RandomCUDA.cu
-  src/Reservoir.cu
-  src/TabulatedAngle.cu
-  src/TabulatedDihedral.cu
-  src/TabulatedMethods.cuh
-  src/TabulatedPotential.cu
+  src/GPUManager.cpp
+  src/SimManager.cpp
   src/useful.cu
   )
 
diff --git a/README b/README
new file mode 100644
index 0000000000000000000000000000000000000000..aff911c707105ebeef2c0f3c213e8d6827eb0dde
--- /dev/null
+++ b/README
@@ -0,0 +1,112 @@
+/==========================================================\
+| Atomic Resolution Brownian Dynamics (ARBD) - beta Oct 19 |
+\==========================================================/
+
+Brownian dynamics (BD) simulation is method for studying biomolecules, ions, and
+nanomaterials that balances detail with computational efficiency.
+
+ARBD supports tabulated non-bonded and bonded interactions between BD
+particles that can also be influenced by grid-specified
+potentials. Uniquely, ARBD also allows grid-specified densities and
+potentials to be associated with rigid body particles that rotate and
+translate to represent larger molecules. Most importantly, the code is
+designed to run quickly on modern NVIDIA GPUs.
+
+ARBD is a rewrite of the BrownianMover code, moving almost all computations to
+the GPU and enabling grid-specified particle models. Please be aware that ARBD
+is being actively developed and is offered without warranty.
+
+
+/==========\
+| Building |
+\==========/
+
+To build, please run `make' in the src directory.
+
+If your CUDA toolkit is installed in a nonstandard location, you may specify
+that location using the CUDA_PATH environment variable. For example:
+make CUDA_PATH=/nonstandard/path/to/cuda
+
+Note that ARBD has been developed using CUDA-8.0 and targets NVIDIA GPUs featuring
+6.0 compute capability. The code should work with devices with compute capability >=2.0,
+but there are no guarantees.
+
+Older versions of CUDA are not compatible with SM 6.0, so you may need to change
+the SMS variable in the makefile, or specify it as an argument to make.
+
+
+/==============\
+| Installation |
+\==============/
+
+Please explore the examples in the 'tests' directory.
+
+For example, try the following commands:
+
+cd tests/argon-small
+mkdir output
+../../src/arbd BrownDyn.bd output/BrownDyn > output/BrownDyn.log
+
+You may use the '-g n' option to specify the n-th GPU on your machine, counting from 0.
+
+If you fail to compile and link the applications, we recommend running
+`make clean` to remove object files. Sometimes we have encountered
+CUDA related errors in binaries built in a "dirty" environment.
+
+
+/========\
+| Citing |
+\========/
+
+If you publish results obtained using ARBD, please cite the following manuscripts:
+
+"DNA base-calling from a nanopore using a Viterbi algorithm"
+Winston Timp, Jeffrey Comer, and Aleksei Aksimentiev
+Biophys J 102(10) L37-9 (2012)
+
+"Predicting the DNA sequence dependence of nanopore ion current using atomic-resolution Brownian dynamics"
+Jeffrey Comer and Aleksei Aksimentiev.
+J Phys Chem C Nanomater Interfaces 116:3376-3393 (2012).
+
+"Atoms-to-microns model for small solute transport through sticky nanochannels"
+Rogan Carr, Jeffrey Comer, Mark D. Ginsberg, and Aleksei Aksimentiev
+Lab Chip 11(22) 3766-73 (2011)
+
+
+/=========\
+| Authors |
+\=========/
+
+ARBD is developed by the Aksimentiev group (http://bionano.physics.illinois.edu)
+as a part of the NIH Center for Macromolecular Modeling and Bioinformatics
+(http://www.ks.uiuc.edu/).
+
+Please direct questions or problems to Chris.
+
+Christopher Maffeo <cmaffeo2@illinois.edu>
+Han-yi Chao
+Jeffrey Comer
+Max Belkin
+Emmanual Guzman
+Justin Dufresne
+Terrance Howard
+
+
+/====================\
+| Outstanding issues |
+\====================/
+
+-- Not implemented --
+
+* There are no checks to ensure that pairlists are recalculated before
+  particles further than the pairlist distance move to within the
+  cutoff
+
+-- Bugs --
+
+* A large amount of GPU memory for pairlists is allocated statically,
+  which may cause out-of-memory crashes in older hardware
+
+* If the number of pairs in the system exceeds the length of the array
+  allocated for pairlists, the non-bonded kernel will try to access
+  forbidden regions of memory, causing a crash
diff --git a/README.md b/README.md
deleted file mode 100644
index 258dd3f1ed24b91ea48cfa746a788aba300f9d3e..0000000000000000000000000000000000000000
--- a/README.md
+++ /dev/null
@@ -1,110 +0,0 @@
-# Atomic Resolution Brownian Dynamics (ARBD) - Nov 22
-
-Brownian dynamics (BD) simulation is method for studying biomolecules,
-ions, and nanomaterials that balances detail with computational
-efficiency.
-
-ARBD supports tabulated non-bonded and bonded interactions between BD
-particles that can also be influenced by grid-specified
-potentials. Uniquely, ARBD also allows grid-specified densities and
-potentials to be associated with rigid body particles that rotate and
-translate to represent larger molecules. Most importantly, the code is
-designed to run quickly on modern NVIDIA GPUs.
-
-ARBD is a rewrite of the BrownianMover code, moving almost all
-computations to the GPU and enabling grid-specified particle
-models. Please be aware that ARBD is being actively developed and is
-offered without warranty.
-
-
-## Building
-
-### Dependencies
-
-Only tested on Linux with:
-  - CMake >= 3.9
-  - gcc >= 4.9
-  - cuda >= 9.0
-
-### Build process
-
-From the root arbd directory (where this README is found), run:
-```
-cmake -S src -B build &&
-(
-  cd build
-  make -j
-)
-```
-
-If your CUDA toolkit is installed in a nonstandard location that CMake
-is unable to find, you may provide use the environement variable
-`CMAKE_CUDA_COMPILER` to specify the path to nvcc. You may also find
-it neccesary to set the environment variable `CUDA_INCLUDE_DIRS` if
-compilation fails due to the compiler being unable to find <cuda.h>.
-
-Note that ARBD has been developed using CUDA-9.0 and targets NVIDIA
-GPUs featuring 6.0 compute capability. The code should work with
-devices with compute capability >=2.0, but there are no guarantees.
-
-Older versions of CUDA are not compatible with SM 6.0, so you may need
-to change the SMS variable in the makefile, or specify it as an
-argument to make.
-
-
-## Usage
-
-Please explore the examples in the 'tests' directory.
-
-For example, try the following commands:
-
-cd tests/argon-small
-mkdir output
-../../src/arbd BrownDyn.bd output/BrownDyn > output/BrownDyn.log
-
-You may use the '-g n' option to specify the n-th GPU on your machine,
-counting from 0.
-
-## Citing
-
-If you publish results obtained using ARBD, please cite the
-following manuscripts:
-
-"DNA base-calling from a nanopore using a Viterbi algorithm"
-Winston Timp, Jeffrey Comer, and Aleksei Aksimentiev
-Biophys J 102(10) L37-9 (2012)
-
-"Predicting the DNA sequence dependence of nanopore ion current using atomic-resolution Brownian dynamics"
-Jeffrey Comer and Aleksei Aksimentiev.
-J Phys Chem C Nanomater Interfaces 116:3376-3393 (2012).
-
-"Atoms-to-microns model for small solute transport through sticky nanochannels"
-Rogan Carr, Jeffrey Comer, Mark D. Ginsberg, and Aleksei Aksimentiev
-Lab Chip 11(22) 3766-73 (2011)
-
-
-## Authors
-
-ARBD is developed by the Aksimentiev group
-(http://bionano.physics.illinois.edu) as a part of the NIH Center for
-Macromolecular Modeling and Bioinformatics (http://www.ks.uiuc.edu/).
-
-Please direct questions or problems to Chris.
-
-Christopher Maffeo <cmaffeo2@illinois.edu>
-Han-yi Chao
-Jeffrey Comer
-Max Belkin
-Emmanual Guzman
-Justin Dufresne
-Terrance Howard
-
-
-## Outstanding issues
-
-* There are no checks to ensure that pairlists are recalculated before
-  particles further than the pairlist distance move to within the
-  cutoff
-
-* A large amount of GPU memory for pairlists is allocated statically,
-  which may cause out-of-memory crashes in older hardware
diff --git a/doc/Makefile b/doc/Makefile
deleted file mode 100644
index 48deba2cf38b75370a0ab57e7001e6ede4cfda2b..0000000000000000000000000000000000000000
--- a/doc/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-
-arbd-user-guide.pdf: arbd-user-guide.tex
-	pdflatex $<
diff --git a/doc/arbd-user-guide.tex b/doc/arbd-user-guide.tex
deleted file mode 100644
index a7de12374e4cb6dcfe76f125eae21ad5bda8349c..0000000000000000000000000000000000000000
--- a/doc/arbd-user-guide.tex
+++ /dev/null
@@ -1,327 +0,0 @@
-\documentclass[10pt]{article}
-% \documentclass[10pt,twocolumn]{article}
-\usepackage{achemso,caption,graphicx}
-\usepackage[margin=2.5cm]{geometry}
-% \setlength{\columnsep}{0.8cm}
-
-\captionsetup{font=small}
-
-\usepackage[version=3]{mhchem} % Formula subscripts using \ce{}
-\usepackage{color}             % for comments in color
-\usepackage{nameref}
-
-%% \usepackage{sectsty}
-%% \sectionfont{\large}
-\usepackage[compact,small]{titlesec}
-% \titlespacing*{\section}{0pt}{10pt}{5pt}
-% \titleformat{\section}{small}
-
-\usepackage{listings}
-
-% MACROS
-% \input{macros}
-\newcommand{\ARBDFull}{Atomic Resolution Browian Dynamics}
-\newcommand{\ARBD}{ARBD}
-% \newcommand{\code}[1]{\colorbox{blue}{\lstinline[basicstyle=\ttfamily\color{white}]#1}}
-\newcommand{\code}[1]{\texttt{#1}}
-
-
-% TITLE
-% \title{\hrule\vspace{0.1cm}\Large \ARBDFull\ User's Guide \vspace{0.0cm}\hrule}
-\title{\Large \ARBDFull\ User's Guide}
-\author{\normalsize The Center for Macromolecular Modeling and Bioninformatics}
-% \date{}
-
-\begin{document}
-\maketitle
-% \clearpage
-
-% ABSTRACT
-%% \begin{abstract}
-%% \end{abstract}
-
-\section{Introduction}
-
-\ARBDFull\ (ARBD) is a gpu-accelerated coarse-grained biomolecular simulation package.
-ARBD currently supports models composed of two basic units: point-like particles, and rigid body particles.
-Point particles can be connected through bonded potentials to construct polymers.
-
-% TODO: NVT ensemble, integrators
-
-ARBD is invoked from the command line and requires a configuration file for specification of the system.
-Currently ARBD's configuration file parser does not include a scripting interface or mathematical operations, so advanced users may want to script the generation of configuration files.
-An extensible python-based system for generating ARBD configuration files (including coordinate files) is provided in the examples directory.
-
-%% ARBD is offered as a Beta release and, so far, has 
-%%  only been tested on Unix platforms. 
-%% Nevertheless compilation should...
-
-ARBD uses the following units: kcal/mol for energy, ns for time, \AA\ for distance, and K for temperature.
-
-% \section{Compilation}
-
-\section{Configuration file}
-The configuration file is read by ARBD line-by-line. 
-The first space-delimited word of each line is expected to be a case-sensitive keyword, followed by arguments.
-These keywords configure the simulation engine and describe the simulation model.
-
-Most keywords are global and can be placed anywhere in the file and will produce the same effect.
-When a global keyword is repeated, the last use of the keyword overrides earlier usages.
-The keywords \textit{particle} and \textit{rigidBody} can be used multiple times, each declaring a new type of particle.
-Certain subsequent keywords such as \textit{mass} or \textit{num} describe the previously declared particle.
-Multiple uses of these between \textit{particle} and \textit{rigidBody} keywords.
-
-%% Args: keyword, accepted values, default value, description
-\newcommand{\keyword}[4]{\noindent\textbf{#1}\\\textit{Expected Value:} #2.  \textit{Default Value:} #3\\#4\\}
-
-\subsection{General simulation parameters}
-
-\keyword{outputName}{String}{``out''}
-{The file prefix for all output}
-
-\subsection{Simulation engine}
-
-
-\keyword{steps}{Integer}{100}
-{The number of steps the simulation will run.}
-
-\keyword{timestep}{Decimal}{10e-5}
-{The timestep for the integrator in nanoseconds.}
-
-\keyword{rigidBodyGridGridPeriod}{Integer}{1}
-{The number of steps between re-evalulation of the force between rigid body objects.}
-
-\keyword{seed}{Integer}{Number determined from the system clock}
-{The seed for the random number generator.}
-
-\keyword{origin}{Three decimals}{Determined from first point-particle grid}
-{The position of the corner of the simulation system.}
-
-\keyword{systemSize}{Three decimals}{Determined from first point-particle grid}
-{The size of the (orthonormal) simulation system. Please note that at this time, periodic boundaries are only used by point particles and not rigid body particles. For simulations employing rigid body particles, a confining ``gridFile'' is recommended. }
-
-\keyword{basis1}{Three decimals}{Determined from first point-particle grid}
-{The first basis vector for a possibly non-orthonormal simulation system. Overrides the systemSize parameter.}
-
-\keyword{basis2}{Three decimals}{Determined from first point-particle grid}
-{The second basis vector for a possibly non-orthonormal simulation system. Overrides the systemSize parameter.}
-
-\keyword{basis3}{Three decimals}{Determined from first point-particle grid}
-{The third basis vector for a possibly non-orthonormal simulation system. Overrides the systemSize parameter.}
-
-\keyword{temperature}{Decimal}{295 K}
-{The temperature of the system.}
-
-\keyword{temperatureGrid}{Path to Dx file}{None}
-{The location-dependent temperature of in the system.}
-
-\keyword{scaleIMDForce}{Decimal}{1}
-{Scaling factor for forces communicated to ARBD using the IMD protocol.}
-
-\keyword{outputPeriod}{Integer}{200}
-{Steps between writing trajectory output.}
-
-\keyword{outputFormat}{``dcd'', ``pdb'' or ``traj''}{``dcd''}
-{The format for the trajectory output file.}
-
-\subsection{Non-bonded interactions}
-\keyword{cutoff}{Decimal}{10 \AA}
-{The cutoff for non-bonded interactions.}
-
-\keyword{switchLen}{Decimal}{2 \AA}
-{The non-bonded potential is smoothly truncated (by linear scaling) between cutoff$-$switchLen and cutoff.}
-
-\keyword{decompPeriod}{Integer}{10}
-{Number of steps between domain decomposition of point particles and creation of pairlists. The current implementation of pairlist creation is quite slow, so users are advised to tune this parameter to optimize performance. WARNING: ARBD does not check whether particles have moved too far between decomposition steps, thus invalidating the pairlist.}
-
-\keyword{pairlistDistance}{Decimal}{2 \AA}
-{The pairlist contains all pairs of point particles within this distance plus the cutoff. Interactions are only calculated for pairs of particles in the pairlist.}
-
-%% \keyword{tabulatedPotential}{0 or 1}{0 \AA}
-%% {}
-
-\keyword{tabulatedFile}{String}{None}
-{A string following the format \textit{path/to/file}@\textit{indexA}@\textit{indexB} that indicates that the particle types corresponding to \textit{indexA} and \textit{indexB} interact through the tabulated potential desribed in \textit{path/to/file}. 
-The tabulated potential file should contain two columns giving the $r$ coordinates (in \AA) and the potential (in kcal/mol).
-The $r$ coordinates are assumed to start at 0 and be uniformly spaced. 
-The indeces \textit{indexA} and \textit{indexB} are 0-based integers.}
-
-\keyword{inputBonds}{Filename}{None}
-{File specifying the bonds in the system.
-Each line of the file should follow the format: \\\noindent
-\code{BOND [ADD|REPLACE] \textit{indexA} \textit{indexB} \textit{bondFile}} \\\noindent
-The contents of \textit{bondFile} should be columns giving the $r$ coordinates (in \AA) and the potential (in kcal/mol).
-The $r$ coordinates are assumed to start at 0 and be uniformly spaced. 
-}
-
-
-%% \keyword{tabulatedBondFile}{Filename}{None}
-%% {Declares a file used in tabulated bond The tabulated bond file should contain two columns giving the $r$ coordinates (in \AA) and the potential (in kcal/mol).
-%% The $r$ coordinates are assumed to start at 0 and be uniformly spaced. 
-%% The indeces \textit{indexA} and \textit{indexB} are 0-based integers.}
-
-\keyword{inputAngles}{Filename}{None}
-{File specifying the angles in the system.
-Each line of the file should follow the format: \\\noindent
-\code{ANGLE \textit{indexA} \textit{indexB} \textit{indexC} \textit{angleFile}} \\ \noindent
-The tabulated angle file should contain two columns giving the angle coordinates (in degrees from 0 to 180) and the potential (in kcal/mol).
-The angle coordinates are assumed to be uniformly spaced. 
-}
-
-\keyword{inputDihedrals}{Filename}{None}
-{File specifying the dihedral angles in the system.
-Each line of the file should follow the format: \\\noindent
-\code{DIHEDRAL \textit{indexA} \textit{indexB} \textit{indexC} \textit{indexD} \textit{dihedralFile}} \\ \noindent
-The contents of \textit{dihedralFile} should be two columns that give the angle coordinates (in degrees from -180 to 180) and the potential (in kcal/mol).
-The dihedral angle coordinates are assumed to be uniformly spaced.
-}
-
-\keyword{inputExcludes}{Filename}{None}
-{File specifying the non-bonded exclusions in the system.
-Each line of the file should follow the format: \\ \noindent
-\code{EXCLUDE \textit{indexA} \textit{indexB}}  \\ \noindent
-}
-
-
-\subsection{Point particles}
-
-\keyword{particle}{Unique string}{None}
-{Declares and provides a name for a new point particle type. This keyword should precede the keywords described in this subsection.}
-
-\keyword{num}{Integer}{0}
-{The number of particles of the preceding type in the system. This parameter will be overridden by the contents of the {\textbf inputParticles} file, if provided.}
-
-\keyword{diffusion}{Decimal}{0}
-{The diffusion coefficient for the particle in \AA$^2$/ns. Used to derive a Langevin coefficient when a Langevin integrator is utilized.}
-
-\keyword{diffusionGridFile}{Name of Dx file}{None}
-{The location-dependent diffusion coefficient for the particle in \AA$^2$/ns. The grid should span the entire system.}
-
-\keyword{gridFile}{Name of Dx file}{None}
-{The location-dependent environment potential affecting the preceding particle type in kcal/mol. The potential is nominally zero outside the boundary of the grid potential.}
-
-\keyword{gridFileScale}{Decimal}{1.0}
-{A scaling factor to apply to the ``gridFile'' potential.}
-
-\keyword{rigidBodyPotential}{Keyword}{None}
-{The location-dependent environment potential affecting the preceding particle type in kcal/mol. The potential is nominally zero outside the boundary of the grid potential.
-This option may be specified multiple times. See ``potentialGrid'' below  in the ``Rigid body particles'' section for additional details.
-}
-
-%% TODO: interpolation scheme
-
-
-\subsection{Rigid body particles}
-
-\keyword{rigidBody}{Unique string}{None}
-{Declares and provides a name for a new rigid body particle type. This keyword should precede the keywords described in this subsection.}
-
-\keyword{num}{Integer}{0}
-{The number of particles of the preceding type in the system. This parameter adds to the particles contained in the {\textbf inputParticles} file, if provided.}
-
-\keyword{diffusion}{Decimal}{0}
-{The diffusion coefficient for the particle in \AA$^2$/ns. Used to derive a Langevin coefficient when a Langevin integrator is utilized.}
-
-%% \keyword{diffusionGridFile}{Name of Dx file}{None}
-%% {The location-dependent diffusion coefficient for the particle in \AA$^2$/ns. The grid should span the entire system.}
-
-\keyword{gridFile}{Key and name of Dx file}{None}
-{A 3-dimensional potential acting on the rigid body, specified in kcal/mol.
-The potential is associated with a user-specified key.
-Each voxel of a ``densityGrid'' in the rigid body using the same key will produce a force and torque due to the potential.
-The potential described by gridFile is fixed in space and can be considered an environement potential. 
-This option may be specified multiple times with different keys.
-}
-
-\keyword{potentialGrid}{Key and name of Dx file}{None}
-{A 3-dimensional potential associated with the rigid body particle in kcal/mol.
-Any point particle that declares the same key through the ``rigidBodyPotential'' keyword will experience forces due to the potential.
-Similarly each voxel of a ``densityGrid'' in another rigid body using the same key will experience a force due to the potential.
-All forces applied by the potential are inverted and applied to the rigid body along with a corresponding torque.
-This option may be specified multiple times with different keys.
-}
-
-\keyword{densityGrid}{Key and name of Dx file}{None}
-{A 3-dimensional density associated with the rigid body particle. 
-Voxels of this density will experience forces and torques from ``gridFile'' potentials with the same key ascribed to the same rigid body and from ``potentialGrid'' potentials with the same key in different rigid body particles.
-This option may be specified multiple times with different keys.
-Please note that in the beta release of ARBD, the densityGrid is not scaled by the volume of each voxel. This behavior may change in the future.
-}
-
-\keyword{pmfScale}{Key and decimal}{1.0}
-{A scaling factor to apply to the ``gridFile'' potential of the same key.}
-
-\keyword{potentialGridScale}{Key and decimal}{1.0}
-{A scaling factor to apply to the ``potentialGrid'' of the same key.}
-
-\keyword{densityGridScale}{Key and decimal}{1.0}
-{A scaling factor to apply to the ``densityGrid'' of the same key.}
-
-
-
-\subsection{Coordinates}
-
-%% There are three ways to specity the initial coordinates for point particles.
-%% Each of the keywords below overrides the keywords above.
-%% If none of the keywords is provided, ARBD places particles randomly in the lowest regions of the gridFile potential for the particle type. 
-
-\keyword{inputCoordinates}{Filename}{None}
-{Name of a file specifying point particle positions. 
-Each line of the file should hold three floating point values for $x$, $y$ and $z$.
-Coordinates are assigned in the order of particle index.
-By default, all particles of the first type defined in the ARBD configuration file have the first block of indices, particles of the second type defined in the file have the second block, etc.
-}
-
-%% \keyword{inputParticle}{Filename}{None}
-%% %% TODO
-%% {Name of a file specifying point particle positions. 
-%% Each line of the file should conform to the following: \\ \noindent
-%% \code{ATOM \textit{index} \textit{name} \textit{x-coord Y-coord Z-coord}} \\ \noindent
-%% where \textit{index} 
-%% }
-
-\keyword{restartCoordinates}{Filename}{None}
-{Loads a point particle restart file written by ARBD.
-The file describes the particles in order, each line of the file describes a point particle in order as follows: \\ \noindent
-\code{\textit{particle\_type} \textit{x} \textit{y} \textit{z}} \\ \noindent
-This keyword overrides the \code{inputCoordinates} and \code{num} keywords.
-}
-
-\keyword{copyReplicaCoorinates}{Integer}{1}
-{If greater than zero, copy coordinates from the first replica to all other replicas. 
-Coordinates for replicas otherwise can be provided in a \code{inputCoordinates} 
-% or \code{restartCoordinates}
- file containing lines for all replicas.
-%% One can concatenate the per-replica restart files generated by ARBD to generate a \code{restartCoordinates} file.
-}
-
-\keyword{inputRBCoordinates}{Filename}{None}
-{Reads rigid body coordinates from \textit{Filename}.
-Each line of the file specifies a rigid body particle's center (x y z) followed by the 3-by-3 orientation matrix (xx xy xz yx yy yz zx zy zz).
-The coordinates are assigned to the rigid body particles in order.
-% By default, rigid body particles are placed randomly in the simulation system.
-}
-
-%% \subsection{Deprecated}
-
-%% numberFluct
-%% numberFluctPeriod
-%% interparticleForce
-%% fullLongRange
-%% coulombConst
-%% electricField
-%% numCap
-
-%% outputEnergyPeriod
-%% currentSegmentZ
-%% forceXGridFile
-%% forceYGridFile
-%% forceZGridFile
-
-%% charge
-%% radius
-%% eps
-%% reservoirFile
-
-%% tabulatedPotential
-\end{document}
diff --git a/src/Angle.cu b/src/Angle.cu
deleted file mode 100644
index 220118759eb45508b49bea6f02b24eb9ac820825..0000000000000000000000000000000000000000
--- a/src/Angle.cu
+++ /dev/null
@@ -1,29 +0,0 @@
-// Angle.cu
-// Copyright Justin Dufresne and Terrance Howard, 2013
-
-#include "Angle.h"
-
-void Angle::print()
-{
-//	printf("about to print fileName %p\n", fileName.val());
-//	fileName.print();
-	printf("ANGLE (%d %d %d) %s\n", ind1, ind2, ind3, fileName.val());
-}
-
-String Angle::toString()
-{
-	return String("ANGLE ") + ind1 + " " + ind2 + " " + ind3 + " " + fileName;
-}
-
-// void BondAngle::print()
-// {
-// //	printf("about to print fileName %p\n", fileName.val());
-// //	fileName.print();
-//     printf("BONDANGLE (%d %d %d) %s; %s; %s\n", ind1, ind2, ind3, angleFileName.val(), bondFileName1.val(), bondFileName2.val());
-// }
-
-// String BondAngle::toString()
-// {
-//     return String("BONDANGLE ") + ind1 + " " + ind2 + " " + ind3 + " " + angleFileName + " " + bondFileName1 + " " + bondFileName2;
-// }
-
diff --git a/src/Angle.h b/src/Angle.h
deleted file mode 100644
index e03a77914ec9a22eff5a67df20f4d79416d32a88..0000000000000000000000000000000000000000
--- a/src/Angle.h
+++ /dev/null
@@ -1,114 +0,0 @@
-// Angle.h
-// Copyright Justin Dufresne and Terrance Howard, 2013
-
-#ifndef ANGLE_H
-#define ANGLE_H
-
-#ifdef __CUDACC__
-    #define HOST __host__
-    #define DEVICE __device__
-#else
-    #define HOST 
-    #define DEVICE 
-#endif
-
-#include "useful.h"
-#include "BaseGrid.h"
-#include <cuda.h>
-
-class Angle
-{
-public:
-	Angle() {}
-	Angle(int ind1, int ind2, int ind3, String fileName) :
-	ind1(ind1), ind2(ind2), ind3(ind3), fileName(fileName), tabFileIndex(-1) { }
-	
-	int ind1, ind2, ind3;
-	String fileName;
-	// tabFileIndex will be assigned after ComputeForce loads the
-	// TabulatedAnglePotentials. The tabefileIndex is used by ComputeForce to
-	// discern which TabulatedAnglePotential this Angle uses.
-	int tabFileIndex;
-
-	inline Angle(const Angle& a) : ind1(a.ind1), ind2(a.ind2), ind3(a.ind3),
-		fileName(a.fileName),
-		tabFileIndex(a.tabFileIndex) { }
-
-	HOST DEVICE inline float calcAngle(Vector3* pos, BaseGrid* sys) {
-		const Vector3& posa = pos[ind1];
-		const Vector3& posb = pos[ind2];
-		const Vector3& posc = pos[ind3];
-		const float distab = sys->wrapDiff(posa - posb).length();
-		const float distbc = sys->wrapDiff(posb - posc).length();
-		const float distac = sys->wrapDiff(posc - posa).length();	
-		float cos = (distbc * distbc + distab * distab - distac * distac)
-							  / (2.0f * distbc * distab);
-		if (cos < -1.0f) cos = -1.0f;
-		else if (cos > 1.0f) cos = 1.0f;
-		float angle = acos(cos);
-		return angle;
-	}	
-
-	HOST DEVICE inline int getIndex(int index) {
-		if (index == ind1) return 1;
-		if (index == ind2) return 2;
-		if (index == ind3) return 3;
-		return -1;
-	}
-
-	String toString();
-	void print();
-};
-
-// TODO consolidate with Angle using inheritence
-class BondAngle
-{
-public:
-	BondAngle() {}
-    BondAngle(int ind1, int ind2, int ind3, int ind4, String angleFileName1, String bondFileName, String angleFileName2) :
-	ind1(ind1), ind2(ind2), ind3(ind3), ind4(ind4), angleFileName1(angleFileName1), bondFileName(bondFileName), angleFileName2(angleFileName2), tabFileIndex1(-1), tabFileIndex2(-1), tabFileIndex3(-1) { }
-
-	int ind1, ind2, ind3, ind4;
-
-	String angleFileName1;
-	String bondFileName;
-	String angleFileName2;
-	// tabFileIndex will be assigned after ComputeForce loads the
-	// TabulatedAnglePotentials. The tabefileIndex is used by ComputeForce to
-	// discern which TabulatedAnglePotential this Angle uses.
-	int tabFileIndex1;
-	int tabFileIndex2;
-	int tabFileIndex3;
-
-    inline BondAngle(const BondAngle& a) : ind1(a.ind1), ind2(a.ind2), ind3(a.ind3), ind4(a.ind4),
-					   angleFileName1(a.angleFileName1), bondFileName(a.bondFileName), angleFileName2(a.angleFileName2),
-					   tabFileIndex1(a.tabFileIndex1), tabFileIndex2(a.tabFileIndex2), tabFileIndex3(a.tabFileIndex3) { }
-
-	// HOST DEVICE inline float calcAngle(Vector3* pos, BaseGrid* sys) {
-	// 	const Vector3& posa = pos[ind1];
-	// 	const Vector3& posb = pos[ind2];
-	// 	const Vector3& posc = pos[ind3];
-	// 	const float distab = sys->wrapDiff(posa - posb).length();
-	// 	const float distbc = sys->wrapDiff(posb - posc).length();
-	// 	const float distac = sys->wrapDiff(posc - posa).length();
-	// 	float cos = (distbc * distbc + distab * distab - distac * distac)
-	// 						  / (2.0f * distbc * distab);
-	// 	if (cos < -1.0f) cos = -1.0f;
-	// 	else if (cos > 1.0f) cos = 1.0f;
-	// 	float angle = acos(cos);
-	// 	return angle;
-	// }
-
-	// HOST DEVICE inline int getIndex(int index) {
-	// 	if (index == ind1) return 1;
-	// 	if (index == ind2) return 2;
-	// 	if (index == ind3) return 3;
-	// 	if (index == ind4) return 4;
-	// 	return -1;
-	// }
-
-	// String toString();
-	// void print();
-};
-
-#endif
diff --git a/src/BaseGrid.cu b/src/BaseGrid.cu
deleted file mode 100644
index 0a8d9701061761b2a8105f3d95d0d2227bcdd91e..0000000000000000000000000000000000000000
--- a/src/BaseGrid.cu
+++ /dev/null
@@ -1,678 +0,0 @@
-
-//////////////////////////////////////////////////////////////////////
-// Grid base class that does just the basics.
-// Author: Jeff Comer <jcomer2@illinois.edu>
-
-#include "BaseGrid.h"
-#include <cuda.h>
-
-
-#define STRLEN 512
-
-// Initialize the variables that get used a lot.
-// Also, allocate the main value array.
-void BaseGrid::init() {
-	basisInv = basis.inverse();
-	size = nx*ny*nz;
-	val = new float[size];
-}
-BaseGrid::BaseGrid() {
-	BaseGrid tmp(Matrix3(),Vector3(),1,1,1);
-	val = new float[1];
-	*this = tmp;									// TODO: verify that this is OK
-	
-	// basis = Matrix3();
-	// origin = Vector3();
-	// nx = 1;
-	// ny = 1;
-	// nz = 1;
-	
-	// init();
-	// zero();
-}
-
-// The most obvious of constructors.
-BaseGrid::BaseGrid(Matrix3 basis0, Vector3 origin0, int nx0, int ny0, int nz0) {
-	basis = basis0;
-	origin = origin0;
-	nx = abs(nx0);
-	ny = abs(ny0);
-	nz = abs(nz0);
-	
-	init();
-	zero();
-}
-
-// Make an orthogonal grid given the box dimensions and resolution.
-BaseGrid::BaseGrid(Vector3 box, float dx) {
-	dx = fabsf(dx);
-	box.x = fabsf(box.x);
-	box.y = fabsf(box.y);
-	box.z = fabsf(box.z);
-
-	// Tile the grid into the system box.
-	// The grid spacing is always a bit smaller than dx.
-	nx = int(ceilf(box.x/dx));
-	ny = int(ceilf(box.y/dx));
-	nz = int(ceilf(box.z/dx));
-	if (nx <= 0) nx = 1;
-	if (ny <= 0) ny = 1;
-	if (nz <= 0) nz = 1;
-	basis = Matrix3(box.x/nx, box.y/ny, box.z/nz);
-	origin = -0.5f*box;
-
-	init();
-	zero();
-}
-
-// The box gives the system geometry.
-// The grid point numbers define the resolution.
-BaseGrid::BaseGrid(Matrix3 box, int nx0, int ny0, int nz0) {
-	nx = nx0;
-	ny = ny0;
-	nz = nz0;
-
-	// Tile the grid into the system box.
-	if (nx <= 0) nx = 1;
-	if (ny <= 0) ny = 1;
-	if (nz <= 0) nz = 1;
-	basis = Matrix3(box.ex()/nx, box.ey()/ny, box.ez()/nz);
-	origin = -0.5f*(box.ex() + box.ey() + box.ez());
-
-	init();
-	zero();
-}
-
-// The box gives the system geometry.
-// dx is the approx. resolution.
-// The grid spacing is always a bit larger than dx.
-BaseGrid::BaseGrid(Matrix3 box, Vector3 origin0, float dx) {
-	dx = fabs(dx);
-	
-	// Tile the grid into the system box.
-	// The grid spacing is always a bit larger than dx.
-	nx = int(floor(box.ex().length()/dx))-1;
-	ny = int(floor(box.ey().length()/dx))-1;
-	nz = int(floor(box.ez().length()/dx))-1;
-	if (nx <= 0) nx = 1;
-	if (ny <= 0) ny = 1;
-	if (nz <= 0) nz = 1;
-
-	basis = Matrix3(box.ex()/nx, box.ey()/ny, box.ez()/nz);
-	origin = origin0;
-
-	init();
-	zero();
-}
-
-// The box gives the system geometry.
-// dx is the approx. resolution.
-// The grid spacing is always a bit smaller than dx.
-BaseGrid::BaseGrid(Matrix3 box, float dx) {
-	dx = fabs(dx);
-	
-	// Tile the grid into the system box.
-	// The grid spacing is always a bit smaller than dx.
-	nx = int(ceilf(box.ex().length()/dx));
-	ny = int(ceilf(box.ey().length()/dx));
-	nz = int(ceilf(box.ez().length()/dx));
-	if (nx <= 0) nx = 1;
-	if (ny <= 0) ny = 1;
-	if (nz <= 0) nz = 1;
-
-	basis = Matrix3(box.ex()/nx, box.ey()/ny, box.ez()/nz);
-	origin = -0.5f*(box.ex() + box.ey() + box.ez());
-
-	init();
-	zero();
-}
-
-// Make an exact copy of a grid.
-BaseGrid::BaseGrid(const BaseGrid& g) {
-	nx = g.nx;
-	ny = g.ny;
-	nz = g.nz;
-	basis = g.basis;
-	origin = g.origin;
-	
-	init();
-	for (int i = 0; i < size; i++) val[i] = g.val[i];
-}
-
-BaseGrid BaseGrid::mult(const BaseGrid& g) {
-	for (int i = 0; i < size; i++) val[i] *= g.val[i];
-	return *this;
-}
-
-BaseGrid& BaseGrid::operator=(const BaseGrid& g) {
-	if(val != NULL) delete[] val;
-	val = NULL;
-	nx = g.nx;
-	ny = g.ny;
-	nz = g.nz;
-	basis = g.basis;
-	origin = g.origin;
-	
-	init();
-	for (int i = 0; i < size; i++) val[i] = g.val[i];
-
-	return *this;
-}
-
-
-// Make a copy of a grid, but at a different resolution.
-BaseGrid::BaseGrid(const BaseGrid& g, int nx0, int ny0, int nz0) : nx(nx0),  ny(ny0), nz(nz0) {
-	if (nx <= 0) nx = 1;
-	if (ny <= 0) ny = 1;
-	if (nz <= 0) nz = 1;
-
-	// Tile the grid into the box of the template grid.
-	Matrix3 box = g.getBox();
-	basis = Matrix3(box.ex()/nx, box.ey()/ny, box.ez()/nz);
-
-	origin = g.origin;
-	init();
-
-	// Do an interpolation to obtain the values.
-	for (int i = 0; i < size; i++) {
-		Vector3 r = getPosition(i);
-		val[i] = g.interpolatePotential(r);
-	}
-}
-
-// Read a grid from a file.
-BaseGrid::BaseGrid(const char* fileName) {
-		 // Open the file.
-	FILE* inp = fopen(fileName,"r");
-	if (inp == NULL) {
-		printf("ERROR BaseGrid::BaseGrid Couldn't open file %s.\n",fileName);
-		exit(-1);
-	}
-	//printf("Reading dx file %s...\n", fileName);
-	
-	size = 0;
-	nx = 0;
-	ny = 0;
-	nz = 0;
-	basis = Matrix3(1.0f);
-	origin = Vector3(0.0f);    
-
-	int n = 0;
-	float x, y, z;
-	char line[STRLEN];
-	int p, nRead;
-	int deltaCount = 0;
-	Vector3 base[3];
-	while (fgets(line, STRLEN, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-	
-		if (isInt(line[0]) && n < size) {
-// Read grid values.
-nRead = sscanf(line, "%f %f %f", &x, &y, &z);
-if (size > 0) {
-	switch(nRead) {
-	case 1:
-		val[n] = x;
-		n++;
-		if (n != size) {
-			printf("ERROR BaseGrid::BaseGrid Improperly formatted dx file %s.\n", fileName);
-			printf("line `%s'\n", line);
-		}
-		break;
-	case 2:
-		val[n] = x;
-		val[n+1] = y;
-		n += 2;
-		if (n != size) {
-			printf("ERROR BaseGrid::BaseGrid Improperly formatted dx file %s.\n", fileName);
-			printf("line `%s'\n", line);
-		}
-		break;
-	case 3:
-		val[n] = x;
-		val[n+1] = y;
-		val[n+2] = z;
-		n += 3;
-		break;
-	}
-}
-		} else if (len > 5) {
-// Read the grid parameters.
-char start[6];
-for (int i = 0; i < 5; i++) start[i] = line[i];
-start[5] = '\0';
-
-if(strcmp("origi", start) == 0) {
-	// Get an origin line.
-	p = firstSpace(line, STRLEN);
-	sscanf(&(line[p+1]), "%f %f %f", &x, &y, &z);
-	origin = Vector3(x, y, z);
-	//printf("Origin: %.12g %.12g %.12g\n", x, y, z);
-} else if(strcmp("delta", start) == 0) {
-	// Get a delta matrix line.
-	p = firstSpace(line, STRLEN);
-	sscanf(&(line[p+1]), "%f %f %f", &x, &y, &z);
-	base[deltaCount] = Vector3(x, y, z);
-	//printf("Delta %d: %.12g %.12g %.12g\n", deltaCount, x, y, z);
-	if (deltaCount < 2) deltaCount = deltaCount + 1;
-} else if(strcmp("objec", start) == 0) {
-	//printf("%s", line);
-	// Get the system dimensions.
-	if (line[7] != '1') continue;
-	int read = sscanf(line, "object 1 class gridpositions counts %d %d %d\n", &nx, &ny, &nz);
-	//printf("Size: %d %d %d\n", nx, ny, nz);
-	if (read == 3) {
-		size = nx*ny*nz;
-		val = new float[size];
-		zero();
-	}
-}
-		}
-	}
-	fclose(inp);
-
-	basis = Matrix3(base[0], base[1], base[2]);
-	basisInv = basis.inverse();
-	if (size == 0 || n != size) {
-		printf("ERROR BaseGrid::BaseGrid Improperly formatted dx file %s.\n",fileName);
-		printf("declared size: %d, items: %d\n", size, n);
-		printf("first value: %10g, final value: %.10g\n", val[0], val[n-1]);
-		exit(-1);
-	}
-}  
-
-// Write without comments.
-void BaseGrid::write(const char* fileName) const {
-	write(fileName, "");
-}
-
-// Writes the grid as a file in the dx format.
-void BaseGrid::write(const char* fileName, const char* comments) const {
-	// Open the file.
-	FILE* out = fopen(fileName,"w");
-	if (out == NULL) {
-		printf("ERROR BaseGrid::write Couldn't open file %s.\n",fileName);
-		exit(-1);
-	}
-
-	// Write the header.
-	fprintf(out, "# %s\n", comments);
-	fprintf(out, "object 1 class gridpositions counts %d %d %d\n", nx, ny, nz);
-	fprintf(out, "origin %.12g %.12g %.12g\n", origin.x, origin.y, origin.z);
-	fprintf(out, "delta %.12g %.12g %.12g\n", basis.exx, basis.eyx, basis.ezx);
-	fprintf(out, "delta %.12g %.12g %.12g\n", basis.exy, basis.eyy, basis.ezy);
-	fprintf(out, "delta %.12g %.12g %.12g\n", basis.exz, basis.eyz, basis.ezz);
-	fprintf(out, "object 2 class gridconnections counts %d %d %d\n", nx, ny, nz);
-	fprintf(out, "object 3 class array type float rank 0 items %d data follows\n", size);
-	
-	// Write the data.
-	int penultima = 3*(size/3);
-	int mod = size - penultima;
-
-	int i;
-	for (i = 0; i < penultima; i+=3) {
-		fprintf(out, "%.12g %.12g %.12g\n", val[i], val[i+1], val[i+2]);
-	}
-	if (mod == 1) {
-		fprintf(out, "%.12g\n", val[size-1]);
-	} else if (mod == 2) {
-		fprintf(out, "%.12g %.12g\n", val[size-2], val[size-1]);
-	}
-	fclose(out);
-}
-
-// Writes the grid data as a single column in the order:
-// nx ny nz ox oy oz dxx dyx dzx dxy dyy dzy dxz dyz dzz val0 val1 val2 ...
-void BaseGrid::writeData(const char* fileName) {
-	// Open the file.
-	FILE* out = fopen(fileName,"w");
-	if (out == NULL) {
-		printf("Couldn't open file %s.\n",fileName);
-		exit(-1);
-	}
-
-	fprintf(out, "%d\n%d\n%d\n", nx, ny, nz);
-	fprintf(out, "%.12g\n%.12g\n%.12g\n", origin.x, origin.y, origin.z);
-	fprintf(out, "%.12g\n%.12g\n%.12g\n", basis.exx, basis.eyx, basis.ezx);
-	fprintf(out, "%.12g\n%.12g\n%.12g\n", basis.exx, basis.eyx, basis.ezx);
-	fprintf(out, "%.12g\n%.12g\n%.12g\n", basis.exx, basis.eyx, basis.ezx);
-
-	for (int i = 0; i < size; i++) fprintf(out, "%.12g\n", val[i]);
-	fclose(out);
-}
-
-// Write the valies in a single column.
-void BaseGrid::writePotential(const char* fileName) const {
-	FILE* out = fopen(fileName, "w");
-	for (int i = 0; i < size; i++) fprintf(out, "%.12g\n", val[i]);
-	fclose(out);
-}
-
-BaseGrid::~BaseGrid() {
-	if (val != NULL)
-		delete[] val;
-}
-
-void BaseGrid::zero() {
-	for (int i = 0; i < size; i++) val[i] = 0.0f;
-}
-
-bool BaseGrid::setValue(int j, float v) {
-	if (j < 0 || j >= size) return false;
-	val[j] = v;
-	return true;
-}
-
-bool BaseGrid::setValue(int ix, int iy, int iz, float v) {
-	if (ix < 0 || ix >= nx) return false;
-	if (iy < 0 || iy >= ny) return false;
-	if (iz < 0 || iz >= nz) return false;
-	int j = iz + iy*nz + ix*ny*nz;
-
-	val[j] = v;
-	return true;
-}
-
-float BaseGrid::getValue(int j) const {
-	if (j < 0 || j >= size) return 0.0f;
-	return val[j];
-}
-/*
-float BaseGrid::getValue(int ix, int iy, int iz) const {
-	if (ix < 0 || ix >= nx) return 0.0f;
-	if (iy < 0 || iy >= ny) return 0.0f;
-	if (iz < 0 || iz >= nz) return 0.0f;
-	
-	int j = iz + iy*nz + ix*ny*nz;
-	return val[j];
-}
-*/
-Vector3 BaseGrid::getPosition(int ix, int iy, int iz) const {
-	return basis.transform(Vector3(ix, iy, iz)) + origin;
-}
-
-Vector3 BaseGrid::getPosition(int j) const {
-	int iz = j%nz;
-	int iy = (j/nz)%ny;
-	int ix = j/(nz*ny);
-
-	return basis.transform(Vector3(ix, iy, iz)) + origin;
-}
-
-// Does the point r fall in the grid?
-// Obviously this is without periodic boundary conditions.
-bool BaseGrid::inGrid(Vector3 r) const {
-	Vector3 l = basisInv.transform(r-origin);
-
-	if (l.x < 0.0f || l.x >= nx) return false;
-	if (l.y < 0.0f || l.y >= ny) return false;
-	if (l.z < 0.0f || l.z >= nz) return false;
-	return true;
-}
-
-bool BaseGrid::inGridInterp(Vector3 r) const {
-	Vector3 l = basisInv.transform(r-origin);
-
-	if (l.x < 2.0f || l.x >= nx-3.0f) return false;
-	if (l.y < 2.0f || l.y >= ny-3.0f) return false;
-	if (l.z < 2.0f || l.z >= nz-3.0f) return false;
-	return true;
-}
-
-Vector3 BaseGrid::transformTo(Vector3 r) const {
-	return basisInv.transform(r-origin);
-}
-Vector3 BaseGrid::transformFrom(Vector3 l) const {
-	return basis.transform(l) + origin;
-}
-
-IndexList BaseGrid::index(int j) const {
-	int iz = j%nz;
-	int iy = (j/nz)%ny;
-	int ix = j/(nz*ny);
-	IndexList ret;
-	ret.add(ix);
-	ret.add(iy);
-	ret.add(iz);
-	return ret;
-}
-int BaseGrid::indexX(int j) const { return j/(nz*ny); }
-int BaseGrid::indexY(int j) const { return (j/nz)%ny; }
-int BaseGrid::indexZ(int j) const { return j%nz; }
-int BaseGrid::index(int ix, int iy, int iz) const { return iz + iy*nz + ix*ny*nz; }
-
-int BaseGrid::index(Vector3 r) const {
-	Vector3 l = basisInv.transform(r-origin);
-	
-	int ix = int(floor(l.x));
-	int iy = int(floor(l.y));
-	int iz = int(floor(l.z));
-
-	ix = wrap(ix, nx);
-	iy = wrap(iy, ny);
-	iz = wrap(iz, nz);
-	
-	return iz + iy*nz + ix*ny*nz;
-}
-
-int BaseGrid::nearestIndex(Vector3 r) const {
-	Vector3 l = basisInv.transform(r-origin);
-	
-	int ix = int(floorf(l.x + 0.5f));
-	int iy = int(floorf(l.y + 0.5f));
-	int iz = int(floorf(l.z + 0.5f));
-
-	ix = wrap(ix, nx);
-	iy = wrap(iy, ny);
-	iz = wrap(iz, nz);
-	
-	return iz + iy*nz + ix*ny*nz;
-}
-
-// A matrix defining the basis for the entire system.
-Matrix3 BaseGrid::getBox() const {
-	return Matrix3(nx*basis.ex(), ny*basis.ey(), nz*basis.ez());
-} 
-// The longest diagonal of the system.
-Vector3 BaseGrid::getExtent() const {
-	return basis.transform(Vector3(nx,ny,nz));
-}
-// The longest diagonal of the system.
-float BaseGrid::getDiagonal() const {
-	return getExtent().length();
-}
-// The position farthest from the origin.
-Vector3 BaseGrid::getDestination() const {
-	return basis.transform(Vector3(nx,ny,nz)) + origin;
-}
-// The center of the grid.
-Vector3 BaseGrid::getCenter() const {
-	return basis.transform(Vector3(0.5f*nx,0.5f*ny,0.5f*nz)) + origin;
-}
-// The volume of a single cell.
-float BaseGrid::getCellVolume() const {
-	return fabs(basis.det());
-}
-// The volume of the entire system.
-float BaseGrid::getVolume() const {
-	return getCellVolume()*size;
-}
-Vector3 BaseGrid::getCellDiagonal() const {
-	return basis.ex() + basis.ey() + basis.ez();
-}
-
-// Add a fixed value to the grid.
-void BaseGrid::shift(float s) {
-	for (int i = 0; i < size; i++) val[i] += s;
-}
-
-// Multiply the grid by a fixed value.
-void BaseGrid::scale(float s) {
-	for (int i = 0; i < size; i++) val[i] *= s;
-}
-
-// Get the mean of the entire grid.
-float BaseGrid::mean() const {
-	float sum = 0.0f;
-	for (int i = 0; i < size; i++) sum += val[i];
-	return sum/size;
-}
-
-// Compute the average profile along an axis.
-// Assumes that the grid axis with index "axis" is aligned with the world axis of index "axis".
-void BaseGrid::averageProfile(const char* fileName, int axis) {
-	FILE* out = fopen(fileName,"w");
-	if (out == NULL) {
-		printf("Couldn't open file %s.\n",fileName);
-		exit(-1);
-	}
-
-	int dir0 = wrap(axis, 3);
-	int dir1 = (axis+1)%3;
-	int dir2 = (axis+2)%3;
-
-	int jump[3];
-	jump[0] = ny*nz;
-	jump[1] = nz;
-	jump[2] = 1;
-
-	int n[3];
-	n[0] = nx;
-	n[1] = ny;
-	n[2] = nz;
- 
-	for (int i0 = 0; i0 < n[dir0]; i0++) {
-		float sum = 0;
-
-		for (int i1 = 0; i1 < n[dir1]; i1++) {
-			for (int i2 = 0; i2 < n[dir2]; i2++) {
-				int j = i0*jump[dir0] + i1*jump[dir1] + i2*jump[dir2];
-				sum += val[j];
-			}
-		}
-		
-		float v = sum/(n[dir1]*n[dir2]);
-		float x = 0.0f;
-		switch (dir0) {
-		case 0:
-			x = origin.x + i0*basis.exx;
-			break;
-		case 1:
-			x = origin.y + i0*basis.eyy;
-			break;
-		case 2:
-			x = origin.z + i0*basis.ezz;
-			break;
-		}
-		fprintf(out, "%0.10g %0.10g\n", x, v);
-	}
-
-	fclose(out);
-}
-
-// Get the potential at the closest node.
-float BaseGrid::getPotential(Vector3 pos) const {
-	// Find the nearest node.
-	int j = nearestIndex(pos);
-
-	return val[j];
-}
-
-bool BaseGrid::crop(int x0, int y0, int z0, int x1, int y1, int z1, bool keep_origin) {
-	if (x0 < 0 || x0 >= 2 * nx) x0 = 0;
-	if (y0 < 0 || y0 >= 2 * ny) y0 = 0;
-	if (z0 < 0 || z0 >= 2 * nz) z0 = 0;
-	if (x1 < 0 || x1 >= 2 * nx) x1 = 2 * nx - 1;
-	if (y1 < 0 || y1 >= 2 * ny) y1 = 2 * ny - 1;
-	if (z1 < 0 || z1 >= 2 * nz) z1 = 2 * nz - 1;
-	printf("Cropping to (%d, %d, %d) -> (%d, %d, %d)\n", x0, y0, z0, x1, y1, z1);
-
-	if (x0 >= x1 || y0 >= y1 || z0 >= z1)
-		return false;
-
-	int new_nx = x1 - x0 + 1;
-	int new_ny = y1 - y0 + 1;
-	int new_nz = z1 - z0 + 1;
-	int new_size = new_nx * new_ny * new_nz;
-	float *new_val = new float[new_size];
-
-	int ind = 0;
-	for (int i = x0; i < x1; i++)
-		for (int j = y0; j < y1; j++)
-			for (int k = z0; k < z1; k++) {
-				int ind1 = k + j * nz + i * ny*nz;
-				new_val[ind++] = val[ind1];
-			}
-
-	if (!keep_origin)
-		origin += basis.transform(Vector3(x0, y0, z0));
-	nx = new_nx;
-	ny = new_ny;
-	nz = new_nz;
-	size = new_size;
-	delete[] val;
-	val = new_val;
-
-	return true;
-}
-
-Vector3 BaseGrid::wrapDiffNearest(Vector3 r) const {
-	Vector3 l = basisInv.transform(r);
-	l.x = wrapDiff(l.x, nx);
-	l.y = wrapDiff(l.y, ny);
-	l.z = wrapDiff(l.z, nz);
-
-	float length2 = basis.transform(l).length2();
-
-	for (int dx = -1; dx <= 1; dx++) {
-		for (int dy = -1; dy <= 1; dy++) {
-			for (int dz = -1; dz <= 1; dz++) {
-				//if (dx == 0 && dy == 0 && dz == 0) continue;
-				Vector3 tmp = Vector3(l.x+dx*nx, l.y+dy*ny, l.z+dz*nz);
-				if (basis.transform(tmp).length2() < length2) {
-					l = tmp;
-					length2 = basis.transform(l).length2();
-				}
-			}
-		}
-	}
-
-	return basis.transform(l);
-}
-
-
-// Includes the home node.
-// indexBuffer must have a size of at least 27.
-void BaseGrid::getNeighbors(int j, int* indexBuffer) const {
-	int jx = indexX(j);
-	int jy = indexY(j);
-	int jz = indexZ(j);
-
-	int k = 0;
-	for (int ix = -1; ix <= 1; ix++) {
-		for (int iy = -1; iy <= 1; iy++) {
-			for (int iz = -1; iz <= 1; iz++) {
-				int ind = wrap(jz+iz,nz) + nz*wrap(jy+iy,ny) + ny*nz*wrap(jx+ix,nx);
-				indexBuffer[k] = ind;
-				k++;
-			}
-		}
-	}
-}
-
-// Get the values at the neighbors of a node.
-// Note that homeX, homeY, and homeZ do not need to be wrapped,
-// since we do it here.
-void BaseGrid::getNeighborValues(NeighborList* neigh, int homeX, int homeY, int homeZ) const {
-	for (int ix = -1; ix <= 1; ix++) {
-		for (int iy = -1; iy <= 1; iy++) {
-			for (int iz = -1; iz <= 1; iz++) {
-				int ind = wrap(homeZ+iz,nz) + nz*wrap(homeY+iy,ny) + ny*nz*wrap(homeX+ix,nx);
-				neigh->v[ix+1][iy+1][iz+1] = val[ind];
-			}
-		}
-	}
-}  
diff --git a/src/BaseGrid.h b/src/BaseGrid.h
deleted file mode 100644
index b3a4cfa170d62790c73306dc3c659328b9208815..0000000000000000000000000000000000000000
--- a/src/BaseGrid.h
+++ /dev/null
@@ -1,1100 +0,0 @@
-//////////////////////////////////////////////////////////////////////
-// Grid base class that does just the basics.
-// Author: Jeff Comer <jcomer2@illinois.edu>
-#ifndef BASEGRID_H
-#define BASEGRID_H
-
-
-#ifdef __CUDACC__
-    #define HOST __host__
-    #define DEVICE __device__
-#else
-    #define HOST 
-    #define DEVICE 
-#endif
-
-#include "useful.h"
-#include <cmath>
-#include <cstring>
-#include <cstdio>
-#include <cstdlib>
-#include <ctime>
-// #include <cuda.h>
-
-#ifndef gpuErrchk
-#define delgpuErrchk
-#define gpuErrchk(code) { if ((code) != cudaSuccess) {			                            \
-	    fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), __FILE__, __LINE__); \
-	}}
-#endif
-
-
-enum BoundaryCondition { dirichlet, neumann, periodic };
-enum InterpolationOrder { linear = 1, cubic = 3 };
-
-#define INTERPOLATE_FORCE(result, function, boundary_condition, args) \
-switch (boundary_condition) { \
-case dirichlet: \
-    result = function<dirichlet>(args); break; \
-case neumann: \
-    result = function<neumann>(args); break; \
-case periodic: \
-    result = function<periodic>(args); break; \
-}
-
-// using namespace std;
-
-#define STRLEN 512
-
-DEVICE float __fdividef(float x, float y);
-
-class NeighborList {
-public:
-  float v[3][3][3];
-};
-
-class BaseGrid {
-  friend class SparseGrid;
- 
-private:
-  // Initialize the variables that get used a lot.
-  // Also, allocate the main value array.
-  void init();
-
-public:
-
-	/*                               \
-	| CONSTRUCTORS, DESTRUCTORS, I/O |
-	\===============================*/
-	
-	// RBTODO Fix?
-	BaseGrid(); // cmaffeo2 (2015) moved this out of protected, cause I wanted BaseGrid in a struct
-  // The most obvious of constructors.
-		BaseGrid(Matrix3 basis0, Vector3 origin0, int nx0, int ny0, int nz0);
-
-  // Make an orthogonal grid given the box dimensions and resolution.
-  BaseGrid(Vector3 box, float dx);
-
-  // The box gives the system geometry.
-  // The grid point numbers define the resolution.
-  BaseGrid(Matrix3 box, int nx0, int ny0, int nz0);
-
-  // The box gives the system geometry.
-  // dx is the approx. resolution.
-  // The grid spacing is always a bit larger than dx.
-  BaseGrid(Matrix3 box, Vector3 origin0, float dx);
-
-  // The box gives the system geometry.
-  // dx is the approx. resolution.
-  // The grid spacing is always a bit smaller than dx.
-  BaseGrid(Matrix3 box, float dx);
-
-  // Make an exact copy of a grid.
-  BaseGrid(const BaseGrid& g);
-
-  BaseGrid mult(const BaseGrid& g);
-
-  BaseGrid& operator=(const BaseGrid& g);
-
-  // Make a copy of a grid, but at a different resolution.
-  BaseGrid(const BaseGrid& g, int nx0, int ny0, int nz0);
-
-  // Read a grid from a file.
-  BaseGrid(const char* fileName);
-  
-  // Write without comments.
-  virtual void write(const char* fileName) const;
-
-  // Writes the grid as a file in the dx format.
-  virtual void write(const char* fileName, const char* comments) const;
-
-  // Writes the grid data as a single column in the order:
-  // nx ny nz ox oy oz dxx dyx dzx dxy dyy dzy dxz dyz dzz val0 val1 val2 ...
-  virtual void writeData(const char* fileName);
- 
-  // Write the valies in a single column.
-  virtual void writePotential(const char* fileName) const;
-  
-	virtual ~BaseGrid();
-
-	/*             \
-	| DATA METHODS |
-	\=============*/
-		
-	void zero();
-  
-  bool setValue(int j, float v);
-
-  bool setValue(int ix, int iy, int iz, float v);
-
-  virtual float getValue(int j) const;
-
-  //virtual float getValue(int ix, int iy, int iz) const;
-
-  Vector3 getPosition(int ix, int iy, int iz) const;
-
-  Vector3 getPosition(int j) const;
-
-  // Does the point r fall in the grid?
-  // Obviously this is without periodic boundary conditions.
-  bool inGrid(Vector3 r) const;
-
-  bool inGridInterp(Vector3 r) const;
-
-  Vector3 transformTo(Vector3 r) const;
-
-  Vector3 transformFrom(Vector3 l) const;
-
-  IndexList index(int j) const;
-  int indexX(int j) const;
-  int indexY(int j) const;
-  int indexZ(int j) const;
-  int index(int ix, int iy, int iz) const;
-  
-  int index(Vector3 r) const;
-
-  int nearestIndex(Vector3 r) const;
-
-  HOST DEVICE inline int length() const {
-		return size;
-	}
-  void setBasis(const Matrix3& b);
-  void setOrigin(const Vector3& o);
-
-  HOST DEVICE inline Vector3 getOrigin() const {return origin;}
-  HOST DEVICE inline Matrix3 getBasis() const {return basis;}
-  HOST DEVICE inline Matrix3 getInverseBasis() const {return basisInv;}
-  HOST DEVICE inline int getNx() const {return nx;}
-  HOST DEVICE inline int getNy() const {return ny;}
-  HOST DEVICE inline int getNz() const {return nz;}
-  HOST DEVICE inline int getSize() const {return size;}
-
-  
-  // A matrix defining the basis for the entire system.
-  Matrix3 getBox() const;
-  // The diagonal (nx,ny,nz) of the system.
-  Vector3 getExtent() const;
-  // The length of diagonal (nx,ny,nz) of the system.
-  float getDiagonal() const;
-
-  HOST DEVICE inline int getRadius() const {
-	  // return radius of smallest sphere circumscribing grid
-	  float radius = basis.transform(Vector3(nx,ny,nz)).length2();
-	  
-	  float tmp = basis.transform(Vector3(-nx,ny,nz)).length2();
-	  radius = tmp > radius ? tmp : radius;
-
-	  tmp = basis.transform(Vector3(nx,-ny,nz)).length2();
-	  radius = tmp > radius ? tmp : radius;
-
-	  tmp = basis.transform(Vector3(nx,ny,-nz)).length2();
-	  radius = tmp > radius ? tmp : radius;
-
-	  return 0.5 * sqrt(radius);
-  }
-
-  // The position farthest from the origin.
-  Vector3 getDestination() const;
-  // The center of the grid.
-  Vector3 getCenter() const;
-  // The volume of a single cell.
-  float getCellVolume() const;
-  // The volume of the entire system.
-  float getVolume() const;
-  Vector3 getCellDiagonal() const;
-
-  // Add a fixed value to the grid.
-  void shift(float s);
-
-  // Multiply the grid by a fixed value.
-  void scale(float s);
-
-	/*         \
-	| COMPUTED |
-	\=========*/
-	
-  // Get the mean of the entire grid.
-  float mean() const;
-	
-  // Compute the average profile along an axis.
-  // Assumes that the grid axis with index "axis" is aligned with the world axis of index "axis".
-  void averageProfile(const char* fileName, int axis);
-
-  // Get the potential at the closest node.
-  virtual float getPotential(Vector3 pos) const;
-
-	// crop
-	// Cuts the grid down
-	// @param		boundries to crop to (x0, y0, z0) -> (x1, y1, z1);
-	//					whether to change the origin
-	// @return	success of the function
-	bool crop(int x0, int y0, int z0, int x1, int y1, int z1, bool keep_origin);
-
-  // Added by Rogan for times when simpler calculations are required.
-  // virtual float interpolatePotentialLinearly(Vector3 pos) const;
-
-  HOST DEVICE inline float interpolateDiffX(Vector3 pos, float w[3], float g1[4][4][4]) const {
-    float a0, a1, a2, a3;
-
-		// RBTODO parallelize loops?
-		
-    // Mix along x, taking the derivative.
-    float g2[4][4];
-    for (int iy = 0; iy < 4; iy++) {
-      for (int iz = 0; iz < 4; iz++) {
-				a3 = 0.5f*(-g1[0][iy][iz] + 3.0f*g1[1][iy][iz] - 3.0f*g1[2][iy][iz] + g1[3][iy][iz]);
-				a2 = 0.5f*(2.0f*g1[0][iy][iz] - 5.0f*g1[1][iy][iz] + 4.0f*g1[2][iy][iz] - g1[3][iy][iz]);
-				a1 = 0.5f*(-g1[0][iy][iz] + g1[2][iy][iz]);
-				a0 = g1[1][iy][iz];
-
-				//g2[iy][iz] = a3*w[0]*w[0]*w[0] + a2*w[0]*w[0] + a1*w[0] + a0;
-				g2[iy][iz] = 3.0f*a3*w[0]*w[0] + 2.0f*a2*w[0] + a1;
-      }
-    }
-
-
-    // Mix along y.
-    float g3[4];
-    for (int iz = 0; iz < 4; iz++) {
-      a3 = 0.5f*(-g2[0][iz] + 3.0f*g2[1][iz] - 3.0f*g2[2][iz] + g2[3][iz]);
-      a2 = 0.5f*(2.0f*g2[0][iz] - 5.0f*g2[1][iz] + 4.0f*g2[2][iz] - g2[3][iz]);
-      a1 = 0.5f*(-g2[0][iz] + g2[2][iz]);
-      a0 = g2[1][iz];
-   
-      g3[iz] = a3*w[1]*w[1]*w[1] + a2*w[1]*w[1] + a1*w[1] + a0;
-    }
-
-    // Mix along z.
-    a3 = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3]);
-    a2 = 0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3]);
-    a1 = 0.5f*(-g3[0] + g3[2]);
-    a0 = g3[1];
- 
-    float retval = -(a3*w[2]*w[2]*w[2] + a2*w[2]*w[2] + a1*w[2] + a0);
-    return retval;
-  }
-
-  HOST DEVICE inline float interpolateDiffY(Vector3 pos, float w[3], float g1[4][4][4]) const {
-    float a0, a1, a2, a3;
-  
-    // Mix along x, taking the derivative.
-    float g2[4][4];
-    for (int iy = 0; iy < 4; iy++) {
-      for (int iz = 0; iz < 4; iz++) {
-				a3 = 0.5f*(-g1[0][iy][iz] + 3.0f*g1[1][iy][iz] - 3.0f*g1[2][iy][iz] + g1[3][iy][iz]);
-				a2 = 0.5f*(2.0f*g1[0][iy][iz] - 5.0f*g1[1][iy][iz] + 4.0f*g1[2][iy][iz] - g1[3][iy][iz]);
-				a1 = 0.5f*(-g1[0][iy][iz] + g1[2][iy][iz]);
-				a0 = g1[1][iy][iz];
-
-				g2[iy][iz] = a3*w[0]*w[0]*w[0] + a2*w[0]*w[0] + a1*w[0] + a0;
-      }
-    }
-
-    // Mix along y.
-    float g3[4];
-    for (int iz = 0; iz < 4; iz++) {
-      a3 = 0.5f*(-g2[0][iz] + 3.0f*g2[1][iz] - 3.0f*g2[2][iz] + g2[3][iz]);
-      a2 = 0.5f*(2.0f*g2[0][iz] - 5.0f*g2[1][iz] + 4.0f*g2[2][iz] - g2[3][iz]);
-      a1 = 0.5f*(-g2[0][iz] + g2[2][iz]);
-      a0 = g2[1][iz];
-   
-      //g3[iz] = a3*w[1]*w[1]*w[1] + a2*w[1]*w[1] + a1*w[1] + a0;
-      g3[iz] = 3.0f*a3*w[1]*w[1] + 2.0f*a2*w[1] + a1;
-    }
-
-    // Mix along z.
-    a3 = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3]);
-    a2 = 0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3]);
-    a1 = 0.5f*(-g3[0] + g3[2]);
-    a0 = g3[1];
-
-    return -(a3*w[2]*w[2]*w[2] + a2*w[2]*w[2] + a1*w[2] + a0);
-  }
-
-  HOST DEVICE inline float interpolateDiffZ(Vector3 pos, float w[3], float g1[4][4][4]) const {
-    float a0, a1, a2, a3;
-  
-    // Mix along x, taking the derivative.
-    float g2[4][4];
-    for (int iy = 0; iy < 4; iy++) {
-      for (int iz = 0; iz < 4; iz++) {
-				a3 = 0.5f*(-g1[0][iy][iz] + 3.0f*g1[1][iy][iz] - 3.0f*g1[2][iy][iz] + g1[3][iy][iz]);
-				a2 = 0.5f*(2.0f*g1[0][iy][iz] - 5.0f*g1[1][iy][iz] + 4.0f*g1[2][iy][iz] - g1[3][iy][iz]);
-				a1 = 0.5f*(-g1[0][iy][iz] + g1[2][iy][iz]);
-				a0 = g1[1][iy][iz];
-
-				g2[iy][iz] = a3*w[0]*w[0]*w[0] + a2*w[0]*w[0] + a1*w[0] + a0;
-      }
-    }
-
-    // Mix along y.
-    float g3[4];
-    for (int iz = 0; iz < 4; iz++) {
-      a3 = 0.5f*(-g2[0][iz] + 3.0f*g2[1][iz] - 3.0f*g2[2][iz] + g2[3][iz]);
-      a2 = 0.5f*(2.0f*g2[0][iz] - 5.0f*g2[1][iz] + 4.0f*g2[2][iz] - g2[3][iz]);
-      a1 = 0.5f*(-g2[0][iz] + g2[2][iz]);
-      a0 = g2[1][iz];
-   
-      g3[iz] = a3*w[1]*w[1]*w[1] + a2*w[1]*w[1] + a1*w[1] + a0;
-    }
-
-    // Mix along z.
-    a3 = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3]);
-    a2 = 0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3]);
-    a1 = 0.5f*(-g3[0] + g3[2]);
-    a0 = g3[1];
-
-    return -(3.0f*a3*w[2]*w[2] + 2.0f*a2*w[2] + a1);
-  }
-
-  HOST DEVICE inline float interpolatePotential(const Vector3& pos) const {
-    // Find the home node.
-    Vector3 l = basisInv.transform(pos - origin);
-
-		const int homeX = int(floor(l.x));
-		const int homeY = int(floor(l.y));
-		const int homeZ = int(floor(l.z));
-		const float wx = l.x - homeX;
-		const float wy = l.y - homeY;
-		const float wz = l.z - homeZ;
-		const float wx2 = wx*wx;
-		const float wy2 = wy*wy;
-		const float wz2 = wz*wz;
-
-		float g3[4];
-		for (int iz = 0; iz < 4; iz++) {
-			float g2[4];
-			const int jz = (iz + homeZ - 1);
-			for (int iy = 0; iy < 4; iy++) {
-				float v[4];
-				const int jy = (iy + homeY - 1);
-				for (int ix = 0; ix < 4; ix++) {
-					const int jx = (ix + homeX - 1);
-					const int ind = jz + jy*nz + jx*nz*ny;
-					v[ix] = jz < 0 || jz >= nz || jy < 0 || jy >= ny || jx < 0 || jx >= nx ?
-						0 : val[ind];
-				}
-				g2[iy] = 0.5f*(-v[0] + 3.0f*v[1] - 3.0f*v[2] + v[3])*wx2*wx +
-					0.5f*(2.0f*v[0] - 5.0f*v[1] + 4.0f*v[2] - v[3])   *wx2  +
-					0.5f*(-v[0] + v[2])                               *wx +
-					v[1];
-			}
-
-			// Mix along y.
-			g3[iz] = 0.5f*(-g2[0] + 3.0f*g2[1] - 3.0f*g2[2] + g2[3])*wy2*wy +
-				0.5f*(2.0f*g2[0] - 5.0f*g2[1] + 4.0f*g2[2] - g2[3])   *wy2  +
-				0.5f*(-g2[0] + g2[2])                                 *wy +
-				g2[1];
-		}
-		// Mix along z.
-		const float e = 0.5f*(-g3[0] + 3.0f*g3[1] - 3.0f*g3[2] + g3[3])*wz2*wz +
-			0.5f*(2.0f*g3[0] - 5.0f*g3[1] + 4.0f*g3[2] - g3[3])          *wz2  +
-			0.5f*(-g3[0] + g3[2])                                        *wz +
-			g3[1];
-    return e;
-  }
-
-  HOST DEVICE inline static int wrap(int i, int n) {
-		while (i < 0) {
-			//i %= n;
-			i += n;
-		}
-		// The portion above allows i == n, so no else keyword.
-		if (i >= n) {
-			i %= n;
-		} 
-		return i;
-	}
-	HOST DEVICE float interpolatePotentialLinearly(const Vector3& pos) const {
-		Vector3 f;
- 		const Vector3 l = basisInv.transform(pos - origin);
-
-		// Find the home node.
-		const int homeX = int(floor(l.x));
-		const int homeY = int(floor(l.y));
-		const int homeZ = int(floor(l.z));
-
-		const float wx = l.x - homeX;
-		const float wy = l.y - homeY;	
-		const float wz = l.z - homeZ;
-
-		float v[2][2][2];
-		for (int iz = 0; iz < 2; iz++) {
-			int jz = (iz + homeZ);
-			for (int iy = 0; iy < 2; iy++) {
-				int jy = (iy + homeY);
-				for (int ix = 0; ix < 2; ix++) {
-					int jx = (ix + homeX);
-					int ind = jz + jy*nz + jx*nz*ny;
-					v[ix][iy][iz] = jz < 0 || jz >= nz || jy < 0 || jy >= ny || jx < 0 || jx >= nx ?
-						0 : val[ind];
-				}
-			}
-		}
-
-		float g3[2];
-		for (int iz = 0; iz < 2; iz++) {
-			float g2[2];
-			for (int iy = 0; iy < 2; iy++) {
-				g2[iy] = wx * (v[1][iy][iz] - v[0][iy][iz]) + v[0][iy][iz];
-			}
-			// Mix along y.
-			g3[iz] = wy * (g2[1] - g2[0]) + g2[0];
-		}
-		// Mix along z.
-		float e = wz * (g3[1] - g3[0]) + g3[0];
-		return e;
-	}
-
-
-	/** interpolateForce() to be used on CUDA Device **/
-	DEVICE inline ForceEnergy interpolateForceD(const Vector3& pos) const {
-		Vector3 f;
- 		const Vector3 l = basisInv.transform(pos - origin);
-
-		const int homeX = int(floor(l.x));
-		const int homeY = int(floor(l.y));
-		const int homeZ = int(floor(l.z));
-		const float wx = l.x - homeX;
-		const float wy = l.y - homeY;
-		const float wz = l.z - homeZ;
-		const float wx2 = wx*wx;
-
-	/* f.x */
-	float g3[3][4];
-	for (int iz = 0; iz < 4; iz++) {
-		float g2[2][4];
-		const int jz = (iz + homeZ - 1);
-		for (int iy = 0; iy < 4; iy++) {
-			float v[4];
-			const int jy = (iy + homeY - 1);
-			for (int ix = 0; ix < 4; ix++) {
-				const int jx = (ix + homeX - 1);
-				const int ind = jz + jy*nz + jx*nz*ny;
-				v[ix] = jz < 0 || jz >= nz || jy < 0 || jy >= ny || jx < 0 || jx >= nx ?
-					0 : val[ind];
-			}
-			const float a3 = 0.5f*(-v[0] + 3.0f*v[1] - 3.0f*v[2] + v[3])*wx2;
-			const float a2 = 0.5f*(2.0f*v[0] - 5.0f*v[1] + 4.0f*v[2] - v[3])*wx;
-			const float a1 = 0.5f*(-v[0] + v[2]);
-			g2[0][iy] = 3.0f*a3 + 2.0f*a2 + a1;				/* f.x (derivative) */
-			g2[1][iy] = a3*wx + a2*wx + a1*wx + v[1]; /* f.y & f.z */
-		}
-
-		// Mix along y.
-		{
-			g3[0][iz] = 0.5f*(-g2[0][0] + 3.0f*g2[0][1] - 3.0f*g2[0][2] + g2[0][3])*wy*wy*wy +
-				0.5f*(2.0f*g2[0][0] - 5.0f*g2[0][1] + 4.0f*g2[0][2] - g2[0][3])      *wy*wy +
-				0.5f*(-g2[0][0] + g2[0][2])                                          *wy +
-				g2[0][1];
-		}
-
-		{
-			const float a3 = 0.5f*(-g2[1][0] + 3.0f*g2[1][1] - 3.0f*g2[1][2] + g2[1][3])*wy*wy;
-			const float a2 = 0.5f*(2.0f*g2[1][0] - 5.0f*g2[1][1] + 4.0f*g2[1][2] - g2[1][3])*wy;
-			const float a1 = 0.5f*(-g2[1][0] + g2[1][2]);
-			g3[1][iz] = 3.0f*a3 + 2.0f*a2 + a1;						/* f.y */
-			g3[2][iz] = a3*wy + a2*wy + a1*wy + g2[1][1]; /* f.z */
-		}
-	}
-
-	// Mix along z.
-	f.x = -0.5f*(-g3[0][0] + 3.0f*g3[0][1] - 3.0f*g3[0][2] + g3[0][3])*wz*wz*wz +
-		-0.5f*(2.0f*g3[0][0] - 5.0f*g3[0][1] + 4.0f*g3[0][2] - g3[0][3])*wz*wz +
-		-0.5f*(-g3[0][0] + g3[0][2])                                    *wz -
-		g3[0][1];
-	f.y = -0.5f*(-g3[1][0] + 3.0f*g3[1][1] - 3.0f*g3[1][2] + g3[1][3])*wz*wz*wz +
-		-0.5f*(2.0f*g3[1][0] - 5.0f*g3[1][1] + 4.0f*g3[1][2] - g3[1][3])*wz*wz +
-		-0.5f*(-g3[1][0] + g3[1][2])                                    *wz -
-		g3[1][1];
-	f.z = -1.5f*(-g3[2][0] + 3.0f*g3[2][1] - 3.0f*g3[2][2] + g3[2][3])*wz*wz -
-		(2.0f*g3[2][0] - 5.0f*g3[2][1] + 4.0f*g3[2][2] - g3[2][3])      *wz -
-		0.5f*(-g3[2][0] + g3[2][2]);
-	float e = 0.5f*(-g3[2][0] + 3.0f*g3[2][1] - 3.0f*g3[2][2] + g3[2][3])*wz*wz*wz +
-		0.5f*(2.0f*g3[2][0] - 5.0f*g3[2][1] + 4.0f*g3[2][2] - g3[2][3])    *wz*wz +
-		0.5f*(-g3[2][0] + g3[2][2])                                        *wz +
-		g3[2][1];
-
-	f = basisInv.transpose().transform(f);
-	return ForceEnergy(f,e);
-	
-	}
-
-	template <BoundaryCondition bc>
-	DEVICE inline ForceEnergy interpolateForceDLinearly(const Vector3& pos) const {
- 		const Vector3 l = basisInv.transform(pos - origin);
-
-		// Find the home node.
-		const int homeX = int(floor(l.x));
-		const int homeY = int(floor(l.y));
-		const int homeZ = int(floor(l.z));
-
-		const float wx = l.x - homeX;
-		const float wy = l.y - homeY;	
-		const float wz = l.z - homeZ;
-
-		if (bc == neumann) {
-		    if (homeX < -1 || homeX >= nx+1 ||
-		    	homeY < -1 || homeY >= ny+1 ||
-		    	homeZ < -1 || homeZ >= nz+1)
-		    	return ForceEnergy();
-		}
-
-		float v[2][2][2];
-		for (int iz = 0; iz < 2; iz++) {
-			int jz = (iz + homeZ);
-			for (int iy = 0; iy < 2; iy++) {
-				int jy = (iy + homeY);
-				for (int ix = 0; ix < 2; ix++) {
-					int jx = (ix + homeX);
-					int ind;
-					switch (bc) {
-					case dirichlet:
-					    ind = jz + jy*nz + jx*nz*ny;
-					    v[ix][iy][iz] = 
-						jz < 0 || jz >= nz ||
-						jy < 0 || jy >= ny ||
-						jx < 0 || jx >= nx ?
-						0 : val[ind];
-					    break;
-					case neumann:
-					    ind =
-						(jz < 0 ? 0 : jz >= nz ? nz-1 : jz) +
-						(jy < 0 ? 0 : jy >= ny ? ny-1 : jy)*nz + 
-						(jx < 0 ? 0 : jx >= nx ? nx-1 : jx)*nz*ny;
-					    v[ix][iy][iz] = val[ind];
-					    break;
-					case periodic:
-					    ind =
-						(jz < 0 ? nz-1 : jz >= nz ? 0 : jz) +
-						(jy < 0 ? ny-1 : jy >= ny ? 0 : jy)*nz + 
-						(jx < 0 ? nx-1 : jx >= nx ? 0 : jx)*nz*ny;
-					    v[ix][iy][iz] = val[ind];
-					    break;
-					}
-				}
-			}
-		}
-
-		float g3[3][2];
-		for (int iz = 0; iz < 2; iz++) {
-			float g2[2][2];
-			for (int iy = 0; iy < 2; iy++) {
-				g2[0][iy] = (v[1][iy][iz] - v[0][iy][iz]); /* f.x */
-				g2[1][iy] = wx * (v[1][iy][iz] - v[0][iy][iz]) + v[0][iy][iz]; /* f.y & f.z */
-			}
-			// Mix along y.
-			g3[0][iz] = wy * (g2[0][1] - g2[0][0]) + g2[0][0];
-			g3[1][iz] = (g2[1][1] - g2[1][0]);
-			g3[2][iz] = wy * (g2[1][1] - g2[1][0]) + g2[1][0];
-		}
-		// Mix along z.
-		Vector3 f;
-		f.x = -(wz * (g3[0][1] - g3[0][0]) + g3[0][0]);
-		f.y = -(wz * (g3[1][1] - g3[1][0]) + g3[1][0]);
-		f.z = -      (g3[2][1] - g3[2][0]);
-
-		f = basisInv.transpose().transform(f);
-		float e = wz * (g3[2][1] - g3[2][0]) + g3[2][0];
-		return ForceEnergy(f,e);
-	}
-        DEVICE inline ForceEnergy interpolateForceDnamd(const Vector3& pos) const {
-                Vector3 f;
-                const Vector3 l = basisInv.transform(pos - origin);
-
-                const int homeX = int(floor(l.x));
-                const int homeY = int(floor(l.y));
-                const int homeZ = int(floor(l.z));
-                const float wx = l.x - homeX;
-                const float wy = l.y - homeY;
-                const float wz = l.z - homeZ;
-
-                Vector3 dg = Vector3(wx,wy,wz);
-
-                int inds[3];
-                inds[0] = homeX;
-                inds[1] = homeY;
-                inds[2] = homeZ;
-
-                // TODO: handle edges
-
-                // Compute b
-                                   float b[64];    // Matrix of values at 8 box corners
-                compute_b(b, inds);
-
-                // Compute a
-                                   float a[64];
-                compute_a(a, b);
-
-                // Calculate powers of x, y, z for later use
-                                   // e.g. x[2] = x^2
-                                                      float x[4], y[4], z[4];
-                x[0] = 1; y[0] = 1; z[0] = 1;
-                for (int j = 1; j < 4; j++) {
-                    x[j] = x[j-1] * dg.x;
-                    y[j] = y[j-1] * dg.y;
-                    z[j] = z[j-1] * dg.z;
-                }
-                float e = compute_V(a, x, y, z);
-                f = compute_dV(a, x, y, z);
-
-                f = basisInv.transpose().transform(f);
-                return ForceEnergy(f,e);
-        }
-        DEVICE inline float compute_V(float *a, float *x, float *y, float *z) const
-        {
-            float V = 0.0;
-            long int ind = 0;
-            for (int l = 0; l < 4; l++) {
-                for (int k = 0; k < 4; k++) {
-                    for (int j = 0; j < 4; j++) {
-                        V += a[ind] * x[j] * y[k] * z[l];
-                        ind++;
-                    }
-                }
-            }
-            return V;
-        }
-        DEVICE inline Vector3 compute_dV(float *a, float *x, float *y, float *z) const
-        {
-            Vector3 dV = Vector3(0.0f);
-            long int ind = 0;
-            for (int l = 0; l < 4; l++) {
-                for (int k = 0; k < 4; k++) {
-                    for (int j = 0; j < 4; j++) {
-                        if (j > 0) dV.x += a[ind] * j * x[j-1] * y[k]   * z[l];         // dV/dx
-                        if (k > 0) dV.y += a[ind] * k * x[j]   * y[k-1] * z[l];         // dV/dy
-                        if (l > 0) dV.z += a[ind] * l * x[j]   * y[k]   * z[l-1];       // dV/dz
-                        ind++;
-                    }
-                }
-            }
-            return dV*(-1.f);
-        }
-        DEVICE inline void compute_a(float *a, float *b) const
-        {
-            // Static sparse 64x64 matrix times vector ... nicer looking way than this?
-                           a[0] = b[0];
-            a[1] = b[8];
-            a[2] = -3*b[0] + 3*b[1] - 2*b[8] - b[9];
-            a[3] = 2*b[0] - 2*b[1] + b[8] + b[9];
-            a[4] = b[16];
-            a[5] = b[32];
-            a[6] = -3*b[16] + 3*b[17] - 2*b[32] - b[33];
-            a[7] = 2*b[16] - 2*b[17] + b[32] + b[33];
-            a[8] = -3*b[0] + 3*b[2] - 2*b[16] - b[18];
-            a[9] = -3*b[8] + 3*b[10] - 2*b[32] - b[34];
-            a[10] = 9*b[0] - 9*b[1] - 9*b[2] + 9*b[3] + 6*b[8] + 3*b[9] - 6*b[10] - 3*b[11]
-                + 6*b[16] - 6*b[17] + 3*b[18] - 3*b[19] + 4*b[32] + 2*b[33] + 2*b[34] + b[35];
-            a[11] = -6*b[0] + 6*b[1] + 6*b[2] - 6*b[3] - 3*b[8] - 3*b[9] + 3*b[10] + 3*b[11]
-                - 4*b[16] + 4*b[17] - 2*b[18] + 2*b[19] - 2*b[32] - 2*b[33] - b[34] - b[35];
-            a[12] = 2*b[0] - 2*b[2] + b[16] + b[18];
-            a[13] = 2*b[8] - 2*b[10] + b[32] + b[34];
-            a[14] = -6*b[0] + 6*b[1] + 6*b[2] - 6*b[3] - 4*b[8] - 2*b[9] + 4*b[10] + 2*b[11]
-                - 3*b[16] + 3*b[17] - 3*b[18] + 3*b[19] - 2*b[32] - b[33] - 2*b[34] - b[35];
-            a[15] = 4*b[0] - 4*b[1] - 4*b[2] + 4*b[3] + 2*b[8] + 2*b[9] - 2*b[10] - 2*b[11]
-                + 2*b[16] - 2*b[17] + 2*b[18] - 2*b[19] + b[32] + b[33] + b[34] + b[35];
-            a[16] = b[24];
-            a[17] = b[40];
-            a[18] = -3*b[24] + 3*b[25] - 2*b[40] - b[41];
-            a[19] = 2*b[24] - 2*b[25] + b[40] + b[41];
-            a[20] = b[48];
-            a[21] = b[56];
-            a[22] = -3*b[48] + 3*b[49] - 2*b[56] - b[57];
-            a[23] = 2*b[48] - 2*b[49] + b[56] + b[57];
-            a[24] = -3*b[24] + 3*b[26] - 2*b[48] - b[50];
-            a[25] = -3*b[40] + 3*b[42] - 2*b[56] - b[58];
-            a[26] = 9*b[24] - 9*b[25] - 9*b[26] + 9*b[27] + 6*b[40] + 3*b[41] - 6*b[42] - 3*b[43]
-                + 6*b[48] - 6*b[49] + 3*b[50] - 3*b[51] + 4*b[56] + 2*b[57] + 2*b[58] + b[59];
-            a[27] = -6*b[24] + 6*b[25] + 6*b[26] - 6*b[27] - 3*b[40] - 3*b[41] + 3*b[42] + 3*b[43]
-                - 4*b[48] + 4*b[49] - 2*b[50] + 2*b[51] - 2*b[56] - 2*b[57] - b[58] - b[59];
-            a[28] = 2*b[24] - 2*b[26] + b[48] + b[50];
-            a[29] = 2*b[40] - 2*b[42] + b[56] + b[58];
-            a[30] = -6*b[24] + 6*b[25] + 6*b[26] - 6*b[27] - 4*b[40] - 2*b[41] + 4*b[42] + 2*b[43]
-                - 3*b[48] + 3*b[49] - 3*b[50] + 3*b[51] - 2*b[56] - b[57] - 2*b[58] - b[59];
-            a[31] = 4*b[24] - 4*b[25] - 4*b[26] + 4*b[27] + 2*b[40] + 2*b[41] - 2*b[42] - 2*b[43]
-                + 2*b[48] - 2*b[49] + 2*b[50] - 2*b[51] + b[56] + b[57] + b[58] + b[59];
-            a[32] = -3*b[0] + 3*b[4] - 2*b[24] - b[28];
-            a[33] = -3*b[8] + 3*b[12] - 2*b[40] - b[44];
-            a[34] = 9*b[0] - 9*b[1] - 9*b[4] + 9*b[5] + 6*b[8] + 3*b[9] - 6*b[12] - 3*b[13]
-                + 6*b[24] - 6*b[25] + 3*b[28] - 3*b[29] + 4*b[40] + 2*b[41] + 2*b[44] + b[45];
-            a[35] = -6*b[0] + 6*b[1] + 6*b[4] - 6*b[5] - 3*b[8] - 3*b[9] + 3*b[12] + 3*b[13]
-                - 4*b[24] + 4*b[25] - 2*b[28] + 2*b[29] - 2*b[40] - 2*b[41] - b[44] - b[45];
-            a[36] = -3*b[16] + 3*b[20] - 2*b[48] - b[52];
-            a[37] = -3*b[32] + 3*b[36] - 2*b[56] - b[60];
-            a[38] = 9*b[16] - 9*b[17] - 9*b[20] + 9*b[21] + 6*b[32] + 3*b[33] - 6*b[36] - 3*b[37]
-                + 6*b[48] - 6*b[49] + 3*b[52] - 3*b[53] + 4*b[56] + 2*b[57] + 2*b[60] + b[61];
-            a[39] = -6*b[16] + 6*b[17] + 6*b[20] - 6*b[21] - 3*b[32] - 3*b[33] + 3*b[36] + 3*b[37]
-                - 4*b[48] + 4*b[49] - 2*b[52] + 2*b[53] - 2*b[56] - 2*b[57] - b[60] - b[61];
-            a[40] = 9*b[0] - 9*b[2] - 9*b[4] + 9*b[6] + 6*b[16] + 3*b[18] - 6*b[20] - 3*b[22]
-                + 6*b[24] - 6*b[26] + 3*b[28] - 3*b[30] + 4*b[48] + 2*b[50] + 2*b[52] + b[54];
-            a[41] = 9*b[8] - 9*b[10] - 9*b[12] + 9*b[14] + 6*b[32] + 3*b[34] - 6*b[36] - 3*b[38]
-                + 6*b[40] - 6*b[42] + 3*b[44] - 3*b[46] + 4*b[56] + 2*b[58] + 2*b[60] + b[62];
-            a[42] = -27*b[0] + 27*b[1] + 27*b[2] - 27*b[3] + 27*b[4] - 27*b[5] - 27*b[6] + 27*b[7]
-                - 18*b[8] - 9*b[9] + 18*b[10] + 9*b[11] + 18*b[12] + 9*b[13] - 18*b[14] - 9*b[15]
-                - 18*b[16] + 18*b[17] - 9*b[18] + 9*b[19] + 18*b[20] - 18*b[21] + 9*b[22] - 9*b[23]
-                - 18*b[24] + 18*b[25] + 18*b[26] - 18*b[27] - 9*b[28] + 9*b[29] + 9*b[30] - 9*b[31]
-                - 12*b[32] - 6*b[33] - 6*b[34] - 3*b[35] + 12*b[36] + 6*b[37] + 6*b[38] + 3*b[39]
-                - 12*b[40] - 6*b[41] + 12*b[42] + 6*b[43] - 6*b[44] - 3*b[45] + 6*b[46] + 3*b[47]
-                - 12*b[48] + 12*b[49] - 6*b[50] + 6*b[51] - 6*b[52] + 6*b[53] - 3*b[54] + 3*b[55]
-                - 8*b[56] - 4*b[57] - 4*b[58] - 2*b[59] - 4*b[60] - 2*b[61] - 2*b[62] - b[63];
-            a[43] = 18*b[0] - 18*b[1] - 18*b[2] + 18*b[3] - 18*b[4] + 18*b[5] + 18*b[6] - 18*b[7]
-                + 9*b[8] + 9*b[9] - 9*b[10] - 9*b[11] - 9*b[12] - 9*b[13] + 9*b[14] + 9*b[15]
-                + 12*b[16] - 12*b[17] + 6*b[18] - 6*b[19] - 12*b[20] + 12*b[21] - 6*b[22] + 6*b[23]
-                + 12*b[24] - 12*b[25] - 12*b[26] + 12*b[27] + 6*b[28] - 6*b[29] - 6*b[30] + 6*b[31]
-                + 6*b[32] + 6*b[33] + 3*b[34] + 3*b[35] - 6*b[36] - 6*b[37] - 3*b[38] - 3*b[39]
-                + 6*b[40] + 6*b[41] - 6*b[42] - 6*b[43] + 3*b[44] + 3*b[45] - 3*b[46] - 3*b[47]
-                + 8*b[48] - 8*b[49] + 4*b[50] - 4*b[51] + 4*b[52] - 4*b[53] + 2*b[54] - 2*b[55]
-                + 4*b[56] + 4*b[57] + 2*b[58] + 2*b[59] + 2*b[60] + 2*b[61] + b[62] + b[63];
-            a[44] = -6*b[0] + 6*b[2] + 6*b[4] - 6*b[6] - 3*b[16] - 3*b[18] + 3*b[20] + 3*b[22]
-                - 4*b[24] + 4*b[26] - 2*b[28] + 2*b[30] - 2*b[48] - 2*b[50] - b[52] - b[54];
-            a[45] = -6*b[8] + 6*b[10] + 6*b[12] - 6*b[14] - 3*b[32] - 3*b[34] + 3*b[36] + 3*b[38]
-                - 4*b[40] + 4*b[42] - 2*b[44] + 2*b[46] - 2*b[56] - 2*b[58] - b[60] - b[62];
-            a[46] = 18*b[0] - 18*b[1] - 18*b[2] + 18*b[3] - 18*b[4] + 18*b[5] + 18*b[6] - 18*b[7]
-                + 12*b[8] + 6*b[9] - 12*b[10] - 6*b[11] - 12*b[12] - 6*b[13] + 12*b[14] + 6*b[15]
-                + 9*b[16] - 9*b[17] + 9*b[18] - 9*b[19] - 9*b[20] + 9*b[21] - 9*b[22] + 9*b[23]
-                + 12*b[24] - 12*b[25] - 12*b[26] + 12*b[27] + 6*b[28] - 6*b[29] - 6*b[30] + 6*b[31]
-                + 6*b[32] + 3*b[33] + 6*b[34] + 3*b[35] - 6*b[36] - 3*b[37] - 6*b[38] - 3*b[39]
-                + 8*b[40] + 4*b[41] - 8*b[42] - 4*b[43] + 4*b[44] + 2*b[45] - 4*b[46] - 2*b[47]
-                + 6*b[48] - 6*b[49] + 6*b[50] - 6*b[51] + 3*b[52] - 3*b[53] + 3*b[54] - 3*b[55]
-                + 4*b[56] + 2*b[57] + 4*b[58] + 2*b[59] + 2*b[60] + b[61] + 2*b[62] + b[63];
-            a[47] = -12*b[0] + 12*b[1] + 12*b[2] - 12*b[3] + 12*b[4] - 12*b[5] - 12*b[6] + 12*b[7]
-                - 6*b[8] - 6*b[9] + 6*b[10] + 6*b[11] + 6*b[12] + 6*b[13] - 6*b[14] - 6*b[15]
-                - 6*b[16] + 6*b[17] - 6*b[18] + 6*b[19] + 6*b[20] - 6*b[21] + 6*b[22] - 6*b[23]
-                - 8*b[24] + 8*b[25] + 8*b[26] - 8*b[27] - 4*b[28] + 4*b[29] + 4*b[30] - 4*b[31]
-                - 3*b[32] - 3*b[33] - 3*b[34] - 3*b[35] + 3*b[36] + 3*b[37] + 3*b[38] + 3*b[39]
-                - 4*b[40] - 4*b[41] + 4*b[42] + 4*b[43] - 2*b[44] - 2*b[45] + 2*b[46] + 2*b[47]
-                - 4*b[48] + 4*b[49] - 4*b[50] + 4*b[51] - 2*b[52] + 2*b[53] - 2*b[54] + 2*b[55]
-                - 2*b[56] - 2*b[57] - 2*b[58] - 2*b[59] - b[60] - b[61] - b[62] - b[63];
-            a[48] = 2*b[0] - 2*b[4] + b[24] + b[28];
-            a[49] = 2*b[8] - 2*b[12] + b[40] + b[44];
-            a[50] = -6*b[0] + 6*b[1] + 6*b[4] - 6*b[5] - 4*b[8] - 2*b[9] + 4*b[12] + 2*b[13]
-                - 3*b[24] + 3*b[25] - 3*b[28] + 3*b[29] - 2*b[40] - b[41] - 2*b[44] - b[45];
-            a[51] = 4*b[0] - 4*b[1] - 4*b[4] + 4*b[5] + 2*b[8] + 2*b[9] - 2*b[12] - 2*b[13]
-                + 2*b[24] - 2*b[25] + 2*b[28] - 2*b[29] + b[40] + b[41] + b[44] + b[45];
-            a[52] = 2*b[16] - 2*b[20] + b[48] + b[52];
-            a[53] = 2*b[32] - 2*b[36] + b[56] + b[60];
-            a[54] = -6*b[16] + 6*b[17] + 6*b[20] - 6*b[21] - 4*b[32] - 2*b[33] + 4*b[36] + 2*b[37]
-                - 3*b[48] + 3*b[49] - 3*b[52] + 3*b[53] - 2*b[56] - b[57] - 2*b[60] - b[61];
-            a[55] = 4*b[16] - 4*b[17] - 4*b[20] + 4*b[21] + 2*b[32] + 2*b[33] - 2*b[36] - 2*b[37]
-                + 2*b[48] - 2*b[49] + 2*b[52] - 2*b[53] + b[56] + b[57] + b[60] + b[61];
-            a[56] = -6*b[0] + 6*b[2] + 6*b[4] - 6*b[6] - 4*b[16] - 2*b[18] + 4*b[20] + 2*b[22]
-                - 3*b[24] + 3*b[26] - 3*b[28] + 3*b[30] - 2*b[48] - b[50] - 2*b[52] - b[54];
-            a[57] = -6*b[8] + 6*b[10] + 6*b[12] - 6*b[14] - 4*b[32] - 2*b[34] + 4*b[36] + 2*b[38]
-                - 3*b[40] + 3*b[42] - 3*b[44] + 3*b[46] - 2*b[56] - b[58] - 2*b[60] - b[62];
-            a[58] = 18*b[0] - 18*b[1] - 18*b[2] + 18*b[3] - 18*b[4] + 18*b[5] + 18*b[6] - 18*b[7]
-                + 12*b[8] + 6*b[9] - 12*b[10] - 6*b[11] - 12*b[12] - 6*b[13] + 12*b[14] + 6*b[15]
-                + 12*b[16] - 12*b[17] + 6*b[18] - 6*b[19] - 12*b[20] + 12*b[21] - 6*b[22] + 6*b[23]
-                + 9*b[24] - 9*b[25] - 9*b[26] + 9*b[27] + 9*b[28] - 9*b[29] - 9*b[30] + 9*b[31]
-                + 8*b[32] + 4*b[33] + 4*b[34] + 2*b[35] - 8*b[36] - 4*b[37] - 4*b[38] - 2*b[39]
-                + 6*b[40] + 3*b[41] - 6*b[42] - 3*b[43] + 6*b[44] + 3*b[45] - 6*b[46] - 3*b[47]
-                + 6*b[48] - 6*b[49] + 3*b[50] - 3*b[51] + 6*b[52] - 6*b[53] + 3*b[54] - 3*b[55]
-                + 4*b[56] + 2*b[57] + 2*b[58] + b[59] + 4*b[60] + 2*b[61] + 2*b[62] + b[63];
-            a[59] = -12*b[0] + 12*b[1] + 12*b[2] - 12*b[3] + 12*b[4] - 12*b[5] - 12*b[6] + 12*b[7]
-                - 6*b[8] - 6*b[9] + 6*b[10] + 6*b[11] + 6*b[12] + 6*b[13] - 6*b[14] - 6*b[15]
-                - 8*b[16] + 8*b[17] - 4*b[18] + 4*b[19] + 8*b[20] - 8*b[21] + 4*b[22] - 4*b[23]
-                - 6*b[24] + 6*b[25] + 6*b[26] - 6*b[27] - 6*b[28] + 6*b[29] + 6*b[30] - 6*b[31]
-                - 4*b[32] - 4*b[33] - 2*b[34] - 2*b[35] + 4*b[36] + 4*b[37] + 2*b[38] + 2*b[39]
-                - 3*b[40] - 3*b[41] + 3*b[42] + 3*b[43] - 3*b[44] - 3*b[45] + 3*b[46] + 3*b[47]
-                - 4*b[48] + 4*b[49] - 2*b[50] + 2*b[51] - 4*b[52] + 4*b[53] - 2*b[54] + 2*b[55]
-                - 2*b[56] - 2*b[57] - b[58] - b[59] - 2*b[60] - 2*b[61] - b[62] - b[63];
-            a[60] = 4*b[0] - 4*b[2] - 4*b[4] + 4*b[6] + 2*b[16] + 2*b[18] - 2*b[20] - 2*b[22]
-                + 2*b[24] - 2*b[26] + 2*b[28] - 2*b[30] + b[48] + b[50] + b[52] + b[54];
-            a[61] = 4*b[8] - 4*b[10] - 4*b[12] + 4*b[14] + 2*b[32] + 2*b[34] - 2*b[36] - 2*b[38]
-                + 2*b[40] - 2*b[42] + 2*b[44] - 2*b[46] + b[56] + b[58] + b[60] + b[62];
-            a[62] = -12*b[0] + 12*b[1] + 12*b[2] - 12*b[3] + 12*b[4] - 12*b[5] - 12*b[6] + 12*b[7]
-                - 8*b[8] - 4*b[9] + 8*b[10] + 4*b[11] + 8*b[12] + 4*b[13] - 8*b[14] - 4*b[15]
-                - 6*b[16] + 6*b[17] - 6*b[18] + 6*b[19] + 6*b[20] - 6*b[21] + 6*b[22] - 6*b[23]
-                - 6*b[24] + 6*b[25] + 6*b[26] - 6*b[27] - 6*b[28] + 6*b[29] + 6*b[30] - 6*b[31]
-                - 4*b[32] - 2*b[33] - 4*b[34] - 2*b[35] + 4*b[36] + 2*b[37] + 4*b[38] + 2*b[39]
-                - 4*b[40] - 2*b[41] + 4*b[42] + 2*b[43] - 4*b[44] - 2*b[45] + 4*b[46] + 2*b[47]
-                - 3*b[48] + 3*b[49] - 3*b[50] + 3*b[51] - 3*b[52] + 3*b[53] - 3*b[54] + 3*b[55]
-                - 2*b[56] - b[57] - 2*b[58] - b[59] - 2*b[60] - b[61] - 2*b[62] - b[63];
-            a[63] = 8*b[0] - 8*b[1] - 8*b[2] + 8*b[3] - 8*b[4] + 8*b[5] + 8*b[6] - 8*b[7]
-                + 4*b[8] + 4*b[9] - 4*b[10] - 4*b[11] - 4*b[12] - 4*b[13] + 4*b[14] + 4*b[15]
-                + 4*b[16] - 4*b[17] + 4*b[18] - 4*b[19] - 4*b[20] + 4*b[21] - 4*b[22] + 4*b[23]
-                + 4*b[24] - 4*b[25] - 4*b[26] + 4*b[27] + 4*b[28] - 4*b[29] - 4*b[30] + 4*b[31]
-                + 2*b[32] + 2*b[33] + 2*b[34] + 2*b[35] - 2*b[36] - 2*b[37] - 2*b[38] - 2*b[39]
-                + 2*b[40] + 2*b[41] - 2*b[42] - 2*b[43] + 2*b[44] + 2*b[45] - 2*b[46] - 2*b[47]
-                + 2*b[48] - 2*b[49] + 2*b[50] - 2*b[51] + 2*b[52] - 2*b[53] + 2*b[54] - 2*b[55]
-                + b[56] + b[57] + b[58] + b[59] + b[60] + b[61] + b[62] + b[63];
-        }
-        DEVICE void compute_b(float * __restrict__ b, int * __restrict__ inds) const
-        {
-            int k[3];
-            k[0] = nx;
-            k[1] = ny;
-            k[2] = nz;
-
-            int inds2[3] = {0,0,0};
-
-            for (int i0 = 0; i0 < 8; i0++) {
-                inds2[0] = 0;
-                inds2[1] = 0;
-                inds2[2] = 0;
-
-                /* printf("%d\n", inds2[0]); */
-                /* printf("%d\n", inds2[1]); */
-                /* printf("%d\n", inds2[2]); */
-
-                bool zero_derivs = false;
-
-                int bit = 1;    // bit = 2^i1 in the below loop
-                for (int i1 = 0; i1 < 3; i1++) {
-                    inds2[i1] = (inds[i1] + ((i0 & bit) ? 1 : 0)) % k[i1];
-                    bit <<= 1;  // i.e. multiply by 2
-                }
-                int d_lo[3] = {1, 1, 1};
-                float voffs[3] = {0.0f, 0.0f, 0.0f};
-                float dscales[3] = {0.5, 0.5, 0.5};
-
-                for (int i1 = 0; i1 < 3; i1++) {
-                    if (inds2[i1] == 0) {
-                        zero_derivs = true;
-                    }
-                    else if (inds2[i1] == k[i1]-1) {
-                        zero_derivs = true;
-                    }
-                    else {
-                        // printf("%d\n",i1);
-                        voffs[i1] = 0.0;
-                    }
-                }
-
-                // V
-                b[i0] = getValue(inds2[0],inds2[1],inds2[2]);
-
-                if (zero_derivs) {
-                    b[8+i0] = 0.0;
-                    b[16+i0] = 0.0;
-                    b[24+i0] = 0.0;
-                    b[32+i0] = 0.0;
-                    b[40+i0] = 0.0;
-                    b[48+i0] = 0.0;
-                    b[56+i0] = 0.0;
-                } else {
-                    b[8+i0]  = dscales[0] * (getValue(inds2[0]+1,inds2[1],inds2[2]) - getValue(inds2[0]-d_lo[0],inds2[1],inds2[2]) + voffs[0]);
-                    b[16+i0] = dscales[1] * (getValue(inds2[0],inds2[1]+1,inds2[2]) - getValue(inds2[0],inds2[1]-d_lo[1],inds2[2]) + voffs[1]);
-                    b[24+i0] = dscales[2] * (getValue(inds2[0],inds2[1],inds2[2]+1) - getValue(inds2[0],inds2[1],inds2[2]-d_lo[2]) + voffs[2]);
-                    b[32+i0] = dscales[0] * dscales[1]
-                        * (getValue(inds2[0]+1,inds2[1]+1,inds2[2]) - getValue(inds2[0]-d_lo[0],inds2[1]+1,inds2[2]) -
-                           getValue(inds2[0]+1,inds2[1]-d_lo[1],inds2[2]) + getValue(inds2[0]-d_lo[0],inds2[1]-d_lo[1],inds2[2]));
-                    b[40+i0] = dscales[0] * dscales[2]
-                        * (getValue(inds2[0]+1,inds2[1],inds2[2]+1) - getValue(inds2[0]-d_lo[0],inds2[1],inds2[2]+1) -
-                           getValue(inds2[0]+1,inds2[1],inds2[2]-d_lo[2]) + getValue(inds2[0]-d_lo[0],inds2[1],inds2[2]-d_lo[2]));
-                    b[48+i0] = dscales[1] * dscales[2]
-                        * (getValue(inds2[0],inds2[1]+1,inds2[2]+1) - getValue(inds2[0],inds2[1]-d_lo[1],inds2[2]+1) -
-                           getValue(inds2[0],inds2[1]+1,inds2[2]-d_lo[2]) + getValue(inds2[0],inds2[1]-d_lo[1],inds2[2]-d_lo[2]));
-                    b[56+i0] = dscales[0] * dscales[1] * dscales[2]
-                        * (getValue(inds2[0]+1,inds2[1]+1,inds2[2]+1) - getValue(inds2[0]+1,inds2[1]+1,inds2[2]-d_lo[2]) -
-                           getValue(inds2[0]+1,inds2[1]-d_lo[1],inds2[2]+1) - getValue(inds2[0]-d_lo[0],inds2[1]+1,inds2[2]+1) +
-                           getValue(inds2[0]+1,inds2[1]-d_lo[1],inds2[2]-d_lo[2]) + getValue(inds2[0]-d_lo[0],inds2[1]+1,inds2[2]-d_lo[2]) +
-                           getValue(inds2[0]-d_lo[0],inds2[1]-d_lo[1],inds2[2]+1) - getValue(inds2[0]-d_lo[0],inds2[1]-d_lo[1],inds2[2]-d_lo[2]));
-                }
-            }
-        }
-        HOST DEVICE inline float getValue(int ix, int iy, int iz) const {
-        
-            if (ix < 0 || ix >= nx) return 0.0f;
-            if (iy < 0 || iy >= ny) return 0.0f;
-            if (iz < 0 || iz >= nz) return 0.0f;
-
-            int j = iz + iy*nz + ix*ny*nz;
-            return val[j];
-        /*
-           if(ix < 0) ix = 0;
-           else if(ix >= nx) ix = nx -1;
-
-           if(iy < 0) iy = 0;
-           else if(iy >= ny) iy = ny-1;
-
-           if(iz < 0) iz = 0;
-           else if(iz >= nz) iz = nz-1;
-
-           int j = iz + nz * (iy + ny * ix);
-           return val[j];*/
-        }
-
-  inline virtual Vector3 interpolateForce(Vector3 pos) const {
-		Vector3 f;
- 		Vector3 l = basisInv.transform(pos - origin);
-		int homeX = int(floor(l.x));
-		int homeY = int(floor(l.y));
-		int homeZ = int(floor(l.z));
-		// Get the array jumps with shifted indices.
-		int jump[3];
-		jump[0] = nz*ny;
-		jump[1] = nz;
-		jump[2] = 1;
-		// Shift the indices in the home array.
-		int home[3];
-		home[0] = homeX;
-		home[1] = homeY;
-		home[2] = homeZ;
-
-		// Shift the indices in the grid dimensions.
-		int g[3];
-		g[0] = nx;
-		g[1] = ny;
-		g[2] = nz;
-
-		// Get the interpolation coordinates.
-		float w[3];
-		w[0] = l.x - homeX;
-		w[1] = l.y - homeY;
-		w[2] = l.z - homeZ;
-		// Find the values at the neighbors.
-		float g1[4][4][4];
-		//RBTODO parallelize?
-		for (int ix = 0; ix < 4; ix++) {
-			for (int iy = 0; iy < 4; iy++) {
-				for (int iz = 0; iz < 4; iz++) {
-	  			// Wrap around the periodic boundaries. 
-					int jx = ix-1 + home[0];
-					jx = wrap(jx, g[0]);
-					int jy = iy-1 + home[1];
-					jy = wrap(jy, g[1]);
-					int jz = iz-1 + home[2];
-					jz = wrap(jz, g[2]);
-					int ind = jz*jump[2] + jy*jump[1] + jx*jump[0];
-					g1[ix][iy][iz] = val[ind];
-				}
-			}
-		}  
-		f.x = interpolateDiffX(pos, w, g1);
-		f.y = interpolateDiffY(pos, w, g1);
-		f.z = interpolateDiffZ(pos, w, g1);
-		Vector3 f1 = basisInv.transpose().transform(f);
-		return f1;
-	}
-
-  // Wrap coordinate: 0 <= x < l
-  HOST DEVICE   inline int quotient(float x, float l) const {
-#if __CUDA_ARCH__ > 0
-	  return int(floorf( __fdividef(x,l) ));
-#else
-	  return int(floor(x/l));
-#endif
-  }
-
-  HOST DEVICE inline float wrapFloat(float x, float l) const {
-		int image = int(floor(x/l));
-		x -= image*l;
-		return x;
-  }
-  
-  // Wrap distance: -0.5*l <= x < 0.5*l
-  HOST DEVICE static inline float wrapDiff(float x, float l) {
-		int image = int(floor(x/l));
-		x -= image*l;
-		if (x >= 0.5f * l)
-			x -= l;
-		return x;
-  }
-
-  // TODO: implement simpler approach when basis is diagonal
-  // TODO: implement device version using __fdividef and floorf
-  // TODO: make BaseGrid an abstract class that diagGrid and nonDiagGrid inherit from 
-  // TODO: test wrap and wrapDiff for non-diagonal basis
-  // Wrap vector, 0 <= x < lx  &&  0 <= y < ly  &&  0 <= z < lz
-  HOST DEVICE inline Vector3 wrap(Vector3 r) const {
-	Vector3 l = basisInv.transform(r - origin);
-	if ( basis.isDiagonal() ) {
-		r = r - Vector3(quotient(l.x,nx) * nx*basis.exx,
-						quotient(l.y,ny) * ny*basis.eyy,
-						quotient(l.z,nz) * nz*basis.ezz);
-	} else {
-		r = r - quotient(l.x,nx) * nx*basis.ex();
-		r = r - quotient(l.y,ny) * ny*basis.ey();
-		r = r - quotient(l.z,nz) * nz*basis.ez();
-	}
-	return r;
-  }
-
-  HOST DEVICE inline Vector3 wrapDiff(Vector3 r) const {
-	Vector3 l = basisInv.transform(r);
-	if ( basis.isDiagonal() ) {
-		r = r - Vector3(quotient(l.x+0.5f*nx,nx) * nx*basis.exx,
-						quotient(l.y+0.5f*ny,ny) * ny*basis.eyy,
-						quotient(l.z+0.5f*nz,nz) * nz*basis.ezz);
-	} else {
-		r = r - quotient(l.x+0.5f*nx,nx) * nx*basis.ex();
-		r = r - quotient(l.y+0.5f*ny,ny) * ny*basis.ey();
-		r = r - quotient(l.z+0.5f*nz,nz) * nz*basis.ez();
-	}
-	return r;
-  }
-  
-  // Wrap vector distance, -0.5*l <= x < 0.5*l  && ...
-  /* HOST DEVICE inline Vector3 wrapDiff(Vector3 r) const { */
-  /*   Vector3 l = basisInv.transform(r); */
-  /*   l.x = wrapDiff(l.x, nx); */
-  /*   l.y = wrapDiff(l.y, ny); */
-  /*   l.z = wrapDiff(l.z, nz); */
-  /*   return basis.transform(l); */
-  /* } */
-  HOST DEVICE inline Vector3 wrapDiffOrig(Vector3 r) const {
-    Vector3 l = basisInv.transform(r);
-    l.x = wrapDiff(l.x, nx);
-    l.y = wrapDiff(l.y, ny);
-    l.z = wrapDiff(l.z, nz);
-    return basis.transform(l);
-  }
-  Vector3 wrapDiffNearest(Vector3 r) const;
-
-  // Includes the home node.
-  // indexBuffer must have a size of at least 27.
-  void getNeighbors(int j, int* indexBuffer) const;
-  
-  // Get the values at the neighbors of a node.
-  // Note that homeX, homeY, and homeZ do not need to be wrapped,
-  // since we do it here.
-  void getNeighborValues(NeighborList* neigh, int homeX, int homeY, int homeZ) const;
-  inline void setVal(float* v) { val = v; }
-
-    BaseGrid* copy_to_cuda() const {
-	BaseGrid* g_d = NULL;
-	BaseGrid g_tmp;
-	float* val_d = NULL;
-	size_t sz = sizeof(float) * size;
-	gpuErrchk(cudaMalloc(&g_d, sizeof(BaseGrid)));
-	gpuErrchk(cudaMalloc(&val_d, sz));
-	gpuErrchk(cudaMemcpy(val_d, val, sz, cudaMemcpyHostToDevice));
-	g_tmp.origin = origin;
-	g_tmp.basis = basis;
-	g_tmp.nx = nx;
-	g_tmp.ny = ny;
-	g_tmp.nz = nz;
-	g_tmp.size= size;
-	g_tmp.basisInv = basisInv;
-	g_tmp.val = val_d;
-	gpuErrchk(cudaMemcpy(g_d, &g_tmp, sizeof(BaseGrid), cudaMemcpyHostToDevice));
-	g_tmp.val = NULL;
-	return g_d;
-    }
-
-    static void remove_from_cuda(BaseGrid* g_d) {
-	BaseGrid g_tmp;
-	gpuErrchk(cudaMemcpy(&g_tmp, g_d, sizeof(BaseGrid), cudaMemcpyDeviceToHost));
-	gpuErrchk(cudaFree(&(g_tmp.val)));
-	g_tmp.val = NULL;
-	gpuErrchk(cudaMemcpy(g_d, &g_tmp, sizeof(BaseGrid), cudaMemcpyHostToDevice)); // copy NULL back to device
-	gpuErrchk(cudaFree(&g_d));
-    }
-
-public:
-  Vector3 origin;
-  Matrix3 basis;
-  int nx, ny, nz;
-  int size;
-  Matrix3 basisInv;
-public:
-  float* val;
-};
-
-#ifndef delgpuErrchk
-#undef  delgpuErrchk
-#undef  gpuErrchk
-#endif
-
-#endif
diff --git a/src/BrownParticlesKernel.h b/src/BrownParticlesKernel.h
deleted file mode 100644
index 5475a454d4a7b3ef4c70a904c8ce262a8d71780e..0000000000000000000000000000000000000000
--- a/src/BrownParticlesKernel.h
+++ /dev/null
@@ -1,211 +0,0 @@
-#ifndef KERNEL_H_
-#define KERNEL_H_
-#include "useful.h"
-
-template<const int BlockSize>
-static __global__ void BrownParticlesKineticEnergy(Vector3* P_n, int type[], BrownianParticleType* part[], 
-                                                   float *vec_red, int num, int num_rb_attached_particles, int num_replicas)
-{
-    __shared__ __align__(4) float sdata[BlockSize];
-    
-    Vector3 p1, p2;
-    float mass1, mass2;
-
-    unsigned int tid = threadIdx.x;
-    unsigned int i = blockIdx.x*(BlockSize<<1) + tid;
-    unsigned int gridSize = (BlockSize<<1)*gridDim.x;
-
-    sdata[tid] = 0.f; 
-
-    int n = (num*num_replicas);
-
-    while (i < n) 
-    {
-	const int i1 = (i % num) +  (i/num)*(num+num_rb_attached_particles);
-        const int t1 = type[i1];
-        const BrownianParticleType& pt1 = *part[t1];
-
-        p1    = P_n[i];
-        mass1 = pt1.mass;
-        
-        if(i + BlockSize < n)
-        {
-	    const int i2 = ((i+BlockSize) % num) +  ((i+BlockSize)/num)*(num+num_rb_attached_particles);
-            const int t2 = type[i2];
-            const BrownianParticleType& pt2 = *part[t2];
-
-            p2    = P_n[i+BlockSize];
-            mass2 = pt2.mass;
-
-            sdata[tid] += (p1.length2() / mass1 + p2.length2() / mass2); 
-        }
-        else
-            sdata[tid] += p1.length2() / mass1;
-
-        i += gridSize;
-    }
-
-    sdata[tid] *= 0.50f;
-
-    __syncthreads();
-
-    if (BlockSize == 512) 
-    { 
-        if (tid < 256) 
-            sdata[tid] += sdata[tid + 256]; 
-        __syncthreads();
-       if (tid < 128)
-            sdata[tid] += sdata[tid + 128];
-        __syncthreads();
-       if (tid < 64)
-            sdata[tid] += sdata[tid + 64];
-        __syncthreads();
-       if (tid < 32)
-            sdata[tid] += sdata[tid + 32];
-        __syncthreads();
-       if (tid < 16)
-            sdata[tid] += sdata[tid + 16];
-        __syncthreads();
-       if (tid < 8)
-            sdata[tid] += sdata[tid + 8];
-        __syncthreads();
-       if (tid < 4)
-            sdata[tid] += sdata[tid + 4];
-        __syncthreads();
-       if (tid < 2)
-            sdata[tid] += sdata[tid + 2];
-        __syncthreads();
-       if (tid < 1)
-            sdata[tid] += sdata[tid + 1];
-        __syncthreads();
-    }
-    else if (BlockSize == 256) 
-    {
-        if (tid < 128)
-            sdata[tid] += sdata[tid + 128];
-        __syncthreads();
-       if (tid < 64)
-            sdata[tid] += sdata[tid + 64];
-        __syncthreads();
-       if (tid < 32)
-            sdata[tid] += sdata[tid + 32];
-        __syncthreads();
-       if (tid < 16)
-            sdata[tid] += sdata[tid + 16];
-        __syncthreads();
-       if (tid < 8)
-            sdata[tid] += sdata[tid + 8];
-        __syncthreads();
-       if (tid < 4)
-            sdata[tid] += sdata[tid + 4];
-        __syncthreads();
-       if (tid < 2)
-            sdata[tid] += sdata[tid + 2];
-        __syncthreads();
-       if (tid < 1)
-            sdata[tid] += sdata[tid + 1];
-        __syncthreads(); 
-    }
-    else if (BlockSize == 128) 
-    {
-       if (tid < 64)
-            sdata[tid] += sdata[tid + 64];
-        __syncthreads();
-       if (tid < 32)
-            sdata[tid] += sdata[tid + 32];
-        __syncthreads();
-       if (tid < 16)
-            sdata[tid] += sdata[tid + 16];
-        __syncthreads();
-       if (tid < 8)
-            sdata[tid] += sdata[tid + 8];
-        __syncthreads();
-       if (tid < 4)
-            sdata[tid] += sdata[tid + 4];
-        __syncthreads();
-       if (tid < 2)
-            sdata[tid] += sdata[tid + 2];
-        __syncthreads();
-       if (tid < 1)
-            sdata[tid] += sdata[tid + 1];
-        __syncthreads();
- 
-    }
-    else if (BlockSize == 64)
-    {
-       if (tid < 32)
-            sdata[tid] += sdata[tid + 32];
-        __syncthreads();
-       if (tid < 16)
-            sdata[tid] += sdata[tid + 16];
-        __syncthreads();
-       if (tid < 8)
-            sdata[tid] += sdata[tid + 8];
-        __syncthreads();
-       if (tid < 4)
-            sdata[tid] += sdata[tid + 4];
-        __syncthreads();
-       if (tid < 2)
-            sdata[tid] += sdata[tid + 2];
-        __syncthreads();
-       if (tid < 1)
-            sdata[tid] += sdata[tid + 1];
-        __syncthreads();
-
-    }
-    else if (BlockSize == 32)
-    {
-       if (tid < 16)
-            sdata[tid] += sdata[tid + 16];
-        __syncthreads();
-       if (tid < 8)
-            sdata[tid] += sdata[tid + 8];
-        __syncthreads();
-       if (tid < 4)
-            sdata[tid] += sdata[tid + 4];
-        __syncthreads();
-       if (tid < 2)
-            sdata[tid] += sdata[tid + 2];
-        __syncthreads();
-       if (tid < 1)
-            sdata[tid] += sdata[tid + 1];
-        __syncthreads();
-
-    }
-    __syncthreads();
-    if (tid == 0) 
-        vec_red[blockIdx.x] = sdata[0];
-}
-//The size must be power of 2, otherwise there is error
-//This small kernel is to reduce the small vecotr obtained by
-//the reduction kernel from the above.
-//The grid size should be one.
-//This small routine is to help do further reduction of a small vector
-
-template<int BlockSize>
-static __global__ void Reduction(float* dev_vec, float* result, int Size)
-{
-    __shared__ __align__(4) float data[BlockSize];
-    const unsigned int tid = threadIdx.x;
-    
-    data[tid] = dev_vec[tid];
-    size_t idx = tid + BlockSize;
-    while(idx < Size)
-    {
-        data[tid] += dev_vec[idx];
-        idx += BlockSize;
-    }
-    __syncthreads();
-
-    int n = BlockSize;
-    while(n > 1)
-    {
-        n = (n >> 1);
-        if(tid < n)
-            data[tid] += data[tid+n];
-        __syncthreads();
-    }
-    if(tid == 0) 
-        result[0] = data[0];
-}
-#endif
diff --git a/src/BrownianParticle.cu b/src/BrownianParticle.cu
deleted file mode 100644
index 46f82c22754c4a6e775f3dba0055c4a4935ca4e4..0000000000000000000000000000000000000000
--- a/src/BrownianParticle.cu
+++ /dev/null
@@ -1,14 +0,0 @@
-// BrownianParticle.cu
-// Copyright Justin Dufresne and Terrance Howard, 2013
-
-#include "BrownianParticle.h"
-
-BrownianParticle& BrownianParticle::operator=(const BrownianParticle& src) {
-	id = src.id;
-	type = src.type;
-	pos = src.pos;
-	orientation = src.orientation;
-	has_orientation_ = src.has_orientation_;
-	is_dummy_ = src.is_dummy_;
-	return *this;
-}
diff --git a/src/BrownianParticle.h b/src/BrownianParticle.h
deleted file mode 100644
index 9f9729f5aaeb5fc3f5a262f5c21b6eabed27293a..0000000000000000000000000000000000000000
--- a/src/BrownianParticle.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// BrownianParticle.h
-// Copyright Justin Dufresne and Terrance Howard, 2013
-
-#include <algorithm>
-
-#include <cuda.h>
-
-#include "useful.h"
-#include "JamesBond.h"
-
-class BrownianParticle {
-public:
-	BrownianParticle() :
-			id(-1), type(-1),
-			has_orientation_(false) { }
-
-	BrownianParticle(int id) :
-			id(id), type(0), pos(Vector3(0.0f)),
-			has_orientation_(false) { }
-
-	BrownianParticle(int id, const Vector3& pos, int type) :
-			id(id), type(type), pos(pos),
-			has_orientation_(false) { }
-
-	BrownianParticle(int id, const Vector3& pos, int type,
-									 const Vector3& orientation) :
-			id(id), type(type), pos(pos),
-			orientation(orientation),
-			has_orientation_(true) { }
-
-	HOST DEVICE
-	inline bool has_orientation() const { return has_orientation_; }
-
-	HOST DEVICE
-	inline bool is_dummy() const { return is_dummy_; }
-	
-	HOST DEVICE
-	inline void lose_orientation() { has_orientation_ = false; }
-
-	HOST DEVICE
-	inline void add_orientation(const Vector3& o) {
-		orientation = o;
-		has_orientation_ = true;
-	}
-
-	BrownianParticle& operator=(const BrownianParticle& src);
-
-	// Static comparison functions for sorting
-	static inline bool compareByIndex(const BrownianParticle& a,
-																		const BrownianParticle& b) {
-		return a.id < b.id;
-	}
-
-	static inline bool compareByType(const BrownianParticle& a,
-																	 const BrownianParticle& b) {
-		if (a.type == b.type)
-			return compareByIndex(a, b);
-		return a.type < b.type;
-	}
-
-public:
-	int id;
-	int type; // index into global type array.
-
-	Vector3 pos;
-
-	Vector3 orientation;
-
-private:
-	bool is_dummy_;
-
-	bool has_orientation_;
-};
diff --git a/src/BrownianParticleType.cpp b/src/BrownianParticleType.cpp
deleted file mode 100644
index 226814419a20fef51671d3a17ada2539b445f738..0000000000000000000000000000000000000000
--- a/src/BrownianParticleType.cpp
+++ /dev/null
@@ -1,172 +0,0 @@
-#include "BrownianParticleType.h"
-
-//////////////////////////////////////////
-// BrownianParticleType Implementations //
-//////////////////////////////////////////
-void BrownianParticleType::clear() {
-	if (pmf != NULL) delete [] pmf;
-	if (pmf_scale != NULL) delete [] pmf_scale;
-	if (diffusionGrid != NULL) delete diffusionGrid;
-	if (forceXGrid != NULL) delete forceXGrid;
-	if (forceYGrid != NULL) delete forceYGrid;
-	if (forceZGrid != NULL) delete forceZGrid;
-	if (reservoir != NULL) delete reservoir;
-        if (meanPmf != NULL) delete []  meanPmf;
-	pmf = NULL, diffusionGrid = NULL;
-	pmf_scale = NULL;
-	pmf_boundary_conditions = NULL;
-	forceXGrid = NULL, forceYGrid = NULL, forceZGrid = NULL;
-	reservoir = NULL, meanPmf = NULL;
-}
-
-void BrownianParticleType::copy(const BrownianParticleType& src) {
-	name = src.name;
-	num = src.num;
-	diffusion = src.diffusion;
-        mass      = src.mass;
-	charge = src.charge;
-	radius = src.radius;
-	eps = src.eps;
-        pmf = src.pmf;
-        pmf_scale = src.pmf_scale;
-        pmf_boundary_conditions = src.pmf_boundary_conditions;
-	meanPmf = src.meanPmf;
-        numPartGridFiles = src.numPartGridFiles;
-        //Han-Yi Chou
-        transDamping = src.transDamping;
-        mu = src.mu;
-        diffusionGrid = NULL;
-        forceXGrid = NULL, forceYGrid = NULL, forceZGrid = NULL;
-        reservoir = NULL;
-        //if (src.pmf != NULL) pmf = new BaseGrid(*src.pmf);
-        if (src.diffusionGrid != NULL) diffusionGrid = new BaseGrid(*src.diffusionGrid);
-        if (src.forceXGrid != NULL) forceXGrid = new BaseGrid(*src.forceXGrid);
-        if (src.forceYGrid != NULL) forceYGrid = new BaseGrid(*src.forceYGrid);
-        if (src.forceZGrid != NULL) forceZGrid = new BaseGrid(*src.forceZGrid);
-        if (src.reservoir != NULL) reservoir = new Reservoir(*src.reservoir);
-
-        /*
-	pmf = NULL, diffusionGrid = NULL;
-	forceXGrid = NULL, forceYGrid = NULL, forceZGrid = NULL;
-	reservoir = NULL;
-	if (src.pmf != NULL) pmf = new BaseGrid(*src.pmf);
-	if (src.diffusionGrid != NULL) diffusionGrid = new BaseGrid(*src.diffusionGrid);
-	if (src.forceXGrid != NULL) forceXGrid = new BaseGrid(*src.forceXGrid);
-	if (src.forceYGrid != NULL) forceYGrid = new BaseGrid(*src.forceYGrid);
-	if (src.forceZGrid != NULL) forceZGrid = new BaseGrid(*src.forceZGrid);
-	if (src.reservoir != NULL) reservoir = new Reservoir(*src.reservoir);*/
-}
-
-BrownianParticleType& BrownianParticleType::operator=(const BrownianParticleType& src) {
-        if(&src != this)
-        {
-	    clear();
-	    copy(src);
-        }
-	return *this;
-}
-/*
-bool BrownianParticleType::crop(int x0, int y0, int z0,
-																int x1, int y1, int z1, bool keep_origin) {
-	bool success = true;
-	
-	// Try cropping
-	BaseGrid *new_pmf(NULL), *new_diffusionGrid(NULL);
-	BaseGrid *new_forceXGrid(NULL), *new_forceYGrid(NULL), *new_forceZGrid(NULL);
-	if (pmf != NULL) {
-		new_pmf = new BaseGrid(*pmf);
-		success = new_pmf->crop(x0, y0, z0, x1, y1, z1, keep_origin);
-	}
-	if (success && diffusionGrid != NULL) {
-		new_diffusionGrid = new BaseGrid(*diffusionGrid);
-		success = new_diffusionGrid->crop(x0, y0, z0, x1, y1, z1, keep_origin);
-	}
-	if (success && forceXGrid != NULL) {
-		new_forceXGrid = new BaseGrid(*forceXGrid);
-		success = new_forceXGrid->crop(x0, y0, z0, x1, y1, z1, keep_origin);
-	}
-	if (success && forceYGrid != NULL) {
-		new_forceYGrid = new BaseGrid(*forceYGrid);
-		success = new_forceYGrid->crop(x0, y0, z0, x1, y1, z1, keep_origin); 
-	}
-	if (success && forceZGrid != NULL) {
-		new_forceZGrid = new BaseGrid(*forceZGrid);
-		success = new_forceZGrid->crop(x0, y0, z0, x1, y1, z1, keep_origin); 
-	}
-	
-	// Save results
-	if (success) {
-		if (pmf != NULL) {
-			delete pmf;
-			pmf = new_pmf;
-		}
-		if (diffusionGrid != NULL) {
-			delete diffusionGrid;
-			diffusionGrid = new_diffusionGrid;
-		}
-		if (forceXGrid != NULL) {
-			delete forceXGrid;
-			forceXGrid = new_forceXGrid;
-		}
-		if (forceYGrid != NULL) {
-			delete forceYGrid;
-			forceYGrid = new_forceYGrid;
-		}
-		if (forceZGrid != NULL) {
-			delete forceZGrid;
-			forceZGrid = new_forceZGrid;
-		}
-	} else {
-		if (new_pmf != NULL) delete new_pmf;
-		if (new_diffusionGrid != NULL) delete new_diffusionGrid;
-		if (new_forceXGrid != NULL) delete new_forceXGrid;
-		if (new_forceYGrid != NULL) delete new_forceYGrid;
-		if (new_forceZGrid != NULL) delete new_forceZGrid;
-	}
-		
-	return success;
-}
-*/
-/*
-///////////////////////////////////////
-// TypeDecomposition Implementations //
-///////////////////////////////////////
-TypeDecomposition::TypeDecomposition(const CellDecomposition &decomp,
-		const BrownianParticleType *parts, size_t num_parts) :
-		num_cells_(decomp.size()), num_parts_(num_parts), parts_(decomp.size()) {
-	int cutoff = (int) decomp.getCutoff();
-	
-	for (size_t c = 0; c < num_cells_; c++) {
-		parts_[c] = new BrownianParticleType[num_parts_];
-		int3 pos = decomp.getCellPos(c);
-	
-		printf("pos[%lu] (%d, %d, %d)\n", c, pos.x, pos.y, pos.z);
-		int x0 = cutoff * pos.x;
-		int y0 = cutoff * pos.y;
-		int z0 = cutoff * pos.z;
-		for (size_t type = 0; type < num_parts; type++) {
-			parts_[c][type] = BrownianParticleType(parts[type]);
-			bool success = parts_[c][type].crop(x0, y0, z0,
-					x0 + cutoff, y0 + cutoff, z0 + cutoff, false);
-			if (!success)
-				printf("WARNING: parts[%lu][%lu] was not cropped, %s %d\n",
-						c, type, __FILE__, __LINE__);
-		}
-	}
-}
-
-TypeDecomposition::~TypeDecomposition() {
-	for (size_t c = 0; c < num_cells_; c++) {
-		delete[] parts_[c];
-	}
-}
-
-
-const BrownianParticleType* TypeDecomposition::at(size_t i) const {
-	if (i >= num_cells_) {
-		printf("ERROR: out of bounds [%lu] %s %d\n", i, __FILE__, __LINE__);
-		return NULL;
-	}
-	return parts_[i];
-}
-*/
diff --git a/src/BrownianParticleType.h b/src/BrownianParticleType.h
deleted file mode 100644
index c4d3f336ff12a6ffbb128bcbdb048c15346d9f93..0000000000000000000000000000000000000000
--- a/src/BrownianParticleType.h
+++ /dev/null
@@ -1,113 +0,0 @@
-// BrownianParticleType.h (2002)
-// Contains BrownianParticleType and TypeDecomposition classes
-//
-// Author: Jeff Comer <jcomer2@illinois.edu>
-// Edited (2013) by Terrance Howard <howard33@illinois.edu>,
-//                  Justin Dufresne <jdufres1@friars.providence.edu>
-
-#ifndef BROWNIANPARTICLETYPE_H
-#define BROWNIANPARTICLETYPE_H
-
-#include <vector>
-
-#include "Reservoir.h"
-#include "BaseGrid.h"
-#include "CellDecomposition.h"
-
-// Stores particle type's potential grid and other information
-class BrownianParticleType {
-	private:
-		// clear
-		// Deletes all members
-		void clear();
-
-		// copy
-		// Copies all members
-		// @param BrownianParticleType to copy
-		void copy(const BrownianParticleType& src);
-
-	public:
-		BrownianParticleType(const String& name = "") :
-				name(name), num(0),
-				diffusion(0.0f), radius(1.0f), charge(0.0f), eps(0.0f), meanPmf(NULL),
-				numPartGridFiles(-1), reservoir(NULL), pmf(NULL), pmf_scale(NULL), pmf_boundary_conditions(NULL),
-				diffusionGrid(NULL),
-				forceXGrid(NULL), forceYGrid(NULL), forceZGrid(NULL){ }
-
-		BrownianParticleType(const BrownianParticleType& src) { copy(src); }
-
-		~BrownianParticleType() { clear(); }
-
-		BrownianParticleType& operator=(const BrownianParticleType& src);
-
-    void set_boundary_conditions( int num, BoundaryCondition* bcs ) {
-	if (num <= 0) return;
-
-	if (pmf_boundary_conditions != NULL) {
-	    delete[] pmf_boundary_conditions;
-	}
-
-	pmf_boundary_conditions = new BoundaryCondition[num];
-	for (int i=0; i < num; ++i) {
-	    pmf_boundary_conditions[i] = bcs[i];
-	}
-    }
-
-		// crop
-		// Crops all BaseGrid members
-		// @param  boundries to crop to (x0, y0, z0) -> (x1, y1, z1);
-		//         whether to change the origin
-		// @return success of function (if false nothing was done)
-		//bool crop(int x0, int y0, int z0, int x1, int y1, int z1, bool keep_origin);
-
-public:
-		String name;
-		int num; // number of particles of this type
-                float mass; // mass of brownian particles Han-Yi Chou
-                Vector3 transDamping; // translational damping coefficient Han-Yi Chou
-		float diffusion;
-		float radius;
-		float charge;
-		float eps;
-		//float meanPmf;
-		float *meanPmf;
-                int   numPartGridFiles;
-                float mu; //for Nose-Hoover Langevin dynamics
-
-		Reservoir* reservoir;
-		BaseGrid** pmf;
-    float* pmf_scale;
-    BoundaryCondition* pmf_boundary_conditions;
-		BaseGrid* diffusionGrid;
-		BaseGrid* forceXGrid;
-		BaseGrid* forceYGrid;
-		BaseGrid* forceZGrid;
-};
-
-/*
-// Spatially decomposes BrownianParticleTypes
-class TypeDecomposition {
-	private:
-		size_t num_cells_;
-		size_t num_parts_;
-		std::vector<BrownianParticleType*> parts_; // 2D array; parts_[cell][particle_type]
-
-		TypeDecomposition() {}
-
-	public:
-		TypeDecomposition(const CellDecomposition &decomp,
-				const BrownianParticleType *parts, size_t num_parts);
-
-		~TypeDecomposition();
-
-		// Getters
-		const BrownianParticleType* at(size_t i) const;
-		const BrownianParticleType* operator[](size_t i) const { return at(i); }
-
-		const std::vector<BrownianParticleType*>& parts() const { return parts_; }
-
-		int num_cells() const { return num_cells_; }
-		int num_parts() const { return num_parts_; }
-};
-*/
-#endif
diff --git a/src/CUDA.h b/src/CUDA.h
new file mode 100644
index 0000000000000000000000000000000000000000..d0b072fddd2c86109d07041578b0e9f7bfeba908
--- /dev/null
+++ b/src/CUDA.h
@@ -0,0 +1,7 @@
+#pragma once
+
+class BDIntegrateCUDA : public Integrator {
+public:
+    void compute(Patch* patch);
+    int num_patches() const { return 1; };
+};
diff --git a/src/CellDecomposition.cu b/src/CellDecomposition.cu
deleted file mode 100644
index 45591f789a71f2abc490910dbb29a7bcfb884dce..0000000000000000000000000000000000000000
--- a/src/CellDecomposition.cu
+++ /dev/null
@@ -1,200 +0,0 @@
-// CellDecomposition.cu
-//
-// Terrance Howard <heyterrance@gmail.com>
-
-#include "CellDecomposition.h"
-
-// *****************************************************************************
-// Error Check
-
-inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) {
-	if (code != cudaSuccess) {
-		fprintf(stderr,"CUDA Error: %s %s %d\n",
-						cudaGetErrorString(code), file, line);
-		if (abort) exit(code);
-	}
-}
-
-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-
-#define gpuKernelCheck() {kernelCheck( __FILE__, __LINE__); }
-inline void kernelCheck(const char* file, int line)
-{
-    cudaError_t err = cudaGetLastError();
-    if (err != cudaSuccess)
-    {
-        std::fprintf(stderr,"Error: %s in %s %d\n", cudaGetErrorString(err),file, line);
-        assert(1==2);
-    }
-    //gpuErrchk(cudaDeviceSynchronize());
-}
-
-// *****************************************************************************
-// CUDA Kernel Definitions
-
-__global__ void decomposeKernel(Vector3 pos[],
-																CellDecomposition::cell_t cells[],
-																Vector3 origin, float cutoff,
-																int3 nCells, size_t num, int numReplicas);
-
-__global__
-void make_rangesKernel(CellDecomposition::cell_t cells[], int tmp[],
-											 size_t num, int numCells, int numReplicas);
-
-__global__
-void bind_rangesKernel(CellDecomposition::range_t ranges[], int tmp[],
-											 int numCells, int numReplicas);
-
-// *****************************************************************************
-// CellDecomposition Implementations
-
-CellDecomposition::CellDecomposition(Matrix3 box, Vector3 origin,
-																		 float cutoff, int numReplicas) :
-		BaseGrid(box, origin, cutoff), cutoff(cutoff), numReplicas(numReplicas),
-		cells(NULL), cells_d(NULL), unsorted_cells(NULL), unsorted_cells_d(NULL),
-		ranges(NULL), ranges_d(NULL) {
-	const Vector3 dim = getExtent();
-	nCells.x = int((dim.x - 1) / cutoff) + 1;
-	nCells.y = int((dim.y - 1) / cutoff) + 1;
-	nCells.z = int((dim.z - 1) / cutoff) + 1;
-	numCells = nCells.x * nCells.y * nCells.z;
-	printf("Created Cell Decomposition (%lu, %lu, %lu)\n",
-			nCells.x, nCells.y, nCells.z);
-}
-
-
-CellDecomposition* CellDecomposition::copyToCUDA() {
-	cell_t* tmp_cells = this->cells;
-	cell_t* tmp_unsorted = this->unsorted_cells;
-
-	this->cells = this->cells_d;
-	this->unsorted_cells = this->unsorted_cells_d;
-
-	const size_t sz = sizeof(CellDecomposition);
-	CellDecomposition *c_d = NULL;
-	gpuErrchk(cudaMalloc(&c_d, sz));
-	gpuErrchk(cudaMemcpy(c_d, this, sz, cudaMemcpyHostToDevice));
-
-	this->cells = tmp_cells;
-	this->unsorted_cells = tmp_unsorted;
-
-	return c_d;
-}
-
-void CellDecomposition::decompose_d(Vector3 pos_d[], size_t num) {
-	const size_t cells_sz = sizeof(cell_t) * num * numReplicas;
-	const size_t numCellRep = numCells * numReplicas;
-
-	if (cells_d == NULL) {
-		gpuErrchk(cudaMalloc(&cells_d, cells_sz));
-		gpuErrchk(cudaMalloc(&unsorted_cells_d, cells_sz));
-		gpuErrchk(cudaMalloc(&ranges_d, sizeof(range_t) * numCellRep));
-		unsorted_cells = new cell_t[num * numReplicas];
-		cells = new cell_t[num * numReplicas];
-		ranges = new range_t[numCellRep];
-	}
-
-	// Pair particles with cells.
-	size_t nBlocks = (num * numReplicas) / NUM_THREADS + 1;
-	decomposeKernel<<< nBlocks, NUM_THREADS >>>(pos_d, cells_d, origin, cutoff,
-																							nCells, num, numReplicas);
-	gpuErrchk(cudaDeviceSynchronize());
-	gpuErrchk(cudaMemcpy(unsorted_cells_d, cells_d, cells_sz,
-											 cudaMemcpyDeviceToDevice));
-	gpuErrchk(cudaMemcpyAsync(unsorted_cells, unsorted_cells_d, cells_sz,
-														cudaMemcpyDeviceToHost));
-
-	// Sort cells.
-	thrust::device_ptr<cell_t> c_d(cells_d);
-	thrust::sort(c_d, c_d + num * numReplicas);
-	gpuErrchk(cudaMemcpyAsync(cells, cells_d, cells_sz, cudaMemcpyDeviceToHost));
-	//Han-Yi Chou
-        //gpuErrchk(cudaMemcpy(cells, cells_d, cells_sz, cudaMemcpyDeviceToHost));
-	const size_t nMax = std::max(2lu * numCells, num);
-	nBlocks = (nMax * numReplicas) / NUM_THREADS + 1;
-
-	// Create ranges for cells.
-	int* temp_ranges = NULL;
-	gpuErrchk(cudaMalloc(&temp_ranges, 2 * sizeof(int) * numCellRep));
-	gpuErrchk(cudaMemset(temp_ranges, -1, 2 * sizeof(int) * numCellRep));
-	make_rangesKernel<<< nBlocks, NUM_THREADS >>>(cells_d, temp_ranges,
-																								num, numCells, numReplicas);
-	gpuErrchk( cudaDeviceSynchronize() );
-
-	// Copy temp_ranges to ranges_d
-	bind_rangesKernel<<< nBlocks, NUM_THREADS >>>(ranges_d, temp_ranges,
-																								numCells, numReplicas);
-	gpuErrchk(cudaMemcpy(ranges, ranges_d, numCellRep, cudaMemcpyDeviceToHost));
-	gpuErrchk( cudaFree(temp_ranges) );
-
-
-}
-
-// *****************************************************************************
-// CUDA Kernels
-
-__global__
-void decomposeKernel(Vector3 *pos, CellDecomposition::cell_t *cells,
-										 Vector3 origin, float cutoff, int3 nCells,
-										 size_t num, int numReplicas) {
-	const size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
-	if (idx < num * numReplicas) {
-		const int repID =  idx / num;
-		const Vector3& p = pos[idx];
-		const int id = CellDecomposition::getCellID(p, origin, cutoff, nCells);
-		const int3 r = CellDecomposition::getCellPos(id, nCells);
-		cells[idx] = CellDecomposition::cell_t(idx, id, r, repID);
-	}
-}
-
-__global__
-void make_rangesKernel(CellDecomposition::cell_t cells[], int tmp[],
-											 size_t num, int numCells, int numReplicas) {
-	const size_t idx = blockIdx.x * blockDim.x + threadIdx.x; 
-
-	if (idx < num * numReplicas) {
-		const int repID = cells[idx].repID;
-		assert(repID == idx/num);
-		const int cellID = cells[idx].id + repID * numCells; // cellID in tmp array
-
-		// Get positions in tmp array.
-		const int first = cellID * 2;
-		const int last = first + 1;
-		const int particle = idx % num;
-
-		if (particle == 0)
-			tmp[first] = idx;
-
-		if (particle == num - 1)
-			tmp[last] = idx + 1;
-
-		const int prev_id = idx - 1;
-		if (prev_id >= 0
-		    and cells[prev_id].repID == repID
-		    and cellID != cells[prev_id].id + repID * numCells)
-			tmp[first] = idx;
-		
-		const int next_id = idx + 1;
-		if (next_id < num * numReplicas
-		    and cells[next_id].repID == repID
-		    and cellID != cells[next_id].id + repID * numCells)
-			tmp[last] = idx + 1;
-	}
-}
-
-__global__
-void bind_rangesKernel(CellDecomposition::range_t ranges[], int tmp[],
-											 int numCells, int numReplicas) {
-        const size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
-	if (idx < numCells * numReplicas)
-		ranges[idx] = CellDecomposition::range_t(tmp[2*idx], tmp[2*idx+1]);
-	/* Print range of each cell. Skip over empty cells
-	__syncthreads();
-	if (idx == 0) {
-		for (int i = 0; i < numCells * numReplicas; ++i) {
-			if (ranges[i].first == -1 and ranges[i].last == -1) continue;
-			printf("cell %d : [%d, %d)\n", i, ranges[i].first, ranges[i].last);
-		}
-	}
-	// */
-}
diff --git a/src/CellDecomposition.h b/src/CellDecomposition.h
deleted file mode 100644
index 75fc1c7afcb1dad3430122961d57c9178a03dd3a..0000000000000000000000000000000000000000
--- a/src/CellDecomposition.h
+++ /dev/null
@@ -1,249 +0,0 @@
-// CellDecomposition.h (2013)
-// contains CellDecomposition class and two related structs
-//
-// Authors: Terrance Howard <howard33@illinois.edu>,
-//          Justin Dufresne <jdufres1@friars.providence.edu>
-//
-// "When I wrote this, only God and myself understood what I was thinking.
-//  Now, only God knows."
-
-#ifndef CELL_DECOMPOSITION_H
-#define CELL_DECOMPOSITION_H
-
-#include <vector>
-#include <algorithm> // std::sort
-
-#include <thrust/sort.h>
-#include <thrust/device_ptr.h>
-#include <thrust/partition.h>
-
-#include <cuda.h>	 	// cudaMalloc, cudaMemcpy
-#include <vector_types.h>	// int3
-
-#include "useful.h" // Vector3, Matrix3
-#include "BaseGrid.h"
-
-#if defined(__CUDACC__) // NVCC
-   #define MY_ALIGN(n) __align__(n)
-#elif defined(__GNUC__) // GCC
-  #define MY_ALIGN(n) __attribute__((aligned(n)))
-#elif defined(_MSC_VER) // MSVC
-  #define MY_ALIGN(n) __declspec(align(n))
-#else
-  #error "Please provide a definition for MY_ALIGN macro for your host compiler!"
-#endif
-
-class CellDecomposition : public BaseGrid {
-public:
-	// range_t
-	// Contains first and last exclusive indices in cells array.
-	struct range_t {
-	public:
-		HOST DEVICE
-		inline range_t() : first(-1), last(-1) { }
-
-		HOST DEVICE
-		inline range_t(int first, int last) : first(first), last(last) { }
-
-	public:
-		int first, last; // [first, last)
-	};
-
-	// cell_t
-	// Contains replica id, particle id, cell id, and position of cell
-	struct cell_t {
-	public:
-		HOST DEVICE
-		inline cell_t() : particle(-1), id(-1) { }
-
-		HOST DEVICE
-		inline cell_t(int particle, int id, const int3& r, int repID) :
-				particle(particle), repID(repID), id(id), pos(r) { }
-
-		HOST DEVICE
-		inline bool operator<(const cell_t& p) const {
-				if (repID != p.repID) return repID < p.repID;
-				if (id != p.id) return id < p.id;
-				return particle < p.particle;
-		}
-	
-	public:
-		int particle; // id of particle
-		int repID;
-		int id; // location in CellDecomposition's cells array
-		int3 pos; // position of cell in grid
-	};
-
-public:
-	CellDecomposition(Matrix3 box, Vector3 origin, float cutoff, int numReplicas);
-
-	// Place particles in cells and create a range for each cell.
-	// Decompose on the GPU.
-	void decompose_d(Vector3 *pos_d, size_t num);
-
-	// copyToCUDA
-	// Return a copy of CellDecomposition to GPU.
-	CellDecomposition* copyToCUDA();
-
-	HOST DEVICE
-	inline float getCutoff() const { return cutoff; }
-
-	HOST DEVICE
-	inline size_t size() const { return numCells; }
-
-	HOST DEVICE
-	inline const cell_t& getCell(int ind) const { return cells[ind]; }
-
-	HOST DEVICE
-	inline const cell_t& getCellForParticle(int particle) const {
-		return unsorted_cells[particle];
-	}
-
-	// Return cell array
-	HOST DEVICE
-	inline const cell_t* getCells() const {
-		return cells;
-	}
-        //Han-Yi Chou
-        HOST DEVICE
-        inline const cell_t* getCells_d() const {
-                return cells_d;
-        }
-
-	/*
-	HOST DEVICE
-	inline const range_t& getRange(const cell_t& c) const {
-		return ranges_d[c.id + c.repID * numCells];
-	}
-	*/
-
-	HOST DEVICE
-	inline const range_t& getRange(int ind, int repID) const {
-		return ranges_d[ind + repID * numCells];
-	}
-
-	HOST DEVICE
-	inline int getCellID(const Vector3 &r0) const {
-		const Vector3 r = r0 - origin;
-		//const int x = int(r.x / cutoff);
-		//const int y = int(r.y / cutoff);
-		//const int z = int(r.z / cutoff);
-		const int x = floorf(r.x / cutoff);
-                const int y = floorf(r.y / cutoff);
-                const int z = floorf(r.z / cutoff);
-		return getCellID(x, y, z, nCells);
-	}
-
-	HOST DEVICE
-	static inline int getCellID(const Vector3& r0, const Vector3& origin,
-															float cutoff, int3 nCells) {
-		const Vector3 r = r0 - origin;
-		//const int x = int(r.x / cutoff);
-		//const int y = int(r.y / cutoff);
-		//const int z = int(r.z / cutoff);
-		const int x = floorf(r.x / cutoff);
-                const int y = floorf(r.y / cutoff);
-                const int z = floorf(r.z / cutoff);
-		return getCellID(x, y, z, nCells);
-	}
-
-	// Return position of cell in grid.
-	HOST DEVICE
-	inline int3 getCellPos(int id) const {
-		return getCellPos(id, nCells);
-	}
-
-	HOST DEVICE
-	static inline int3 getCellPos(int id, int3 nCells) {
-		int3 p;
-		p.z = id % nCells.z;
-		p.y = (id / nCells.z) % nCells.y;
-		p.x = id / (nCells.z * nCells.y);
-		return p;
-	}
-
-	// Return ID of cell in position (i, j, k) relative to c.
-	// Return -1 if wrapping to an adjacent cell.
-	HOST DEVICE
-	inline int getNeighborID(const cell_t& c, int i, int j, int k) const {
-		if (i == 0 and j == 0 and k == 0)
-			return c.id;
-		int u = i + c.pos.x;
-		int v = j + c.pos.y;
-		int w = k + c.pos.z;
-		if (nCells.x == 1 and u != 0) return -1;
-		if (nCells.y == 1 and v != 0) return -1;
-		if (nCells.z == 1 and w != 0) return -1;
-		if (nCells.x == 2 and (u < 0 || u > 1)) return -1;
-		if (nCells.y == 2 and (v < 0 || v > 1)) return -1;
-		if (nCells.z == 2 and (w < 0 || w > 1)) return -1;
-		return getCellID(u, v, w, nCells);
-	}
-/*
-        HOST DEVICE
-inline int getNeighborID(int idx, int dx, int dy, int dz) const
-{
-    if(dx == 0 and dy == 0 and dz == 0)
-        return idx;
-    int idx_z = idx % nCells.z;
-    int idx_y = idx / nCells.z % nCells.y;
-    int idx_x = idx / (nCells.z * nCells.y);
-
-    int u = (dx + idx_x + nCells.x) % nCells.x;
-    int v = (dy + idx_y + nCells.y) % nCells.y;
-    int w = (dz + idx_z + nCells.z) % nCells.z;
-    if (nCells.x == 1 and u != 0) return -1;
-    if (nCells.y == 1 and v != 0) return -1;
-    if (nCells.z == 1 and w != 0) return -1;
-    if (nCells.x == 2 and (u < 0 || u > 1)) return -1;
-    if (nCells.y == 2 and (v < 0 || v > 1)) return -1;
-    if (nCells.z == 2 and (w < 0 || w > 1)) return -1;
-    return getCellID(u, v, w, nCells);
-}
-*/
-public:
-	int3 nCells;
-
-private:
-	// Wrap an integer with inclusive lower and upper bounds.
-	HOST DEVICE
-	static inline int wrapInt(int k, int lower, int upper) {
-		int range = upper - lower + 1;
-		if (k < lower)
-			k += range * ((lower - k) / range + 1);
-		return lower + (k - lower) % range;
-	}
-	
-	// Calculate a cell's id given a position in the grid.
-	HOST DEVICE
-	static inline int getCellID(int i, int j, int k, int3 nCells) {
-		i = wrapInt(i, 0, nCells.x - 1);
-		j = wrapInt(j, 0, nCells.y - 1);
-		k = wrapInt(k, 0, nCells.z - 1);
-		return k + nCells.z * (j + (nCells.y * i));
-	}
-
-
-private:
-	static const unsigned int NUM_THREADS = 256;
-
-	int numCells;
-	int numReplicas;
-
-	cell_t* cells;
-	cell_t* cells_d;
-	cell_t* unsorted_cells;
-	cell_t* unsorted_cells_d;
-	range_t* ranges;
-	range_t* ranges_d;
-
-	float cutoff;
-
-	// build_ranges
-	// @param	number of particles
-	// used by decompose()
-	void build_ranges(size_t num);
-
-};
-
-#endif
diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu
deleted file mode 100644
index 7b8c42ba37bdd160bef7526a481839a1c99e0f30..0000000000000000000000000000000000000000
--- a/src/ComputeForce.cu
+++ /dev/null
@@ -1,1115 +0,0 @@
-///////////////////////////////////////////////////////////////////////
-// Brownian dynamics base class
-// Author: Jeff Comer <jcomer2@illinois.edu>
-
-#include "ComputeForce.h"
-#include "ComputeForce.cuh"
-#include "Configuration.h"
-#include <cuda_profiler_api.h>
-#include <fstream>
-#include <iostream>
-
-#ifndef gpuErrchk
-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) {
-   if (code != cudaSuccess) {
-      fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), file, line);
-      if (abort) exit(code);
-   }
-}
-#endif 
-
-#define gpuKernelCheck() {kernelCheck( __FILE__, __LINE__); }
-inline void kernelCheck(const char* file, int line)
-{
-    cudaError_t err = cudaGetLastError();
-    if (err != cudaSuccess)
-    {
-        std::fprintf(stderr,"Error: %s in %s %d\n", cudaGetErrorString(err),file, line);
-        assert(1==2);
-    }
-    //gpuErrchk(cudaDeviceSynchronize());
-}
-
-cudaEvent_t start, stop;
-
-GPUManager ComputeForce::gpuman = GPUManager();
-
-void runSort(int2 *d1, int *d2, float *key,
-				int2 *scratch1, int  *scratch2, float *scratchKey,
-				unsigned int count);
-
-ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) :
-    num(c.num), numParts(c.numParts), num_rb_attached_particles(c.num_rb_attached_particles),
-    sys(c.sys), switchStart(c.switchStart),
-    switchLen(c.switchLen), electricConst(c.coulombConst),
-    cutoff2((c.switchLen + c.switchStart) * (c.switchLen + c.switchStart)),
-    decomp(c.sys->getBox(), c.sys->getOrigin(), c.switchStart + c.switchLen + c.pairlistDistance, numReplicas),
-    numBonds(c.numBonds), numTabBondFiles(c.numTabBondFiles),
-    numExcludes(c.numExcludes), numAngles(c.numAngles),
-    numTabAngleFiles(c.numTabAngleFiles), numDihedrals(c.numDihedrals),
-    numTabDihedralFiles(c.numTabDihedralFiles), numRestraints(c.numRestraints),
-    numBondAngles(c.numBondAngles), numProductPotentials(c.numProductPotentials),
-    numGroupSites(c.numGroupSites),
-    numReplicas(numReplicas) {
-
-	// Grow vectors for per-gpu device pointers
-	for (int i = 0; i < gpuman.gpus.size(); ++i) {
-	    int s = gpuman.gpus.size();
-	    sys_d	= std::vector<BaseGrid*>(s);
-	    tablePot_addr = std::vector<TabulatedPotential**>(s);
-	    tablePot_d	= std::vector<TabulatedPotential**>(s);
-	    pairLists_d = std::vector<int2*>(s);
-	    pairLists_tex = std::vector<cudaTextureObject_t>(s);
-	    pairTabPotType_d = std::vector<int*>(s);
-	    pairTabPotType_tex = std::vector<cudaTextureObject_t>(s);
-	    numPairs_d = std::vector<int*>(s);
-	    pos_d = std::vector<Vector3*>(s);
-	    pos_tex = std::vector<cudaTextureObject_t>(s);
-	    forceInternal_d = std::vector<Vector3*>(s);
-	}
-
-	// Allocate the parameter tables.
-	decomp_d = NULL;
-
-	pairlistdist2 = (sqrt(cutoff2) + c.pairlistDistance);
-	pairlistdist2 *= pairlistdist2;
-
-	int np2     = numParts*numParts;
-	tableEps    = new float[np2];
-	tableRad6   = new float[np2];
-	tableAlpha  = new float[np2];
-
-	const size_t tableSize = sizeof(float) * np2;
-	gpuErrchk(cudaMalloc(&tableEps_d, tableSize));
-	gpuErrchk(cudaMalloc(&tableRad6_d, tableSize));
-	gpuErrchk(cudaMalloc(&tableAlpha_d, tableSize));
-	for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) {
-	    gpuman.use(i);
-	    gpuErrchk(cudaMalloc(&sys_d[i], sizeof(BaseGrid)));
-	    gpuErrchk(cudaMemcpyAsync(sys_d[i], sys, sizeof(BaseGrid), cudaMemcpyHostToDevice));
-	}
-	gpuman.use(0);
-
-	// Build the parameter tables.
-	makeTables(c.part);
-
-	gpuErrchk(cudaMemcpyAsync(tableAlpha_d, tableAlpha, tableSize, cudaMemcpyHostToDevice));
-	gpuErrchk(cudaMemcpyAsync(tableEps_d, tableEps, tableSize, cudaMemcpyHostToDevice));
-	gpuErrchk(cudaMemcpyAsync(tableRad6_d, tableRad6, tableSize, cudaMemcpyHostToDevice));
-
-	// Create the potential table
-	tablePot = new TabulatedPotential*[np2];
-	for (int i = 0; i < np2; ++i) tablePot[i] = NULL;
-
-	for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) {
-	    tablePot_addr[i] = new TabulatedPotential*[np2];
-	    for (int j = 0; j < np2; ++j) tablePot_addr[i][j] = NULL;
-	    gpuman.use(i);
-	    gpuErrchk(cudaMalloc(&tablePot_d[i], sizeof(TabulatedPotential*) * np2));
-	}
-	gpuman.use(0);
-
-	// Create the bond table
-	tableBond = new TabulatedPotential*[numTabBondFiles];
-	tableBond_addr = new TabulatedPotential*[numTabBondFiles];
-	bondList_d = NULL;
-	tableBond_d = NULL;
-	for (int i = 0; i < numTabBondFiles; i++) {
-		tableBond_addr[i] = NULL;
-		tableBond[i] = NULL;
-	}
-	gpuErrchk(cudaMalloc(&tableBond_d, sizeof(TabulatedPotential*) * numTabBondFiles));
-
-	// Create the angle table
-	tableAngle = new TabulatedAnglePotential*[numTabAngleFiles];
-	tableAngle_addr = new TabulatedAnglePotential*[numTabAngleFiles];
-	angleList_d = NULL;
-	tableAngle_d = NULL;
-	for (int i = 0; i < numTabAngleFiles; i++) {
-		tableAngle_addr[i] = NULL;
-		tableAngle[i] = NULL;
-	}
-	gpuErrchk(cudaMalloc(&tableAngle_d, sizeof(TabulatedAnglePotential*) * numTabAngleFiles));
-
-	// Create the dihedral table
-	tableDihedral = new TabulatedDihedralPotential*[numTabDihedralFiles];
-	tableDihedral_addr = new TabulatedDihedralPotential*[numTabDihedralFiles];
-	dihedralList_d = NULL;
-	tableDihedral_d = NULL;
-	for (int i = 0; i < numTabDihedralFiles; i++) {
-		tableDihedral_addr[i] = NULL;
-		tableDihedral[i] = NULL;
-	}
-	gpuErrchk(cudaMalloc(&tableDihedral_d, sizeof(TabulatedDihedralPotential*) * numTabDihedralFiles));
-
-	{	// allocate device for pairlists
-		// RBTODO: select maxpairs in better way; add assertion in kernel to avoid going past this
-		const int maxPairs = MAX_NLIST_PAIRS;
-		for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) {
-		    gpuman.use(i);
-		    gpuErrchk(cudaMalloc(&numPairs_d[i],       sizeof(int)));
-		    gpuErrchk(cudaMalloc(&pairLists_d[i],      sizeof(int2)*maxPairs));
-		    // gpuErrchk(cudaBindTexture(0, pairListsTex, pairLists_d[i], sizeof(int2)*maxPairs)); //Han-Yi
-		    gpuErrchk(cudaMalloc(&pairTabPotType_d[i], sizeof(int)*maxPairs));
-		}
-
-		// create texture object
-		for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) {
-		    gpuman.use(i);
-		    cudaResourceDesc resDesc;
-		    memset(&resDesc, 0, sizeof(resDesc));
-		    resDesc.resType = cudaResourceTypeLinear;
-		    resDesc.res.linear.devPtr = pairLists_d[i];
-		    resDesc.res.linear.desc.f = cudaChannelFormatKindSigned;
-		    resDesc.res.linear.desc.x = 32; // bits per channel
-		    resDesc.res.linear.desc.y = 32; // bits per channel
-		    resDesc.res.linear.sizeInBytes = maxPairs*sizeof(int2);
-
-		    cudaTextureDesc texDesc;
-		    memset(&texDesc, 0, sizeof(texDesc));
-		    texDesc.readMode = cudaReadModeElementType;
-
-		    // create texture object: we only have to do this once!
-		    pairLists_tex[i]=0;
-		    cudaCreateTextureObject(&pairLists_tex[i], &resDesc, &texDesc, NULL);
-		}
-
-		// create texture object
-		for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) {
-		    gpuman.use(i);
-		    cudaResourceDesc resDesc;
-		    memset(&resDesc, 0, sizeof(resDesc));
-		    resDesc.resType = cudaResourceTypeLinear;
-		    resDesc.res.linear.devPtr = pairTabPotType_d[i];
-		    resDesc.res.linear.desc.f = cudaChannelFormatKindSigned;
-		    resDesc.res.linear.desc.x = 32; // bits per channel
-		    resDesc.res.linear.sizeInBytes = maxPairs*sizeof(int);
-
-		    cudaTextureDesc texDesc;
-		    memset(&texDesc, 0, sizeof(texDesc));
-		    texDesc.readMode = cudaReadModeElementType;
-
-		    // create texture object: we only have to do this once!
-		    pairTabPotType_tex[i] = 0;
-		    cudaCreateTextureObject(&pairTabPotType_tex[i], &resDesc, &texDesc, NULL);
-
-		}
-		gpuman.use(0);
-
-
-                //Han-Yi Chou
-                int nCells = decomp.nCells.x * decomp.nCells.y * decomp.nCells.z;
-                //int* nCells_dev;
-		if (nCells < MAX_CELLS_FOR_CELLNEIGHBORLIST) {
-		    int3 *Cells_dev;
-		    size_t sz = 27*nCells*sizeof(int);
-		    gpuErrchk(cudaMalloc(&CellNeighborsList, sz));
-		    //gpuErrchk(cudaMalloc(&nCells_dev,sizeof(int)));
-		    gpuErrchk(cudaMalloc(&Cells_dev,sizeof(int3)));
-		    //gpuErrchk(cudaMemcpy(nCells_dev,&nCells,1,cudaMemcpyHostToDevice);
-		    gpuErrchk(cudaMemcpy(Cells_dev,&(decomp.nCells),sizeof(int3),cudaMemcpyHostToDevice));
-		    createNeighborsList<<<256,256>>>(Cells_dev,CellNeighborsList);
-		    gpuErrchk(cudaFree(Cells_dev));
-
-		    // create texture object
-		    {
-			cudaResourceDesc resDesc;
-			memset(&resDesc, 0, sizeof(resDesc));
-			resDesc.resType = cudaResourceTypeLinear;
-			resDesc.res.linear.devPtr = CellNeighborsList;
-			resDesc.res.linear.desc.f = cudaChannelFormatKindSigned;
-			resDesc.res.linear.desc.x = 32; // bits per channel
-			resDesc.res.linear.sizeInBytes = sz;
-
-			cudaTextureDesc texDesc;
-			memset(&texDesc, 0, sizeof(texDesc));
-			texDesc.readMode = cudaReadModeElementType;
-
-			// create texture object: we only have to do this once!
-			neighbors_tex=0;
-			cudaCreateTextureObject(&neighbors_tex, &resDesc, &texDesc, NULL);
-		    }
-		}
-	}
-	
-	restraintIds_d = NULL;
-	bondAngleList_d = NULL;
-	product_potential_list_d = NULL;
-
-	//Calculate the number of blocks the grid should contain
-	gridSize =  (num+num_rb_attached_particles) / NUM_THREADS + 1;
-
-	// Create and allocate the energy arrays
-	gpuErrchk(cudaMalloc(&energies_d, sizeof(float) * (num+num_rb_attached_particles+numGroupSites) * numReplicas));
-	cudaEventCreate(&start);
-	cudaEventCreate(&stop);
-}
-
-ComputeForce::~ComputeForce() {
-	delete[] tableEps;
-	delete[] tableRad6;
-	delete[] tableAlpha;
-	gpuErrchk(cudaFree(tableEps_d));
-	gpuErrchk(cudaFree(tableAlpha_d));
-	gpuErrchk(cudaFree(tableRad6_d));
-	
-	for (int i = 0; i < numParts; ++i) {
-	    for (int j = i; j < numParts; ++j) {
-		int ind = i+j*numParts;
-		if (tablePot[ind] != NULL) {
-		    for (std::size_t g = 0; g < gpuman.gpus.size(); ++g) {
-			gpuman.use(g);
-			tablePot_addr[g][ind]->free_from_cuda(tablePot_addr[g][ind]);
-			tablePot_addr[g][ind] = NULL;
-		    }
-		    delete tablePot[ind];
-		}
-	    }
-	}
-	delete[] tablePot;
-	for (auto& tpa : tablePot_addr) delete[] tpa;
-
-	for (int j = 0; j < numTabBondFiles; ++j)
-		delete tableBond[j];
-	delete[] tableBond;
-	delete[] tableBond_addr;
-	gpuErrchk(cudaFree(tableBond_d));
-
-	for (int j = 0; j < numTabAngleFiles; ++j)
-		if (tableAngle[j] != NULL)
-			delete tableAngle[j];
-	delete[] tableAngle;
-	delete[] tableAngle_addr;
-
-	if(type_d != NULL)
-	{
-		gpuErrchk(cudaFree(tableAngle_d));
-
-		gpuErrchk(cudaFree(energies_d));
-
-		gpuErrchk( cudaFree(type_d) );
-		if (numBonds > 0) {
-			gpuErrchk( cudaFree(bonds_d) );
-			gpuErrchk( cudaFree(bondMap_d) );
-			gpuErrchk( cudaFree(bondList_d) );
-		}
-		if (numAngles > 0) {
-			gpuErrchk( cudaFree(angles_d) );
-			gpuErrchk( cudaFree(angleList_d) );
-		}
-		if (numDihedrals > 0) {
-			gpuErrchk( cudaFree(dihedrals_d) );
-			gpuErrchk( cudaFree(dihedralList_d) );
-			gpuErrchk( cudaFree(dihedralPotList_d) );
-		}
-		if (numExcludes > 0) {
-			gpuErrchk( cudaFree(excludes_d) );
-			gpuErrchk( cudaFree(excludeMap_d) );
-		}
-		if (numRestraints > 0) {
-			gpuErrchk( cudaFree(restraintIds_d) );
-			gpuErrchk( cudaFree(restraintLocs_d) );
-			gpuErrchk( cudaFree(restraintSprings_d) );
-		}
-	}
-
-	for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) {
-	    gpuErrchk(cudaFree(forceInternal_d[i]) );
-	    gpuErrchk(cudaFree(sys_d[i]));
-	    gpuErrchk(cudaDestroyTextureObject(pos_tex[i]));
-	    gpuErrchk(cudaFree(pos_d[i]) );
-	    gpuErrchk(cudaFree(numPairs_d[i]));
-	    gpuErrchk(cudaDestroyTextureObject(pairLists_tex[i]));
-	    gpuErrchk(cudaFree(pairLists_d[i]));
-	    gpuErrchk(cudaDestroyTextureObject(pairTabPotType_tex[i]));
-	    gpuErrchk(cudaFree(pairTabPotType_d[i]));
-	}
-        gpuErrchk(cudaDestroyTextureObject(neighbors_tex));
-        gpuErrchk(cudaFree( CellNeighborsList));
-
-}
-
-void ComputeForce::updateNumber(int newNum) {
-	if (newNum == num or newNum < 0) return;
-
-	// Set the new number.
-	num = newNum;
-
-	// Reallocate the neighbor list.
-	//delete[] neigh;
-	//neigh = new IndexList[num];
-	decompose();
-
-	printf("updateNumber() called\n");
-	// Reallocate CUDA arrays
-
-	// Recalculate the number of blocks in the grid
-	gridSize = 0;
-	while ((int)sqrt(NUM_THREADS) * gridSize < num+num_rb_attached_particles)
-		++gridSize;
-
-	gpuErrchk(cudaFree(energies_d));
-	gpuErrchk(cudaMalloc(&energies_d, sizeof(float) * gridSize));
-}
-
-void ComputeForce::makeTables(const BrownianParticleType part[]) {
-	for (int i = 0; i < numParts; ++i) {
-		const BrownianParticleType& pi = part[i];
-		for (int j = 0; j < numParts; ++j) {
-			const BrownianParticleType& pj = part[j];
-			int ind = i * numParts + j;
-			tableEps[ind] = sqrtf(pi.eps * pj.eps);
-			float r = pi.radius + pj.radius;
-			tableRad6[ind] = r * r * r * r * r * r;
-			tableAlpha[ind] = electricConst * pi.charge * pj.charge;
-		}
-	}
-}
-
-bool ComputeForce::addTabulatedPotential(String fileName, int type0, int type1) {
-	if (type0 < 0 or type0 >= numParts) return false;
-	if (type1 < 0 or type1 >= numParts) return false;
-
-	int ind = type0 + type1 * numParts;
-	int ind1 = type1 + type0 * numParts;
-
-	// If an entry already exists for this particle type, delete it
-	if (tablePot[ind] != NULL) {
-	    for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) {
-		gpuman.use(i);
-		tablePot_addr[i][ind]->free_from_cuda(tablePot_addr[i][ind]);
-		tablePot_addr[i][ind] = NULL;
-	    }
-	    gpuman.use(0);
-	    delete tablePot[ind];
-	}
-	// if (tablePot[ind1] != NULL) {
-	//     // gpuErrchk(cudaFree(tablePot_addr[ind1]));
-	// 	delete tablePot[ind1];
-	// 	// tablePot[ind1] = NULL;
-	// 	// tablePot_addr[ind1] = NULL;
-	// }
-
-	tablePot[ind] = tablePot[ind1] = new TabulatedPotential(fileName);
-	tablePot[ind]->truncate(switchStart, sqrtf(cutoff2), 0.0f);
-
-	for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) {
-	    gpuman.use(i);
-	    tablePot_addr[i][ind] = tablePot_addr[i][ind1] = tablePot[ind]->copy_to_cuda();
-	    gpuErrchk(cudaMemcpy(tablePot_d[i], tablePot_addr[i],
-				 sizeof(TabulatedPotential*) * numParts * numParts, cudaMemcpyHostToDevice));
-	}
-	gpuman.use(0);
-	return true;
-}
-
-bool ComputeForce::addBondPotential(String fileName, int ind, Bond bonds[], BondAngle bondAngles[])
-{
-    // TODO: see if tableBond_addr can be removed
-    if (tableBond[ind] != NULL) {
-	delete tableBond[ind];
-	// gpuErrchk(cudaFree(tableBond_addr[ind])); //TODO free this a little more cleanly
-    }
-
-    tableBond[ind] = new TabulatedPotential(fileName);
-
-	for (int i = 0; i < numBonds; ++i)
-		if (bonds[i].fileName == fileName)
-			bonds[i].tabFileIndex = ind;
-
-	for (int i = 0; i < numBondAngles; i++)
-	{
-	    if (bondAngles[i].bondFileName == fileName)
-		bondAngles[i].tabFileIndex2 = ind;
-	}
-
-	gpuErrchk(cudaMemcpyAsync(bonds_d, bonds, sizeof(Bond) * numBonds, cudaMemcpyHostToDevice));
-
-	tableBond_addr[ind] = tableBond[ind]->copy_to_cuda();
-	gpuErrchk(cudaMemcpy(tableBond_d, tableBond_addr,
-			     sizeof(TabulatedPotential*) * numTabBondFiles, cudaMemcpyHostToDevice));
-	return true;
-}
-
-bool ComputeForce::addAnglePotential(String fileName, int ind, Angle* angles, BondAngle* bondAngles) {
-	if (tableAngle[ind] != NULL) {
-		delete tableAngle[ind];
-		gpuErrchk(cudaFree(tableAngle_addr[ind]));
-		tableAngle[ind] = NULL;
-		tableAngle_addr[ind] = NULL;
-	}
-
-	tableAngle[ind] = new TabulatedAnglePotential(fileName);
-	TabulatedAnglePotential *t = new TabulatedAnglePotential(*tableAngle[ind]);
-
-	// Copy tableAngle[ind] to the device
-	float *pot;
-	int size = tableAngle[ind]->size;
-	gpuErrchk(cudaMalloc(&pot, sizeof(float) * size));
-	gpuErrchk(cudaMemcpyAsync(pot, tableAngle[ind]->pot, sizeof(float) * size, cudaMemcpyHostToDevice));
-	t->pot = pot;
-	gpuErrchk(cudaMalloc(&tableAngle_addr[ind], sizeof(TabulatedAnglePotential)));
-	gpuErrchk(cudaMemcpy(tableAngle_addr[ind], t, sizeof(TabulatedAnglePotential), cudaMemcpyHostToDevice));
-	t->pot = NULL;
-	delete t;
-
-	gpuErrchk(cudaMemcpyAsync(tableAngle_d, tableAngle_addr,
-			sizeof(TabulatedAnglePotential*) * numTabAngleFiles, cudaMemcpyHostToDevice));
-
-	for (int i = 0; i < numAngles; i++)
-		if (angles[i].fileName == fileName)
-			angles[i].tabFileIndex = ind;
-
-	for (int i = 0; i < numBondAngles; i++) {
-	    if (bondAngles[i].angleFileName1 == fileName)
-		bondAngles[i].tabFileIndex1 = ind;
-	    if (bondAngles[i].angleFileName2 == fileName)
-		bondAngles[i].tabFileIndex3 = ind;
-	}
-	gpuErrchk(cudaMemcpy(angles_d, angles, sizeof(Angle) * numAngles,
-			cudaMemcpyHostToDevice));
-	return true;
-}
-
-bool ComputeForce::addDihedralPotential(String fileName, int ind, Dihedral dihedrals[])
-{
-	for (int i = 0; i < numDihedrals; i++)
-		if (dihedrals[i].fileName == fileName)
-			dihedrals[i].tabFileIndex = ind;
-
-	gpuErrchk(cudaMemcpyAsync(dihedrals_d, dihedrals, sizeof(Dihedral) * numDihedrals,
-			cudaMemcpyHostToDevice));
-
-	if (tableDihedral[ind] != NULL) {
-		delete tableDihedral[ind];
-		gpuErrchk(cudaFree(tableDihedral_addr[ind]));
-		tableDihedral[ind] = NULL;
-		tableDihedral_addr[ind] = NULL;
-	}
-
-	tableDihedral[ind] = new TabulatedDihedralPotential(fileName);
-	TabulatedDihedralPotential t = TabulatedDihedralPotential(*tableDihedral[ind]);
-
-	// Copy tableAngle[ind] to the device
-	float *pot;
-	int size = tableDihedral[ind]->size;
-	gpuErrchk(cudaMalloc(&pot, sizeof(float) * size));
-	gpuErrchk(cudaMemcpyAsync(pot, tableDihedral[ind]->pot,
-			sizeof(float) * size, cudaMemcpyHostToDevice));
-	t.pot = pot;
-
-	gpuErrchk(cudaMalloc(&tableDihedral_addr[ind], sizeof(TabulatedDihedralPotential)));
-	gpuErrchk(cudaMemcpyAsync(tableDihedral_addr[ind], &t,
-			sizeof(TabulatedDihedralPotential), cudaMemcpyHostToDevice));
-	gpuErrchk(cudaMemcpy(tableDihedral_d, tableDihedral_addr,
-			sizeof(TabulatedDihedralPotential*) * numTabDihedralFiles, cudaMemcpyHostToDevice));
-	t.pot = NULL;
-	return true;
-}
-
-void ComputeForce::decompose() {
-	//gpuErrchk( cudaProfilerStart() );
-
-	// Reset the cell decomposition.
-	if (decomp_d != NULL)
-        {
-            cudaFree(decomp_d);
-            decomp_d = NULL;
-	}	
-	decomp.decompose_d(pos_d[0], num+num_rb_attached_particles);
-	decomp_d = decomp.copyToCUDA();
-
-	// Update pairlists using cell decomposition (not sure this is really needed or good) 
-	//RBTODO updatePairlists<<< nBlocks, NUM_THREADS >>>(pos_d[0], num, numReplicas, sys_d[0], decomp_d);	
-
-	/* size_t free, total; */
-	/* { */
-	/* 	cuMemGetInfo(&free,&total); */
-	/* 	printf("Free memory: %zu / %zu\n", free, total); */
-	/* } */
-	
-	// initializePairlistArrays
-	int nCells = decomp.nCells.x * decomp.nCells.y * decomp.nCells.z;
-	
-	/* cuMemGetInfo(&free,&total); */
-	/* printf("Free memory: %zu / %zu\n", free, total); */
-	
-	int tmp = 0;
-	gpuErrchk(cudaMemcpyAsync(numPairs_d[0], &tmp,	sizeof(int), cudaMemcpyHostToDevice));
-	gpuErrchk(cudaDeviceSynchronize());
-
-#ifdef DEBUGEXCLUSIONS
-	initExSum();
-	gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: sync needed here? */
-#endif
-      
-      #if __CUDA_ARCH__ >= 520
-      createPairlists<64,64,8><<<dim3(128,128,numReplicas),dim3(64,1,1)>>>(pos_d[0], num+num_rb_attached_particles, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0],
-                                                                             pairLists_d[0], numParts, type_d, pairTabPotType_d[0], excludes_d,
-									   excludeMap_d, numExcludes, pairlistdist2, pos_tex[0], neighbors_tex);
-      #else //__CUDA_ARCH__ == 300
-      createPairlists<64,64,8><<<dim3(256,256,numReplicas),dim3(64,1,1)>>>(pos_d[0], num+num_rb_attached_particles, numReplicas, sys_d[0], decomp_d, nCells, numPairs_d[0],
-                                                                           pairLists_d[0], numParts, type_d, pairTabPotType_d[0], excludes_d, 
-                                                                           excludeMap_d, numExcludes, pairlistdist2, pos_tex[0], neighbors_tex);
-      #endif
-       
-      gpuKernelCheck();
-      gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: sync needed here? */
-
-      #ifdef USE_NCCL
-      if (gpuman.gpus.size() > 1) {
-	  // Currently we don't use numPairs_d[i] for i > 0... might be able to reduce data transfer with some kind nccl scatter, and in that case we'd prefer to use all numPairs_d[i]
-	  gpuErrchk(cudaMemcpy(&numPairs, numPairs_d[0], sizeof(int), cudaMemcpyDeviceToHost));
-	  gpuman.nccl_broadcast(0, pairTabPotType_d, pairTabPotType_d, numPairs, -1);
-	  gpuman.nccl_broadcast(0, pairLists_d, pairLists_d, numPairs, -1);
-      }
-      gpuman.sync();
-      #endif
-}
-
-IndexList ComputeForce::decompDim() const {
-	IndexList ret;
-	ret.add(decomp.getNx());
-	ret.add(decomp.getNy());
-	ret.add(decomp.getNz());
-	return ret;
-}
-
-CellDecomposition ComputeForce::getDecomp() { return decomp; }
-
-float ComputeForce::decompCutoff() { return decomp.getCutoff(); }
-
-float ComputeForce::computeFull(bool get_energy) {
-	float energy = 0.0f;
-	gridSize = ((num+num_rb_attached_particles) * numReplicas) / NUM_THREADS + 1;
-	dim3 numBlocks(gridSize, 1, 1);
-	dim3 numThreads(NUM_THREADS, 1, 1);
-
-	// Call the kernel to calculate forces
-	computeFullKernel<<< numBlocks, numThreads >>>(forceInternal_d[0], pos_d[0], type_d, tableAlpha_d,
-		tableEps_d, tableRad6_d, num+num_rb_attached_particles, numParts, sys_d[0], energies_d, gridSize,
-		numReplicas, get_energy);
-
-	// Calculate energy based on the array created by the kernel
-	if (get_energy) {
-		gpuErrchk(cudaDeviceSynchronize());
-		thrust::device_ptr<float> en_d(energies_d);
-		energy = thrust::reduce(en_d, en_d + num + num_rb_attached_particles + numGroupSites);
-	}
-
-	return energy;
-}
-
-float ComputeForce::computeSoftcoreFull(bool get_energy) {
-	float energy = 0.0f;
-	gridSize = ((num+num_rb_attached_particles) * numReplicas) / NUM_THREADS + 1;
-	dim3 numBlocks(gridSize, 1, 1);
-	dim3 numThreads(NUM_THREADS, 1, 1);
-
-	// Call the kernel to calculate forces
-	computeSoftcoreFullKernel<<<numBlocks, numThreads>>>(forceInternal_d[0], pos_d[0], type_d,
-			tableEps_d, tableRad6_d, num+num_rb_attached_particles, numParts, sys_d[0], energies_d, gridSize,
-			numReplicas, get_energy);
-
-	// Calculate energy based on the array created by the kernel
-	if (get_energy) {
-		cudaDeviceSynchronize();
-		thrust::device_ptr<float> en_d(energies_d);
-		energy = thrust::reduce(en_d, en_d + num + num_rb_attached_particles);
-	}
-
-	return energy;
-}
-
-float ComputeForce::computeElecFull(bool get_energy) {
-	float energy = 0.0f;
-
-	gridSize = num/NUM_THREADS + 1;
-	dim3 numBlocks(gridSize, 1, 1);
-	dim3 numThreads(NUM_THREADS, 1, 1);
-
-	// Call the kernel to calculate forces
-	computeElecFullKernel<<<numBlocks, numThreads>>>(forceInternal_d[0], pos_d[0], type_d,
-			tableAlpha_d, num, numParts, sys_d[0], energies_d, gridSize, numReplicas,
-			get_energy);
-
-	// Calculate energy based on the array created by the kernel
-	if (get_energy) {
-		gpuErrchk(cudaDeviceSynchronize());
-		thrust::device_ptr<float> en_d(energies_d);
-		energy = thrust::reduce(en_d, en_d + num);
-	}
-
-	return energy;
-}
-
-
-float ComputeForce::compute(bool get_energy) {
-	float energy = 0.0f;
-
-	gridSize = ((num+num_rb_attached_particles) * numReplicas) / NUM_THREADS + 1;
-	dim3 numBlocks(gridSize, 1, 1);
-	dim3 numThreads(NUM_THREADS, 1, 1);
-
-	// Call the kernel to calculate forces
-	computeKernel<<<numBlocks, numThreads>>>(forceInternal_d[0], pos_d[0], type_d,
-			tableAlpha_d, tableEps_d, tableRad6_d, num+num_rb_attached_particles, numParts, sys_d[0],
-			decomp_d, energies_d, switchStart, switchLen, gridSize, numReplicas,
-			get_energy);
-
-	gpuErrchk(cudaDeviceSynchronize());
-	// Calculate the energy based on the array created by the kernel
-	if (get_energy) {
-		gpuErrchk(cudaDeviceSynchronize());
-		thrust::device_ptr<float> en_d(energies_d);
-		energy = thrust::reduce(en_d, en_d + num + num_rb_attached_particles + numGroupSites);
-	}
-
-	return energy;
-}
-
-//MLog: added Bond* bondList to the list of passed in variables.
-/*float ComputeForce::computeTabulated(Vector3* force, Vector3* pos, int* type,
-		Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap,
-		Angle* angles, Dihedral* dihedrals, bool get_energy, Bond* bondList) {*/
-float ComputeForce::computeTabulated(bool get_energy) {
-	float energy = 0.0f;
-
-	gridSize = ((num+num_rb_attached_particles) * numReplicas) / NUM_THREADS + 1;
-	dim3 numBlocks(gridSize, 1, 1);
-	dim3 numThreads(NUM_THREADS, 1, 1);
-	
-	// Call the kernel to calculate the forces
-	// int nb = (decomp.nCells.x * decomp.nCells.y * decomp.nCells.z);
-	// int nb = (1+(decomp.nCells.x * decomp.nCells.y * decomp.nCells.z)) * 75; /* RBTODO: number of pairLists */
-	const int nb = 800;
-	// printf("ComputeTabulated\n");
-
-	// RBTODO: get_energy
-	if (get_energy)
-	//if (false) 
-	{
-		//clearEnergies<<< nb, numThreads >>>(energies_d,num);
-		//gpuErrchk(cudaDeviceSynchronize());
-	        cudaMemset((void*)energies_d, 0, sizeof(float)*(num+num_rb_attached_particles+numGroupSites)*numReplicas);
-		computeTabulatedEnergyKernel<<< nb, numThreads >>>(forceInternal_d[0], pos_d[0], sys_d[0],
-						cutoff2, numPairs_d[0], pairLists_d[0], pairTabPotType_d[0], tablePot_d[0], energies_d);
-	}
-	
-	else
-	{
-	    // Copy positions from device 0 to all others
-
-                //gpuErrchk(cudaBindTexture(0,  PosTex, pos_d[0],sizeof(Vector3)*num*numReplicas));
-		//computeTabulatedKernel<<< nb, numThreads >>>(forceInternal_d[0], pos_d[0], sys_d[0],
-
-	    int ngpu = gpuman.gpus.size();
-	    if (ngpu == 1) {
-		int i = 0;
-		computeTabulatedKernel<64><<< dim3(2048,1,1), dim3(64,1,1), 0, gpuman.gpus[i].get_next_stream() >>>
-		    (forceInternal_d[i], sys_d[i], cutoff2, numPairs_d[i], pairLists_d[i], pairTabPotType_d[i], tablePot_d[i], pairLists_tex[i], pos_tex[i], pairTabPotType_tex[i]);
-
-	    } else {
-	    for (size_t i = 0; i < ngpu; ++i) {
-		gpuman.use(i);
-		int start =            floor( ((float) numPairs*i    )/ngpu );
-		int end   = i < ngpu-1 ? floor( ((float) numPairs*(i+1))/ngpu ) : numPairs;
-		
-		if (i == ngpu-1) assert(end == numPairs);
-		computeTabulatedKernel<64><<< dim3(2048,1,1), dim3(64,1,1), 0, gpuman.gpus[i].get_next_stream() >>>(forceInternal_d[i], sys_d[i],
-														    cutoff2, pairLists_d[i], pairTabPotType_d[i], tablePot_d[i], pairLists_tex[i], pos_tex[i], pairTabPotType_tex[i], start, end-start);
-                  gpuKernelCheck();
-	    }
-	    gpuman.use(0);
-	    }
-                //gpuErrchk(cudaUnbindTexture(PosTex));
-	}
-	/* printPairForceCounter<<<1,32>>>(); */
-
-	//Mlog: the commented function doesn't use bondList, uncomment for testing.
-	//if(bondMap_d != NULL && tableBond_d != NULL)
-
-	if(product_potential_list_d != NULL && product_potentials_d != NULL)
-	{
-	    computeProductPotentials <<<nb, numThreads, 0, gpuman.get_next_stream()>>> ( forceInternal_d[0], pos_d[0], sys_d[0], numReplicas*numProductPotentials, product_potential_particles_d, product_potentials_d, product_potential_list_d, productCount_d, energies_d, get_energy);
-	}
-
-	if(bondAngleList_d != NULL && tableBond_d != NULL && tableAngle_d != NULL)
-	{
-	    computeTabulatedBondAngles <<<nb, numThreads, 0, gpuman.get_next_stream()>>> ( forceInternal_d[0], pos_d[0], sys_d[0], numReplicas*numBondAngles, bondAngleList_d, tableAngle_d, tableBond_d, energies_d, get_energy);
-	}
-
-	if(bondList_d != NULL && tableBond_d != NULL)
-
-	{
-	    //computeTabulatedBonds <<<numBlocks, numThreads>>> ( force, pos, num, numParts, sys_d[0], bonds, bondMap_d, numBonds, numReplicas, energies_d, get_energy, tableBond_d);
-	//computeTabulatedBonds <<<nb, numThreads>>> ( forceInternal_d[0], pos_d[0], sys_d[0], numReplicas*numBonds/2, bondList_d, tableBond_d);
-	  //if(get_energy)
-              //cudaMemset(bond_energy_d, 0, sizeof(float)*num);
-		computeTabulatedBonds <<<nb, numThreads, 0, gpuman.get_next_stream()>>> ( forceInternal_d[0], pos_d[0], sys_d[0], numReplicas*numBonds/2, bondList_d, tableBond_d, energies_d, get_energy);
-	}
-
-	if (angleList_d != NULL && tableAngle_d != NULL)
-        {
-            //if(get_energy)
-		//computeTabulatedAngles<<<nb, numThreads>>>(forceInternal_d[0], pos_d[0], sys_d[0], numAngles*numReplicas, angleList_d, tableAngle_d);
-	    computeTabulatedAngles<<<nb, numThreads, 0, gpuman.get_next_stream()>>>(forceInternal_d[0], pos_d[0], sys_d[0], numAngles*numReplicas, angleList_d, tableAngle_d, energies_d, get_energy);
-        }
-	if (dihedralList_d != NULL && tableDihedral_d != NULL)
-        {
-            //if(get_energy)
-		//computeTabulatedDihedrals<<<nb, numThreads>>>(forceInternal_d[0], pos_d[0], sys_d[0], numDihedrals*numReplicas, dihedralList_d, dihedralPotList_d, tableDihedral_d);
-	    computeTabulatedDihedrals<<<nb, numThreads, 0, gpuman.get_next_stream()>>>(forceInternal_d[0], pos_d[0], sys_d[0], numDihedrals*numReplicas, 
-                dihedralList_d, dihedralPotList_d, tableDihedral_d, energies_d, get_energy);
-        }
-
-	// TODO: Sum energy
-	if (restraintIds_d != NULL )
-	    computeHarmonicRestraints<<<1, numThreads, 0, gpuman.get_next_stream()>>>(forceInternal_d[0], pos_d[0], sys_d[0], numRestraints*numReplicas, restraintIds_d, restraintLocs_d, restraintSprings_d);
-	
-
-	// Calculate the energy based on the array created by the kernel
-	// TODO: return energy
-	/*if (get_energy) 
-        {
-            float e = 0.f;
-	    gpuErrchk(cudaDeviceSynchronize());
-	    thrust::device_ptr<float> en_d(energies_d);
-	    e = (thrust::reduce(en_d, en_d+num*numReplicas)) / numReplicas;
-            std::fstream energy_file;
-            energy_file.open("energy_config.txt", std::fstream::out | std::fstream::app);
-            if(energy_file.is_open())
-            {
-                energy_file << "Configuation Energy: "  << e << " kcal/mol " << std::endl;
-                energy_file.close();
-            }
-            else
-            {
-                std::cout << "Error in opening energ files\n";
-            }
-            energy = e;
-        }*/
-	return energy;
-}
-
-float ComputeForce::computeTabulatedFull(bool get_energy) {
-	energy = 0.0f;
-
-	gridSize = ((num+num_rb_attached_particles) * numReplicas) / NUM_THREADS + 1;
-	dim3 numBlocks(gridSize, 1, 1);
-	dim3 numThreads(NUM_THREADS, 1, 1);
-
-	// Call the kernel to calculate forces
-	computeTabulatedFullKernel<<< numBlocks, numThreads >>>(forceInternal_d[0], pos_d[0], type_d,	tablePot_d[0], tableBond_d, num+num_rb_attached_particles, numParts, sys_d[0], bonds_d, bondMap_d, numBonds, excludes_d, excludeMap_d, numExcludes, energies_d, gridSize, numReplicas, get_energy, angles_d);
-	gpuErrchk(cudaDeviceSynchronize());
-
-	computeAngles<<< numBlocks, numThreads >>>(forceInternal_d[0], pos_d[0], angles_d, tableAngle_d,
-																						 numAngles, num+num_rb_attached_particles, sys_d[0], energies_d,
-																						 get_energy);
-	gpuErrchk(cudaDeviceSynchronize());
-	computeDihedrals<<< numBlocks, numThreads >>>(forceInternal_d[0], pos_d[0], dihedrals_d,
-																							  tableDihedral_d, numDihedrals,
-																								num+num_rb_attached_particles, sys_d[0], energies_d,
-																								get_energy);
-	// Calculate the energy based on the array created by the kernel
-	if (get_energy) {
-		gpuErrchk(cudaDeviceSynchronize());
-		thrust::device_ptr<float> en_d(energies_d);
-		energy = thrust::reduce(en_d, en_d + num + num_rb_attached_particles);
-	}
-
-	return energy;
-}
-
-void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos)
-{
-    const size_t tot_num = (num+num_rb_attached_particles+numGroupSites) * numReplicas;
-
-	for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) {
-	    gpuman.use(i);
-	    gpuErrchk(cudaMalloc(&pos_d[i], sizeof(Vector3) * tot_num));
-	    //Han-Yi bind to the texture
-	    cudaResourceDesc resDesc;
-	    memset(&resDesc, 0, sizeof(resDesc));
-	    resDesc.resType = cudaResourceTypeLinear;
-	    resDesc.res.linear.devPtr = pos_d[i];
-	    resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
-	    resDesc.res.linear.desc.x = 32; // bits per channel
-	    resDesc.res.linear.desc.y = 32; // bits per channel
-	    resDesc.res.linear.desc.z = 32; // bits per channel
-	    resDesc.res.linear.desc.w = 32; // bits per channel
-	    resDesc.res.linear.sizeInBytes = tot_num*sizeof(float4);
-	    
-	    cudaTextureDesc texDesc;
-	    memset(&texDesc, 0, sizeof(texDesc));
-	    texDesc.readMode = cudaReadModeElementType;
-	    
-	    // create texture object: we only have to do this once!
-	    pos_tex[i] = 0;
-	    cudaCreateTextureObject(&pos_tex[i], &resDesc, &texDesc, NULL);
-	    gpuErrchk(cudaDeviceSynchronize());
-	}
-	gpuman.use(0);
-
-	gpuErrchk(cudaMemcpyAsync(pos_d[0], pos, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice));
-
-	for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) {
-	    gpuman.use(i);
-	    gpuErrchk(cudaMalloc(&forceInternal_d[i], sizeof(Vector3) * tot_num));
-	}
-	gpuman.use(0);
-	gpuErrchk(cudaMemcpyAsync(forceInternal_d[0], forceInternal, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice));
-
-	gpuErrchk(cudaDeviceSynchronize());
-}
-void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos, Vector3* mom)
-{
-    const size_t tot_num = num * numReplicas;
-
-        gpuErrchk(cudaMalloc(&mom_d, sizeof(Vector3) * tot_num));
-        gpuErrchk(cudaMemcpyAsync(mom_d, mom, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice));
-
-	copyToCUDA(forceInternal,pos);
-        gpuErrchk(cudaDeviceSynchronize());
-}
-void ComputeForce::copyToCUDA(Vector3* forceInternal, Vector3* pos, Vector3* mom, float* random)
-{
-    const size_t tot_num = num * numReplicas;
-
-        gpuErrchk(cudaMalloc(&ran_d, sizeof(float) * tot_num));
-        gpuErrchk(cudaMemcpyAsync(ran_d, random, sizeof(float) * tot_num, cudaMemcpyHostToDevice));
-
-	copyToCUDA(forceInternal, pos, mom);
-        gpuErrchk(cudaDeviceSynchronize());
-}
-
-void ComputeForce::setForceInternalOnDevice(Vector3* f) {
-    // const size_t tot_num = (num+numGroupSites) * numReplicas;
-    assert(numGroupSites == 0); // IMD, the only feature using this function, is currently incompatible with group sites
-    const size_t tot_num = num * numReplicas;
-	gpuErrchk(cudaMemcpy(forceInternal_d[0], f, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice));
-}
-
-void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals, const Restraint* const restraints, const BondAngle* const bondAngles, const XpotMap simple_potential_map, const std::vector<SimplePotential> simple_potentials, const ProductPotentialConf* const product_potential_confs)
-{
-    assert(simNum == numReplicas); // Not sure why we have both of these things
-    int tot_num_with_rb = (num+num_rb_attached_particles) * simNum;
-    int tot_num_with_rb_group = (num+num_rb_attached_particles+numGroupSites) * simNum;
-	// type_d
-	gpuErrchk(cudaMalloc(&type_d, sizeof(int) * tot_num_with_rb));
-	gpuErrchk(cudaMemcpyAsync(type_d, type, sizeof(int) * tot_num_with_rb, cudaMemcpyHostToDevice));
-	
-	if (numBonds > 0)
-	{
-		// bonds_d
-		gpuErrchk(cudaMalloc(&bonds_d, sizeof(Bond) * numBonds));
-		gpuErrchk(cudaMemcpyAsync(bonds_d, bonds, sizeof(Bond) * numBonds, cudaMemcpyHostToDevice));
-		
-		// bondMap_d
-		gpuErrchk(cudaMalloc(&bondMap_d, sizeof(int2) * tot_num_with_rb_group));
-		gpuErrchk(cudaMemcpyAsync(bondMap_d, bondMap, sizeof(int2) * tot_num_with_rb_group, cudaMemcpyHostToDevice));
-	}
-
-	if (numExcludes > 0) {
-	    // printf("Copying %d exclusions to the GPU\n", numExcludes);
-	    
-		// excludes_d
-		gpuErrchk(cudaMalloc(&excludes_d, sizeof(Exclude) * numExcludes));
-		gpuErrchk(cudaMemcpyAsync(excludes_d, excludes, sizeof(Exclude) * numExcludes,
-				cudaMemcpyHostToDevice));
-		
-		// excludeMap_d
-		gpuErrchk(cudaMalloc(&excludeMap_d, sizeof(int2) * tot_num_with_rb));
-		gpuErrchk(cudaMemcpyAsync(excludeMap_d, excludeMap, sizeof(int2) * tot_num_with_rb,
-				cudaMemcpyHostToDevice));
-	}
-
-	if (numAngles > 0) {
-		// angles_d
-		gpuErrchk(cudaMalloc(&angles_d, sizeof(Angle) * numAngles));
-		gpuErrchk(cudaMemcpyAsync(angles_d, angles, sizeof(Angle) * numAngles,
-				cudaMemcpyHostToDevice));
-	}
-
-	if (numDihedrals > 0) {
-		// dihedrals_d
-		gpuErrchk(cudaMalloc(&dihedrals_d, sizeof(Dihedral) * numDihedrals));
-		gpuErrchk(cudaMemcpyAsync(dihedrals_d, dihedrals,
-												 		  sizeof(Dihedral) * numDihedrals,
-														 	cudaMemcpyHostToDevice));
-	}
-
-	if (numRestraints > 0) {
-	    int restraintIds[numRestraints];
-	    Vector3 restraintLocs[numRestraints];
-	    float restraintSprings[numRestraints];
-	    for (int i = 0; i < numRestraints; ++i) {
-		restraintIds[i]     = restraints[i].id;
-		restraintLocs[i]    = restraints[i].r0;
-		restraintSprings[i] = restraints[i].k;
-	    }
-
-	    gpuErrchk(cudaMalloc(&restraintIds_d, sizeof(int) * numRestraints));
-	    gpuErrchk(cudaMalloc(&restraintLocs_d, sizeof(Vector3) * numRestraints));
-	    gpuErrchk(cudaMalloc(&restraintSprings_d, sizeof(float) * numRestraints));
-	    
-	    gpuErrchk(cudaMemcpyAsync(restraintIds_d, restraintIds,
-				      sizeof(int)     * numRestraints, cudaMemcpyHostToDevice));
-	    gpuErrchk(cudaMemcpyAsync(restraintLocs_d, restraintLocs,
-				      sizeof(Vector3) * numRestraints, cudaMemcpyHostToDevice));
-	    gpuErrchk(cudaMemcpyAsync(restraintSprings_d, restraintSprings,
-				      sizeof(float)   * numRestraints, cudaMemcpyHostToDevice));
-	}	    
-
-	if (numBondAngles > 0) {
-		gpuErrchk(cudaMalloc(&bondAngles_d, sizeof(BondAngle) * numBondAngles));
-		gpuErrchk(cudaMemcpyAsync(bondAngles_d, bondAngles, sizeof(BondAngle) * numBondAngles,
-				cudaMemcpyHostToDevice));
-	}
-
-	if (simple_potentials.size() > 0) {
-	    float **val = simple_potential_pots_d = new float*[simple_potentials.size()];
-	    // float **tmp = new float*[simple_potentials.size()];
-	    for (int i=0; i < simple_potentials.size(); ++i) {
-		const SimplePotential sp = simple_potentials[i];
-		gpuErrchk(cudaMalloc(&val[i], sizeof(float)*sp.size));
-		gpuErrchk(cudaMemcpyAsync(val[i], sp.pot, sizeof(float)*sp.size, cudaMemcpyHostToDevice));
-		// tmp[i] = sp.pot;
-		// // sp.pot = val[i];
-	    }
-
-	    // size_t sz =  sizeof(SimplePotential) * simple_potentials.size();
-	    // gpuErrchk(cudaMalloc(&simple_potentials_d, sz));
-	    // gpuErrchk(cudaMemcpyAsync(simple_potentials_d, &simple_potentials[0], sz,
-	    // 				  cudaMemcpyHostToDevice));
-	    
-	    // for (int i=0; i < simple_potentials.size(); ++i) { // Restore host pointers on host object
-	    // 	SimplePotential &sp = simple_potentials[i];
-	    // 	sp.pot = tmp[i];
-	    // }
-	    // // delete[] val;
-	    // delete[] tmp;
-
-	}
-	
-	if (numProductPotentials > 0) {
-	    // Count particles
-	    int n_pots = 0;
-	    int n_particles = 0;
-	    for (int i=0; i < numProductPotentials; ++i) {
-		const ProductPotentialConf& c = product_potential_confs[i];
-		n_pots += c.indices.size();
-		for (int j=0; j < c.indices.size(); ++j) {
-		    n_particles += c.indices[j].size();
-		}
-	    }
-	    // printf("DEBUG: Found %d particles participating in %d potentials forming %d productPotentials\n",
-	    // 	   n_particles, n_pots, numProductPotentials);
-
-	    // Build productPotentialLists on host
-	    int *particle_list = new int[n_particles*numReplicas];
-	    SimplePotential *product_potentials = new SimplePotential[n_pots];
-	    uint2 *product_potential_list = new uint2[numProductPotentials*numReplicas];
-	    unsigned short *productCount = new unsigned short[numProductPotentials*numReplicas];
-
-	    n_particles = 0;
-	    
-	    for (unsigned int r=0; r < numReplicas; ++r) {
-		n_pots = 0;
-		for (int i=0; i < numProductPotentials; ++i) {
-		    const ProductPotentialConf& c = product_potential_confs[i];
-		    product_potential_list[i+r*numProductPotentials] = make_uint2( n_pots, n_particles );
-
-		    for (int j=0; j < c.indices.size(); ++j) {
-			if (r == 0) {
-			    const unsigned int sp_i = simple_potential_map.find( std::string( c.potential_names[j].val() ) )->second;
-			    product_potentials[n_pots] = simple_potentials[sp_i];
-			    product_potentials[n_pots].pot = simple_potential_pots_d[sp_i];
-			}
-			++n_pots;
-			for (int k=0; k < c.indices[j].size(); ++k) {
-			    particle_list[n_particles++] = c.indices[j][k]+r*num;
-			}
-		    }
-		    productCount[i+r*numProductPotentials] = c.indices.size();
-		}
-	    }
-
-	    // Copy to device
-	    size_t sz = n_particles*numReplicas * sizeof(int);
-	    gpuErrchk(cudaMalloc(&product_potential_particles_d, sz));
-	    gpuErrchk(cudaMemcpyAsync(product_potential_particles_d, particle_list, sz,
-	    				  cudaMemcpyHostToDevice));
-	    sz = n_pots * sizeof(SimplePotential);
-	    gpuErrchk(cudaMalloc(&product_potentials_d, sz));
-	    gpuErrchk(cudaMemcpyAsync(product_potentials_d, product_potentials, sz,
-	    				  cudaMemcpyHostToDevice));
-	    sz = numProductPotentials*numReplicas * sizeof(uint2);
-	    gpuErrchk(cudaMalloc(&product_potential_list_d, sz));
-	    gpuErrchk(cudaMemcpyAsync(product_potential_list_d, product_potential_list, sz,
-	    				  cudaMemcpyHostToDevice));
-	    sz = numProductPotentials*numReplicas * sizeof(unsigned short);
-	    gpuErrchk(cudaMalloc(&productCount_d, sz));
-	    gpuErrchk(cudaMemcpyAsync(productCount_d, productCount, sz,
-	    				  cudaMemcpyHostToDevice));
-
-	    // Clean up
-	    delete[] particle_list;
-	    delete[] product_potentials;
-	    delete[] product_potential_list;
-	    delete[] productCount;
-	}
-
-	gpuErrchk(cudaDeviceSynchronize());
-}
-
-// void ComputeForce::createBondList(int3 *bondList)
-// {
-// 	size_t size = (numBonds / 2) * numReplicas * sizeof(int3);
-// 	gpuErrchk( cudaMalloc( &bondList_d, size ) );
-// 	gpuErrchk( cudaMemcpyAsync( bondList_d, bondList, size, cudaMemcpyHostToDevice) );
-
-// 	for(int i = 0 ; i < (numBonds / 2) * numReplicas ; i++)
-// 	{
-// 		cout << "Displaying: bondList_d["<< i <<"].x = " << bondList[i].x << ".\n"
-// 			<< "Displaying: bondList_d["<< i <<"].y = " << bondList[i].y << ".\n"
-// 			<< "Displaying: bondList_d["<< i <<"].z = " << bondList[i].z << ".\n";
-
-// 	}
-// }
-
-void ComputeForce::copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *dihedralList, int *dihedralPotList, int4* bondAngleList) {
-
-	
-	size_t size;
-
-	if (numBonds > 0) {
-	size = (numBonds / 2) * numReplicas * sizeof(int3);
-	gpuErrchk( cudaMalloc( &bondList_d, size ) );
-	gpuErrchk( cudaMemcpyAsync( bondList_d, bondList, size, cudaMemcpyHostToDevice) );
-	}
-	
-	if (numAngles > 0) {
-    size = numAngles * numReplicas * sizeof(int4);
-    gpuErrchk( cudaMalloc( &angleList_d, size ) );
-    gpuErrchk( cudaMemcpyAsync( angleList_d, angleList, size, cudaMemcpyHostToDevice) );
-	}
-	
-	if (numDihedrals > 0) {
-    size = numDihedrals * numReplicas * sizeof(int4);
-    gpuErrchk( cudaMalloc( &dihedralList_d, size ) );
-    gpuErrchk( cudaMemcpyAsync( dihedralList_d, dihedralList, size, cudaMemcpyHostToDevice) );
-
-    size = numDihedrals * numReplicas * sizeof(int);
-    gpuErrchk( cudaMalloc( &dihedralPotList_d, size ) );
-    gpuErrchk( cudaMemcpyAsync( dihedralPotList_d, dihedralPotList, size, cudaMemcpyHostToDevice) );
-	}
-
-	if (numBondAngles > 0) {
-	    size = 2*numBondAngles * numReplicas * sizeof(int4);
-	    gpuErrchk( cudaMalloc( &bondAngleList_d, size ) );
-	    gpuErrchk( cudaMemcpyAsync( bondAngleList_d, bondAngleList, size, cudaMemcpyHostToDevice) );
-	}
-
-}
diff --git a/src/ComputeForce.cuh b/src/ComputeForce.cuh
deleted file mode 100644
index 63778e529cc2e0f26b65fa31ec2f59267c6845f0..0000000000000000000000000000000000000000
--- a/src/ComputeForce.cuh
+++ /dev/null
@@ -1,1081 +0,0 @@
-// ComputeForce.cuh
-//
-// Terrance Howard <heyterrance@gmail.com>
-#pragma once
-#include <cassert>
-
-#include "CudaUtil.cuh"
-
-#include "TabulatedMethods.cuh"
-
-// From TabulatedMethods.cuh: constexpr float BD_PI = 3.1415927f; 
-constexpr size_t MAX_CELLS_FOR_CELLNEIGHBORLIST = 1<<25;
-constexpr size_t MAX_NLIST_PAIRS = 1<<27; // Reduce if ARBD crashes immediately with GPU memory allocation error
-
-// texture<int,    1, cudaReadModeElementType>      NeighborsTex;
-// texture<int,    1, cudaReadModeElementType> pairTabPotTypeTex;
-//texture<int2,   1, cudaReadModeElementType>      pairListsTex;
-// texture<float4, 1, cudaReadModeElementType>            PosTex;
-
-__host__ __device__
-EnergyForce ComputeForce::coulombForce(Vector3 r, float alpha,
-																			 float start, float len) {
-	float d = r.length();
-	
-	if (d >= start + len)
-		return EnergyForce();
-	if (d <= start) {
-		float energy = alpha/d - alpha/start + 0.5f*alpha/(start*start)*len;
-		Vector3 force = -alpha/(d*d*d)*r;
-
-		return EnergyForce(energy, force);
-	}
-
-	// Switching.
-	float c = alpha / (start * start);
-	float energy = 0.5f/len * c * (start + len - d) * (start + len - d);
-	Vector3 force = -c * (1.0f - (d - start) / len) / d * r;
-	return EnergyForce(energy, force);
-}
-
-__host__ __device__
-EnergyForce ComputeForce::coulombForceFull(Vector3 r, float alpha) {
-	float d = r.length();
-	return EnergyForce(alpha/d, -alpha/(d*d*d)*r);
-}
-
-__host__ __device__
-EnergyForce ComputeForce::softcoreForce(Vector3 r, float eps, float rad6) {
-	const float d2 = r.length2();
-	const float d6 = d2*d2*d2;
-
-	Vector3 force = -12*eps*(rad6*rad6/(d6*d6*d2) - rad6/(d6*d2))*r;
-		if (isnan(force.x) or isnan(force.y) or isnan(force.z))
-			printf(">>>> Damn.\n");
-
-	if (d6 < rad6) {
-		const float d6_2 = d6 * d6;
-		const float rad6_2 = rad6 * rad6;
-		float e = eps * ((rad6_2 / (d6_2)) - (2.0f * rad6 / d6)) + eps;
-		Vector3 f = -12.0f * eps * (rad6_2 / (d6_2 * d2) - rad6 / (d6 * d2)) * r;
-		return EnergyForce(e, f);
-	}
-
-	return EnergyForce();
-}
-
-__global__
-void computeFullKernel(Vector3 force[], Vector3 pos[], int type[],
-											 float tableAlpha[], float tableEps[], float tableRad6[],
-											 int num, int numParts, BaseGrid* sys, float g_energies[],
-											 int gridSize, int numReplicas, bool get_energy) {
-	// Calculate the ID of each thread.
-	// The combination of (X, Y) is unique among all threads
-	// id_in_block is unique to a single thread block
-	int i = blockIdx.x * blockDim.x + threadIdx.x;
-	float energy_local = 0.0f;
-
-	// For all threads representing a valid pair of particles
-	if (i < num * numReplicas) {
-		const int repID = i / num;
-		const int typei = type[i];
-		const Vector3& posi = pos[i];
-		float alpha, eps, rad6;
-		Vector3 force_local(0.0f);
-		int typej = -1;
-
-		for (int j = repID * num; j < (repID + 1) * num; ++j) {
-			if (i == j) continue;
-			int newj = type[j];
-			if (typej != newj) {
-				// Get new values if type[j-1] != type[j]
-				// Save time; avoid reading from memory
-				typej = newj;
-				alpha = tableAlpha[typei * numParts + typej];
-				eps = tableEps[typei * numParts + typej];
-				rad6 = tableRad6[typei * numParts + typej];
-			}
-			Vector3 dr = sys->wrapDiff(pos[j] - posi);
-			EnergyForce fc = ComputeForce::coulombForceFull(dr, alpha);
-			EnergyForce fh = ComputeForce::softcoreForce(dr, eps, rad6);
-			// Only update the force in the X particle.
-			// Another thread will handle the Y particle
-			force_local += fc.f + fh.f;
-
-			// Check if there is a bond between these two.
-			// If there is, and the bond's flag is ADD, then add some more force to
-			// the interaction.
-
-			// Only update the energy once. The other thread handling x and y will
-			// not update the energy.
-			if (get_energy && j > i && i < num)
-				energy_local += fc.e + fh.e;
-		}
-		force[i] = force_local;
-		if (get_energy && i < num)
-			g_energies[i] = energy_local;
-	}
-}
-
-
-__global__
-void computeSoftcoreFullKernel(Vector3 force[], Vector3 pos[], int type[],
-															 float tableEps[], float tableRad6[],
-															 int num, int numParts, BaseGrid* sys,
-															 float g_energies[], int gridSize,
-															 int numReplicas, bool get_energy) {
-	// Calculate the ID of each thread.
-	// The combination of (X, Y) is unique among all threads
-	// id_in_block is unique to a single thread block
-	int i = blockIdx.x * blockDim.x + threadIdx.x;
-	float energy_local = 0.0f;
-	// For all threads representing a valid pair of particles
-	if (i < num * numReplicas) {
-		const int repID = i / num;
-		const int typei = type[i];
-		const Vector3& posi = pos[i];
-		float eps, rad6;
-		Vector3 force_local(0.0f);
-
-		int typej = -1;
-		for (int j = repID * num; j < (repID + 1) * num; ++j) {
-			if (i == j) continue;
-			int newj = type[j];
-			if (typej != newj) {
-				typej = newj;
-				eps = tableEps[typei * numParts + typej];
-				rad6 = tableRad6[typei * numParts + typej];
-			}
-
-			Vector3 dr = sys->wrapDiff(pos[j] - posi);
-			EnergyForce fh = ComputeForce::softcoreForce(dr, eps, rad6);
-			// Only update the force in the particle i
-			// Another thread will handle the particle j
-			force_local += fh.f;
-			// Only update the energy once. The other thread handling x and y
-			// will not update the energy
-			if (get_energy && j > i)
-				energy_local += fh.e;
-		}
-		force[i] = force_local;
-		if (get_energy)
-			g_energies[i] = energy_local;
-	}
-}
-
-__global__
-void computeElecFullKernel(Vector3 force[], Vector3 pos[], int type[],
-													 float tableAlpha[], int num, int numParts,
-													 BaseGrid* sys, float g_energies[],
-													 int gridSize, int numReplicas,
-													 bool get_energy) {
-	// Calculate the ID of each thread.
-	// The combination of (X, Y) is unique among all threads
-	// id_in_block is unique to a single thread block
-	const int i = blockIdx.x * blockDim.x + threadIdx.x;
-	float energy_local = 0.0f;
-	// For all threads representing a valid pair of particles
-	if (i < num * numReplicas) {
-		const int repID = i / num;
-		const int typei = type[i];
-		const Vector3 posi = pos[i];
-		float alpha;
-
-		Vector3 force_local(0.0f);
-		int typej = -1;
-		for (int j = repID * num; j < num * (repID-1); j++) {
-			if (i == j) continue;
-			int newj = type[j];
-			if (typej != newj) {
-				typej = newj;
-				alpha = tableAlpha[typei * numParts + typej];
-			}
-			const Vector3 dr = sys->wrapDiff(pos[j] - posi);
-			EnergyForce fc = ComputeForce::coulombForceFull(dr, alpha);
-			// Only update the force in the X particle.
-			// Another thread will handle the Y particle
-			force_local += fc.f;
-			// Only update the energy once. The other thread handling x and y
-			// will not update the energy
-			if (get_energy and j > i)
-				energy_local += fc.e;
-		}
-		force[i] = force_local;
-		if (get_energy)
-			g_energies[i] = energy_local;
-	}
-}
-
-
-/* const __device__ int maxPairs = 1 << 14; */
-
-/* __global__ */
-/* void pairlistTest(Vector3 pos[], int num, int numReplicas, */
-/* 									BaseGrid* sys, CellDecomposition* decomp, */
-/* 									const int nCells, const int blocksPerCell, */
-/* 									int* g_numPairs, int* g_pairI, int* g_pairJ ) { */
-/* 	const int gtid = threadIdx.x + blockIdx.x*blockDim.x; */
-/* 	for (int i = gtid; i < gridDim.x*100; i+=blockDim.x) { */
-/* 		assert( g_numPairs[i] == 0 ); */
-/* 		assert( g_pairI[i] != NULL ); */
-/* 		assert( g_pairJ[i] != NULL ); */
-/* 	} */
-/* } */
-
-__device__ int* exSum;
-void initExSum() {
-    int tmp = 0;
-    int* devPtr;
-    cudaMalloc(&devPtr, sizeof(int));
-    cudaMemcpyToSymbol(exSum, &devPtr, sizeof(int*));
-    cudaMemcpy(devPtr, &tmp, sizeof(int), cudaMemcpyHostToDevice);
-
-}
-int getExSum() {
-    int tmp;
-    int* devPtr;
-    cudaMemcpyFromSymbol(&devPtr, exSum, sizeof(int*));
-    cudaMemcpy(&tmp, devPtr, sizeof(int), cudaMemcpyDeviceToHost);
-    return tmp;
-}
-//
-__device__
-int computeCellNeighbor( const int3 cells, const int3 cell_idx, const int dx, const int dy, const int dz )
-{
-    int idx = cell_idx.x;
-    int idy = cell_idx.y;
-    int idz = cell_idx.z;
-
-    int u = idx + dx;
-    int v = idy + dy;
-    int w = idz + dz;
-
-    int nID;
-    if (cells.x == 1 and u != 0) nID = -1;
-    else if (cells.y == 1 and v != 0) nID =  -1;
-    else if (cells.z == 1 and w != 0) nID = -1;
-    else if (cells.x == 2 and (u < 0 || u > 1)) nID = -1;
-    else if (cells.y == 2 and (v < 0 || v > 1)) nID = -1;
-    else if (cells.z == 2 and (w < 0 || w > 1)) nID = -1;
-    else
-    {
-	u = (u + cells.x) % cells.x;
-	v = (v + cells.y) % cells.y;
-	w = (w + cells.z) % cells.z;
-	nID = w + cells.z * (v + cells.y * u);
-    }
-
-    return nID;
-}
-
-__global__ 
-void createNeighborsList(const int3 *Cells,int* __restrict__ CellNeighborsList)
-{
-    const int tid = threadIdx.x + blockDim.x * blockIdx.x;
-    const int3 cells = Cells[0]; 
-    const int nCells = cells.x * cells.y * cells.z;
-    const int Size   = blockDim.x * gridDim.x;
-    int   nID;
-    
-    for (int cID = tid; cID < nCells; cID += Size) {
-
-        int idz = cID %  cells.z;
-        int idy = cID /  cells.z % cells.y;
-        int idx = cID / (cells.z * cells.y);
-
-        int count = 0;
-        for (int dx = -1; dx <= 1; ++dx) {
-            for (int dy = -1; dy <= 1; ++dy) {
-                for (int dz = -1; dz <= 1; ++dz) {
-
-		    nID = computeCellNeighbor( cells, make_int3(idx,idy,idz), dx, dy, dz );
-                    CellNeighborsList[size_t(count+27*cID)] = nID;
-                    ++count;
-                    //__syncthreads();
-                }
-            }
-        }
-    }
-}
-template<const int BlockSize,const int Size,const int N>
-__global__ void createPairlists(Vector3* __restrict__ pos, const int num, const int numReplicas,
-                                const BaseGrid* __restrict__ sys, const CellDecomposition* __restrict__ decomp,
-                                const int nCells,int* g_numPairs, int2* g_pair, int numParts, const int* __restrict__ type,
-                                int* __restrict__ g_pairTabPotType, const Exclude* __restrict__ excludes,
-                                const int2* __restrict__ excludeMap, const int numExcludes, float pairlistdist2, cudaTextureObject_t PosTex, cudaTextureObject_t NeighborsTex)
-{
-    __shared__ float4 __align__(16) particle[N];
-    __shared__ int     Index_i[N];
-
-    const int TotalBlocks  = gridDim.x * gridDim.y;
-    const int cells        = TotalBlocks / Size;
-    const int cell_start   = ( blockIdx.x + gridDim.x * blockIdx.y) / Size;
-    const int pid_start    = ((blockIdx.x + gridDim.x * blockIdx.y) % Size) * N;
-    const int tid          =   threadIdx.x + blockDim.x * threadIdx.y
-                                           + blockDim.x *  blockDim.y * threadIdx.z;
-    const int warpLane     = tid % WARPSIZE;
-    const int nReps        = gridDim.z;
-    const int idx_  = tid % N;
-    const int idx__ = tid / N;
-    const int Step1 = Size * N;
-    const int Step2 = Size / N; 
-
-    const CellDecomposition::cell_t* __restrict__ cellInfo = decomp->getCells();
-
-    for(int repID = blockIdx.z; repID < numReplicas; repID += nReps)
-    {
-        for(int cellid_i = cell_start; cellid_i < nCells; cellid_i += cells)
-        {
-            CellDecomposition::range_t rangeI = decomp->getRange(cellid_i,repID);
-            int Ni = rangeI.last-rangeI.first;
-
-            for(int pid_i = pid_start; pid_i < Ni; pid_i += Step1)
-            {
-                __syncthreads();
-                if(tid + pid_i < Ni && tid < N)
-                {
-                    Index_i [tid] = cellInfo[rangeI.first+pid_i+tid].particle;
-                    particle[tid] = tex1Dfetch<float4>(PosTex,Index_i[tid]);
-                }
-                __syncthreads();
-
-                if(idx_ + pid_i < Ni)
-                {
-                    int ai = Index_i[idx_];
-                    Vector3 A(particle[idx_]);
-
-                    int2 ex_pair = make_int2(-1,-1);
-                    if(numExcludes > 0 && excludeMap != NULL)
-                    {
-                        ex_pair = excludeMap[ai -repID * num];
-                    }
-
-                    //loop over neighbor directions
-                    for(int idx = 0; idx < 27; ++idx)
-                    {
-
-			int currEx = ex_pair.x;
-			int nextEx = (ex_pair.x >= 0) ? excludes[currEx].ind2 : -1;
-
-			int neighbor_cell;
-			if (nCells < MAX_CELLS_FOR_CELLNEIGHBORLIST) {
-			    neighbor_cell = tex1Dfetch<int>(NeighborsTex,idx+27*cellid_i);
-			} else {
-			    int3 cells = decomp->nCells;
-			    int3 cell_idx = make_int3(cellid_i %  cells.z,
-						      cellid_i /  cells.z % cells.y,
-						      cellid_i / (cells.z * cells.y));
-
-			    int dz = (idx % 3) - 1;
-			    int dy = ((idx/3) % 3) - 1;
-			    int dx = ((idx/9) % 3) - 1;
-			    neighbor_cell = computeCellNeighbor( decomp->nCells, cell_idx, dx, dy, dz );
-			}
-
-                        if(neighbor_cell < 0)
-                        {
-                            continue;
-                        }
-
-                        CellDecomposition::range_t rangeJ = decomp->getRange(neighbor_cell,repID);
-                        int Nj = rangeJ.last-rangeJ.first;
-
-                        // In each neighbor cell, loop over particles
-                        for(int pid_j = idx__; pid_j < Nj; pid_j += Step2)
-                        {
-                            
-                            int aj  = cellInfo[pid_j+rangeJ.first].particle;
-                            if( aj <= ai)
-                            {
-                                continue;
-                            }
-
-                            while (nextEx >= 0 && nextEx < ( aj - repID * num))
-                            {
-                                nextEx = (currEx < ex_pair.y - 1) ? excludes[++currEx].ind2 : -1;
-                            }
-
-                            if (nextEx == (aj - repID * num))
-                            {
-                                #ifdef DEBUGEXCLUSIONS
-                                atomicAggInc( exSum, warpLane );
-                                #endif
-                                nextEx = (currEx < ex_pair.y - 1) ? excludes[++currEx].ind2 : -1;
-                                continue;
-                            }
-
-                            float4 b = tex1Dfetch<float4>(PosTex,aj);
-                            Vector3 B(b.x,b.y,b.z);
-
-                            float dr = (sys->wrapDiff(A-B)).length2();
-                            if(dr <= pairlistdist2)
-                            {
-                                int gid = atomicAggInc( g_numPairs, warpLane );
-				if (gid < MAX_NLIST_PAIRS) {
-				    int pairType = type[ai] + type[aj] * numParts;
-
-				    g_pair[gid] = make_int2(ai,aj);
-				    g_pairTabPotType[gid] = pairType;
-				} else {
-				    if (gid == MAX_NLIST_PAIRS)
-					printf("RAN OUT OF PAIRLIST SPACE\n");
-				}
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-__global__
-void createPairlists_debug(Vector3* __restrict__ pos, const int num, const int numReplicas,
-                                const BaseGrid* __restrict__ sys, const CellDecomposition* __restrict__ decomp,
-                                const int nCells,
-                                int* g_numPairs, int2* g_pair,
-                                int numParts, const int* __restrict__ type, int* __restrict__ g_pairTabPotType,
-                                const Exclude* __restrict__ excludes, const int2* __restrict__ excludeMap, const int numExcludes,
-                                float pairlistdist2)
-{
-    // TODO: loop over all cells with edges within pairlistdist2
-    // Loop over threads searching for atom pairs
-    //   Each thread has designated values in shared memory as a buffer
-    //   A sync operation periodically moves data from shared to global
-    const int tid = threadIdx.x;
-    const int warpLane = tid % WARPSIZE; /* RBTODO: optimize */
-    const int split = 32;                                   /* numblocks should be divisible by split */
-    /* const int blocksPerCell = gridDim.x/split;  */
-    const CellDecomposition::cell_t* __restrict__ cellInfo = decomp->getCells();
-    for (int cID = 0 + (blockIdx.x % split); cID < nCells; cID += split)
-    {
-        for (int repID = 0; repID < numReplicas; repID++)
-        {
-            const CellDecomposition::range_t rangeI = decomp->getRange(cID, repID);
-            for (int ci = rangeI.first + blockIdx.x/split; ci < rangeI.last; ci += gridDim.x/split)
-            {
-                const int ai = cellInfo[ci].particle;
-                const CellDecomposition::cell_t celli = cellInfo[ci];
-                const int ex_start = (numExcludes > 0 && excludeMap != NULL) ? excludeMap[ai -repID*num].x : -1;
-                const int ex_end   = (numExcludes > 0 && excludeMap != NULL) ? excludeMap[ai -repID*num].y : -1;
-                for(int x = -1; x <= 1; ++x) 
-                {
-                    for(int y = -1; y <= 1; ++y) 
-                    {
-                        for (int z = -1; z <= 1; ++z) 
-                        {
-                            const int nID = decomp->getNeighborID(celli, x, y, z);
-                            //const int nID = CellNeighborsList[x+27*cID];//elli.id]; 
-                            if (nID < 0) continue; // Initialize exclusions
-                            // TODO: optimize exclusion code (and entire kernel)
-                            int currEx = ex_start;
-                            int nextEx = (ex_start >= 0) ? excludes[currEx].ind2 : -1;
-                            //int ajLast = -1; // TODO: remove this sanity check
-                            const CellDecomposition::range_t range = decomp->getRange(nID, repID);
-                            for (int n = range.first + tid; n < range.last; n+=blockDim.x) 
-                            {
-                                const int aj = cellInfo[n].particle;
-                                if (aj <= ai) continue;
-                                // Skip excludes
-                                // Implementation requires that aj increases monotonically
-                                //assert( ajLast < aj ); ajLast = aj; // TODO: remove this sanity check
-                                while (nextEx >= 0 && nextEx < (aj - repID * num)) // TODO get rid of this
-                                    nextEx = (currEx < ex_end - 1) ? excludes[++currEx].ind2 : -1;
-                                if (nextEx == (aj - repID * num))
-                                {
-                                    #ifdef DEBUGEXCLUSIONS
-                                    atomicAggInc( exSum, warpLane );
-                                    #endif
-                                    nextEx = (currEx < ex_end - 1) ? excludes[++currEx].ind2 : -1;
-                                    continue;
-                                }
-                                // TODO: Skip non-interacting types for efficiency
-                                // Skip ones that are too far away
-                                const float dr = (sys->wrapDiff(pos[aj] - pos[ai])).length2();
-                                if (dr > pairlistdist2) continue;
-                                // Add to pairlist
-                                int gid = atomicAggInc( g_numPairs, warpLane );
-                                int pairType = type[ai] + type[aj] * numParts;
-                                g_pair[gid] = make_int2(ai,aj);
-                                g_pairTabPotType[gid] = pairType;
-                            }
-                        }                      
-                    }
-                }
-            }                              
-        }
-    }
-}
-
-// TODO: deprecate?
-__global__
-void computeKernel(Vector3 force[], Vector3 pos[], int type[],
-									 float tableAlpha[], float tableEps[], float tableRad6[],
-									 int num, int numParts, BaseGrid* sys,
-									 CellDecomposition* decomp,
-									 float g_energies[], float switchStart, float switchLen,
-									 int gridSize, int numReplicas, bool get_energy) {
-	const int i = blockIdx.x * blockDim.x + threadIdx.x;
-	float energy_local = 0.0f;
-
-	// i - index of the particle in the original, unsorted array
-	if (i < num * numReplicas) {
-		const int repID = i / num;
-		const int typei = type[i];
-		const Vector3 posi = pos[i];
-		// TODO: Fix this: Find correct celli (add a new function to
-		//       CellDecomposition, binary search over cells)
-		CellDecomposition::cell_t celli = decomp->getCellForParticle(i);
-		float alpha(0.0f), eps(0.0f), rad6(0.0f);
-		Vector3 force_local(0.0f);
-
-		const CellDecomposition::cell_t* pairs = decomp->getCells();
-		for (int x = -1; x <= 1; ++x) {
-			for (int y = -1; y <= 1; ++y) {
-				for (int z = -1; z <= 1; ++z) {
-					const int nID = decomp->getNeighborID(celli, x, y, z);
-					// Skip if got wrong or duplicate cell.
-					if (nID < 0) continue;
-
-					const CellDecomposition::range_t range = decomp->getRange(nID, repID);
-
-					int typej = -1;
-					for (int n = range.first; n < range.last; ++n) {
-						const int j = pairs[n].particle;
-//						if (j < 0)
-//							printf("%d -> pairs[%d].particle %d\n", i, n, j);
-						if (j == i) continue;
-						const int newj = type[j];
-						// Update values.
-						if (typej != newj) {
-							typej = newj;
-							alpha = tableAlpha[typei * numParts + typej];
-							eps = tableEps[typei * numParts + typej];
-							rad6 = tableRad6[typei * numParts + typej];
-						}
-
-						const Vector3 dr = sys->wrapDiff(pos[j] - posi);
-						if (dr.length() < 1e-4) {
-//							printf("dr = %g << 1: %d -> %d on [%d, %d)\n",
-//										 dr.length(), i, j, range.first, range.last);
-						}
-
-						const EnergyForce fc =
-								ComputeForce::coulombForce(dr, alpha, switchStart, switchLen);
-						const EnergyForce fh = ComputeForce::softcoreForce(dr, eps, rad6);
-
-						force_local += fc.f + fh.f;
-						energy_local += 0.5f * (fc.e + fh.e);
-					} 	// n
-				} 		// z
-			} 			// y
-		} 				// x
-		force[i] = force_local;
-		if (isnan(force_local.x) or isnan(force_local.y) or isnan(force_local.z)) {
-//			printf("Nan FORCE!\n");
-			force[i] = Vector3(0.0f);
-		}
-		if (get_energy)
-			g_energies[i] = energy_local;
-	}
-}
-
-__device__ int pairForceCounter = 0;
-__global__ void printPairForceCounter() {
-	if (threadIdx.x + blockIdx.x == 0)
-		printf("Computed the force for %d pairs\n", pairForceCounter);
-}
-
-template<const int BlockSize>
-__device__ inline void _computeTabulatedKernel(Vector3* force, const BaseGrid* __restrict__ sys, 
-					       float cutoff2, const int numPairs, const int2* __restrict__ g_pair, 
-					       const int* __restrict__ g_pairTabPotType, TabulatedPotential** __restrict__ tablePot,
-					       cudaTextureObject_t pairListsTex, cudaTextureObject_t PosTex, cudaTextureObject_t pairTabPotTypeTex
-    )
-{
-    const int tid = threadIdx.x + blockDim.x * threadIdx.y
-                                         + blockDim.x *  blockDim.y * threadIdx.z 
-                                         + BlockSize  *( blockIdx.x + gridDim.x * blockIdx.y 
-                                         + gridDim.x  * gridDim.y   * blockIdx.z );
-
-    const int TotalThreads = BlockSize * gridDim.x * gridDim.y * gridDim.z;
-    for (int i = tid; i < numPairs; i += TotalThreads) 
-    {
-        //int2 pair = g_pair[i];
-        int2 pair = tex1Dfetch<int2>(pairListsTex,i);
-        //int  ind  = tex1Dfetch(pairTabPotTypeTex,i); 
-
-        int ai = pair.x;
-        int aj = pair.y;
-                        
-        //int ind = g_pairTabPotType[i];
-
-        Vector3 a(tex1Dfetch<float4>(PosTex, ai));
-        Vector3 b(tex1Dfetch<float4>(PosTex, aj));
-        Vector3 dr = sys->wrapDiff(b-a);
-        
-        float d2 = dr.length2();
-        int  ind  = tex1Dfetch<int>(pairTabPotTypeTex,i);
-        if (tablePot[ind] != NULL && d2 <= cutoff2) 
-        {
-            Vector3 f = tablePot[ind]->computef(dr,d2);
-            atomicAdd( &force[ai],  f );
-            atomicAdd( &force[aj], -f );
-        }
-    }
-}
-
-template<const int BlockSize>
-__global__ void computeTabulatedKernel(Vector3* force, const BaseGrid* __restrict__ sys, 
-                                       float cutoff2, const int* __restrict__ g_numPairs, const int2* __restrict__ g_pair, 
-                                       const int* __restrict__ g_pairTabPotType, TabulatedPotential** __restrict__ tablePot,
-				       cudaTextureObject_t pairListsTex, cudaTextureObject_t PosTex, cudaTextureObject_t pairTabPotTypeTex) {
-    _computeTabulatedKernel<BlockSize>(force,sys,
-				       cutoff2, *g_numPairs, g_pair,
-				       g_pairTabPotType, tablePot,
-				       pairListsTex, PosTex, pairTabPotTypeTex);
-}
-
-template<const int BlockSize>
-__global__ void computeTabulatedKernel(Vector3* force, const BaseGrid* __restrict__ sys, 
-                                       float cutoff2, const int2* __restrict__ g_pair, 
-                                       const int* __restrict__ g_pairTabPotType, TabulatedPotential** __restrict__ tablePot,
-				       cudaTextureObject_t pairListsTex, cudaTextureObject_t PosTex, cudaTextureObject_t pairTabPotTypeTex,
-				       int start, int numPairs) {
-    _computeTabulatedKernel<BlockSize>(force,sys,
-				       cutoff2, numPairs, g_pair+start,
-				       g_pairTabPotType+start, tablePot,
-				       pairListsTex, PosTex, pairTabPotTypeTex);
-} 
-
-__global__ void clearEnergies(float* __restrict__  g_energies, int num) {
-	for (int i = threadIdx.x+blockIdx.x*blockDim.x; i < num; i+=blockDim.x*gridDim.x) {
-		g_energies[i] = 0.0f;
-	}
-}
-
-__global__ void computeTabulatedEnergyKernel(Vector3* force, const Vector3* __restrict__ pos,
-				const BaseGrid* __restrict__ sys, float cutoff2,
-				const int* __restrict__ g_numPairs,	const int2* __restrict__ g_pair, const int* __restrict__ g_pairTabPotType, 	TabulatedPotential** __restrict__ tablePot, float* g_energies) {
-	const int numPairs = *g_numPairs;
-	for (int i = threadIdx.x+blockIdx.x*blockDim.x; i < numPairs; i+=blockDim.x*gridDim.x) {
-		const int2 pair = g_pair[i];
-		const int ai = pair.x;
-		const int aj = pair.y;
-		const int ind = g_pairTabPotType[i];
-
-		// RBTODO: implement wrapDiff2, returns dr2 (???)
-		Vector3 dr = pos[aj] - pos[ai];
-		dr = sys->wrapDiff(dr);
-		float d2 = dr.length2();
-		// RBTODO: order pairs according to distance to reduce divergence // not actually faster
-		
-		if (tablePot[ind] != NULL && d2 <= cutoff2) { 
-			EnergyForce fe = tablePot[ind]->compute(dr,d2);
-			atomicAdd( &force[ai],  fe.f );
-			atomicAdd( &force[aj], -fe.f );
-			// RBTODO: reduce energies
-			atomicAdd( &(g_energies[ai]), fe.e*0.5f );
-			atomicAdd( &(g_energies[aj]), fe.e*0.5f );
-		}
-	}
-}
-
-
-// =============================================================================
-// Kernel computes forces between Brownian particles (ions)
-// NOT using cell decomposition
-//
-__global__
-void computeTabulatedFullKernel(Vector3 force[], Vector3 pos[], int type[], TabulatedPotential* tablePot[], TabulatedPotential* tableBond[], int num, int numParts, BaseGrid *sys, Bond bonds[], int2 bondMap[], int numBonds, Exclude excludes[], int2 excludeMap[], int numExcludes, float g_energies[], int gridSize, int numReplicas, bool get_energy, Angle angles[]) 
-{
-	// Thread's unique ID.
-	const int i = blockIdx.x * blockDim.x + threadIdx.x;
-
-	// Initialize interaction energy (per particle)
-	float energy_local = 0.0f;
-
-	// Loop over ALL particles in ALL replicas
-	if (i < num * numReplicas) {
-		const int repID = i / num;
-
-		// Each particle may have a varying number of bonds.
-		// bondMap is an array with one element for each particle which keeps track
-		// of where a particle's bonds are stored in the bonds array.
-		// bondMap[i].x is the index in the bonds array where the ith particle's
-		// bonds begin.
-		// bondMap[i].y is the index in the bonds array where the ith particle's
-		// bonds end.
-		const int bond_start	= (bondMap != NULL) ? bondMap[i - repID * num].x : -1;
-		const int bond_end 		= (bondMap != NULL) ? bondMap[i - repID * num].y : -1;
-
-		// currBond is the index in the bonds array that we should look at next
-		// currBond is initialized to bond_start because that is the first index of the
-		// bonds array where this particle's bonds are stored
-		int currBond = bond_start;
-
-		// nextBond is the ID number of the next particle that this particle is bonded to
-		// If this particle has at least one bond, then nextBond is initialized to be the
-	 	// first particle that this particle is bonded to
-		int nextBond = (bond_start >= 0) ? bonds[bond_start].ind2 : -1;
-
-		// Same as for bonds, but for exclusions now
-		const int ex_start 	= (excludeMap != NULL) ? excludeMap[i].x : -1;
-		const int ex_end 		= (excludeMap != NULL) ? excludeMap[i].y : -1;
-		int currEx = ex_start;
-		int nextEx = (ex_start >= 0) ? excludes[ex_start].ind2 : -1;
-
-		// Particle's type and position
-		const int typei = type[i];
-		const Vector3& posi = pos[i];
-
-		// Initialize force_local - force on a particle (i)
-		Vector3 force_local(0.0f);
-
-		int typej = -1;
-		int ind = -1;
-
-		// Loop over ALL particles in a replica, where current particle belongs to
-		const size_t first 	= repID * num;
-		const size_t last	= first + num;
-		for (int j = first; j < last; ++j) {
-			if (i == j) continue;
-			
-			int newj = type[j];
-			if (typej != newj) {
-				typej = newj;
-				ind = typei + typej * numParts;
-			}
-
-			Vector3 dr = sys->wrapDiff(pos[j] - posi);
-
-			EnergyForce ft(0.0f, Vector3(0.0f));
-
-			if (nextEx == (j - repID * num))
-				nextEx = (currEx < ex_end - 1) ? excludes[++currEx].ind2 : -1;
-			else if (tablePot[ind] != NULL)
-				ft = tablePot[ind]->compute(dr);
-
-			// If the next bond we want is the same as j, then there is a bond between
-			// particles i and j.
-			if (nextBond == (j - repID * num) and tableBond != NULL) {
-				// If the user has specified the REPLACE option for this bond, then
-				// overwrite the force we calculated from the regular tabulated
-				// potential. If the user has specified the ADD option, then add the bond
-				// force to the tabulated potential value.
-				EnergyForce bond_ef = tableBond[bonds[currBond].tabFileIndex]->compute(dr);
-				switch (bonds[currBond].flag) {
-					case Bond::REPLACE:	ft  = bond_ef; break;
-					case Bond::ADD:     ft += bond_ef; break;
-				}
-
-				// Increment currBond, so that we can find the index of the next particle
-				// that this particle is bonded to
-				if (currBond < bond_end - 1) nextBond = bonds[++currBond].ind2;
-				else nextBond = -1;
-			}
-
-			force_local += ft.f;
-			if (get_energy and j > i)
-				energy_local += ft.e;
-
-		} // Loop over all particles (i != j) in replica repID
-
-		force[i] = force_local;
-		if (get_energy and i < num)
-			g_energies[i] = energy_local;
-	}
-}
-
-__global__
-void computeAngles(Vector3 force[], Vector3 pos[],
-									 Angle angles[], TabulatedAnglePotential* tableAngle[],
-									 int numAngles, int num, BaseGrid* sys,
-									 float g_energies[], bool get_energy) {
-	int idx = blockIdx.x * blockDim.x + threadIdx.x;
-	float energy_local = 0.0f;
-	Vector3 force_local(0.0f);
-	if (idx < num) {
-		for (int i = 0; i < numAngles; ++i) {
-			Angle& a = angles[i];
-			const int ind = a.getIndex(idx);
-			if (ind >= 0) {
-				EnergyForce ef = tableAngle[a.tabFileIndex]->computeOLD(&a, pos, sys, ind);
-				force_local += ef.f;
-				if (ind == 1 and get_energy)
-					energy_local += ef.e;
-			}
-		}
-		force[idx] += force_local;
-		if (get_energy)
-			g_energies[idx] += energy_local;
-	}
-}
-
-// TODO: add kernels for energy calculations
-//__global__ void computeTabulatedBonds(Vector3* force,
-//				Vector3* __restrict__ pos,
-//				BaseGrid* __restrict__ sys,
-//				int numBonds, int3* __restrict__ bondList_d, TabulatedPotential** tableBond) {
-__global__
-void computeTabulatedBonds(Vector3* force, Vector3* __restrict__ pos, BaseGrid* __restrict__ sys, 
-int numBonds, int3* __restrict__ bondList_d, TabulatedPotential** tableBond, float* energy, bool get_energy)
-{
-	// Loop over ALL bonds in ALL replicas
-	for (int bid = threadIdx.x+blockIdx.x*blockDim.x; bid<numBonds; bid+=blockDim.x*gridDim.x) {
-		// Initialize interaction energy (per particle)
-		// float energy_local = 0.0f;
-		
-		int i = bondList_d[bid].x;
-		int j = bondList_d[bid].y;
-
-		// Find the distance between particles i and j,
-		// wrapping this value if necessary
-		const Vector3 dr = sys->wrapDiff(pos[j] - pos[i]);
-
-		//Vector3 force_local = tableBond[ bondList_d[bid].z ]->computef(dr,dr.length2());
-	        EnergyForce fe_local = tableBond[ bondList_d[bid].z ]->compute(dr,dr.length2());	
-		//atomicAdd( &force[i], force_local );
-		//atomicAdd( &force[j], -force_local );
-		atomicAdd( &force[i], fe_local.f );
-                atomicAdd( &force[j], -fe_local.f );
-
-		if (get_energy)
-		{
-		 	//TODO: clarification on energy computation needed, consider changing.
-		 	atomicAdd( &energy[i], fe_local.e*0.5f);
-		        atomicAdd( &energy[j], fe_local.e*0.5f);
-		}
-	}
-}
-
-__global__
-void computeTabulatedAngles(Vector3* force,
-				Vector3* __restrict__ pos,
-				BaseGrid* __restrict__ sys,
-				int numAngles, int4* __restrict__ angleList_d, TabulatedAnglePotential** tableAngle, float* energy, bool get_energy) {
-	// Loop over ALL angles in ALL replicas
-	for (int i = threadIdx.x+blockIdx.x*blockDim.x; i<numAngles; i+=blockDim.x*gridDim.x) {
-		int4& ids = angleList_d[i];
-		computeAngle(tableAngle[ ids.w ], sys, force, pos, ids.x, ids.y, ids.z, energy, get_energy);
-	// if (get_energy)
-	// {
-	//     //TODO: clarification on energy computation needed, consider changing.
-	//     atomicAdd( &g_energies[i], energy_local);
-	//     //atomicAdd( &g_energies[j], energy_local);
-	// }
-	}
-}
-
-__global__
-void computeTabulatedBondAngles(Vector3* force,
-				Vector3* __restrict__ pos,
-				BaseGrid* __restrict__ sys,
-				int numBondAngles, int4* __restrict__ bondAngleList_d, TabulatedAnglePotential** tableAngle,
-				TabulatedPotential** tableBond,
-				float* energy, bool get_energy) {
-	// Loop over ALL angles in ALL replicas
-	for (int i = threadIdx.x+blockIdx.x*blockDim.x; i<numBondAngles; i+=blockDim.x*gridDim.x) {
-		int atom1 = bondAngleList_d[2*i].x;
-		int atom2 = bondAngleList_d[2*i].y;
-		int atom3 = bondAngleList_d[2*i].z;
-		int atom4 = bondAngleList_d[2*i].w;
-
-		int angleInd1 = bondAngleList_d[2*i+1].x;
-		int bondInd   = bondAngleList_d[2*i+1].y;
-		int angleInd2 = bondAngleList_d[2*i+1].z;
-
-		computeBondAngle(tableAngle[ angleInd1 ], tableBond[ bondInd ], tableAngle[ angleInd2 ], sys, force, pos, atom1, atom2, atom3, atom4, energy, get_energy);
-	}
-}
-
-__global__
-void computeProductPotentials(Vector3* force,
-			      Vector3* __restrict__ pos,
-			      BaseGrid* __restrict__ sys,
-			      int numProductPotentials,
-			      int* __restrict__ productPotentialParticles,
-			      SimplePotential* __restrict__ potentialList,
-			      uint2* __restrict__ productPotential_list,
-			      unsigned short* __restrict__ productCount,
-			      float* energy, bool get_energy) {
-    /*
-      productPotential_list[i].x : index of first potential in potentialList for i_th productPotential
-      productPotential_list[i].y : index of first atom in productPotentialParticles for i_th productPotential
-
-      for three potentials, angle, bond, angle, we would have the following atomic indices in productPotentialParticles:
-        pot1 : productPotential_list[i].y, productPotential_list[i].y + 1 , productPotential_list[i].y + 2
-        pot2 : productPotential_list[i].y + 3, productPotential_list[i].y + 4
-	and
-        pot3 : productPotential_list[i].y + 5, productPotential_list[i].y + 6, productPotential_list[i].y + 7
-
-      productCount[i] : number of potentials in the i_th productPotential
-    */
-
-    // CRAPPY NAIVE IMPLEMENTATION
-    constexpr int MAX_XPOTS = 4;
-    float2 energy_and_deriv[MAX_XPOTS];
-    float tmp_force;
-
-    for (int i = threadIdx.x+blockIdx.x*blockDim.x; i<numProductPotentials; i+=blockDim.x*gridDim.x) {
-	unsigned short num_pots = productCount[i];
-
-	unsigned int part_idx = productPotential_list[i].y;
-#pragma unroll
-	for (unsigned short int j = 0; j < MAX_XPOTS; ++j) {
-	    if (j == num_pots) break;
-	    SimplePotential& p = potentialList[ productPotential_list[i].x + j ];
-
-	    // Hidden branch divergence in compute_value => sort potentials by type before running kernel
-	    float tmp = p.compute_value(pos,sys, &productPotentialParticles[part_idx]);
-	    energy_and_deriv[j] = p.compute_energy_and_deriv(tmp);
-	    part_idx += p.type==BOND? 2: p.type==ANGLE? 3: 4;
-	}
-
-	part_idx = productPotential_list[i].y;
-#pragma unroll
-	for (unsigned short int j = 0; j < MAX_XPOTS; ++j) {
-	    if (j == num_pots) break;
-	    tmp_force = energy_and_deriv[j].y;
-#pragma unroll
-	    for (unsigned short int k = 0; k < MAX_XPOTS; ++k) {
-		if (k == num_pots) break;
-		if (j == k) continue;
-		tmp_force *= energy_and_deriv[k].x;
-	    }
-	    SimplePotential& p = potentialList[ productPotential_list[i].x + j ];
-	    if (tmp_force != 0) {
-		// TODO add energy
-		p.apply_force(pos,sys, force, &productPotentialParticles[part_idx], tmp_force);
-	    }
-	    part_idx += p.type==BOND? 2: p.type==ANGLE? 3: 4;
-	}
-    }
-}
-
-
-__global__
-void computeDihedrals(Vector3 force[], Vector3 pos[],
-											Dihedral dihedrals[],
-											TabulatedDihedralPotential* tableDihedral[],
-											int numDihedrals, int num, BaseGrid* sys, float g_energies[],
-											bool get_energy) {
-	int i = blockIdx.x * blockDim.x + threadIdx.x;
-	// float energy_local = 0.0f;
-	Vector3 force_local(0.0f);
-
-	if (i < numDihedrals) {
-		// RBTODO: optimize
-		Dihedral& d = dihedrals[i];
-
-		const Vector3 ab = sys->wrapDiff( pos[d.ind1] - pos[d.ind2] );
-		const Vector3 bc = sys->wrapDiff( pos[d.ind2] - pos[d.ind3] );
-		const Vector3 cd = sys->wrapDiff( pos[d.ind3] - pos[d.ind4] );
-		
-		//const float distab = ab.length();
-		const float distbc = bc.length();
-		//const float distcd = cd.length();
-	
-		Vector3 crossABC = ab.cross(bc);
-		Vector3 crossBCD = bc.cross(cd);
-		Vector3 crossX = bc.cross(crossABC);
-
-		const float cos_phi = crossABC.dot(crossBCD) / (crossABC.length() * crossBCD.length());
-		const float sin_phi = crossX.dot(crossBCD) / (crossX.length() * crossBCD.length());
-		
-		const float angle = -atan2(sin_phi, cos_phi);
-
-	
-		Vector3 f1, f2, f3; // forces
-		f1 = -distbc * crossABC.rLength2() * crossABC;
-		f3 = -distbc * crossBCD.rLength2() * crossBCD;
-		f2 = -(ab.dot(bc) * bc.rLength2()) * f1 - (bc.dot(cd) * bc.rLength2()) * f3;
-	
-		// Shift "angle" by "PI" since    -PI < dihedral < PI
-		// And our tabulated potential data: 0 < angle < 2 PI
-		float& dangleInv = tableDihedral[d.tabFileIndex]->angle_step_inv;
-		float t = (angle + BD_PI) * dangleInv;
-		int home = (int) floorf(t);
-		t = t - home;
-
-		int size = tableDihedral[d.tabFileIndex]->size;
-		home = home % size;
-		int home1 = (home + 1) % size;
-
-		//================================================
-		// Linear interpolation
-		float * pot = tableDihedral[d.tabFileIndex]->pot;
-		float U0 = pot[home];       // Potential
-		float dU = pot[home1] - U0; // Change in potential
-		
-		float energy = dU * t + U0;
-		float f = -dU * dangleInv;
-		//================================================
-		// TODO: add an option for cubic interpolation [Probably not]
-
-		if (crossABC.rLength() > 1.0f || crossBCD.rLength() > 1.0f)
-			// avoid singularity when one angle is straight 
-			f = 0.0f;
-
-		f1 *= f;
-		f2 *= f;
-		f3 *= f;
-
-		atomicAdd( &force[d.ind1], f1 );
-		atomicAdd( &force[d.ind2], f2-f1 );
-		atomicAdd( &force[d.ind3], f3-f2 );
-		atomicAdd( &force[d.ind4], -f3 );
-
-		if (get_energy) {
-			atomicAdd( &g_energies[d.ind1], energy );
-			atomicAdd( &g_energies[d.ind2], energy );
-			atomicAdd( &g_energies[d.ind3], energy );
-			atomicAdd( &g_energies[d.ind4], energy );
-		}
-	}
-}
-
-
-    // void computeTabulatedDihedrals(Vector3* __restrict__ force, Vector3* __restrict__ pos, int num,
-    // 			    int numParts, BaseGrid* __restrict__ sys, int4* __restrict__ dihedralList_d,
-    // 			    int* __restrict__ dihedralPotList_d,
-    // 			    int numDihedrals, int numReplicas, float* __restrict g_energies,
-    // 			    bool get_energy, TabulatedDihedralPotential** __restrict__ tableDihedral) {
-
-__global__
-void computeTabulatedDihedrals(Vector3* force, const Vector3* __restrict__ pos,
-			       const BaseGrid* __restrict__ sys,
-			       int numDihedrals, const int4* const __restrict__ dihedralList_d,
-			       const int* __restrict__ dihedralPotList_d, TabulatedDihedralPotential** tableDihedral, float* energy, bool get_energy) {
-
-	// int currDihedral = blockIdx.x * blockDim.x + threadIdx.x; // first particle ID
-
-    // Loop over ALL dihedrals in ALL replicas
-	for (int i = threadIdx.x+blockIdx.x*blockDim.x; i < numDihedrals; i+=blockDim.x*gridDim.x) {
-		const int4& ids = dihedralList_d[i];
-		const int& id = dihedralPotList_d[i];
-		computeDihedral(tableDihedral[ id ], sys, force, pos, ids.x, ids.y, ids.z, ids.w, energy, get_energy);
-
-	// if (get_energy)
-	// {
-	//     //TODO: clarification on energy computation needed, consider changing.
-	//     atomicAdd( &g_energies[i], energy_local);
-	//     //atomicAdd( &g_energies[j], energy_local);
-	// }
-    }
-}
-
-__global__
-void computeHarmonicRestraints(Vector3* force, const Vector3* __restrict__ pos,
-			       const BaseGrid* __restrict__ sys,
-			       int numRestraints, const int* const __restrict__ particleId,
-			       const Vector3* __restrict__ r0, const float* __restrict__ k) {
-
-    // Loop over ALL dihedrals in ALL replicas
-    for (int i = threadIdx.x+blockIdx.x*blockDim.x; i < numRestraints; i+=blockDim.x*gridDim.x) {
-	const int& id = particleId[i];
-	const Vector3 dr = sys->wrapDiff(pos[id]-r0[i]);
-	Vector3 f = -k[i]*dr;
-	atomicAdd( &force[ id ], f );
-    }
-}
diff --git a/src/ComputeForce.h b/src/ComputeForce.h
deleted file mode 100644
index 39247282970c4b83713afffe9eb3e1a073c9d253..0000000000000000000000000000000000000000
--- a/src/ComputeForce.h
+++ /dev/null
@@ -1,297 +0,0 @@
-///////////////////////////////////////////////////////////////////////
-// Brownian dynamics base class
-// Author: Jeff Comer <jcomer2@illinois.edu>
-#pragma once
-
-#ifdef __CUDACC__
-    #define HOST __host__
-    #define DEVICE __device__
-#else
-    #define HOST 
-    #define DEVICE 
-#endif
-
-#include "BaseGrid.h"
-#include "BrownianParticleType.h"
-#include "CellDecomposition.h"
-
-// Simple classes
-#include "Restraint.h"
-#include "useful.h"
-#include "Exclude.h"
-#include "Angle.h"
-#include "JamesBond.h"
-#include "TabulatedPotential.h"
-#include "TabulatedAngle.h"
-#include "TabulatedDihedral.h"
-#include "ProductPotential.h"
-#include "GPUManager.h"
-
-// #include <map>
-
-#include <cstdio>
-// #include <cuda_runtime.h>
-#include <thrust/transform_reduce.h>	// thrust::reduce
-#include <thrust/functional.h>				// thrust::plus
-
-#ifdef USE_BOOST
-#include <boost/unordered_map.hpp>
-typedef boost::unordered_map<std::string,unsigned int> XpotMap;
-inline std::size_t hash_value(String const& s) {
-    if (s.length() == 0) return 0;
-    return boost::hash_range(s.val(), s.val()+s.length());
-}
-#else
-#include <map>
-typedef std::map<std::string,unsigned int> XpotMap;
-inline std::size_t hash_value(String const& s) {
-    if (s.length() == 0) return 0;
-    return std::hash<std::string>{}( std::string(s.val()) );
-}
-#endif
-
-
-
-const unsigned int NUM_THREADS = 256;
-
-// Configuration
-class Configuration;
-
-class ComputeForce {
-public:
-    ComputeForce(const Configuration &c, const int numReplicas);
-    ~ComputeForce();
-    
-	void updateNumber(int newNum);
-	void makeTables(const BrownianParticleType* part);
-
-	bool addTabulatedPotential(String fileName, int type0, int type1);
-	bool addBondPotential(String fileName, int ind, Bond* bonds, BondAngle* bondAngles);
-	bool addAnglePotential(String fileName, int ind, Angle* angles, BondAngle* bondAngles);
-	bool addDihedralPotential(String fileName, int ind, Dihedral* dihedrals);
-
-	void decompose();
-	
-	CellDecomposition getDecomp();
-	IndexList decompDim() const;
-
-	float decompCutoff();
-
-	// Does nothing
-	int* neighborhood(Vector3 r);
-
-	float computeSoftcoreFull(bool get_energy);
-	float computeElecFull(bool get_energy);
-	
-	float compute(bool get_energy);
-	float computeFull(bool get_energy);
-	
-	//MLog: the commented function doesn't use bondList, uncomment for testing.
-	/*float computeTabulated(Vector3* force, Vector3* pos, int* type,
-			Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap,
-			Angle* angles, Dihedral* dihedrals, bool get_energy);*/
-	float computeTabulated(bool get_energy);
-	float computeTabulatedFull(bool get_energy);
-	
-	//MLog: new copy function to allocate memory required by ComputeForce class.
-	void copyToCUDA(Vector3* forceInternal, Vector3* pos);
-	void copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals, const Restraint* const restraints, const BondAngle* const bondAngles,
-			const XpotMap simple_potential_map,
-			const std::vector<SimplePotential> simple_potentials,
-			const ProductPotentialConf* const product_potential_confs);
-        void copyToCUDA(Vector3* forceInternal, Vector3* pos, Vector3* mom);
-        void copyToCUDA(Vector3* forceInternal, Vector3* pos, Vector3* mom, float* random);
-	
-	// void createBondList(int3 *bondList);
-	void copyBondedListsToGPU(int3 *bondList, int4 *angleList, int4 *dihedralList, int *dihedralPotList, int4 *bondAngleList);
-	    
-	//MLog: because of the move of a lot of private variables, some functions get starved necessary memory access to these variables, below is a list of functions that return the specified private variable.
-    std::vector<Vector3*> getPos_d()
-	{
-		return pos_d;
-	}
-        Vector3* getMom_d() const
-        {
-            return mom_d;
-        }
-        float* getRan_d()
-        {
-            return ran_d;
-        }
-
-    std::vector<Vector3*> getForceInternal_d()
-	{
-		return forceInternal_d;
-	}
-	void setForceInternalOnDevice(Vector3* f);
-
-	int* getType_d()
-	{
-		return type_d;
-	}
-
-	Bond* getBonds_d()
-	{
-		return bonds_d;
-	}
-
-	int2* getBondMap_d()
-	{
-		return bondMap_d;
-	}
-
-	Exclude* getExcludes_d()
-	{
-		return excludes_d;
-	}
-
-	int2* getExcludeMap_d()
-	{
-		return excludeMap_d;
-	}
-
-	Angle* getAngles_d()
-	{
-		return angles_d;
-	}
-
-	Dihedral* getDihedrals_d()
-	{
-		return dihedrals_d;
-	}
-
-	int3* getBondList_d()
-	{
-		return bondList_d;
-	}
-	
-        float* getEnergy()
-        {
-            return energies_d;
-        }
-    
-    void clear_force() { 
-	for (std::size_t i = 0; i < gpuman.gpus.size(); ++i) {
-	    gpuman.use(i);
-	    gpuErrchk(cudaMemsetAsync((void*)(forceInternal_d[i]),0,(num+numGroupSites)*numReplicas*sizeof(Vector3)));
-	}
-	gpuman.use(0);		// TODO move to a paradigm where gpu0 is not preferentially treated 
-    }
-    void clear_energy() { 
-	gpuErrchk(cudaMemsetAsync((void*)(energies_d), 0, sizeof(float)*(num+numGroupSites)*numReplicas)); // TODO make async
-    }
-
-	HOST DEVICE
-	static EnergyForce coulombForce(Vector3 r, float alpha,float start, float len);
-
-	HOST DEVICE
-	static EnergyForce coulombForceFull(Vector3 r, float alpha);
-
-	HOST DEVICE
-	static EnergyForce softcoreForce(Vector3 r, float eps, float rad6);
-
-private:
-	static GPUManager gpuman;
-
-	// Configuration* c;
-	int numReplicas;
-	int num;
-	int numParts;
-	int num_rb_attached_particles;
-	int numBonds;
-	int numExcludes;
-	int numTabBondFiles;
-	int numAngles;
-	int numTabAngleFiles;
-	int numDihedrals;
-	int numTabDihedralFiles;
-
-	int numGroupSites;
-	int* comSiteParticles;
-	int* comSiteParticles_d;
-
-	float *tableEps, *tableRad6, *tableAlpha;
-	TabulatedPotential **tablePot; // 100% on Host 
-	TabulatedPotential **tableBond;
-	TabulatedAnglePotential **tableAngle;
-	TabulatedDihedralPotential **tableDihedral;
-	const BaseGrid* sys;
-	float switchStart, switchLen, electricConst, cutoff2;
-	CellDecomposition decomp;
-	int numTablePots;
-	float energy;
-
-	// Device Variables
-    std::vector<BaseGrid*> sys_d;
-	CellDecomposition* decomp_d;
-	float *energies_d;
-	float *tableEps_d, *tableRad6_d, *tableAlpha_d;
-	int gridSize;
-	// TabulatedPotential **tablePot_d, **tablePot_addr;
-	// We use this ugly approach because the array of tabulatePotentials may be sparse... but it probably won't be large enough to cause problems if we allocate more directly
-	std::vector<TabulatedPotential**> tablePot_addr; // per-gpu vector of host-allocated device pointers
-	std::vector<TabulatedPotential**> tablePot_d; // per-gpu vector of device-allocated device pointers
-
-	TabulatedPotential **tableBond_d, **tableBond_addr;
-	TabulatedAnglePotential **tableAngle_d, **tableAngle_addr;
-	TabulatedDihedralPotential **tableDihedral_d, **tableDihedral_addr;
-
-	// Pairlists
-	float pairlistdist2;
-    std::vector<int2*> pairLists_d;
-    std::vector<cudaTextureObject_t> pairLists_tex;
-
-    std::vector<int*> pairTabPotType_d;
-    std::vector<cudaTextureObject_t> pairTabPotType_tex;
-
-    int numPairs;
-    std::vector<int*> numPairs_d;
-
-        //Han-Yi Chou
-        int *CellNeighborsList;	
-	//MLog: List of variables that need to be moved over to ComputeForce class. Members of this list will be set to static to avoid large alterations in working code, thereby allowing us to access these variables easily.
-	cudaTextureObject_t neighbors_tex;
-
-	//BrownianParticleType* part;
-	//float electricConst;
-	//int fullLongRange;
-        std::vector<Vector3*> pos_d;
-	std::vector<cudaTextureObject_t> pos_tex;
-        Vector3* mom_d;
-        float*   ran_d;
-
-	std::vector<Vector3*> forceInternal_d; // vector for multigpu
-	int* type_d; 
-
-	Bond* bonds_d; 
-	int2* bondMap_d; 
-
-	Exclude* excludes_d; 
-	int2* excludeMap_d; 
-
-	Angle* angles_d;
-	Dihedral* dihedrals_d;
-
-	int numBondAngles;
-	BondAngle* bondAngles_d;
-	int4* bondAngleList_d;
-
-    int numProductPotentials;
-    float** simple_potential_pots_d;
-    SimplePotential* simple_potentials_d;
-    int* product_potential_particles_d;
-    SimplePotential* product_potentials_d;
-    uint2* product_potential_list_d;
-    unsigned short* productCount_d;
-
-	int3* bondList_d;
-	int4* angleList_d;
-	int4* dihedralList_d;
-	int* dihedralPotList_d;
-
-	int numRestraints;
-	int* restraintIds_d;
-	Vector3* restraintLocs_d;
-	float* restraintSprings_d;
-
-};
diff --git a/src/ComputeGridGrid.cu b/src/ComputeGridGrid.cu
deleted file mode 100644
index f2eb6fde0bd9802b89537f39af10efd9f32618df..0000000000000000000000000000000000000000
--- a/src/ComputeGridGrid.cu
+++ /dev/null
@@ -1,203 +0,0 @@
-// Included in RigidBodyController.cu
-#include "ComputeGridGrid.cuh"
-#include "RigidBodyGrid.h"
-#include "CudaUtil.cuh"
-//RBTODO: add __restrict__, benchmark (Q: how to restrict member data?)
-
-class GridPositionTransformer {
-public:
-    __device__ GridPositionTransformer(const Vector3 o, const Vector3 c, BaseGrid* s) :
-	o(o), c(c), s(s) { }
-    __device__ inline Vector3 operator() (Vector3 pos) const {
-	return s->wrapDiff(pos + o) + c;
-    }
-private:
-    const Vector3 o;
-    const Vector3 c;
-    const BaseGrid* s;
-};
-
-//class PmfPositionTransformer : public BasePositionTransformer {
-class PmfPositionTransformer {
-public:
-    __device__ PmfPositionTransformer(const Vector3 o) : o(o) { }
-    __device__ inline Vector3 operator() (Vector3 pos) const {
-	return pos + o;
-    }
-private:
-    const Vector3 o;
-};
-
-template <typename T>
-__device__
-inline void common_computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u, const Matrix3 basis_rho, const Matrix3 basis_u_inv, const T& transformer,
-					ForceEnergy* retForce, Vector3 * retTorque, int scheme)
-{
-
-	extern __shared__ ForceEnergy s[];
-	ForceEnergy *force = s;
-	//Vector3 *torque = &s[NUMTHREADS];
-        ForceEnergy *torque = &s[NUMTHREADS];
-
-  // RBTODO: http://devblogs.nvidia.com/parallelforall/cuda-pro-tip-write-flexible-kernels-grid-stride-loops
-	const int tid = threadIdx.x;
-	const int r_id = blockIdx.x * blockDim.x + threadIdx.x;
-
-	force[tid] = ForceEnergy(0.f,0.f);
-	torque[tid] = ForceEnergy(0.f,0.f);
-	if (r_id < rho->getSize()) { // skip threads with nothing to do
-		// RBTODO: reduce registers used;
-		//   commenting out interpolateForceD still uses ~40 registers
-		//   -- the innocuous-looking fn below is responsible; consumes ~17 registers!
-	    Vector3 r_pos= rho->getPosition(r_id); /* i,j,k value of voxel */
-
-	    r_pos = basis_rho.transform( r_pos );
-	    const Vector3 u_ijk_float = basis_u_inv.transform( transformer( r_pos ) );
-		// RBTODO: Test for non-unit delta
-		/* Vector3 tmpf  = Vector3(0.0f); */
-		/* float tmpe = 0.0f; */
-		/* const ForceEnergy fe = ForceEnergy( tmpf, tmpe); */
-
-                ForceEnergy fe;
-                if(!scheme)
-		    fe = u->interpolateForceDLinearly( u_ijk_float ); /* in coord frame of u */
-                else
-                    fe = u->interpolateForceD( u_ijk_float );
-
-		force[tid] = fe;
-                //force[tid].e = fe.e;
-
-		const float r_val = rho->val[r_id]; /* maybe move to beginning of function?  */
-		force[tid].f = basis_u_inv.transpose().transform( r_val*(force[tid].f) ); /* transform to lab frame, with correct scaling factor */
-                force[tid].e = r_val;
-
-		// Calculate torque about origin_rho in the lab frame
-		torque[tid].f = r_pos.cross(force[tid].f);
-	}
-
-	// Reduce force and torques
-	// http://www.cuvilib.com/Reduction.pdf
-	// RBTODO optimize further, perhaps
-	// assert( NUMTHREADS==32 || NUMTHREADS==64 || NUMTHREADS==128 || NUMTHREADS==256 || NUMTHREADS==512 );
-	__syncthreads();
-	for (int offset = blockDim.x/2; offset > 0; offset >>= 1) {
-		if (tid < offset) {
-			int oid = tid + offset;
-                        //if(get_energy)
-                            //force[tid].e = force[tid].e + force[oid].e;
-			force[tid] = force[tid] + force[oid];
-			torque[tid] = torque[tid] + torque[oid];
-		}
-		__syncthreads();
-	}
-
-	if (tid == 0) {
-		atomicAdd( retForce, force[0] ); // apply force to particle
-		atomicAdd( retTorque, torque[0].f ); // apply force to particle
-	}
-}
-
-__global__
-void computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u, const Matrix3 basis_rho, const Matrix3 basis_u_inv, const Vector3 origin_rho_minus_center_u, const Vector3 center_u_minus_origin_u,
-			ForceEnergy* retForce, Vector3 * retTorque, int scheme, BaseGrid* sys_d)
-{
-    GridPositionTransformer transformer = GridPositionTransformer(origin_rho_minus_center_u, center_u_minus_origin_u, sys_d);
-    common_computeGridGridForce<GridPositionTransformer>(rho, u, basis_rho, basis_u_inv, transformer, retForce, retTorque, scheme);
-}
-
-__global__
-void computePmfGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u, const Matrix3 basis_rho, const Matrix3 basis_u_inv, const Vector3 origin_rho_minus_origin_u,
-			 ForceEnergy* retForce, Vector3 * retTorque, int scheme)
-{
-    PmfPositionTransformer transformer = PmfPositionTransformer(origin_rho_minus_origin_u);
-    common_computeGridGridForce<PmfPositionTransformer>(rho, u, basis_rho, basis_u_inv, transformer, retForce, retTorque, scheme);
-}
-
-__global__
-void computePartGridForce(const Vector3* __restrict__ pos, Vector3* particleForce,
-				const int num, const int* __restrict__ particleIds, 
-				const RigidBodyGrid* __restrict__ u,
-				const Matrix3 basis_u_inv, const Vector3 center_u, const Vector3 origin_u,
-				ForceEnergy* __restrict__ retForceTorque, float* __restrict__ energy, bool get_energy, int scheme, BaseGrid* sys_d) {
-
-	extern __shared__ ForceEnergy s[];
-	ForceEnergy *force  = s;
-	ForceEnergy *torque = &s[NUMTHREADS];
-  	
-	const int tid = threadIdx.x;
-	const int i = blockIdx.x * blockDim.x + threadIdx.x;
-
-	force[tid]  = ForceEnergy(0.f, 0.f);
-	torque[tid] = ForceEnergy(0.f,0.f);
-	if (i < num) {
-		const int id = particleIds[i];
-		Vector3 p = sys_d->wrapDiff(pos[id]-center_u) + center_u - origin_u;
-		const Vector3 u_ijk_float = basis_u_inv.transform( p );
-
-                ForceEnergy fe;
-                if(!scheme)                       
-		    fe = u->interpolateForceDLinearly( u_ijk_float ); /* in coord frame of u */
-                else
-                    fe = u->interpolateForceD( u_ijk_float );
-                
-		force[tid] = fe;
-                //force[tid].e = fe.e;
-                if(get_energy)
-                    atomicAdd(&energy[id], fe.e);
-		force[tid].f = basis_u_inv.transpose().transform( force[tid].f ); /* transform to lab frame */
-		atomicAdd( &particleForce[id], force[tid].f ); // apply force to particle
-		
-		// Calculate torque about origin_u in the lab frame
-		torque[tid].f = p.cross(force[tid].f);				// RBTODO: test sign
-	}
-
-	// Reduce force and torques
-	// assert( NUMTHREADS==32 || NUMTHREADS==64 || NUMTHREADS==128 || NUMTHREADS==256 || NUMTHREADS==512 );
-	__syncthreads();
-	for (int offset = blockDim.x/2; offset > 0; offset >>= 1) {
-		if (tid < offset) {
-			int oid = tid + offset;
-                        //if(get_energy)
-                            //force[tid].e = force[tid].e + force[oid].e;
-			force[tid] = force[tid] + force[oid];
-			torque[tid] = torque[tid] + torque[oid];
-		}
-		__syncthreads();
-	}
-	
-	if (tid == 0) {
-		retForceTorque[2*blockIdx.x] = force[0];
-		retForceTorque[2*blockIdx.x+1] = torque[0];
-	}
-}
-
-
-__global__
-void createPartlist(const Vector3* __restrict__ pos,
-				const int numTypeParticles, const int* __restrict__ typeParticles_d,
-		    const int attached_particle_start, const int attached_particle_end,
-				int* numParticles_d, int* particles_d,
-				const Vector3 gridCenter, const float radius2, BaseGrid* sys_d) {
-	const int tid = threadIdx.x;
-	const int warpLane = tid % WARPSIZE; /* RBTODO: optimize */
-	
-	const int i = blockIdx.x * blockDim.x + threadIdx.x;
-	if (i < numTypeParticles) {
-		int aid = typeParticles_d[i];
-		if (aid < attached_particle_start || aid >= attached_particle_end) { 
-		    float dist = (sys_d->wrapDiff(pos[aid] - gridCenter)).length2();
-		
-		    if (dist <= radius2) {
-			int tmp = atomicAggInc(numParticles_d, warpLane);
-			particles_d[tmp] = aid;
-		    }
-		}
-	}
-}		
-
-__global__
-void printRigidBodyGrid(const RigidBodyGrid* rho) {
-  printf("Printing an RB of size %d\n",rho->size);
-  for (int i=0; i < rho->size; i++)
-	printf("  val[%d] = %f\n", i, rho->val[i]);
-}
diff --git a/src/ComputeGridGrid.cuh b/src/ComputeGridGrid.cuh
deleted file mode 100644
index 27fc32ccb6b3b3fd43dd005fd9eda7eb1689c8f8..0000000000000000000000000000000000000000
--- a/src/ComputeGridGrid.cuh
+++ /dev/null
@@ -1,37 +0,0 @@
-// Included in RigidBodyController.cu
-#pragma once
-#include "useful.h"
-#define NUMTHREADS 128
-#define WARPSIZE 32
-
-class RigidBodyGrid;
-class BaseGrid;
-
-extern __global__
-void computeGridGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u,
-				const Matrix3 basis_rho, const Matrix3 basis_u_inv,
-				const Vector3 origin_rho_minus_center_u, const Vector3 center_u_minus_origin_u,
-				ForceEnergy* retForce, Vector3 * retTorque, int scheme, BaseGrid* sys_d);
-
-extern __global__
-void computePmfGridForce(const RigidBodyGrid* rho, const RigidBodyGrid* u,
-			 const Matrix3 basis_rho, const Matrix3 basis_u_inv,
-			 const Vector3 origin_rho_minus_origin_u,
-			 ForceEnergy* retForce, Vector3 * retTorque, int scheme);
-
-extern __global__
-void computePartGridForce(const Vector3* __restrict__ pos, Vector3* particleForce,
-				const int num, const int* __restrict__ particleIds,
-				const RigidBodyGrid* __restrict__ u,
-				const Matrix3 basis_u_inv, const Vector3 center_u, const Vector3 origin_u,
-				ForceEnergy* __restrict__ retForceTorque, float* energy, bool get_energy, int scheme, BaseGrid* sys_d);
-
-extern __global__
-void createPartlist(const Vector3* __restrict__ pos,
-				const int numTypeParticles, const int* __restrict__ typeParticles_d,
-		    const int attached_particle_start, const int attached_particle_end,
-				int* numParticles_d, int* particles_d,
-				const Vector3 gridCenter, const float radius2, BaseGrid* sys_d);
-	
-extern __global__
-void printRigidBodyGrid(const RigidBodyGrid* rho);
diff --git a/src/ComputeJustForce.h b/src/ComputeJustForce.h
deleted file mode 100644
index 8397e9b24e9e0c3d41722617794a39875bbdf3ea..0000000000000000000000000000000000000000
--- a/src/ComputeJustForce.h
+++ /dev/null
@@ -1,256 +0,0 @@
-///////////////////////////////////////////////////////////////////////
-// Brownian dynamics base class
-// Author: Jeff Comer <jcomer2@illinois.edu>
-#ifndef COMPUTEFORCE_H
-#define COMPUTEFORCE_H
-
-#include "BaseGrid.h"
-#include "BrownianParticleType.h"
-#include "CellDecomposition.h"
-#include "TabulatedPotential.h"
-
-class ComputeForce {
-public:
-  ComputeForce(int num0, const BrownianParticleType* part, int numParts0, const BaseGrid* g, float switchStart0, float switchLen0, float electricConst0) : 
-    num(num0),
-    numParts(numParts0),
-    sys(g), switchStart(switchStart0),
-    switchLen(switchLen0), electricConst(electricConst0),
-    cutoff2((switchLen0+switchStart0)*(switchLen0+switchStart0)), 
-    decomp(g->getBox(), g->getOrigin(), switchStart0+switchLen0) {
-    
-    // Allocate the parameter tables.
-    tableEps = new float*[numParts];
-    tableRad6 = new float*[numParts];
-    tableAlpha = new float*[numParts];
-    for (int i = 0; i < numParts; i++) {
-      tableEps[i] = new float[numParts];
-      tableRad6[i] = new float[numParts];
-      tableAlpha[i] = new float[numParts];
-    }    
-
-    // Form the parameter tables.
-    makeTables(part);
-    tablePot = new TabulatedPotential*[numParts*numParts];
-    for (int i = 0; i < numParts*numParts; i++) tablePot[i] = NULL;
-    
-    // Make the cell decomposition.
-    neigh = new IndexList[num];
-  }
-
-  ~ComputeForce() {
-    for (int i = 0; i < numParts; i++) {
-      delete[] tableEps[i];
-      delete[] tableRad6[i];
-      delete[] tableAlpha[i];
-    }
-    delete[] tableEps;
-    delete[] tableRad6;
-    delete[] tableAlpha;
-
-    for (int j = 0; j < numParts*numParts; j++) {
-      if (tablePot[j] != NULL) {
-	delete tablePot[j];
-	tablePot[j] = NULL;
-      }
-    }
-    delete[] tablePot;
-
-    delete[] neigh;
-  }
-
-  void updateNumber(const Vector3* pos, int newNum) {
-    if (newNum == num || newNum < 0) return;
-
-    // Set the new number.
-    num = newNum;
-
-    // Reallocate the neighbor list.
-    delete[] neigh;
-    neigh = new IndexList[num];
-    decompose(pos);
-  }
-
-  void makeTables(const BrownianParticleType* part) {
-    for (int i = 0; i < numParts; i++) {
-      for (int j = 0; j < numParts; j++) {
-	tableEps[i][j] = sqrtf(part[i].eps*part[j].eps);
-	float r = part[i].radius + part[j].radius;
-	tableRad6[i][j] = r*r*r*r*r*r;
-	tableAlpha[i][j] = electricConst*part[i].charge*part[j].charge;
-      }
-    }
-  }
-
-  bool addTabulatedPotential(String fileName, int type0, int type1) {
-    if (type0 < 0 || type0 >= numParts) return false;
-    if (type1 < 0 || type1 >= numParts) return false;
-
-    int ind = type0 + type1*numParts;
-    int ind1 = type1 + type0*numParts;
-
-    if (tablePot[ind] != NULL) {
-      delete tablePot[ind];
-      tablePot[ind] = NULL;
-    }
-    if (tablePot[ind1] != NULL) delete tablePot[ind1];
-    
-    tablePot[ind] = new TabulatedPotential(fileName);
-    tablePot[ind1] = new TabulatedPotential(*tablePot[ind]);
-
-    return true;
-  }
-
-  void decompose(const Vector3* pos) {
-    // Reset the cell decomposition.
-    decomp.clearCells();
-    decomp.decompose(pos, num);
-
-    // Regenerate the neighbor lists.
-    for (int i = 0; i < num; i++) neigh[i] = decomp.neighborhood(pos[i]);
-  }
-
-  IndexList decompDim() const {
-    IndexList ret;
-    ret.add(decomp.getNx());
-    ret.add(decomp.getNy());
-    ret.add(decomp.getNz());
-    return ret;
-  }
-
-  float decompCutoff() const {
-    return decomp.getCutoff();
-  }
-
-  IndexList neighborhood(Vector3 r) const {
-    return decomp.neighborhood(r);
-  }
-
-  void computeFull(Vector3* force, const Vector3* pos, const int* type) const {
-    // Zero the force.
-    for (int i = 0; i < num; i++) force[i] = Vector3(0.0);
-    
-    // Compute the force for all pairs.
-    for (int i = 0; i < num-1; i++) {
-      for (int j = i + 1; j < num; j++) {
-	float alpha = tableAlpha[type[i]][type[j]];
-	float eps = tableEps[type[i]][type[j]];
-	float rad6 = tableRad6[type[i]][type[j]];
-	Vector3 dr = sys->wrapDiff(pos[j] - pos[i]);
-
-	Vector3 fc = coulombForceFull(dr, alpha);
-	Vector3 fh = softcoreForce(dr, eps, rad6);
-
-	force[i] += fc + fh;
-	force[j] -= fc + fh;
-      }
-    }
-  }
-
-  void compute(Vector3* force, const Vector3* pos, const int* type) const {
-    for (int i = 0; i < num; i++) {
-      // Zero the force.
-      force[i] = Vector3(0.0);
-
-      // Loop through the neighbors.
-      for (int n = 0; n < neigh[i].length(); n++) {
-	int j = neigh[i].get(n);
-	if (j == i) continue;
-
-	float alpha = tableAlpha[type[i]][type[j]];
-	float eps = tableEps[type[i]][type[j]];
-	float rad6 = tableRad6[type[i]][type[j]];
-	Vector3 dr = sys->wrapDiff(pos[j] - pos[i]);
-
-	Vector3 fc = coulombForceFull(dr, alpha);
-	Vector3 fh = softcoreForce(dr, eps, rad6);
-
-	force[i] += fc + fh;
-      }
-    }
-  }
-
-  void computeTabulated(Vector3* force, const Vector3* pos, const int* type) {
-    for (int i = 0; i < num; i++) {
-      // Zero the force.
-      force[i] = Vector3(0.0);
-      
-      // Loop through the neighbors.
-      for (int n = 0; n < neigh[i].length(); n++) {
-	int j = neigh[i].get(n);
-	if (j == i) continue;
-	int ind = type[i] + type[j]*numParts;
-	if (tablePot[ind] == NULL) continue;
-	Vector3 dr = sys->wrapDiff(pos[j] - pos[i]);
-	
-	if (dr.length2() > cutoff2) continue;
-	Vector3 ft = tablePot[ind]->computeForce(dr);
-
-	force[i] += ft;
-      }
-    }
-  }
-
-  void computeTabulatedFull(Vector3* force, const Vector3* pos, const int* type) {
-    // Zero the force.
-    for (int i = 0; i < num; i++) force[i] = Vector3(0.0);
-    
-    // Compute the force for all pairs.
-    for (int i = 0; i < num-1; i++) {
-      for (int j = i + 1; j < num; j++) {
-	int ind = type[i] + type[j]*numParts;
-	if (tablePot[ind] == NULL) continue;
-	Vector3 dr = sys->wrapDiff(pos[j] - pos[i]);
-
-	Vector3 ft = tablePot[ind]->computeForce(dr);
-
-	force[i] += ft;
-	force[j] -= ft;
-      }
-    }
-  }
-
-  static Vector3 coulombForce(Vector3 r, float alpha, float start, float len) {
-    float d = r.length();
-
-    if (d >= start + len) return Vector3(0.0);
-    if (d <= start) {
-      Vector3 force = -alpha/(d*d*d)*r;
-      return force;
-    }
-
-    // Switching.
-    float c = alpha/(start*start);
-    Vector3 force = -c*(1.0 - (d - start)/len)/d*r;
-    return force;
-  }
-
-  static Vector3 coulombForceFull(Vector3 r, float alpha) {
-    float d = r.length();
-    
-    return -alpha/(d*d*d)*r;
-  }
-
-  static Vector3 softcoreForce(Vector3 r, float eps, float rad6) {
-    const float d2 = r.length2();
-    const float d6 = d2*d2*d2;
-  
-    if (d6 < rad6) return (-12*eps*(rad6*rad6/(d6*d6*d2) - rad6/(d6*d2)))*r;
-    return Vector3(0.0);
-  }
-
-private:
-  int num;
-  int numParts;
-  float** tableEps;
-  float** tableRad6;
-  float** tableAlpha;
-  const BaseGrid* sys;
-  IndexList* neigh;
-  float switchStart, switchLen, electricConst, cutoff2;
-  CellDecomposition decomp;
-  TabulatedPotential** tablePot;
-  int numTablePots;
-  float energy;
-};
-#endif
diff --git a/src/Configuration.cpp b/src/Configuration.cpp
deleted file mode 100644
index 0df5f36bc5b567354d634450903ee9f51c22186a..0000000000000000000000000000000000000000
--- a/src/Configuration.cpp
+++ /dev/null
@@ -1,2759 +0,0 @@
-#include "Configuration.h"
-#include "Angle.h"
-#include "Dihedral.h"
-#include "Restraint.h"
-#include "ProductPotential.h"
-#include <cmath>
-#include <cassert>
-#include <stdlib.h>     /* srand, rand */
-#include <time.h>       /* time */
-#include <string>
-#include <iostream>
-using namespace std;
-
-#ifndef gpuErrchk
-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) {
-   if (code != cudaSuccess) {
-      fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), __FILE__, line);
-      if (abort) exit(code);
-   }
-}
-#endif
-
-namespace
-{
-    template<class T> 
-    void convertString(const String& token, void* data)
-    {
-        exit(1);
-    }
-
-    template<> 
-    void convertString<float>(const String& token, void* data)
-    {
-        float* tmp = (float*)data;
-        *tmp = atof(token);
-    }
-
-    template<>
-    void convertString<String>(const String& token, void* data)
-    {
-        String* tmp = (String*)data;
-        *tmp = token;
-    }
-
-    template<class T>
-    void stringToArray(String* str, int& size, T** array)
-    {
-        register int num;
-        String *token;
-        num =  str->tokenCount();
-        size = num;
-        *array = new T[num];
-        token  = new String[num];
-        str->tokenize(token);
-
-        for(int i = 0; i < num; ++i)
-            convertString<T>(token[i], (*array)+i);
-        delete [] token;
-    }
-}
-Configuration::Configuration(const char* config_file, int simNum, bool debug) :
-		simNum(simNum) {
-	// Read the parameters.
-	//type_d = NULL;
-	kTGrid_d = NULL;
-	//bonds_d = NULL;
-	//bondMap_d = NULL;
-	//excludes_d = NULL;
-	//excludeMap_d = NULL;
-	//angles_d = NULL;
-	//dihedrals_d = NULL;
-	setDefaults();
-	readParameters(config_file);
-
-	// Get the number of particles
-	// printf("\nCounting particles specified in the ");
-	if (restartCoordinates.length() > 0) {
-    // Read them from the restart file.
-	    // printf("restart file.\n");
-		num = countRestart(restartCoordinates.val());
-		if (copyReplicaCoordinates <= 0) {
-		    num /= simNum;
-		}
-  } else {
-    if (readPartsFromFile) readAtoms();
-    if (numPartsFromFile > 0) {
-      // Determine number of particles from input file (PDB-style)
-	// printf("input file.\n");
-      num = numPartsFromFile;
-    } else {
-      // Sum up all particles in config file
-	// printf("configuration file.\n");
-      //int num0 = 0;
-      num = 0;
-      for (int i = 0; i < numParts; i++) num += part[i].num;
-      //num = num0;
-    }
-  } // end result: variable "num" is set
-
-	// Count particles associated with rigid bodies
-	num_rb_attached_particles = 0;
-	if (numRigidTypes > 0) {
-	    // grow list of rbs
-	    for (int i = 0; i < numRigidTypes; i++) {
-		RigidBodyType &rbt = rigidBody[i];
-		rbt.attach_particles();
-		num_rb_attached_particles += rbt.num * rbt.num_attached_particles();
-	    }
-	}
-	assert( num_rb_attached_particles == 0 || simNum == 1 ); // replicas not yet implemented
-	// num = num+num_rb_attached_particles;
-
-
-	// Set the number capacity
-	printf("\n%d particles\n", num);
-	printf("%d particles attached to RBs\n", num_rb_attached_particles);
-
-	if (numCap <= 0) numCap = numCapFactor*num; // max number of particles
-	if (numCap <= 0) numCap = 20;
-
-	if (readGroupSitesFromFile) readGroups();
-	printf("%d groups\n", numGroupSites);
-
-	// Allocate particle variables.
-	// Each replica works with num+num_rb_attached_particles in array
-	pos = new Vector3[ (num+num_rb_attached_particles) * simNum];
-
-        //Han-Yi Chou
-        if (ParticleDynamicType == String("Langevin") || ParticleDynamicType == String("NoseHooverLangevin"))
-            momentum = new Vector3[(num+num_rb_attached_particles) * simNum];
-
-	type   = new int[(num+num_rb_attached_particles) * simNum];
-	serial = new int[(num+num_rb_attached_particles) * simNum];
-	posLast = new Vector3[(num+num_rb_attached_particles) * simNum];
-
-	{
-	    int pidx = 0;
-	    for (int i = 0; i < numRigidTypes; i++) { // Loop over RB types
-		RigidBodyType &rbt = rigidBody[i];
-		for (int j = 0; j < rbt.num; ++j) { // Loop over RBs
-		    for (const int& t: rbt.get_attached_particle_types()) {
-			type[num+pidx] = t;
-			serial[num+pidx] = num+pidx;
-			pidx++;
-		    }
-		}
-	    }
-	}	
-	
-        //Han-Yi Chou
-        if(ParticleDynamicType == String("Langevin") || ParticleDynamicType == String("NoseHooverLangevin"))
-           momLast = new Vector3[(num+num_rb_attached_particles) * simNum];
-	name = new String[(num+num_rb_attached_particles) * simNum];
-	currSerial = 0;
-
-
-  // Now, load the coordinates
-	loadedCoordinates = false;
-        loadedMomentum    = false; //Han-Yi Chou
-
-  //I need kT here Han-Yi Chou
-  kT = temperature * 0.0019872065f; // `units "k K" "kcal_mol"`
-  //kT = temperature * 0.593f;
- // If we have a restart file - use it
-	if (restartCoordinates.length() > 0) {
-		loadRestart(restartCoordinates.val()); 
-		printf("Loaded %d restart coordinates from `%s'.\n", num, restartCoordinates.val());
-		printf("Particle numbers specified in the configuration file will be ignored.\n");
-		loadedCoordinates = true;
-                //Han-Yi Chou Langevin dynamic
-                if (ParticleDynamicType == String("Langevin") || ParticleDynamicType == String("NoseHooverLangevin"))
-                {
-                    if (restartMomentum.length() > 0)
-                    {
-                        loadRestartMomentum(restartMomentum.val());
-                        printf("Loaded %d restart momentum from `%s'.\n", num, restartMomentum.val());
-                        printf("Particle numbers specified in the configuration file will be ignored.\n");
-                        loadedMomentum = true;
-                    }
-                    else
-                    {
-                        printf("Warning: There is no restart momentum file when using restart coordinates in Langevin Dynamics\n");
-                        printf("Initialize with Boltzmann distribution\n");
-                        loadedMomentum = Boltzmann(COM_Velocity, num * simNum);
-                    }
-               }
-	} 
-        else 
-        {
-		// Load coordinates from a file?
-		if (numPartsFromFile > 0) {
-			loadedCoordinates = true;
-			for (int i = 0; i < num; i++) {
-				int numTokens = partsFromFile[i].tokenCount();
-
-				// Break the line down into pieces (tokens) so we can process them individually
-				String* tokenList = new String[numTokens];
-				partsFromFile[i].tokenize(tokenList);
-
-				int currType = find_particle_type(tokenList[2]);
-				if (currType == -1) {
-				    printf("Error: Unable to find particle type %s\n", tokenList[2].val());
-				    exit(1);
-
-				}
-				for (int j = 0; j < numParts; j++)
-					if (tokenList[2] == part[j].name)
-						currType = j;
-
-				for (int s = 0; s < simNum; ++s)
-				    type[i + s*num] = currType;
-
-				serial[i] = currSerial++;
-
-				pos[i] = Vector3(atof(tokenList[3].val()), atof(tokenList[4].val()), atof(tokenList[5].val()));
-                                //Han-Yi Chou
-                                if (ParticleDynamicType == String("Langevin") || ParticleDynamicType == String("NoseHooverLangevin"))
-                                {
-                                    loadedMomentum = true;
-                                    if(numTokens == 9)
-                                        momentum[i] = Vector3(atof(tokenList[6].val()), atof(tokenList[7].val()), atof(tokenList[8].val()));
-                                    else
-                                    {
-                                        printf("Error occurs in %s at line %d. Please specify momentum\n", __FILE__, __LINE__);
-                                        assert(1==2);
-                                    }
-                                }
-			}
-			delete[] partsFromFile;
-			partsFromFile = NULL;
-                        //Han-Yi Chou
-                        for(int i = 1; i < simNum; ++i)
-                            for(int j = 0; j < num; ++j)
-                                serial[j + num * i] = currSerial++;
-                }
-                else 
-                {
-	            // Not loading coordinates from a file
-	            populate();
-	            if (inputCoordinates.length() > 0) 
-                    {
-		        printf("Loading coordinates from %s ... ", inputCoordinates.val());
-			loadedCoordinates = loadCoordinates(inputCoordinates.val());
-			if (loadedCoordinates)
-			    printf("done!\n");
-	            }
-                    if(ParticleDynamicType == String("Langevin") || ParticleDynamicType == String("NoseHooverLangevin"))
-                    {
-                        if (inputMomentum.length() > 0) 
-                        {
-                            printf("Loading momentum from %s ... ", inputMomentum.val());
-                            loadedMomentum = loadMomentum(inputMomentum.val());
-                            if (loadedMomentum)
-                                printf("done!\n");
-                        }
-                        else
-                            loadedMomentum = Boltzmann(COM_Velocity, (num * simNum));
-                    }
-                }
-            }
-        //Check initialize momentum
-        //if(ParticleDynamicType == String("Langevin"))
-            //PrintMomentum();
-	/* Initialize exclusions */
-	excludeCapacity = 256;
-	numExcludes = 0;
-	excludes = new Exclude[excludeCapacity];
-
-	if (readExcludesFromFile) readExcludes();
-	if (readBondsFromFile) readBonds();
-	if (readAnglesFromFile) readAngles();
-	if (readDihedralsFromFile) readDihedrals();
-	if (readRestraintsFromFile) readRestraints();
-	if (readBondAnglesFromFile) readBondAngles();
-	if (readProductPotentialsFromFile) readProductPotentials();
-
-
-	if (temperatureGridFile.length() != 0) {
-		printf("\nFound temperature grid file: %s\n", temperatureGridFile.val());
-		tGrid = new BaseGrid(temperatureGridFile.val());
-		printf("Loaded `%s'.\n", temperatureGridFile.val());
-		printf("Grid size %s.\n", tGrid->getExtent().toString().val());
-
-		// TODO: ask Max Belkin what this is about and how to remove hard-coded temps
-		float ToSo = 1.0f / (295.0f * 4.634248239f); // 1 / (To * sigma(To))
-		sigmaT = new BaseGrid(*tGrid);
-		sigmaT->shift(-122.8305f);
-		sigmaT->scale(0.0269167f);
-		sigmaT->mult(*tGrid);
-		sigmaT->scale(ToSo);
-
-		kTGrid = new BaseGrid(*tGrid);
-		float factor = 0.0019872065f; // `units "k K" "kcal_mol"`
-		kTGrid->scale(factor);
-		// char outFile[256];
-		// char comment[256]; sprintf(comment,"KTGrid");
-		// sprintf(outFile,"kTGrid.dx");
-		// kTGrid->write(outFile, comment);
-	}
-
-	printf("\nFound %d particle types.\n", numParts);
-
-	printf("Loading the potential grids...\n");
-	// First load a single copy of each grid
-	for (int i = 0; i < numParts; i++) 
-        {
-	    for(int j = 0; j < part[i].numPartGridFiles; ++j)
-	    {
-		std::string fname(partGridFile[i][j].val(), partGridFile[i][j].length());
-		if (part_grid_dictionary.count( fname ) == 0)
-		{
-		    int len = fname.length();
-		    if (len >= 3 && fname[len-3]=='.' && fname[len-2]=='d' && fname[len-1]=='x')
-		    {
-			part_grid_dictionary.insert({fname, BaseGrid(fname.c_str())});
-		    }
-		    else if  (len >= 4 && fname[len-4]=='.' && fname[len-3]=='d' && fname[len-2]=='e' && fname[len-1]=='f')
-		    {
-			assert(1==2); // Throw exception because this implementation needs to be revisited
-/*                OverlordGrid* over = new OverlordGrid[part[i].numPartGridFiles];
-		  part[i].meanPmf = new float[part[i].numPartGridFiles];
-		  for(int j = 0; j < part[i].numPartGridFiles; ++j)
-		  {
-		  map = partGridFile[i][j];
-		  len = map.length();
-		  if (!(len >= 4 && map[len-4]=='.' && map[len-3]=='d' && map[len-2]=='e' && map[len-1]=='f'))
-		  {
-		  cout << "currently do not support different format " << endl;
-		  exit(1);
-		  }
-
-		  String rootGrid = OverlordGrid::readDefFirst(map);
-		  over[j] = OverlordGrid(rootGrid.val());
-		  int count = over->readDef(map);
-		  printf("Loaded system def file `%s'.\n", map.val());
-		  printf("Found %d unique grids.\n", over->getUniqueGridNum());
-		  printf("Linked %d subgrids.\n", count);
-		  part[i].meanPmf[j] = part[i].pmf[j].mean();
-		  }
-		  part[i].pmf = static_cast<BaseGrid*>(over);
-*/
-		    } else {
-			printf("WARNING: Unrecognized gridFile extension. Must be *.def or *.dx.\n");
-			exit(-1);
-		    }
-		}
-	    }
-	}
-
-	// Then assign grid addresses to particles
-	for (int i = 0; i < numParts; i++)
-        {
-	    part[i].pmf     = new BaseGrid*[part[i].numPartGridFiles];
-	    part[i].pmf_scale = new float[part[i].numPartGridFiles];
-	    part[i].meanPmf = new float[part[i].numPartGridFiles];
-	    for(int j = 0; j < part[i].numPartGridFiles; ++j)
-	    {
-		part[i].pmf[j] = &(part_grid_dictionary.find( std::string(partGridFile[i][j]) )->second);
-		part[i].pmf_scale[j] = partGridFileScale[i][j];
-		part[i].meanPmf[j] = part[i].pmf[j]->mean(); // TODO: review how this is used and decide whether to scale
-	    }
-		if (partForceXGridFile[i].length() != 0) {
-			part[i].forceXGrid = new BaseGrid(partForceXGridFile[i].val());
-			printf("Loaded `%s'.\n", partForceXGridFile[i].val());
-			printf("Grid size %s.\n", part[i].forceXGrid->getExtent().toString().val());
-		}
-
-		if (partForceYGridFile[i].length() != 0) {
-			part[i].forceYGrid = new BaseGrid(partForceYGridFile[i].val());
-			printf("Loaded `%s'.\n", partForceYGridFile[i].val());
-			printf("Grid size %s.\n", part[i].forceYGrid->getExtent().toString().val());
-		}
-
-		if (partForceZGridFile[i].length() != 0) {
-			part[i].forceZGrid = new BaseGrid(partForceZGridFile[i].val());
-			printf("Loaded `%s'.\n", partForceZGridFile[i].val());
-			printf("Grid size %s.\n", part[i].forceZGrid->getExtent().toString().val());
-		}
-
-		if (partDiffusionGridFile[i].length() != 0) {
-			part[i].diffusionGrid = new BaseGrid(partDiffusionGridFile[i].val());
-			printf("Loaded `%s'.\n", partDiffusionGridFile[i].val());
-			printf("Grid size %s.\n", part[i].diffusionGrid->getExtent().toString().val());
-		}
-
-		if (temperatureGridFile.length() != 0) {
-			if (partDiffusionGridFile[i].length() != 0) {
-				part[i].diffusionGrid->mult(*sigmaT);
-			} else {
-				part[i].diffusionGrid = new BaseGrid(*sigmaT);
-				part[i].diffusionGrid->scale(part[i].diffusion);
-				// char outFile[256];
-				// char comment[256]; sprintf(comment,"Diffusion for particle type %d", i);
-				// sprintf(outFile,"diffusion%d.dx",i);
-				// part[i].diffusionGrid->write(outFile, comment);
-			}
-		}
-           
-	}
-
-    // Load reservoir files if any
-    for (int i = 0; i < numParts; i++) {
-        if (partReservoirFile[i].length() != 0) {
-            printf("\nLoading the reservoirs for %s... \n", part[i].name.val());
-            part[i].reservoir = new Reservoir(partReservoirFile[i].val());
-            int nRes = part[i].reservoir->length();
-            printf("\t -> %d reservoir(s) found in `%s'.\n", nRes, partReservoirFile[i].val());
-        }
-    }
-
-    // Get the system dimensions
-    // from the dimensions of supplied 3D potential maps
-    if (size.length2() > 0) {	// use size if it's defined
-	if (basis1.length2() > 0 || basis2.length2() > 0 || basis3.length2() > 0)
-	    printf("WARNING: both 'size' and 'basis' were specified... using 'size'\n"); 
-	basis1 = Vector3(size.x,0,0);
-	basis2 = Vector3(0,size.y,0);
-	basis3 = Vector3(0,0,size.z);
-    }
-    if (basis1.length2() > 0 && basis2.length2() > 0 && basis3.length2() > 0) {
-	sys = new BaseGrid( Matrix3(basis1,basis2,basis3), origin, 1, 1, 1 );
-    } else {
-	// TODO: use largest system in x,y,z
-	sys = *part[0].pmf;
-    }
-    sysDim = sys->getExtent();
-
-// RBTODO: clean this mess up
-	/* // RigidBodies... */
-	/* if (numRigidTypes > 0) { */
-	/* 	printf("\nCounting rigid bodies specified in the configuration file.\n"); */
-	/* 	numRB = 0; */
-
-	/* 	// grow list of rbs */
-	/* 	for (int i = 0; i < numRigidTypes; i++) {			 */
-	/* 		numRB += rigidBody[i].num; */
-
-	/* 		std::vector<RigidBody> tmp; */
-	/* 		for (int j = 0; j < rigidBody[i].num; j++) { */
-	/* 			tmp.push_back( new RigidBody( this, rigidBody[i] ) ); */
-	/* 		} */
-
-	/* 		rbs.push_back(tmp); */
-	/* 	} */
-		// // state data
-		// rbPos = new Vector3[numRB * simNum];
-		// type = new int[numRB * simNum];
-
-	/* } */
-	/* printf("Initial RigidBodies: %d\n", numRB); */
-
-
-	// Create exclusions from the exclude rule, if it was specified in the config file
-	if (excludeRule != String("")) {
-		int oldNumExcludes = numExcludes;
-		Exclude* newExcludes = makeExcludes(bonds, bondMap, num, numBonds, excludeRule, numExcludes);
-		if (excludes == NULL) {
-			excludes = new Exclude[numExcludes];
-		} else if (numExcludes >= excludeCapacity) {
-			Exclude* tempExcludes = excludes;
-			excludes = new Exclude[numExcludes];
-			for (int i = 0; i < oldNumExcludes; i++)
-				excludes[i] = tempExcludes[i];
-			delete [] tempExcludes;
-		}
-		for (int i = oldNumExcludes; i < numExcludes; i++)
-			excludes[i] = newExcludes[i - oldNumExcludes];
-		printf("Built %d exclusions.\n",numExcludes);
-	}
-
-	{ // Add exclusions for RB attached particles
-	    std::vector<Exclude> ex;
-	    int start = num;
-	    for (int i = 0; i < numRigidTypes; i++) { // Loop over RB types
-		RigidBodyType &rbt = rigidBody[i];
-		const int nap = rbt.num_attached_particles();
-		for (int j = 0; j < rbt.num; ++j) { // Loop over RBs
-		    for (int ai = 0; ai < nap-1; ++ai) {
-			for (int aj = ai+1; aj < nap; ++aj) {
-			    ex.push_back( Exclude( ai+start, aj+start ) );
-			}
-		    }
-		    start += nap;
-		}
-	    }
-	    // copy
-	    int oldNumExcludes = numExcludes;
-	    numExcludes = numExcludes + ex.size();
-	    if (excludes == NULL) {
-		excludes = new Exclude[numExcludes];
-	    } else if (numExcludes >= excludeCapacity) {
-		Exclude* tempExcludes = excludes;
-		excludes = new Exclude[numExcludes];
-		for (int i = 0; i < oldNumExcludes; i++)
-		    excludes[i] = tempExcludes[i];
-		delete [] tempExcludes;
-	    }
-	    for (int i = oldNumExcludes; i < numExcludes; i++)
-		excludes[i] = ex[i - oldNumExcludes];
-	}
-
-	printf("Built %d exclusions.\n",numExcludes);		
-	buildExcludeMap();
-
-	// Count number of particles of each type
-	numPartsOfType = new int[numParts];
-	for (int i = 0; i < numParts; ++i) {
-		numPartsOfType[i] = 0;
-	}
-	for (int i = 0; i < num+num_rb_attached_particles; ++i) {
-		++numPartsOfType[type[i]];
-	}
-
-	// Some geometric stuff that should be gotten rid of.
-	Vector3 buffer = (sys->getCenter() + 2.0f*sys->getOrigin())/3.0f;
-	initialZ = buffer.z;
-
-	// Set the initial conditions.
-	// Do the initial conditions come from restart coordinates?
-	// inputCoordinates are ignored if restartCoordinates exist.
-	/*
-	if (restartCoordinates.length() > 0) {
-		loadRestart(restartCoordinates.val());
-		printf("Loaded %d restart coordinates from `%s'.\n", num, restartCoordinates.val());
-		printf("Particle numbers specified in the configuration file will be ignored.\n");
-	} else {
-		// Set the particle types.
-
-		// Load coordinates from a file?
-		if (numPartsFromFile > 0) {
-			for (int i = 0; i < num; i++) {
-				int numTokens = partsFromFile[i].tokenCount();
-
-				// Break the line down into pieces (tokens) so we can process them individually
-				String* tokenList = new String[numTokens];
-				partsFromFile[i].tokenize(tokenList);
-				int currType = 0;
-				for (int j = 0; j < numParts; j++)
-					if (tokenList[2] == part[j].name)
-						currType = j;
-				type[i] = currType;
-				serial[i] = currSerial;
-				currSerial++;
-
-				pos[i] = Vector3(atof(tokenList[3].val()), atof(tokenList[4].val()), atof(tokenList[5].val()));
-			}
-			if (partsFromFile != NULL) {
-				delete[] partsFromFile;
-				partsFromFile = NULL;
-			}
-		} else if (inputCoordinates.length() > 0) {
-			populate();
-			printf("Loading coordinates from %s.\n", inputCoordinates.val());
-			bool loaded = loadCoordinates(inputCoordinates.val());
-			if (loaded) 
-				printf("Loaded initial coordinates from %s.\n", inputCoordinates.val());
-		}
-	}
-	*/
-	
-
-	// Get the maximum particle radius.
-	minimumSep = 0.0f;
-	for (int i = 0; i < numParts; ++i)
-		minimumSep = std::max(minimumSep, part[i].radius);
-	minimumSep *= 2.5f; // Make it a little bigger.
-
-	// Default outputEnergyPeriod
-	if (outputEnergyPeriod < 0)
-		outputEnergyPeriod = 10 * outputPeriod;
-	
-	// If we are running with debug ON, ask the user which force computation to use
-	if (debug)
-		getDebugForce();
-
-	printf("\n");
-	switchStart = cutoff - switchLen;
-	if (fullLongRange == 0)
-		printf("Cutting off the potential from %.10g to %.10g.\n", switchStart, switchStart+switchLen);
-	
-	if (fullLongRange != 0)
-		printf("No cell decomposition created.\n");
-
-}
-
-Configuration::~Configuration() {
-	// System state
-	delete[] pos;
-        //Han-Yi Chou
-        if (ParticleDynamicType == String("Langevin") || ParticleDynamicType == String("NoseHooverLangevin"))
-            delete[] momentum;
-
-	delete[] posLast;
-        //Han-Yi Chou
-        if (ParticleDynamicType == String("Langevin") || ParticleDynamicType == String("NoseHooverLangevin"))
-            delete[] momLast;
-
-	delete[] type;
-	delete[] name;
-	
-	// Particle parameters
-	delete[] part;
-	//delete[] partGridFile;
-	//delete[] partGridFileScale;
-	for(int i = 0; i < numParts; ++i)
-        {
-            if(partGridFile[i] != NULL) 
-            {
-                delete[] partGridFile[i];
-                partGridFile[i] = NULL;
-            }
-            if(partGridFileScale[i] != NULL)
-            {
-                delete[] partGridFileScale[i];
-                partGridFileScale[i] = NULL;
-            }
-        }
-        delete [] partGridFile;
-        delete [] partGridFileScale;
-        //delete numPartGridFiles;
-	delete[] partForceXGridFile;
-	delete[] partForceYGridFile;
-	delete[] partForceZGridFile;
-	delete[] partDiffusionGridFile;
-	delete[] partReservoirFile;
-	partRigidBodyGrid.clear();
-	
-	// TODO: plug memory leaks
-	if (partsFromFile != NULL) delete[] partsFromFile;
-	if (bonds != NULL) delete[] bonds;
-	if (bondMap != NULL) delete[] bondMap;
-	if (excludes != NULL) delete[] excludes;
-	if (excludeMap != NULL) delete[] excludeMap;
-	if (angles != NULL) delete[] angles;
-	if (dihedrals != NULL) delete[] dihedrals;
-	if (bondAngles != NULL) delete[] bondAngles;
-	if (productPotentials != NULL) delete[] productPotentials;
-
-	delete[] numPartsOfType;
-	  
-	// Table parameters
-	delete[] partTableFile;
-	delete[] partTableIndex0;
-	delete[] partTableIndex1;
-
-	delete[] bondTableFile;
-
-	delete[] angleTableFile;
-
-	delete[] dihedralTableFile;
-
-	//if (type_d != NULL) {
-		//gpuErrchk(cudaFree(type_d));
-		gpuErrchk(cudaFree(sys_d));
-		gpuErrchk(cudaFree(kTGrid_d));
-		gpuErrchk(cudaFree(part_d));
-		//gpuErrchk(cudaFree(bonds_d));
-		//gpuErrchk(cudaFree(bondMap_d));
-		//gpuErrchk(cudaFree(excludes_d));
-		//gpuErrchk(cudaFree(excludeMap_d));
-		//gpuErrchk(cudaFree(angles_d));
-		//gpuErrchk(cudaFree(dihedrals_d));
-	//}
-}
-
-void Configuration::copyToCUDA() {
-    printf("Copying particle grids to GPU %d\n", GPUManager::current());
-    for (const auto& pair : part_grid_dictionary)
-    {
-	// Copy PMF
-	const BaseGrid& g = pair.second;
-	BaseGrid *g_d = g.copy_to_cuda();
-	part_grid_dictionary_d.insert({pair.first, g_d});
-	// printf("Assigning grid for %s to %p (originally %p)\n", pair.first.c_str(), (void *) part_grid_dictionary_d[pair.first], (void *) g_d);
-    }
-
-    printf("Copying particle data to GPU %d\n", GPUManager::current());
-
-	BrownianParticleType **part_addr = new BrownianParticleType*[numParts];
-
-	// Copy the BaseGrid objects and their member variables/objects
-	gpuErrchk(cudaMalloc(&part_d, sizeof(BrownianParticleType*) * numParts));
-	// TODO: The above line fails when there is not enough memory. If it fails, stop.
-	
-	for (int i = 0; i < numParts; i++) 
-        {
-		BrownianParticleType *b = new BrownianParticleType(part[i]);
-		// Copy PMF pointers
-		if (part[i].pmf != NULL) 
-                {
-		    {
-			BaseGrid** tmp_d = new BaseGrid*[part[i].numPartGridFiles];
-			BaseGrid** tmp   = new BaseGrid*[part[i].numPartGridFiles];
-			for(int j = 0; j < part[i].numPartGridFiles; ++j) {
-			    // printf("Retrieving grid for %s (at %p)\n", partGridFile[i][j].val(), (void *) part_grid_dictionary_d[std::string(partGridFile[i][j])]);
-			    tmp[j] = part_grid_dictionary_d[std::string(partGridFile[i][j])];
-			}
-			gpuErrchk(cudaMalloc(&tmp_d, sizeof(BaseGrid*)*part[i].numPartGridFiles));
-			gpuErrchk(cudaMemcpy(tmp_d, tmp, sizeof(BaseGrid*)*part[i].numPartGridFiles,
-					     cudaMemcpyHostToDevice));
-			b->pmf = tmp_d;
-		    }
-
-		    {
-			float *tmp;
-			gpuErrchk(cudaMalloc(&tmp, sizeof(float)*part[i].numPartGridFiles));
-			gpuErrchk(cudaMemcpy(tmp, part[i].pmf_scale, sizeof(float)*part[i].numPartGridFiles,
-					     cudaMemcpyHostToDevice));
-			b->pmf_scale = tmp;
-		    }
-
-		    {
-			float *tmp;
-			gpuErrchk(cudaMalloc(&tmp, sizeof(float)*part[i].numPartGridFiles));
-			gpuErrchk(cudaMemcpy(tmp, part[i].meanPmf, sizeof(float)*part[i].numPartGridFiles, 
-					     cudaMemcpyHostToDevice));
-			b->meanPmf = tmp;
-		    }
-
-		    {
-			BoundaryCondition *tmp;
-			size_t s = sizeof(BoundaryCondition)*part[i].numPartGridFiles;
-			gpuErrchk(cudaMalloc(&tmp, s));
-			gpuErrchk(cudaMemcpy(tmp, part[i].pmf_boundary_conditions, s, cudaMemcpyHostToDevice));
-			b->pmf_boundary_conditions = tmp;
-		    }
-                    
-		}
-
-		// Copy the diffusion grid
-		if (part[i].diffusionGrid != NULL) {
-		    b->diffusionGrid = part[i].diffusionGrid->copy_to_cuda();
-		} else {
-		    b->diffusionGrid = NULL;
-		}
-		
-		//b->pmf = pmf;
-		gpuErrchk(cudaMalloc(&part_addr[i], sizeof(BrownianParticleType)));
-		gpuErrchk(cudaMemcpyAsync(part_addr[i], b, sizeof(BrownianParticleType),
-				cudaMemcpyHostToDevice));
-	}
-	// RBTODO: moved this out of preceding loop; was that correct?
-	gpuErrchk(cudaMemcpyAsync(part_d, part_addr, sizeof(BrownianParticleType*) * numParts,
-				cudaMemcpyHostToDevice));
-
-	// kTGrid_d
-	kTGrid_d = NULL;
-	if (temperatureGridFile.length() > 0) {
-		gpuErrchk(cudaMalloc(&kTGrid_d, sizeof(BaseGrid)));
-		gpuErrchk(cudaMemcpyAsync(kTGrid_d, kTGrid, sizeof(BaseGrid), cudaMemcpyHostToDevice));
-	}
-
-	// type_d and sys_d
-	gpuErrchk(cudaMalloc(&sys_d, sizeof(BaseGrid)));
-	gpuErrchk(cudaMemcpyAsync(sys_d, sys, sizeof(BaseGrid), cudaMemcpyHostToDevice));
-	/*gpuErrchk(cudaMalloc(&type_d, sizeof(int) * num * simNum));
-	gpuErrchk(cudaMemcpyAsync(type_d, type, sizeof(int+num_rb_attached_particles) * num * simNum, cudaMemcpyHostToDevice));
-	
-	if (numBonds > 0) {
-		// bonds_d
-		gpuErrchk(cudaMalloc(&bonds_d, sizeof(Bond) * numBonds));
-		gpuErrchk(cudaMemcpyAsync(bonds_d, bonds, sizeof(Bond) * numBonds, cudaMemcpyHostToDevice));
-		
-		// bondMap_d
-		gpuErrchk(cudaMalloc(&bondMap_d, sizeof(int2) * num));
-		gpuErrchk(cudaMemcpyAsync(bondMap_d, bondMap, sizeof(int2) * num, cudaMemcpyHostToDevice));
-	}
-
-	if (numExcludes > 0) {
-		// excludes_d
-		gpuErrchk(cudaMalloc(&excludes_d, sizeof(Exclude) * numExcludes));
-		gpuErrchk(cudaMemcpyAsync(excludes_d, excludes, sizeof(Exclude) * numExcludes,
-				cudaMemcpyHostToDevice));
-		
-		// excludeMap_d
-		gpuErrchk(cudaMalloc(&excludeMap_d, sizeof(int2) * (num));
-		gpuErrchk(cudaMemcpyAsync(excludeMap_d, excludeMap, sizeof(int2) * num,
-				cudaMemcpyHostToDevice));
-	}
-
-	if (numAngles > 0) {
-		// angles_d
-		gpuErrchk(cudaMalloc(&angles_d, sizeof(Angle) * numAngles));
-		gpuErrchk(cudaMemcpyAsync(angles_d, angles, sizeof(Angle) * numAngles,
-				cudaMemcpyHostToDevice));
-	}
-
-	if (numDihedrals > 0) {
-		// dihedrals_d
-		gpuErrchk(cudaMalloc(&dihedrals_d, sizeof(Dihedral) * numDihedrals));
-		gpuErrchk(cudaMemcpyAsync(dihedrals_d, dihedrals,
-												 		  sizeof(Dihedral) * numDihedrals,
-														 	cudaMemcpyHostToDevice));
-	}*/
-	gpuErrchk(cudaDeviceSynchronize());
-}
-
-void Configuration::setDefaults() {
-    // System parameters
-	outputName = "out";
-	timestep = 1e-5f;
-	rigidBodyGridGridPeriod = 1;
-	steps = 100;
-
-	unsigned long int r0 = clock();
-	for (int i = 0; i < 4; i++)
-	    r0 *= r0 + 1;
-	seed = time(NULL) + r0;
-
-	origin = Vector3(0,0,0);
-	size = Vector3(0,0,0);
-	basis1 = Vector3(0,0,0);
-	basis2 = Vector3(0,0,0);
-	basis3 = Vector3(0,0,0);
-	
-	inputCoordinates = "";
-	restartCoordinates = "";
-        //Han-Yi Chou
-        inputMomentum = "";
-        restartMomentum = "";
-	copyReplicaCoordinates = 1;
-	numberFluct = 0;
-	numberFluctPeriod = 200;
-	interparticleForce = 1;
-	tabulatedPotential = 0;
-	fullLongRange = 0;
-	//	kTGridFile = ""; // Commented out for an unknown reason
-	temperature = 295.0f;
-	temperatureGridFile = "";
-	coulombConst = 566.440698f/92.0f;
-	electricField = 0.0f;
-	cutoff = 10.0f;
-	switchLen = 2.0f;
-	pairlistDistance = 2.0f;
-	imdForceScale = 1.0f;
-	outputPeriod = 200;
-	outputEnergyPeriod = -1;
-	outputFormat = TrajectoryWriter::formatDcd;
-	currentSegmentZ = -1.0f;
-	numCap = 0;
-	decompPeriod = 10;
-	readPartsFromFile = 0;
-	numPartsFromFile = 0;
-	partsFromFile = NULL;
-	readBondsFromFile = false;
-	numGroupSites = 0;
-	readGroupSitesFromFile = false;
-	
-
-	numBonds = 0;
-	bonds = NULL;
-	bondMap = NULL;
-	numTabBondFiles = 0;
-	readExcludesFromFile = false;
-	numExcludes = 0;
-	excludeCapacity = 256;
-	excludes = NULL;
-	excludeMap = NULL;
-	excludeRule = "";
-	readAnglesFromFile = false;
-	numAngles = 0;
-	angles = NULL;
-	numTabAngleFiles = 0;
-	readDihedralsFromFile = false;
-	numDihedrals = 0;
-	dihedrals = NULL;
-	numTabDihedralFiles = 0;
-
-	readBondAnglesFromFile = false;
-	numBondAngles = 0;
-	bondAngles = NULL;
-
-	readProductPotentialsFromFile = false;
-	numProductPotentials = 0;
-	productPotentials = NULL;
-	simple_potential_ids = XpotMap();
-	simple_potentials = std::vector<SimplePotential>();
-
-	readRestraintsFromFile = false;
-	numRestraints = 0;
-	restraints = NULL;
-
-        //Han-Yi Chou default values
-        ParticleDynamicType  = String("Brown");
-        RigidBodyDynamicType = String("Brown");
-        COM_Velocity = Vector3(0.f,0.f,0.f);
-        ParticleLangevinIntegrator = String("BAOAB"); //The default is BAOAB
-
-	// Hidden parameters
-	// Might be parameters later
-	numCapFactor = 5;
-
-        ParticleInterpolationType = 0;
-        RigidBodyInterpolationType = 0;
-}
-
-int Configuration::readParameters(const char * config_file) {
-	Reader config(config_file);
-	printf("Read config file %s\n", config_file);
-
-	// Get the number of particles.
-	const int numParams = config.length();
-	numParts = config.countParameter("particle");
-	numRigidTypes = config.countParameter("rigidBody");
-
-	// Allocate the particle variables.
-	part = new BrownianParticleType[numParts];
-	//partGridFile = new String[numParts];
-	//partGridFileScale = new float[numParts];
-	partGridFile       = new String*[numParts];
-        //partGridFileScale = new float[numParts];
-        partGridFileScale  = new float*[numParts];
-        //int numPartGridFiles = new int[numParts];
-
-	partForceXGridFile = new String[numParts];
-	partForceYGridFile = new String[numParts];
-	partForceZGridFile = new String[numParts];
-	partDiffusionGridFile = new String[numParts];
-	partReservoirFile = new String[numParts];
-	partRigidBodyGrid.resize(numParts);
-	
-	// Allocate the table variables.
-	partTableFile = new String[numParts*numParts];
-	partTableIndex0 = new int[numParts*numParts];
-	partTableIndex1 = new int[numParts*numParts];
-
-	// Allocate rigid body types
-	rigidBody = new RigidBodyType[numRigidTypes];
-	
-	// Set a default
-	/*
-	for (int i = 0; i < numParts; ++i) {
-	    partGridFileScale[i] = 1.0f;
-	}*/
-
-        for(int i = 0; i < numParts; ++i)
-        {
-            partGridFile[i] = NULL;
-            partGridFileScale[i] = NULL;
-            //part[i].numPartGridFiles = -1;
-        }
-        //for(int i = 0; i < numParts; ++i)
-          //  cout << part[i].numPartGridFiles << endl;
-
-	int btfcap = 10;
-	bondTableFile = new String[btfcap];
-
-	int atfcap = 10;
-	angleTableFile = new String[atfcap];
-
-	int dtfcap = 10;
-	dihedralTableFile = new String[dtfcap];
-
-	int currPart = -1;
-	int currTab = -1;
-	int currBond = -1;
-	int currAngle = -1;
-	int currDihedral = -1;
-	int currRB = -1;
-
-	int partClassPart =  0;
-	int partClassRB   =  1;
-	int currPartClass = -1;				// 0 => particle, 1 => rigidBody
-
-
-
-	for (int i = 0; i < numParams; i++) {
-		String param = config.getParameter(i);
-		String value = config.getValue(i);
-		// printf("Parsing %s: %s\n", param.val(), value.val());
-		if (param == String("outputName"))
-			outputName = value;
-		else if (param == String("timestep"))
-			timestep = (float) strtod(value.val(), NULL);
-		else if (param == String("rigidBodyGridGridPeriod"))
-			rigidBodyGridGridPeriod = atoi(value.val());
-		else if (param == String("steps"))
-			steps = atol(value.val());
-		else if (param == String("seed"))
-			seed = atoi(value.val());
-		else if (param == String("origin"))
-		    origin = stringToVector3( value );
-		else if (param == String("systemSize"))
-		    size = stringToVector3( value );
-		else if (param == String("basis1"))
-		    basis1 = stringToVector3( value );
-		else if (param == String("basis2"))
-		    basis2 = stringToVector3( value );
-		else if (param == String("basis3"))
-		    basis3 = stringToVector3( value );
-		else if (param == String("inputCoordinates"))
-			inputCoordinates = value;
-		else if (param == String("restartCoordinates"))
-			restartCoordinates = value;
-                //Han-Yi Chou
-                else if (param == String("inputMomentum"))
-                        inputMomentum = value;
-                else if (param == String("restartMomentum"))
-                        restartMomentum = value;
-		else if (param == String("copyReplicaCoordinates"))
-		        copyReplicaCoordinates = atoi(value.val());
-		else if (param == String("temperature"))
-			temperature =  (float) strtod(value.val(),NULL);
-		else if (param == String("temperatureGrid"))
-			temperatureGridFile = value;
-		else if (param == String("numberFluct"))
-			numberFluct = atoi(value.val());
-		else if (param == String("numberFluctPeriod"))
-			numberFluctPeriod = atoi(value.val());
-		else if (param == String("interparticleForce"))
-			interparticleForce = atoi(value.val());
-		else if (param == String("fullLongRange") || param == String("fullElect") )
-			fullLongRange = atoi(value.val());
-		else if (param == String("coulombConst"))
-			coulombConst = (float) strtod(value.val(), NULL);
-		else if (param == String("electricField"))
-			electricField = (float) strtod(value.val(), NULL);
-		else if (param == String("cutoff"))
-			cutoff = (float) strtod(value.val(), NULL);
-		else if (param == String("switchLen"))
-			switchLen = (float) strtod(value.val(), NULL);
-		else if (param == String("pairlistDistance"))
-			pairlistDistance = (float) strtod(value.val(), NULL);
-		else if (param == String("scaleIMDForce"))
-			imdForceScale = (float) strtod(value.val(), NULL);		
-		else if (param == String("outputPeriod"))
-			outputPeriod = atoi(value.val());
-		else if (param == String("outputEnergyPeriod"))
-			outputEnergyPeriod = atoi(value.val());
-		else if (param == String("outputFormat"))
-			outputFormat = TrajectoryWriter::getFormatCode(value);
-		else if (param == String("currentSegmentZ"))
-			currentSegmentZ = (float) strtod(value.val(), NULL);
-		else if (param == String("numCap"))
-			numCap = atoi(value.val());
-		else if (param == String("decompPeriod"))
-			decompPeriod = atoi(value.val());
-
-                //Han-Yi Chou
-                else if (param == String("ParticleDynamicType"))
-                    ParticleDynamicType = value;
-                else if (param == String("RigidBodyDynamicType"))
-                    RigidBodyDynamicType = value;
-                else if (param == String("ParticleLangevinIntegrator"))
-                    ParticleLangevinIntegrator = value;
-                else if (param == String("ParticleInterpolationType"))
-                    ParticleInterpolationType = atoi(value.val());
-                else if (param == String("RigidBodyInterpolationType"))
-                    RigidBodyInterpolationType = atoi(value.val());
-		// PARTICLES
-		else if (param == String("particle")) {
-		    part[++currPart] = BrownianParticleType(value);
-		    currPartClass = partClassPart;
-		}
-                else if (param == String("mu")) { // for Nose-Hoover Langevin
-		    if (currPart < 0) exit(1);
-		    part[currPart].mu = (float) strtod(value.val(), NULL);
-		} else if (param == String("forceXGridFile")) {
-		    if (currPart < 0) exit(1);
-		    partForceXGridFile[currPart] = value;
-		} else if (param == String("forceYGridFile")) {
-		    if (currPart < 0) exit(1);
-		    partForceYGridFile[currPart] = value;
-		} else if (param == String("forceZGridFile")) {
-		    if (currPart < 0) exit(1);
-		    partForceZGridFile[currPart] = value;
-		} else if (param == String("diffusionGridFile")) {
-		    if (currPart < 0) exit(1);
-		    partDiffusionGridFile[currPart] = value;
-		} else if (param == String("diffusion")) {
-		    if (currPart < 0) exit(1);
-		    part[currPart].diffusion = (float) strtod(value.val(), NULL);
-		} else if (param == String("charge")) {
-		    if (currPart < 0) exit(1);
-		    part[currPart].charge = (float) strtod(value.val(), NULL);
-		} else if (param == String("radius")) {
-		    if (currPart < 0) exit(1);
-		    part[currPart].radius = (float) strtod(value.val(), NULL);
-		} else if (param == String("eps")) {
-		    if (currPart < 0) exit(1);
-		    part[currPart].eps = (float) strtod(value.val(), NULL);
-		} else if (param == String("reservoirFile")) {
-		    if (currPart < 0) exit(1);
-		    partReservoirFile[currPart] = value;
-		}
-		else if (param == String("tabulatedPotential"))
-			tabulatedPotential = atoi(value.val());
-		else if (param == String("tabulatedFile"))
-			readTableFile(value, ++currTab);
-		else if (param == String("tabulatedBondFile")) {
-			if (numTabBondFiles >= btfcap) {
-				String* temp = bondTableFile;
-				btfcap *= 2;	
-				bondTableFile = new String[btfcap];
-				for (int j = 0; j < numTabBondFiles; j++)
-					bondTableFile[j] = temp[j];
-				delete[] temp;
-			}
-			if (readBondFile(value, ++currBond))
-				numTabBondFiles++;
-		} else if (param == String("inputParticles")) {
-			if (readPartsFromFile) {
-				printf("WARNING: More than one particle file specified. Ignoring new file.\n");
-			} else {
-				partFile = value;
-				readPartsFromFile = true;
-				loadedCoordinates = true;
-			}
-		} else if (param == String("inputGroups")) {
-			if (readGroupSitesFromFile) {
-				printf("WARNING: More than one group file specified. Ignoring new file.\n");
-			} else {
-				groupSiteFile = value;
-				readGroupSitesFromFile = true;
-			}
-		} else if (param == String("inputBonds")) {
-			if (readBondsFromFile) {
-				printf("WARNING: More than one bond file specified. Ignoring new bond file.\n");
-			} else {
-				bondFile = value;				
-				readBondsFromFile = true;
-			}
-		} else if (param == String("inputExcludes")) {
-			if (readExcludesFromFile) {
-				printf("WARNING: More than one exclude file specified. Ignoring new exclude file.\n");
-			} else {
-			    printf("inputExclude %s\n", value.val());
-				excludeFile = value;				
-				readExcludesFromFile = true;
-			}
-		} else if (param == String("exclude") or param == String("exclusion")) {
-			excludeRule = value; 
-		} else if (param == String("inputAngles")) {
-			if (readAnglesFromFile) {
-				printf("WARNING: More than one angle file specified. Ignoring new angle file.\n");
-			} else {
-				angleFile = value;
-				readAnglesFromFile = true;
-			}
-		} else if (param == String("inputBondAngles")) {
-			if (readBondAnglesFromFile) {
-				printf("WARNING: More than one bondangle file specified. Ignoring new bondangle file.\n");
-			} else {
-			        bondAngleFile = value;
-				readBondAnglesFromFile = true;
-			}
-		} else if (param == String("inputProductPotentials")) {
-			if (readBondAnglesFromFile) {
-				printf("WARNING: More than one product potential file specified. Ignoring new file.\n");
-			} else {
-			        productPotentialFile = value;
-				readProductPotentialsFromFile = true;
-			}
-		} else if (param == String("tabulatedAngleFile")) {
-			if (numTabAngleFiles >= atfcap) {
-				String* temp = angleTableFile;
-				atfcap *= 2;	
-				angleTableFile = new String[atfcap];
-				for (int j = 0; j < numTabAngleFiles; j++)
-					angleTableFile[j] = temp[j];
-				delete[] temp;
-			}
-			if (readAngleFile(value, ++currAngle))
-				numTabAngleFiles++;
-		} else if (param == String("inputDihedrals")) {
-			if (readDihedralsFromFile) {
-				printf("WARNING: More than one dihedral file specified. Ignoring new dihedral file.\n");
-			} else {
-				dihedralFile = value;
-				readDihedralsFromFile = true;
-			}
-		} else if (param == String("tabulatedDihedralFile")) {
-			if (numTabDihedralFiles >= dtfcap) {
-				String * temp = dihedralTableFile;
-				dtfcap *= 2;
-				dihedralTableFile = new String[dtfcap];
-				for (int j = 0; j < numTabDihedralFiles; j++)
-					dihedralTableFile[j] = temp[j];
-				delete[] temp;
-			}
-			if (readDihedralFile(value, ++currDihedral))
-				numTabDihedralFiles++;
-		} else if (param == String("inputRestraints")) {
-			if (readRestraintsFromFile) {
-				printf("WARNING: More than one restraint file specified. Ignoring new restraint file.\n");
-			} else {
-				restraintFile = value;
-				readRestraintsFromFile = true;
-			}
-		} else if (param == String("gridFileScale")) {
-		    if (currPart < 0) exit(1);
-			//partGridFileScale[currPart] = (float) strtod(value.val(), NULL);
-			  stringToArray<float>(&value, part[currPart].numPartGridFiles, 
-                                                      &partGridFileScale[currPart]);
-		} else if (param == String("gridFileBoundaryConditions")) {
-		    if (currPart < 0) exit(1);
-		    register size_t num = value.tokenCount();
-		    if (num > 0) {
-			String *tokens  = new String[num];
-			BoundaryCondition *data = new BoundaryCondition[num];
-			value.tokenize(tokens);
-			for(size_t i = 0; i < num; ++i) {
-			    tokens[i].lower();
-			    if (tokens[i] == "dirichlet")
-				data[i] = dirichlet;
-			    else if (tokens[i] == "neumann")
-				data[i] = neumann;
-			    else if (tokens[i] == "periodic")
-				data[i] = periodic;
-			    else {
-				fprintf(stderr,"WARNING: Unrecognized gridFile boundary condition \"%s\". Using Dirichlet.\n", tokens[i].val() );
-				data[i] = dirichlet;
-			    }
-			}
-			delete[] tokens;
-			part[currPart].set_boundary_conditions(num, data);
-			delete[] data;
-		    }
-		} else if (param == String("rigidBodyPotential")) {
-		    if (currPart < 0) exit(1);
-		    partRigidBodyGrid[currPart].push_back(value);
-		}
-                //Han-Yi Chou initial COM velocity for total particles
-                else if (param == String("COM_Velocity"))
-                    COM_Velocity = stringToVector3(value);
-
-		// RIGID BODY
-		else if (param == String("rigidBody")) {
-			// part[++currPart] = BrownianParticleType(value);
-			rigidBody[++currRB] = RigidBodyType(value, this);
-			currPartClass = partClassRB;
-		}
-		else if (param == String("inertia")) {
-		    if (currRB < 0) exit(1);
-			rigidBody[currRB].inertia = stringToVector3( value );
-		} else if (param == String("rotDamping")) {
-		    if (currRB < 0) exit(1);
-			rigidBody[currRB].rotDamping = stringToVector3( value );
-		} else if (param == String("attachedParticles")) {
-			rigidBody[currRB].append_attached_particle_file(value);
-		} else if (param == String("densityGrid")) {
-		    if (currRB < 0) exit(1);
-			rigidBody[currRB].addDensityGrid(value);
-		} else if (param == String("potentialGrid")) {
-		    if (currRB < 0) exit(1);
-			rigidBody[currRB].addPotentialGrid(value);
-		} else if (param == String("densityGridScale")) {
-		    if (currRB < 0) exit(1);
-			rigidBody[currRB].scaleDensityGrid(value);
-		} else if (param == String("potentialGridScale")) {
-		    if (currRB < 0) exit(1);
-			rigidBody[currRB].scalePotentialGrid(value);
-		} else if (param == String("pmfScale")) {
-		    if (currRB < 0) exit(1);
-			rigidBody[currRB].scalePMF(value);
-		} else if (param == String("position")) {
-		    if (currRB < 0) exit(1);
-			rigidBody[currRB].initPos = stringToVector3( value );
-		} else if (param == String("orientation")) {
-		    if (currRB < 0) exit(1);
-			rigidBody[currRB].initRot = stringToMatrix3( value );
-                } else if (param == String("momentum")) {
-                        rigidBody[currRB].initMomentum = stringToVector3(value);
-                } else if (param == String("angularMomentum")) {
-		    if (currRB < 0) exit(1);
-                        rigidBody[currRB].initAngularMomentum = stringToVector3(value);
-		}
-		else if (param == String("inputRBCoordinates"))
-			inputRBCoordinates = value;
-		else if (param == String("restartRBCoordinates"))
-		        restartRBCoordinates = value;
-		
-		// COMMON
-		else if (param == String("num")) {
-		    if (currPartClass == partClassPart) {
-			if (currPart < 0) exit(1);
-			part[currPart].num = atoi(value.val());
-		    } else if (currPartClass == partClassRB) {
-			if (currRB < 0) exit(1);
-			rigidBody[currRB].num = atoi(value.val());
-		    }
-		}
-                //set mass here Han-Yi Chou
-                else if (param == String("mass"))
-                {
-                    if (currPartClass == partClassPart) {
-			if (currPart < 0) exit(1);
-                        part[currPart].mass    = (float) strtod(value.val(),NULL);
-                    } else if (currPartClass == partClassRB) {
-			if (currRB < 0) exit(1);
-                        rigidBody[currRB].mass = (float) strtod(value.val(),NULL);
-		    }
-                }
-                //set damping here, using anisotropic damping, i.e. data type Vector3 Han-Yi Chou
-                else if (param == String("transDamping"))
-                {
-                    if (currPartClass == partClassPart) {
-			if (currPart < 0) exit(1);
-                        part[currPart].transDamping    = stringToVector3(value);
-		    } else if (currPartClass == partClassRB) {
-			if (currRB < 0) exit(1);
-                        rigidBody[currRB].transDamping = stringToVector3(value);
-		    }
-                }
-		else if (param == String("gridFile")) {
-			if (currPartClass == partClassPart)
-                        {
-			    if (currPart < 0) exit(1);
-                                printf("Applying grid file '%s'\n", value.val());
-				stringToArray<String>(&value, part[currPart].numPartGridFiles, 
-                                                             &partGridFile[currPart]);
-				const int& num = part[currPart].numPartGridFiles;
-				partGridFileScale[currPart] = new float[num];
-                                for(int i = 0; i < num; ++i) {
-                                    // printf("%s ", partGridFile[currPart]->val());
-				    partGridFileScale[currPart][i] = 1.0f;
-				}
-
-				// Set default boundary conditions for grids
-				BoundaryCondition *bc = part[currPart].pmf_boundary_conditions;
-				if (bc == NULL) {
-				    bc = new BoundaryCondition[num];
-				    for(int i = 0; i < num; ++i) {
-					bc[i] = dirichlet;
-				    }
-				    part[currPart].pmf_boundary_conditions = bc;
-				}
-                        }
-			else if (currPartClass == partClassRB) {
-			    if (currRB < 0) exit(1);
-				rigidBody[currRB].addPMF(value);
-			}
-		}
-		// UNKNOWN
-		else {
-			printf("ERROR: Unrecognized keyword `%s'.\n", param.val());
-			exit(1);
-		}
-	}
-
-	// extra configuration for RB types
-	for (int i = 0; i < numRigidTypes; i++)
-		rigidBody[i].setDampingCoeffs(timestep);
-
-        //For debugging purpose Han-Yi Chou
-        //Print();
-	return numParams;
-}
-//Han-Yi Chou
-void Configuration::Print()
-{
-    printf("The dynamic type for particle is %s \n", ParticleDynamicType.val());
-    for(int i = 0; i < numParts; ++i)
-    {
-        printf("The type %d has mass %f \n", i,part[i].mass);
-        printf("The diffusion coefficient is %f \n", part[i].diffusion);
-        printf("The translational damping is %f %f %f \n", part[i].transDamping.x, part[i].transDamping.y, part[i].transDamping.z);
-    }
-    printf("Done with check for Langevin");
-    //assert(1==2);
-}
-
-void Configuration::PrintMomentum()
-{
-    for(int i = 0; i < num; ++i)
-    {
-        printf("%f %f %f\n", momentum[i].x, momentum[i].y, momentum[i].z);
-    }
-    //assert(1==2);
-}
-Vector3 Configuration::stringToVector3(String s) {
-	// tokenize and return
-	int numTokens = s.tokenCount();
-	if (numTokens != 3) {
-		printf("ERROR: could not convert input to Vector3.\n"); // TODO improve this message
-		exit(1);
-	}
-	String* token = new String[numTokens];
-	s.tokenize(token);
-	Vector3 v( (float) strtod(token[0], NULL),
-						 (float) strtod(token[1], NULL),
-						 (float) strtod(token[2], NULL) );
-	return v;
-}
-Matrix3 Configuration::stringToMatrix3(String s) {
-	// tokenize and return
-	int numTokens = s.tokenCount();
-	if (numTokens != 9) {
-		printf("ERROR: could not convert input to Matrix3.\n"); // TODO improve this message
-		exit(1);
-	}
-	String* token = new String[numTokens];
-	s.tokenize(token);
-	Matrix3 m( (float) strtod(token[0], NULL),
-						 (float) strtod(token[1], NULL),
-						 (float) strtod(token[2], NULL),
-						 (float) strtod(token[3], NULL),
-						 (float) strtod(token[4], NULL),
-						 (float) strtod(token[5], NULL),
-						 (float) strtod(token[6], NULL),
-						 (float) strtod(token[7], NULL),
-						 (float) strtod(token[8], NULL) );
-	return m;
-}
-
-void Configuration::readAtoms() {
-	// Open the file
-	FILE* inp = fopen(partFile.val(), "r");
-	char line[256];
-
-	// If the particle file cannot be found, exit the program
-	if (inp == NULL) {
-		printf("ERROR: Could not open `%s'.\n", partFile.val());
-		bool found = true;
-		for (int i = 0; i < numParts; i++)
-			if (part[i].num == 0)
-				found = false;
-		// assert(false); // TODO probably relax constraint that particle must be found; could just be in RB
-		if (!found) {
-			printf("ERROR: Number of particles not specified in config file.\n");
-			exit(1);
-		}
-		printf("Using default coordinates file\n");
-		return;
-	}
-
-	// Our particle array has a starting capacity of 256
-	// We will expand this later if we need to.
-	int capacity = 256;
-	numPartsFromFile = 0;
-	partsFromFile = new String[capacity];
-	indices = new int[capacity];
-	indices[0] = 0;
-
-	// Get and process all lines of input
-	while (fgets(line, 256, inp) != NULL) {
-		// Lines in the particle file that begin with # are comments
-		if (line[0] == '#') continue;
-		      
-		String s(line);
-		int numTokens = s.tokenCount();
-		      
-		// Break the line down into pieces (tokens) so we can process them individually
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-
-		// Legitimate ATOM input lines have 6 tokens: 
-		// ATOM | Index | Name | X-coord | Y-coord | Z-coord
-		// A line without exactly six tokens should be discarded.
-                if (ParticleDynamicType == String("Langevin") || ParticleDynamicType == String("NoseHooverLangevin")) {
-		    if (numTokens != 9) {
-			printf("Error: Invalid particle file line: %s\n", line);
-			exit(-1);
-		    }
-		} else {
-		    if (numTokens != 6) {
-			printf("Error: Invalid particle file line: %s\n", line);
-			exit(-1);
-		    }
-		}
-
-		// Ensure that this particle's type was defined in the config file.
-		// If not, discard this line.
-		bool found;
-		for (int j = 0; j < numParts; j++) {
-			// If this particle type exists, add a new one to the list
-			if (part[j].name == tokenList[2]) {
-				found = true;
-				part[j].num++;
-			}
-		}
-
-		// If the particle's type does not exist according to the config file, discard it.
-		if (!found) {
-			printf("WARNING Unknown particle type %s found and discarded.\n", tokenList[2].val());
-			continue;
-		}
-
-		// If we don't have enough room in our particle array, we need to expand it.
-		if (numPartsFromFile >= capacity) {
-			// Temporary pointers to the old arrays
-			String* temp = partsFromFile;	
-			int* temp2 = indices;
-
-			// Double the capacity
-			capacity *= 2;
-
-			// Create pointers to new arrays which are twice the size of the old ones
-			partsFromFile = new String[capacity];
-			indices = new int[capacity];
-		
-			// Copy the old values into the new arrays
-			for (int j = 0; j < numPartsFromFile; j++) {
-				partsFromFile[j] = temp[j];
-				indices[j] = temp2[j];
-			}
-
-			// delete the old arrays
-			delete[] temp;
-			delete[] temp2;
-		}
-		// Make sure the index of this particle is unique.
-		// NOTE: The particle list is sorted by index. 
-		bool uniqueID = true;		
-		int key = atoi(tokenList[1].val());
-		int mid = 0;
-
-		// If the index is greater than the last index in the list, 
-		// this particle belongs at the end of the list. Since the 
-		// list is kept sorted, we know this is okay.
-		if (numPartsFromFile == 0 || key > indices[numPartsFromFile - 1]) {
-			indices[numPartsFromFile] = key;
-			partsFromFile[numPartsFromFile++] = line;
-		}
-		// We need to do a binary search to figure out if
-		// the index already exists in the list. 
-		// The assumption is that input files SHOULD have their indices sorted in 
-		// ascending order, so we shouldn't actually use the binary search 
-		// or the sort (which is pretty time consuming) very often.
-		else {
-			int low = 0, high = numPartsFromFile - 1;
-			
-			while (low <= high) {
-				mid = (int)((high - low) / 2 + low);
-				int curr = indices[mid];
-				if (curr < key) {
-					low = mid + 1;
-				} else if (curr > key) {
-					high = mid - 1;
-				} else {
-					// For now, particles with non-unique IDs are simply not added to the array
-					// Other possible approaches which are not yet implemented:
-					// 1: Keep track of these particles and assign them new IDs after you have
-					//    already added all of the other particles. 	
-					// 2: Get rid of ALL particles with that ID, even the ones that have already 
-					//    been added.
-					printf("WARNING: Non-unique ID found: %s\n", line);
-					uniqueID = false;
-					break;
-				}
-			}
-			if (uniqueID) {
-				// Add the particle to the end of the array, then sort it. 
-				indices[numPartsFromFile] = key;
-				partsFromFile[numPartsFromFile++] = line;
-				std::sort(indices, indices + numPartsFromFile);
-				std::sort(partsFromFile, partsFromFile + numPartsFromFile, compare());		
-			}
-		}
-	}
-}
-void Configuration::readGroups() {
-	// Open the file
-    const size_t line_char = 16384;
-	FILE* inp = fopen(groupSiteFile.val(), "r");
-	char line[line_char];
-
-	// If the particle file cannot be found, exit the program
-	if (inp == NULL) {
-		printf("ERROR: Could not open `%s'.\n", partFile.val());
-		exit(1);
-	}
-
-	// Our particle array has a starting capacity of 256
-	// We will expand this later if we need to.
-	// int capacity = 256;
-	numGroupSites = 0;
-
-	// partsFromFile = new String[capacity];
-	// indices = new int[capacity];
-	// indices[0] = 0;
-
-	// Get and process all lines of input
-	while (fgets(line, line_char, inp) != NULL) {
-		// Lines in the particle file that begin with # are comments
-		if (line[0] == '#') continue;
-		      
-		String s(line);
-		int numTokens = s.tokenCount();
-		      
-		// Break the line down into pieces (tokens) so we can process them individually
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-
-		// Legitimate GROUP input lines have at least 3 tokens: 
-		// GROUP | Atom_1_idx | Atom_2_idx | ...
-		// A line without exactly six tokens should be discarded.
-		if (numTokens < 3) {
-		    printf("Error: Invalid group file line: %s\n", line);
-		    exit(-1);
-		}
-
-		// Make sure the index of this particle is unique.
-		// NOTE: The particle list is sorted by index. 
-		std::vector<int> tmp;
-		for (int i=1; i < numTokens; ++i) {
-		    const int ai = atoi(tokenList[i].val());
-		    if (ai >= num+num_rb_attached_particles) {
-			printf("Error: Attempted to include invalid particle in group: %s\n", line);
-			exit(-1);
-		    } else if (ai >= num) {
-			printf("WARNING: including RB particles in group with line: %s\n", line);
-		    }
-		    tmp.push_back( ai );
-		}
-		groupSiteData.push_back(tmp);
-		numGroupSites++;
-	}
-}
-
-void Configuration::readBonds() {
-	// Open the file
-	FILE* inp = fopen(bondFile.val(), "r");
-	char line[256];
-
-	// If the particle file cannot be found, exit the program
-	if (inp == NULL) {
-		printf("WARNING: Could not open `%s'.\n", bondFile.val());
-		printf("         This simulation will not use particle bonds.\n");
-		return;
-	}
-
-	// Our particle array has a starting capacity of 256
-	// We will expand this later if we need to.
-	int capacity = 256;
-	numBonds = 0;
-	bonds = new Bond[capacity];
-
-	// Get and process all lines of input
-	while (fgets(line, 256, inp) != NULL) {
-		
-		// Lines in the particle file that begin with # are comments
-		if (line[0] == '#') continue;
-		      
-		String s(line);
-		int numTokens = s.tokenCount();
-
-		// Break the line down into pieces (tokens) so we can process them individually
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-
-		// Legitimate BOND input lines have 4 tokens: 
-		// BOND | OPERATION_FLAG | INDEX1 | INDEX2 | FILENAME 
-		// A line without exactly five tokens should be discarded.
-		if (numTokens != 5) {
-			printf("WARNING: Invalid bond file line: %s\n", line);
-			continue;
-		}
-
-		String op = tokenList[1];
-		int ind1 = atoi(tokenList[2].val());
-		int ind2 = atoi(tokenList[3].val());
-		String file_name = tokenList[4];
-
-		if (ind1 == ind2) {
-			printf("WARNING: Invalid bond file line: %s\n", line);
-			continue;
-		}
-
-		if (ind1 < 0 || ind1 >= num+num_rb_attached_particles+numGroupSites ||
-		    ind2 < 0 || ind2 >= num+num_rb_attached_particles+numGroupSites) {
-			printf("ERROR: Bond file line '%s' includes invalid index\n", line);
-			exit(1);
-		}
-
-		
-		// If we don't have enough room in our bond array, we need to expand it.
-		if (numBonds+1 >= capacity) { // "numBonds+1" because we are adding two bonds to array
-			// Temporary pointer to the old array
-			Bond* temp = bonds;	
-
-			// Double the capacity
-			capacity *= 2;
-
-			// Create pointer to new array which is twice the size of the old one
-			bonds = new Bond[capacity];
-		
-			// Copy the old values into the new array
-			for (int j = 0; j < numBonds; j++)
-				bonds[j] = temp[j];
-
-			// delete the old array
-			delete[] temp;
-		}
-		// Add the bond to the bond array
-		// We must add it twice: Once for (ind1, ind2) and once for (ind2, ind1)
-		
-		// RBTODO: add ind1/2 to exclusion list here iff op == REPLACE
-
-		if (op == "REPLACE")
-		    addExclusion(ind1, ind2);
-
-		Bond* b = new Bond(op, ind1, ind2, file_name);
-		bonds[numBonds++] = *b;
-		b = new Bond(op, ind2, ind1, file_name);
-		bonds[numBonds++] = *b;
-		delete[] tokenList;
-	}	
-	// Call compareBondIndex with qsort to sort the bonds by BOTH ind1 AND ind2
-	std::sort(bonds, bonds + numBonds, compare());
-
-	/* Each particle may have a varying number of bonds
-	 * bondMap is an array with one element for each particle
-	 * which keeps track of where a particle's bonds are stored
-	 * in the bonds array.
-	 * bondMap[i].x is the index in the bonds array where the ith particle's bonds begin
-	 * bondMap[i].y is the index in the bonds array where the ith particle's bonds end
-	 */
-	bondMap = new int2[num+num_rb_attached_particles+numGroupSites];
-	for (int i = 0; i < num+num_rb_attached_particles+numGroupSites; i++) {
-		bondMap[i].x = -1;
-		bondMap[i].y = -1;
-	}
-	int currPart = -1;
-	int lastPart = -1;
-	for (int i = 0; i < numBonds; i++) {
-		if (bonds[i].ind1 != currPart) {
-			currPart = bonds[i].ind1;
-			bondMap[currPart].x = i;
-			if (lastPart >= 0) bondMap[lastPart].y = i;
-			lastPart = currPart;
-		}
-	}
-	if (bondMap[lastPart].x > 0)
-		bondMap[lastPart].y = numBonds;
-}
-
-void Configuration::readExcludes()
-{
-	// Open the file
-	FILE* inp = fopen(excludeFile.val(), "r");
-	char line[256];
-
-	// If the exclusion file cannot be found, exit the program
-	if (inp == NULL) {
-		printf("WARNING: Could not open `%s'.\n", excludeFile.val());
-		printf("This simulation will not use exclusions.\n");
-		return;
-	}
-
-
-	// Get and process all lines of input
-	while (fgets(line, 256, inp) != NULL) {
-		// Lines in the particle file that begin with # are comments
-		if (line[0] == '#') continue;
-		      
-		String s(line);
-		int numTokens = s.tokenCount();
-		      
-		// Break the line down into pieces (tokens) so we can process them individually
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-
-		// Legitimate EXCLUDE input lines have 3 tokens: 
-		// BOND | INDEX1 | INDEX2
-		// A line without exactly three tokens should be discarded.
-		if (numTokens != 3) {
-			printf("WARNING: Invalid exclude file line: %s\n", line);
-			continue;
-		}
-		int ind1 = atoi(tokenList[1].val());
-		int ind2 = atoi(tokenList[2].val());
-		addExclusion(ind1, ind2);
-		delete[] tokenList;
-	}
-}
-void Configuration::addExclusion(int ind1, int ind2) {
-    if (ind1 >= num+num_rb_attached_particles || ind2 >= num+num_rb_attached_particles) {
-	printf("WARNING: Attempted to add an exclusion for an out-of-range particle index (%d or %d >= %d).\n", ind1, ind2, num+num_rb_attached_particles);
-	return;
-    }
-		
-    // If we don't have enough room in our bond array, we need to expand it.
-    if (numExcludes >= excludeCapacity) {
-	// Temporary pointer to the old array
-	Exclude* temp = excludes;	
-
-	// Double the capacity
-	excludeCapacity *= 2;
-
-	// Create pointer to new array which is twice the size of the old one
-	excludes = new Exclude[excludeCapacity];
-		
-	// Copy the old values into the new array
-	for (int j = 0; j < numExcludes; j++)
-	    excludes[j] = temp[j];
-
-	// delete the old array
-	delete[] temp;
-    }
-
-    // Add the bond to the exclude array
-    // We must add it twice: Once for (ind1, ind2) and once for (ind2, ind1)
-    Exclude ex1(ind1, ind2);
-    excludes[numExcludes++] = ex1;
-    Exclude ex2(ind2, ind1);
-    excludes[numExcludes++] = ex2;
-    
-}    
-
-void Configuration::buildExcludeMap() {
-    // Call compareExcludeIndex with qsort to sort the excludes by BOTH ind1 AND ind2
-    std::sort(excludes, excludes + numExcludes, compare());
-
-    /* Each particle may have a varying number of excludes
-     * excludeMap is an array with one element for each particle
-     * which keeps track of where a particle's excludes are stored
-     * in the excludes array.
-     * excludeMap[i].x is the index in the excludes array where the ith particle's excludes begin
-     * excludeMap[i].y is the index in the excludes array where the ith particle's excludes end
-     */
-    excludeMap = new int2[num+num_rb_attached_particles];
-    for (int i = 0; i < num+num_rb_attached_particles; i++) {
-	excludeMap[i].x = -1;
-	excludeMap[i].y = -1;
-    }
-    int currPart = -1;
-    int lastPart = -1;
-    for (int i = 0; i < numExcludes; i++) {
-	if (excludes[i].ind1 != currPart) {
-	    currPart = excludes[i].ind1;
-	    assert(currPart < num+num_rb_attached_particles);
-	    excludeMap[currPart].x = i;
-	    if (lastPart >= 0)
-		excludeMap[lastPart].y = i;
-	    lastPart = currPart;
-	}
-    }
-    if (excludeMap[lastPart].x > 0)
-	excludeMap[lastPart].y = numExcludes;
-}
-
-void Configuration::readAngles() {
-	FILE* inp = fopen(angleFile.val(), "r");
-	char line[256];
-	int capacity = 256;
-	numAngles = 0;
-	angles = new Angle[capacity];
-
-	// If the angle file cannot be found, exit the program
-	if (inp == NULL) {
-		printf("WARNING: Could not open `%s'.\n", angleFile.val());
-		printf("This simulation will not use angles.\n");
-		return;
-	}
-
-	while(fgets(line, 256, inp)) {
-		if (line[0] == '#') continue;
-		String s(line);
-		int numTokens = s.tokenCount();
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-		
-		// Legitimate ANGLE inputs have 5 tokens
-		// ANGLE | INDEX1 | INDEX2 | INDEX3 | FILENAME
-		// Any angle input line without exactly 5 tokens should be discarded
-		if (numTokens != 5) {
-			printf("WARNING: Invalid angle input line: %s\n", line);
-			continue;
-		}		
-		
-		// Discard any empty line
-		if (tokenList == NULL) 
-			continue;
-		
-		int ind1 = atoi(tokenList[1].val());
-		int ind2 = atoi(tokenList[2].val());
-		int ind3 = atoi(tokenList[3].val());
-		String file_name = tokenList[4];
-		//printf("file_name %s\n", file_name.val());
-		if (ind1 >= num+num_rb_attached_particles+numGroupSites or ind2 >= num+num_rb_attached_particles+numGroupSites or ind3 >= num+num_rb_attached_particles+numGroupSites)
-			continue;
-
-		if (numAngles >= capacity) {
-			Angle* temp = angles;
-			capacity *= 2;
-			angles = new Angle[capacity];
-			for (int i = 0; i < numAngles; i++)
-				angles[i] = temp[i];
-			delete[] temp;
-		}
-
-		Angle a(ind1, ind2, ind3, file_name);
-		angles[numAngles++] = a;
-		delete[] tokenList;
-	}
-	std::sort(angles, angles + numAngles, compare());	
-
-	// for(int i = 0; i < numAngles; i++)
-	// 	angles[i].print();
-}
-
-void Configuration::readDihedrals() {
-	FILE* inp = fopen(dihedralFile.val(), "r");
-	char line[256];
-	int capacity = 256;
-	numDihedrals = 0;
-	dihedrals = new Dihedral[capacity];
-
-	// If the dihedral file cannot be found, exit the program
-	if (inp == NULL) {
-		printf("WARNING: Could not open `%s'.\n", dihedralFile.val());
-		printf("This simulation will not use dihedrals.\n");
-		return;
-	}
-
-	while(fgets(line, 256, inp)) {
-		if (line[0] == '#') continue;
-		String s(line);
-		int numTokens = s.tokenCount();
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-		
-		// Legitimate DIHEDRAL inputs have 6 tokens
-		// DIHEDRAL | INDEX1 | INDEX2 | INDEX3 | INDEX4 | FILENAME
-		// Any angle input line without exactly 6 tokens should be discarded
-		if (numTokens != 6) {
-			printf("WARNING: Invalid dihedral input line: %s\n", line);
-			continue;
-		}		
-		
-		// Discard any empty line
-		if (tokenList == NULL) 
-			continue;
-		
-		int ind1 = atoi(tokenList[1].val());
-		int ind2 = atoi(tokenList[2].val());
-		int ind3 = atoi(tokenList[3].val());
-		int ind4 = atoi(tokenList[4].val());
-		String file_name = tokenList[5];
-		//printf("file_name %s\n", file_name.val());
-		if (ind1 >= num+num_rb_attached_particles+numGroupSites or
-		    ind2 >= num+num_rb_attached_particles+numGroupSites or
-		    ind3 >= num+num_rb_attached_particles+numGroupSites or
-		    ind4 >= num+num_rb_attached_particles+numGroupSites)
-			continue;
-
-		if (numDihedrals >= capacity) {
-			Dihedral* temp = dihedrals;
-			capacity *= 2;
-			dihedrals = new Dihedral[capacity];
-			for (int i = 0; i < numDihedrals; ++i)
-				dihedrals[i] = temp[i];
-			delete[] temp;
-		}
-
-		Dihedral d(ind1, ind2, ind3, ind4, file_name);
-		dihedrals[numDihedrals++] = d;
-		delete[] tokenList;
-	}
-	std::sort(dihedrals, dihedrals + numDihedrals, compare());	
-
-	// for(int i = 0; i < numDihedrals; i++)
-	// 	dihedrals[i].print();
-}
-
-void Configuration::readBondAngles() {
-	FILE* inp = fopen(bondAngleFile.val(), "r");
-	char line[256];
-	int capacity = 256;
-	numBondAngles = 0;
-	bondAngles = new BondAngle[capacity];
-
-	// If the angle file cannot be found, exit the program
-	if (inp == NULL) {
-		printf("WARNING: Could not open `%s'.\n", bondAngleFile.val());
-		printf("This simulation will not use angles.\n");
-		return;
-	}
-
-	while(fgets(line, 256, inp)) {
-		if (line[0] == '#') continue;
-		String s(line);
-		int numTokens = s.tokenCount();
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-
-		// Legitimate BONDANGLE inputs have 8 tokens
-		// BONDANGLE | INDEX1 | INDEX2 | INDEX3 | INDEX4 | ANGLE_FILENAME | BOND_FILENAME1 | BOND_FILENAME2
-		if (numTokens != 8) {
-			printf("WARNING: Invalid bond_angle input line: %s\n", line);
-			continue;
-		}
-
-		// Discard any empty line
-		if (tokenList == NULL)
-			continue;
-
-		int ind1 = atoi(tokenList[1].val());
-		int ind2 = atoi(tokenList[2].val());
-		int ind3 = atoi(tokenList[3].val());
-		int ind4 = atoi(tokenList[4].val());
-		String file_name1 = tokenList[5];
-		String file_name2 = tokenList[6];
-		String file_name3 = tokenList[7];
-		//printf("file_name %s\n", file_name.val());
-		if (ind1 >= num or ind2 >= num or ind3 >= num or ind4 >= num)
-			continue;
-
-		if (numBondAngles >= capacity) {
-			BondAngle* temp = bondAngles;
-			capacity *= 2;
-			bondAngles = new BondAngle[capacity];
-			for (int i = 0; i < numBondAngles; i++)
-				bondAngles[i] = temp[i];
-			delete[] temp;
-		}
-
-		BondAngle a(ind1, ind2, ind3, ind4, file_name1, file_name2, file_name3);
-		bondAngles[numBondAngles++] = a;
-		delete[] tokenList;
-	}
-	std::sort(bondAngles, bondAngles + numBondAngles, compare());
-
-	// for(int i = 0; i < numAngles; i++)
-	// 	angles[i].print();
-}
-
-void Configuration::readProductPotentials() {
-	FILE* inp = fopen(productPotentialFile.val(), "r");
-	char line[256];
-	int capacity = 256;
-	numProductPotentials = 0;
-	productPotentials = new ProductPotentialConf[capacity];
-
-	// If the angle file cannot be found, exit the program
-	if (inp == NULL) {
-		printf("WARNING: Could not open `%s'.\n", productPotentialFile.val());
-		printf("This simulation will not use product potentials.\n");
-		return;
-	}
-	printf("DEBUG: READING PRODUCT POTENTAL FILE\n");
-	std::vector<std::vector<int>> indices;
-	std::vector<int> tmp;
-	std::vector<String> pot_names;
-
-	while(fgets(line, 256, inp)) {
-		if (line[0] == '#') continue;
-		String s(line);
-		int numTokens = s.tokenCount();
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-
-		indices.clear();
-		tmp.clear();
-		pot_names.clear();		    
-
-		printf("\rDEBUG: reading line %d",numProductPotentials+1);
-
-		// Legitimate ProductPotential inputs have at least 7 tokens
-		// BONDANGLE | INDEX1 | INDEX2 | INDEX3 | [TYPE1] | POT_FILENAME1 | INDEX4 | INDEX5 | [TYPE2] POT_FILENAME2 ...
-		if (numTokens < 5) {
-		    printf("WARNING: Invalid product potential input line (too few tokens %d): %s\n", numTokens, line);
-			continue;
-		}
-
-		// Discard any empty line
-		if (tokenList == NULL)
-			continue;
-
-		SimplePotentialType type = BOND; // initialize to suppress warning
-		bool type_specified = false;
-		for (int i = 1; i < numTokens; ++i) {
-		    char *end;
-		    // printf("DEBUG: Working on token %d '%s'\n", i, tokenList[i].val());
-
-		    // Try to convert token to integer
-		    int index = (int) strtol(tokenList[i].val(), &end, 10);
-		    if (tokenList[i].val() == end || *end != '\0' || errno == ERANGE) {
-			// Failed to convert token to integer; therefore it must be a potential name or type
-
-			// Try to match a type
-			String n = tokenList[i];
-			n.lower();
-			if (n == "bond") { type = DIHEDRAL; type_specified = true; }
-			else if (n == "angle")  { type = DIHEDRAL; type_specified = true; }
-			else if (n == "dihedral")  { type = DIHEDRAL; type_specified = true; }
-			else if (n == "vecangle") { type = VECANGLE; type_specified = true; }
-			else { // Not a type, therefore a path to a potential
-			    n = tokenList[i];
-			    indices.push_back(tmp);
-			    pot_names.push_back( n );
-			    // TODO: Key should be tuple of (type,n)
-			    std::string n_str = std::string(n.val());
-			    if ( simple_potential_ids.find(n_str) == simple_potential_ids.end() ) {
-				// Could not find fileName in dictionary, so read and add it
-				unsigned int s = tmp.size();
-				if (s < 2 || s > 4) {
-				    printf("WARNING: Invalid product potential input line (indices of potential %d == %d): %s\n", i, s, line);
-				    continue;
-				}
-				simple_potential_ids[ n_str ] = simple_potentials.size();
-				if (not type_specified) type = s==2? BOND: s==3? ANGLE: DIHEDRAL;
-				simple_potentials.push_back( SimplePotential(n.val(), type) );
-			    }
-			    tmp.clear();
-			    type_specified = false;
-
-			}
-		    } else {
-			if (index >= num) {
-			    continue;
-			}
-			tmp.push_back(index);
-		    }
-		}
-
-		if (numProductPotentials >= capacity) {
-			ProductPotentialConf* temp = productPotentials;
-			capacity *= 2;
-			productPotentials = new ProductPotentialConf[capacity];
-			for (int i = 0; i < numProductPotentials; i++)
-				productPotentials[i] = temp[i];
-			delete[] temp;
-		}
-
-		ProductPotentialConf a(indices, pot_names);
-		productPotentials[numProductPotentials++] = a;
-		delete[] tokenList;
-	}
-	printf("\nDEBUG: Sorting\n");
-	std::sort(productPotentials, productPotentials + numProductPotentials, compare());
-
-	// for(int i = 0; i < numAngles; i++)
-	// 	angles[i].print();
-}
-
-
-void Configuration::readRestraints() {
-	FILE* inp = fopen(restraintFile.val(), "r");
-	char line[256];
-	int capacity = 16;
-	numRestraints = 0;
-	restraints = new Restraint[capacity];
-
-	// If the restraint file cannot be found, exit the program
-	if (inp == NULL) {
-		printf("WARNING: Could not open `%s'.\n", restraintFile.val());
-		printf("  This simulation will not use restraints.\n");
-		return;
-	}
-
-	while(fgets(line, 256, inp)) {
-		if (line[0] == '#') continue;
-		String s(line);
-		int numTokens = s.tokenCount();
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-
-		// inputs have 6 tokens
-		// RESTRAINT | INDEX1 | k | x0 | y0 | z0
-		if (numTokens != 6) {
-			printf("WARNING: Invalid restraint input line: %s\n", line);
-			continue;
-		}
-
-		// Discard any empty line
-		if (tokenList == NULL) continue;
-
-		int   id = atoi(tokenList[1].val());
-		float k  = (float) strtod(tokenList[2].val(), NULL);
-		float x0 = (float) strtod(tokenList[3].val(), NULL);
-		float y0 = (float) strtod(tokenList[4].val(), NULL);
-		float z0 = (float) strtod(tokenList[5].val(), NULL);
-
-		if (id >= num + num_rb_attached_particles + numGroupSites) continue;
-
-		if (numRestraints >= capacity) {
-			Restraint* temp = restraints;
-			capacity *= 2;
-			restraints = new Restraint[capacity];
-			for (int i = 0; i < numRestraints; ++i)
-				restraints[i] = temp[i];
-			delete[] temp;
-		}
-
-		Restraint tmp(id, Vector3(x0,y0,z0), k);
-		restraints[numRestraints++] = tmp;
-		delete[] tokenList;
-	}
-	// std::sort(restraints, restraints + numRestraints, compare());
-}
-
-//populate the type list and serial list
-void Configuration::populate() {
-    for (int repID = 0; repID < simNum; ++repID) {
-                const int offset = repID * num;
-                int pn = 0;
-                int p = 0;
-                for (int i = 0; i < num; ++i) {
-                        type[i + offset] = p;
-                        serial[i + offset] = currSerial++;
-
-                        if (++pn >= part[p].num) {
-                                p++;
-                                pn = 0;
-                        }
-                }
-        }
-}
-
-bool Configuration::readBondFile(const String& value, int currBond) {
-	int numTokens = value.tokenCount();
-	if (numTokens != 1) {
-		printf("ERROR: Invalid tabulatedBondFile: %s, numTokens = %d\n", value.val(), numTokens);
-		return false;
-	}
-
-	String* tokenList = new String[numTokens];
-	value.tokenize(tokenList);
-	if (tokenList == NULL) {
-		printf("ERROR: Invalid tabulatedBondFile: %s; tokenList is NULL\n", value.val());
-		return false;
-	}
-
-	bondTableFile[currBond] = tokenList[0];
-
-	// printf("Tabulated Bond Potential: %s\n", bondTableFile[currBond].val() );
-
-	return true;
-}
-
-bool Configuration::readAngleFile(const String& value, int currAngle) {
-	int numTokens = value.tokenCount();
-	if (numTokens != 1) {
-		printf("ERROR: Invalid tabulatedAngleFile: %s, numTokens = %d\n", value.val(), numTokens);
-		return false;
-	}
-
-	String* tokenList = new String[numTokens];
-	value.tokenize(tokenList);
-	if (tokenList == NULL) {
-		printf("ERROR: Invalid tabulatedAngleFile: %s; tokenList is NULL\n", value.val());
-		return false;
-	}
-
-	angleTableFile[currAngle] = tokenList[0];
-
-	// printf("Tabulated Angle Potential: %s\n", angleTableFile[currAngle].val() );
-
-	return true;
-}
-
-bool Configuration::readDihedralFile(const String& value, int currDihedral) {
-	int numTokens = value.tokenCount();
-	if (numTokens != 1) {
-		printf("ERROR: Invalid tabulatedDihedralFile: %s, numTokens = %d\n", value.val(), numTokens);
-		return false;
-	}
-
-	String* tokenList = new String[numTokens];
-	value.tokenize(tokenList);
-	if (tokenList == NULL) {
-		printf("ERROR: Invalid tabulatedDihedralFile: %s; tokenList is NULL\n", value.val());
-		return false;
-	}
-
-	dihedralTableFile[currDihedral] = tokenList[0];
-
-	// printf("Tabulated Dihedral Potential: %s\n", dihedralTableFile[currDihedral].val() );
-
-	return true;
-}
-//Load the restart coordiantes only
-void Configuration::loadRestart(const char* file_name) {
-	char line[STRLEN];
-	FILE* inp = fopen(file_name, "r");
-
-	if (inp == NULL) {
-		printf("GrandBrownTown:loadRestart File `%s' does not exist\n", file_name);
-		exit(-1);
-	}
-
-	int count = 0;
-	while (fgets(line, STRLEN, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-
-		String s(line);
-		int numTokens = s.tokenCount();
-		if (numTokens != 4) {
-			printf("GrandBrownTown:loadRestart Invalid coordinate file line: %s\n", line);
-			fclose(inp);	
-			exit(-1);
-		}
-
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-		if (tokenList == NULL) {
-			printf("GrandBrownTown:loadRestart Invalid coordinate file line: %s\n", line);
-			fclose(inp);
-			exit(-1);
-		}
-
-		int typ = atoi(tokenList[0]);
-		float x = (float) strtod(tokenList[1],NULL);
-		float y = (float) strtod(tokenList[2],NULL);
-		float z = (float) strtod(tokenList[3],NULL);
-
-		pos[count] = Vector3(x,y,z);
-		type[count] = typ;
-		serial[count] = currSerial;
-		currSerial++;
-		if (typ < 0 || typ >= numParts) {
-			printf("GrandBrownTown:countRestart Invalid particle type: %d\n", typ);
-			fclose(inp);
-			exit(-1);
-		}
-
-		count++;
-		delete[] tokenList;
-	}
-
-	fclose(inp);    
-}
-//Han-Yi Chou
-//First the resart coordinates should be loaded
-void Configuration::loadRestartMomentum(const char* file_name) 
-{
-    char line[STRLEN];
-    FILE* inp = fopen(file_name, "r");
-
-    if (inp == NULL) 
-    {
-        printf("GrandBrownTown:loadRestart File `%s' does not exist\n", file_name);
-        exit(-1);
-    }
-    if(!loadedCoordinates)
-    {
-        printf("First load the restart coordinates\n");
-        assert(1==2);
-    }
-    int count = 0;
-    while (fgets(line, STRLEN, inp) != NULL) 
-    {
-        // Ignore comments.
-        int len = strlen(line);
-        if (line[0] == '#') continue;
-        if (len < 2) continue;
-
-        String s(line);
-        int numTokens = s.tokenCount();
-        if (numTokens != 4) 
-        {
-            printf("GrandBrownTown:loadRestart Invalid momentum file line: %s\n", line);
-            fclose(inp);
-            exit(-1);
-        }
-
-        String* tokenList = new String[numTokens];
-        s.tokenize(tokenList);
-        if (tokenList == NULL) 
-        {
-            printf("GrandBrownTown:loadRestart Invalid momentum file line: %s\n", line);
-            fclose(inp);
-            exit(-1);
-        }
-
-        int typ = atoi(tokenList[0]);
-        float x = (float) strtod(tokenList[1],NULL);
-        float y = (float) strtod(tokenList[2],NULL);
-        float z = (float) strtod(tokenList[3],NULL);
-
-        if (typ < 0 || typ >= numParts) 
-        {
-            printf("GrandBrownTown:countRestart Invalid particle type : %d\n", typ);
-            fclose(inp);
-            exit(-1);
-        }
-
-        if(typ != type[count])
-        {
-            printf("Inconsistent in momentum file with the position file\n");
-            fclose(inp);
-            exit(-1);
-        }
-        momentum[count] = Vector3(x,y,z);
-        ++count;
-        delete[] tokenList;
-    }
-    fclose(inp);
-}
-
-bool Configuration::loadCoordinates(const char* file_name) {
-	char line[STRLEN];
-	FILE* inp = fopen(file_name, "r");
-
-	if (inp == NULL) {
-	    printf("ERROR: Could not open file for reading: %s\n", file_name);
-	    exit(-1);
-	    return false;
-	}
-
-	int count = 0;
-	while (fgets(line, STRLEN, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-
-		String s(line);
-		int numTokens = s.tokenCount();
-		if (numTokens != 3) {
-			printf("ERROR: Invalid coordinate file line: %s\n", line);
-			fclose(inp);	
-			return false;
-		}
-
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-		if (tokenList == NULL) {
-			printf("ERROR: Invalid coordinate file line: %s\n", line);
-			fclose(inp);
-			return false;
-		}
-
-		if (count >= num*simNum) {
-			printf("WARNING: Too many coordinates in coordinate file %s.\n", file_name);
-			fclose(inp);
-			return true;
-		}
-
-		float x = (float) strtod(tokenList[0],NULL);
-		float y = (float) strtod(tokenList[1],NULL);
-		float z = (float) strtod(tokenList[2],NULL);
-		pos[count] = Vector3(x,y,z);
-		count++;
-
-		delete[] tokenList;
-	}
-	fclose(inp);
-
-	if (count < num) {
-		printf("ERROR: Too few coordinates in coordinate file.\n");
-		return false;
-	}
-	return true;
-}
-//Han-Yi Chou The function populate should be called before entering this function
-bool Configuration::loadMomentum(const char* file_name) 
-{
-    char line[STRLEN];
-    FILE* inp = fopen(file_name, "r");
-
-    if (inp == NULL) 
-        return false;
-
-    int count = 0;
-    while (fgets(line, STRLEN, inp) != NULL) 
-    {
-        // Ignore comments.
-        int len = strlen(line);
-        if (line[0] == '#') 
-            continue;
-        if (len < 2) 
-            continue;
-
-        String s(line);
-        int numTokens = s.tokenCount();
-        if (numTokens != 3) 
-        {
-            printf("ERROR: Invalid momentum file line: %s\n", line);
-            fclose(inp);
-            return false;
-        }
-
-        String* tokenList = new String[numTokens];
-        s.tokenize(tokenList);
-        if (tokenList == NULL) 
-        {
-            printf("ERROR: Invalid momentum file line: %s\n", line);
-            fclose(inp);
-            return false;
-        }
-
-        if (count >= num) 
-        {
-            printf("WARNING: Too many momentum in momentum file %s.\n", file_name);
-            fclose(inp);
-            return false;
-        }
-
-        float x = (float) strtod(tokenList[0],NULL);
-        float y = (float) strtod(tokenList[1],NULL);
-        float z = (float) strtod(tokenList[2],NULL);
-        momentum[count] = Vector3(x,y,z);
-        ++count;
-        delete[] tokenList;
-    }
-    fclose(inp);
-
-    if (count < num) 
-    {
-        printf("ERROR: Too few momentum in momentum file.\n");
-        return false;
-    }
-    return true;
-}
-
-// Count the number of atoms in the restart file.
-int Configuration::countRestart(const char* file_name) {
-	char line[STRLEN];
-	FILE* inp = fopen(file_name, "r");
-
-	if (inp == NULL) {
-		printf("ERROR: countRestart File `%s' does not exist\n", file_name);
-		exit(-1);
-	}
-
-	int count = 0;
-	while (fgets(line, STRLEN, inp) != NULL) {
-		int len = strlen(line);
-		// Ignore comments.
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-
-		String s(line);
-		int numTokens = s.tokenCount();
-		if (numTokens != 4) {
-			printf("ERROR: countRestart Invalid coordinate file line: %s\n", line);
-			fclose(inp);	
-			exit(-1);
-		}
-
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-		if (tokenList == NULL) {
-			printf("ERROR: countRestart Invalid coordinate file line: %s\n", line);
-			fclose(inp);
-			exit(-1);
-		}
-
-		int typ = atoi(tokenList[0]);
-		// float x = strtod(tokenList[1],NULL);
-		// float y = strtod(tokenList[2],NULL);
-		// float z = strtod(tokenList[3],NULL);
-		if (typ < 0 || typ >= numParts) {
-			printf("ERROR: countRestart Invalid particle type: %d\n", typ);
-			fclose(inp);
-			exit(-1);
-		}
-
-		count++;
-		delete[] tokenList;
-	}
-
-	fclose(inp);    
-	return count;
-}
-
-bool Configuration::readTableFile(const String& value, int currTab) {
-	int numTokens = value.tokenCount('@');
-	if (numTokens != 3) {
-		printf("ERROR: Invalid tabulatedFile: %s\n", value.val());
-		return false;
-	}
-
-	String* tokenList = new String[numTokens];
-	value.tokenize(tokenList, '@');
-	if (tokenList == NULL) {
-		printf("ERROR: Invalid tabulatedFile: %s\n", value.val());
-		return false;
-	}
-
-	if (currTab >= numParts*numParts) {
-	    printf("ERROR: Number of tabulatedFile entries exceeded %d*%d particle types.\n", numParts,numParts);
-	    exit(1);
-	}
-
-	partTableIndex0[currTab] = atoi(tokenList[0]);
-	partTableIndex1[currTab] = atoi(tokenList[1]);
-	partTableFile[currTab] = tokenList[2];
-
-	// printf("Tabulated Potential: %d %d %s\n", partTableIndex0[currTab],
-	// 		partTableIndex1[currTab], partTableFile[currTab].val() );
-	delete[] tokenList;
-	return true;
-}
-
-void Configuration::getDebugForce() {
-	// Allow the user to choose which force computation to use
-	printf("\n");
-	printf("(1) ComputeFull [Default]          (2) ComputeSoftcoreFull\n");
-	printf("(3) ComputeElecFull                (4) Compute (Decomposed)\n");
-	printf("(5) ComputeTabulated (Decomposed)  (6) ComputeTabulatedFull\n");
-
-	printf("WARNING: ");
-	if (tabulatedPotential) {
-		if (fullLongRange) printf("(6) was specified by config file\n");
-		else printf("(5) was specified by config file\n");
-	} else {
-		if (fullLongRange != 0) printf("(%d) was specified by config file\n", fullLongRange);
-		else printf("(4) was specified by config file\n");
-	}
-
-	char buffer[256];
-	int choice;
-	while (true) {
-		printf("Choose a force computation (1 - 6): ");
-		fgets(buffer, 256, stdin);
-		bool good = sscanf(buffer, "%d", &choice) && (choice >= 1 && choice <= 6);
-		if (good)
-			break;
-	}
-	switch(choice) {
-		case 1:
-			tabulatedPotential = 0;
-			fullLongRange = 1;
-			break;
-		case 2:
-			tabulatedPotential = 0;
-			fullLongRange = 2;
-			break;
-		case 3:
-			tabulatedPotential = 0;
-			fullLongRange = 3;
-			break;
-		case 4:
-			tabulatedPotential = 0;
-			fullLongRange = 0;
-			break;
-		case 5:
-			tabulatedPotential = 1;
-			fullLongRange = 0;
-			break;
-		case 6:
-			tabulatedPotential = 1;
-			fullLongRange = 1;
-			break;
-		default:
-			tabulatedPotential = 0;
-			fullLongRange = 1;
-			break;
-	}
-	printf("\n");
-}
-//Han-Yi Chou setting boltzman distribution of momentum with a given center of mass velocity
-//Before using this code, make sure the array type list and serial list are both already initialized
-bool Configuration::Boltzmann(const Vector3& v_com, int N)
-{
-    int count = 0;
-    Vector3 total_momentum = Vector3(0.);
-
-    RandomCPU random = RandomCPU(seed + 2); /* +2 to avoid using same seed elsewhere */
-
-    for(int i = 0; i < N; ++i)
-    {
-        int typ = type[i];
-        double M = part[typ].mass;
-        double sigma = sqrt(kT * M) * 2.046167337e4;
-   
-        Vector3 tmp = random.gaussian_vector() * sigma;
-        tmp = tmp * 1e-4;
-        total_momentum += tmp;
-        momentum[(size_t)count] = tmp;
-        ++count;
-    }
-    if(N > 1)
-    {
-        total_momentum = total_momentum / (double)N;
-
-        for(int i = 0; i < N; ++i)
-        {
-            int typ = type[i];
-            double M = part[typ].mass;
-        
-            momentum[i] = momentum[i] - total_momentum + M * v_com;
-        }
-    }
-
-    
-    return true;
-} 
-
-//////////////////////////
-// Comparison operators //
-//////////////////////////
-bool Configuration::compare::operator()(const String& lhs, const String& rhs) {
-	String* list_lhs = new String[lhs.tokenCount()];
-	String* list_rhs = new String[rhs.tokenCount()];
-	lhs.tokenize(list_lhs);
-	rhs.tokenize(list_rhs);
-	int key_lhs = atoi(list_lhs[1].val());
-	int key_rhs = atoi(list_rhs[1].val());
-	delete[] list_lhs;
-	delete[] list_rhs;
-	return key_lhs < key_rhs;
-}
-
-bool Configuration::compare::operator()(const Bond& lhs, const Bond& rhs) {
-	int diff = lhs.ind1 - rhs.ind1;
-	if (diff != 0)
-		return lhs.ind1 < rhs.ind1;
-	return lhs.ind2 < rhs.ind2;
-}
-
-bool Configuration::compare::operator()(const Exclude& lhs, const Exclude& rhs) {
-	int diff = lhs.ind1 - rhs.ind1;
-	if (diff != 0)
-		return lhs.ind1 < rhs.ind1;
-	return lhs.ind2 < rhs.ind2;
-}
-
-bool Configuration::compare::operator()(const Angle& lhs, const Angle& rhs) {
-	int diff = lhs.ind1 - rhs.ind1;
-	if (diff != 0)
-		return lhs.ind1 < rhs.ind1;
-	diff = lhs.ind2 - rhs.ind2;
-	if (diff != 0)
-		return lhs.ind2 < rhs.ind2;
-	return lhs.ind3 < rhs.ind3;
-}
-
-bool Configuration::compare::operator()(const Dihedral& lhs, const Dihedral& rhs) {
-	int diff = lhs.ind1 - rhs.ind1;
-	if (diff != 0) 
-		return lhs.ind1 < rhs.ind1;
-	diff = lhs.ind2 - rhs.ind2;
-	if (diff != 0) 
-		return lhs.ind2 < rhs.ind2;
-	diff = lhs.ind3 - rhs.ind3;
-	if (diff != 0) 
-		return lhs.ind3 < rhs.ind3;
-	return lhs.ind4 < rhs.ind4;
-}
-
-bool Configuration::compare::operator()(const BondAngle& lhs, const BondAngle& rhs) {
-	int diff = lhs.ind1 - rhs.ind1;
-	if (diff != 0)
-		return lhs.ind1 < rhs.ind1;
-	diff = lhs.ind2 - rhs.ind2;
-	if (diff != 0)
-		return lhs.ind2 < rhs.ind2;
-	diff = lhs.ind3 - rhs.ind3;
-	if (diff != 0) 
-		return lhs.ind3 < rhs.ind3;
-	return lhs.ind4 < rhs.ind4;
-}
-
-bool Configuration::compare::operator()(const ProductPotentialConf& lhs, const ProductPotentialConf& rhs) {
-    int diff = rhs.indices.size() - lhs.indices.size();
-    if (diff != 0) return diff > 0;
-
-    for (unsigned int i = 0; i < lhs.indices.size(); ++i) {
-	diff = rhs.indices[i].size() - lhs.indices[i].size();
-	if (diff != 0) return diff > 0;
-    }
-
-    for (unsigned int i = 0; i < lhs.indices.size(); ++i) {
-	for (unsigned int j = 0; j < lhs.indices[i].size(); ++j) {
-	    diff = rhs.indices[i][j] - lhs.indices[i][j];
-	    if (diff != 0) return diff > 0;
-	}
-    }
-    return true;
-}
diff --git a/src/Configuration.h b/src/Configuration.h
deleted file mode 100644
index 7fb589df2b6cca944941d864921b44c4433257b7..0000000000000000000000000000000000000000
--- a/src/Configuration.h
+++ /dev/null
@@ -1,289 +0,0 @@
-// Configuration.h (2013)
-// Loads .brown config file that can be shared between simulations
-// To be used by GrandBrownTown to initialize its members
-//
-// Authors: Terrance Howward <howard33@illinois.edu>
-//          Justin Dufresne <jdufres1@friars.providence.edu>
-//
-
-#ifndef CONFIGURATION_H
-#define CONFIGURATION_H
-
-#include <algorithm> // sort
-#include <vector>
-#include <map>
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-
-#include "useful.h" // String, Vector3
-#include "BrownianParticleType.h"
-#include "BaseGrid.h"
-#include "OverlordGrid.h"
-#include "ComputeForce.h"
-#include "Reader.h"
-#include "TrajectoryWriter.h"
-#include "TabulatedPotential.h"
-#include "TabulatedAngle.h"
-#include "ProductPotential.h"
-#include "GPUManager.h"
-#include "RigidBodyType.h"
-#include "RigidBody.h"
-
-// Units:
-//    Energy: kcal/mol (6.947694e-24 kJ)
-//    Temperature: Kelvin
-//    Time: nanoseconds
-//    Length: angstroms
-//    Momentum: Da * \mu m / ns
-
-// Forward declerations
-class Angle;
-class Dihedral;
-struct Restraint;
-
-class Configuration {
-	struct compare {
-		bool operator()(const String& lhs, const String& rhs);
-		bool operator()(const Bond& lhs, const Bond& rhs);
-		bool operator()(const Exclude& lhs, const Exclude& rhs);
-		bool operator()(const Angle& lhs, const Angle& rhs);
-		bool operator()(const Dihedral& lhs, const Dihedral& rhs);
-		bool operator()(const BondAngle& lhs, const BondAngle& rhs);
-		bool operator()(const ProductPotentialConf& lhs, const ProductPotentialConf& rhs);
-	};
-
-	void setDefaults();
-	Vector3 stringToVector3(String s);
-	Matrix3 stringToMatrix3(String s);
-
-	int readParameters(const char* config_file);
-	void readAngles();
-	void readAtoms();
-	void readGroups();
-	void readBonds();
-	void readExcludes();
-	void addExclusion(int ind1, int ind2);
-	void buildExcludeMap();
-	void readDihedrals();
-	void readRestraints();
-	void readBondAngles();
-
-
-	bool readTableFile(const String& value, int currTab);
-	bool readBondFile(const String& value, int currBond);
-	bool readAngleFile(const String& value, int currAngle);
-	bool readDihedralFile(const String& value, int currDihedral);
-
-	bool readBondAngleFile(const String& value, const String& bondfile1, const String& bondfile2, int currBondAngle);
-
-	// Given the numbers of each particle, populate the type list.
-	void populate();
-
-	void loadRestart(const char* file_name);
-	bool loadCoordinates(const char* file_name);
-	int countRestart(const char* file_name);
-
-	void getDebugForce();
-
-        //Han-Yi Chou
-        bool Boltzmann(const Vector3& com_v,int N);
-        bool loadMomentum(const char* file_name);
-        void loadRestartMomentum(const char* file_name);
-        void Print();
-        void PrintMomentum();
-public:
-	Configuration(const char * config_file, int simNum = 0, bool debug=false);
-	~Configuration();
-
-    int find_particle_type(const char* s) const {
-	for (int j = 0; j < numParts; j++) {
-	    // printf("Searching particle %d (%s) =? %s\n", j, part[j].name.val(), s);
-	    if (strcmp(s,part[j].name.val()) == 0)
-		return j;
-	}
-	return -1;
-    }
-
-	void copyToCUDA();
-
-	// Output variables
-	Vector3 sysDim;
-	BaseGrid* sys;
-	// temporary variables
-	Vector3 origin, size, basis1, basis2, basis3;
-
-
-	bool loadedCoordinates;
-        bool loadedMomentum;
-
-	// Device Variables
-	//int *type_d;
-	BrownianParticleType **part_d;
-	BaseGrid *sys_d, *kTGrid_d;
-	//Bond* bonds_d;
-	//int2* bondMap_d;
-	//Exclude* excludes_d;
-	//int2* excludeMap_d;
-	//Angle* angles_d;
-	//Dihedral* dihedrals_d;
-
-	// number of simulations
-	int simNum;
-
-	// Particle variables
-	String* partsFromFile;
-	int* indices;
-	int numPartsFromFile;
-	Bond* bonds;
-	int numCap; // max number of particles
-	int num; // current number of particles
-    int num_rb_attached_particles;
-        Vector3* pos; //  position of each particle
-        Vector3* momentum; //momentum of each brownian particles Han-Yi Chou
-        Vector3  COM_Velocity; //center of mass velocity Han-Yi Chou
-	int* type; // type of each particle
-	int* serial; // serial number of each particle
-	int currSerial; // the serial number of the next new particle
-	String* name; // name of each particle
-	Vector3* posLast; // used for current computation
-        Vector3* momLast; //used for Lagevin dynamics
-	float timeLast; // used with posLast
-	float minimumSep; // minimum separation allowed with placing new particles
-
-
-	// RigidBody variables
-	/* int numRB; */
-	/* std::vector< std::vector<RigidBody> > rbs; */
-	
-	// System parameters
-	String outputName;
-	float timestep;
-	long int steps;
-	long int seed;
-	// String kTGridFile;
-	String temperatureGridFile;
-	String inputCoordinates;
-        String inputMomentum; //Han-Yi Chou
-	String inputRBCoordinates;
-	String restartRBCoordinates;
-	int copyReplicaCoordinates;
-	String restartCoordinates;
-        String restartMomentum; //Han-Yi Chou
-	int numberFluct;
-	int interparticleForce;
-	int tabulatedPotential;
-	int fullLongRange;
-	float kT;
-	float temperature;
-	float coulombConst;
-	float electricField;
-	float cutoff;
-	float pairlistDistance;
-	float switchLen;
-	float imdForceScale;
-	int outputPeriod;
-	int outputEnergyPeriod;
-	int outputFormat;
-	float currentSegmentZ;
-	int numberFluctPeriod;
-	int decompPeriod;
-	int numCapFactor;
-	BaseGrid* kTGrid;
-	BaseGrid* tGrid;
-	BaseGrid* sigmaT;
-	unsigned long randoSeed;
-
-	// Other parameters.
-	int rigidBodyGridGridPeriod;
-	float switchStart;
-	float maxInitialPot;
-	float initialZ;
-
-	// Particle parameters.
-	BrownianParticleType* part;
-	int numParts;
-	int numBonds;
-	int numExcludes;
-	int numAngles;
-	int numDihedrals;
-	int numBondAngles;
-	int numRestraints;
-	int* numPartsOfType;
-	String partFile;
-	String bondFile;
-	String excludeFile;
-	String angleFile;
-	String dihedralFile;
-	String restraintFile;
-	String bondAngleFile;
-	bool readPartsFromFile;
-	bool readGroupSitesFromFile;
-	bool readBondsFromFile;
-	bool readExcludesFromFile;
-	bool readAnglesFromFile;
-	bool readDihedralsFromFile;
-	bool readBondAnglesFromFile;
-	bool readRestraintsFromFile;
-	//String* partGridFile;
-	String **partGridFile;
-	//float* partGridFileScale;
-	float **partGridFileScale;
-        //int *numPartGridFiles;
-    std::map<std::string,BaseGrid> part_grid_dictionary;
-    std::map<std::string,BaseGrid*> part_grid_dictionary_d;
-	std::vector< std::vector<String> > partRigidBodyGrid;
-	String* partDiffusionGridFile;
-	String* partForceXGridFile;
-	String* partForceYGridFile;
-	String* partForceZGridFile;
-	String* partTableFile;
-	String* partReservoirFile;
-	int* partTableIndex0;
-	int* partTableIndex1;
-
-	String groupSiteFile;
-	int numGroupSites;
-	std::vector<std::vector<int>> groupSiteData;
-
-	String* bondTableFile;
-	int numTabBondFiles;
-	int2* bondMap;
-	
-	Exclude* excludes;
-	int2* excludeMap;
-	String excludeRule;
-	int excludeCapacity;
-
-	Angle* angles;
-	String* angleTableFile;
-	int numTabAngleFiles;
-
-	Dihedral* dihedrals;
-	String* dihedralTableFile;
-	int numTabDihedralFiles;
-
-	BondAngle* bondAngles;
-
-	Restraint* restraints;
-
-	void readProductPotentials();
-	String productPotentialFile;
-	int numProductPotentials;
-	bool readProductPotentialsFromFile;
-        ProductPotentialConf* productPotentials;
-	XpotMap simple_potential_ids;
-        std::vector<SimplePotential> simple_potentials;
-
-        //Han-Yi Chou
-        String ParticleDynamicType;
-        String RigidBodyDynamicType;
-        String ParticleLangevinIntegrator;
-	// RigidBody parameters.
-	RigidBodyType* rigidBody;
-	int numRigidTypes;
-        int ParticleInterpolationType;
-        int RigidBodyInterpolationType;
-};
-
-#endif
diff --git a/src/CudaUtil.cu b/src/CudaUtil.cu
deleted file mode 100644
index 8a023397f9f7a705eb8a16ca15259fe23d0f0c5a..0000000000000000000000000000000000000000
--- a/src/CudaUtil.cu
+++ /dev/null
@@ -1,63 +0,0 @@
-#include "CudaUtil.cuh"
-#include <cuda_runtime_api.h>
-
-#ifndef CUDART_VERSION
-#error CUDART_VERSION Undefined!
-#elif (CUDART_VERSION < 9000)
-
-#if __CUDA_ARCH__ < 300
-volatile extern __shared__ int sh[];
-__device__ int warp_bcast(int v, int leader) {
-	// WARNING: might not be safe to call in divergent branches 
-	const int tid = threadIdx.x;
-	const int warpLane = tid % WARPSIZE;
-	if (warpLane == leader)
-		sh[tid/WARPSIZE] = v;
-	return sh[tid/WARPSIZE];		
-}	
-#elif __CUDA_ARCH__ < 700
-__device__ int warp_bcast(int v, int leader) {return __shfl(v, leader); }
-#else
-__device__ int warp_bcast(int v, int leader) {return __shfl_sync(v, leader); }
-#endif
-
-__device__ int atomicAggInc(int *ctr, int warpLane) {
-	// https://devblogs.nvidia.com/parallelforall/cuda-pro-tip-optimized-filtering-warp-aggregated-atomics/
-	int mask = __ballot(1);
-	int leader = __ffs(mask)-1;
-
-	int res;
-	if ( warpLane == leader )
-		res = atomicAdd(ctr, __popc(mask));
-	res = warp_bcast(res,leader);
-	return res + __popc( mask & ((1 << warpLane) - 1) );
-}
-#endif
-
-__global__
-void reduceVector(const int num, Vector3* __restrict__ vector, Vector3* netVector) {
-	extern __shared__ Vector3 blockVector[];
-	const int tid = threadIdx.x;
-
-	// grid-stride loop over vector[]
-	for (int i = threadIdx.x+blockIdx.x*blockDim.x; i < num; i+=blockDim.x*gridDim.x) {
-		// assign vector to shared memory
-		blockVector[tid] = vector[i];
-		// blockVector[tid] = Vector3(0.0f);
-		__syncthreads();
-		
-		
-		// Reduce vectors in shared memory
-		// http://www.cuvilib.com/Reduction.pdf
-		for (int offset = blockDim.x/2; offset > 0; offset >>= 1) {
-			if (tid < offset) {
-				int oid = tid + offset;
-				blockVector[tid] = blockVector[tid] + blockVector[oid];
-			}
-			__syncthreads();
-		}
-
-		if (tid == 0)
-			atomicAdd( netVector, blockVector[0] );
-	}
-}
diff --git a/src/CudaUtil.cuh b/src/CudaUtil.cuh
deleted file mode 100644
index 61f1811b85ad320d373d1c36b305855675dc8c30..0000000000000000000000000000000000000000
--- a/src/CudaUtil.cuh
+++ /dev/null
@@ -1,96 +0,0 @@
-#pragma once
-#include "useful.h"
-#define WARPSIZE 32
-
-extern __device__ int warp_bcast(int v, int leader);
-
-#ifndef CUDART_VERSION
-#error CUDART_VERSION Undefined!
-#elif (CUDART_VERSION < 9000)
-extern __device__ int atomicAggInc(int *ctr, int warpLane);
-#else
-__device__ inline int atomicAggInc(int *ctr, int warpLane) {
-    return atomicAdd(ctr, 1);
-}
-#endif
-
-extern __global__
-void reduceVector(const int num, Vector3* __restrict__ vector, Vector3* netVector);
-
-__device__ inline void exclIntCumSum(int* in, const int n) {
-	// 1) int* in must point to shared memory
-	// 2) int n must be power of 2
-	const int tid = threadIdx.x;
-	// RBTODO: worry about possible bank conflicts http://www.eecs.umich.edu/courses/eecs570/hw/parprefix.pdf
-	
-	// build tree of sums
-	int stride = 1;
-	for (int d = n>>1; d > 0; d >>= 1) {
-		__syncthreads();
-		if (tid < d) {
-			int id = 2*stride*(tid+1)-1;
-			in[id] += in[id-stride];
-		}
-		stride *= 2;
-	}
-	if (tid == 0) in[n-1] = 0;		/* exclusive cumsum (starts at 0) */
-
-	// traverse down tree and build 'scan'
-	for (int d = 1; d < n; d*= 2) {
-		stride >>= 1;
-		__syncthreads();
-
-		if (tid < d) { // RBTODO: this could be incorrect ==> test
-			int id = 2*stride*(tid+1)-1;
-			int t = in[id];
-			in[id] += in[id-stride];
-			in[id-stride] = t;
-		}
-	}
-	__syncthreads();
-}
-
-__device__ inline void inclIntCumSum(int* in, const int n) {
-	// 1) int* in must point to shared memory
-	// 2) int n must be power of 2
-	const int tid = threadIdx.x;
-	
-	// RBTODO: worry about possible bank conflicts http://www.eecs.umich.edu/courses/eecs570/hw/parprefix.pdf
-	
-	// build tree of sums
-	int stride = 1;
-	for (int d = n>>1; d > 0; d >>= 1) {
-		__syncthreads();
-		if (tid < d) {
-			int id = 2*stride*(tid+1)-1;
-			in[id] += in[id-stride];
-		}
-		stride *= 2;
-	}
-	// if (tid == 0) in[n-1] = 0;		/* exclusive cumsum (starts at 0) */
-
-	// traverse down tree and build 'scan'
-	for (int d = 1; d < n; d*= 2) {
-		stride >>= 1;
-		__syncthreads();
-
-		if (tid < d) { // RBTODO: this could be incorrect ==> test
-			int id = 2*stride*(tid+1)-1;
-			in[id+stride] += in[id];
-			/* int t = in[id]; */
-			/* in[id] += in[id-stride]; */
-			/* in[id-stride] = t; */
-		}
-	}
-	__syncthreads();
-}
-
-__device__ inline void atomicAdd(Vector3* address, const Vector3 val) {
-	atomicAdd( &(address->x), val.x);
-	atomicAdd( &(address->y), val.y);
-	atomicAdd( &(address->z), val.z);
-}
-__device__ inline void atomicAdd(ForceEnergy* address, const ForceEnergy val) {
-    atomicAdd( &(address->f), val.f );
-    atomicAdd( &(address->e), val.e);
-}
diff --git a/src/DcdWriter.h b/src/DcdWriter.h
deleted file mode 100644
index 20dcd78948a9a4f01ba8e068077ef5e5b5857a31..0000000000000000000000000000000000000000
--- a/src/DcdWriter.h
+++ /dev/null
@@ -1,326 +0,0 @@
-///////////////////////////////////////////////////////////////////////  
-// Modified dcd reader from NAMD.
-// Author: Jeff Comer <jcomer2@illinois.edu>
-
-/**
-***  Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by
-***  The Board of Trustees of the University of Illinois.
-***  All rights reserved.
-**/
-
-/*
-  dcdlib contains C routines for reading and writing binary DCD
-  files.  The output format of these files is based on binary FORTRAN
-  output, so its pretty ugly.  If you are squeamish, don't look!
-*/
-
-#pragma once
-#include <unistd.h>
-#include <cstring>
-#include <fstream>
-#include <fcntl.h>
-#include <sys/stat.h>
-
-// using namespace std;
-
-#define NFILE_POS (off_t) 8
-#define NPRIV_POS (off_t) 12
-#define NSAVC_POS (off_t) 16
-#define NSTEP_POS (off_t) 20
-
-#ifndef O_LARGEFILE
-#define O_LARGEFILE 0x0
-#endif
-
-/*  DEFINE ERROR CODES THAT MAY BE RETURNED BY DCD ROUTINES		*/
-#define DCD_DNE		-2	/*  DCD file does not exist		*/
-#define DCD_OPENFAILED	-3	/*  Open of DCD file failed		*/
-#define DCD_BADREAD 	-4	/*  read call on DCD file failed	*/
-#define DCD_BADEOF	-5	/*  premature EOF found in DCD file	*/
-#define DCD_BADFORMAT	-6	/*  format of DCD file is wrong		*/
-#define DCD_FILEEXISTS  -7	/*  output file already exists		*/
-#define DCD_BADMALLOC   -8	/*  malloc failed			*/
-
-// Just use write instead of NAMD_write --JRC
-#define NAMD_write write
-
-class DcdWriter {
-public:
-  DcdWriter(const char* fileName) {
-    fd = openDcd(fileName);    
-    
-    if (fd == DCD_OPENFAILED) {
-      printf("DcdWriter::DcdWriter Failed to open dcd file %s.", fileName);
-      exit(-1);
-    }
-  }
-
-  ~DcdWriter() {
-    closeDcd();
-  }
-private:
-  int fd;
-
-private:
-
-void pad(char *s, int len)
-{
-	int curlen;
-	int i;
-
-	curlen=strlen(s);
-
-	if (curlen>len)
-	{
-		s[len]='\0';
-		return;
-	}
-
-	for (i=curlen; i<len; i++)
-	{
-		s[i]=' ';
-	}
-
-	s[i]='\0';
-}
-
-
-  /*********************************************************************/
-  /*								     */
-  /*			FUNCTION open_dcd_write			     */
-  /*								     */
-  /*   INPUTS:							     */
-  /*	dcdfile - Name of the dcd file				     */
-  /*								     */
-  /*   OUTPUTS:							     */
-  /*	returns an open file descriptor for writing		     */
-  /*								     */
-  /*	This function will open a dcd file for writing.  It takes    */
-  /*   the filename to open as its only argument.	 It will return a    */
-  /*   valid file descriptor if successful or DCD_OPENFAILED if the    */
-  /*   open fails for some reason.  If the file specifed already       */
-  /*   exists, it is renamed by appending .BAK to it.		     */
-  /*								     */
-  /*********************************************************************/
-  int openDcd(const char* dcdname)
-  {
-    struct stat sbuf;
-    int dcdfd;
-    char *newdcdname = 0;
-
-    if (stat(dcdname, &sbuf) == 0) 
-      {
-	newdcdname = new char[strlen(dcdname)+5];
-	if(newdcdname == (char *) 0)
-	  return DCD_OPENFAILED;
-	strcpy(newdcdname, dcdname);
-	strcat(newdcdname, ".BAK");
-	if(rename(dcdname, newdcdname))
-	  return(DCD_OPENFAILED);
-	delete [] newdcdname;
-      } 
-
-
-    if ( (dcdfd = open(dcdname, O_RDWR|O_CREAT|O_EXCL|O_LARGEFILE,
-		       S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0)
-      {
-	return(DCD_OPENFAILED);
-      }
-
-    return dcdfd;
-  }
-
-  /****************************************************************/
-  /*								*/
-  /*			FUNCTION close_dcd_write		*/
-  /*								*/
-  /*   INPUTS:							*/
-  /*	fd - file descriptor to close				*/
-  /*								*/
-  /*   OUTPUTS:							*/
-  /*	the file pointed to by fd				*/
-  /*								*/
-  /*	close_dcd_write close a dcd file that was opened for    */
-  /*   writing							*/
-  /*								*/
-  /****************************************************************/
-
-  void closeDcd()
-
-  {	
-    close(fd);
-  }
-
-public:
-  /*****************************************************************************/
-  /*									     */
-  /*				FUNCTION write_dcdheader		     */
-  /*									     */
-  /*   INPUTS:								     */
-  /*	fd - file descriptor for the dcd file				     */
-  /*	filename - filename for output					     */
-  /*	N - Number of atoms						     */
-  /*	NFILE - Number of sets of coordinates				     */
-  /*	NPRIV - Starting timestep of DCD file - NOT ZERO		     */
-  /*	NSAVC - Timesteps between DCD saves				     */
-  /*	NSTEP - Number of timesteps					     */
-  /*	DELTA - length of a timestep					     */
-  /*									     */
-  /*   OUTPUTS:								     */
-  /*	none								     */
-  /*									     */
-  /*	This function prints the "header" information to the DCD file.  Since*/
-  /*   this is duplicating an unformatted binary output from FORTRAN, its ugly.*/
-  /*   So if you're squeamish, don't look.					     */
-  /*									     */
-  /*****************************************************************************/
-  int writeHeader(const char *filename, int N, int NFILE, int NPRIV, 
-		  int NSAVC, int NSTEP, float DELTA, int with_unitcell)
-  {
-    int	out_integer;
-    float   out_float;
-    char	title_string[200];
-    //int	user_id;
-    time_t 	cur_time;
-    struct  tm *tmbuf;
-    char    time_str[11];
-
-    out_integer = 84;
-    NAMD_write(fd, (char *) & out_integer, sizeof(int));
-    strcpy(title_string, "CORD");
-    NAMD_write(fd, title_string, 4);
-    out_integer = NFILE;  /* located at fpos 8 */
-    out_integer = 0;  /* ignore the lies */
-    NAMD_write(fd, (char *) & out_integer, sizeof(int));
-    out_integer = NPRIV;  /* located at fpos 12 */
-    NAMD_write(fd, (char *) & out_integer, sizeof(int));
-    out_integer = NSAVC;  /* located at fpos 16 */
-    NAMD_write(fd, (char *) & out_integer, sizeof(int));
-    out_integer = NSTEP;  /* located at fpos 20 */
-    out_integer = NPRIV - NSAVC;  /* ignore the lies */
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    out_integer=0;
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    out_float = DELTA;
-    NAMD_write(fd, (char *) &out_float, sizeof(float));
-    out_integer = with_unitcell ? 1 : 0;
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    out_integer = 0;
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    out_integer = 24;  // PRETEND TO BE CHARMM24 -JCP
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    out_integer = 84;
-    NAMD_write(fd, (char *) & out_integer, sizeof(int));
-
-    out_integer = 164;
-    NAMD_write(fd, (char *) & out_integer, sizeof(int));
-    out_integer = 2;
-    NAMD_write(fd, (char *) & out_integer, sizeof(int));
-
-    sprintf(title_string, "REMARKS FILENAME=%s CREATED BY NAMD", filename);
-    pad(title_string, 80);
-    NAMD_write(fd, title_string, 80);
-
-    char username[100];
-    //user_id= (int) getuid();
-    //pwbuf=getpwuid(user_id);
-    //if ( pwbuf ) sprintf(username,"%s",pwbuf->pw_name);
-    //else sprintf(username,"%d",user_id);
-    sprintf(username,"%s", "BrownTown");
-
-    cur_time=time(NULL);
-    tmbuf=localtime(&cur_time);
-    strftime(time_str, 10, "%m/%d/%y", tmbuf);
-
-    sprintf(title_string, "REMARKS DATE: %s CREATED BY USER: %s",
-	    time_str, username);
-    pad(title_string, 80);
-    NAMD_write(fd, title_string, 80);
-    out_integer = 164;
-    NAMD_write(fd, (char *) & out_integer, sizeof(int));
-    out_integer = 4;
-    NAMD_write(fd, (char *) & out_integer, sizeof(int));
-    out_integer = N;
-    NAMD_write(fd, (char *) & out_integer, sizeof(int));
-    out_integer = 4;
-    NAMD_write(fd, (char *) & out_integer, sizeof(int));
-
-    return(0);
-  }
-
-  
-  /************************************************************************/
-  /*									*/
-  /*				FUNCTION write_dcdstep			*/
-  /*									*/
-  /*   INPUTS:								*/
-  /*	fd - file descriptor for the DCD file to write to		*/
-  /*	N - Number of atoms						*/
-  /*	X - X coordinates						*/
-  /*	Y - Y coordinates						*/
-  /*	Z - Z coordinates						*/
-  /*  unitcell - a, b, c, alpha, beta, gamma of unit cell */
-  /*									*/
-  /*   OUTPUTS:								*/
-  /*	none								*/
-  /*									*/
-  /*	write_dcdstep writes the coordinates out for a given timestep   */
-  /*   to the specified DCD file.						*/
-  /*                                                                      */
-  /************************************************************************/
-  int writeStep(int N, const float *X, const float *Y, const float *Z, const double *cell)
-
-  {
-    int NSAVC,NSTEP,NFILE;
-    int out_integer;
-
-    /* Unit cell */
-    if (cell) {
-      out_integer = 6*8;
-      NAMD_write(fd, (char *) &out_integer, sizeof(int));
-      NAMD_write(fd, (char *) cell, out_integer);
-      NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    }
-
-    /* Coordinates */
-    out_integer = N*4;
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    NAMD_write(fd, (char *) X, out_integer);
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    NAMD_write(fd, (char *) Y, out_integer);
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-    NAMD_write(fd, (char *) Z, out_integer);
-    NAMD_write(fd, (char *) &out_integer, sizeof(int));
-
-    /* don't update header until after write succeeds */
-    lseek(fd,NSAVC_POS,SEEK_SET);
-    read(fd,(void*) &NSAVC,sizeof(int));
-    lseek(fd,NSTEP_POS,SEEK_SET);
-    read(fd,(void*) &NSTEP,sizeof(int));
-    lseek(fd,NFILE_POS,SEEK_SET);
-    read(fd,(void*) &NFILE,sizeof(int));
-    NSTEP += NSAVC;
-    NFILE += 1;
-    lseek(fd,NSTEP_POS,SEEK_SET);
-    NAMD_write(fd,(char*) &NSTEP,sizeof(int));
-    lseek(fd,NFILE_POS,SEEK_SET);
-    NAMD_write(fd,(char*) &NFILE,sizeof(int));
-    lseek(fd,0,SEEK_END);
-
-    return(0);
-  }
-};
-
diff --git a/src/Debug.h b/src/Debug.h
deleted file mode 100644
index a77f2b83133300b628bd633060a7859ab4c540f6..0000000000000000000000000000000000000000
--- a/src/Debug.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
-***  Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by
-***  The Board of Trustees of the University of Illinois.
-***  All rights reserved.
-**/
-
-#pragma once
-
-#ifndef MIN_DEBUG_LEVEL
-  #define MIN_DEBUG_LEVEL 0
-#endif
-#ifndef MAX_DEBUG_LEVEL
-  #define MAX_DEBUG_LEVEL 10
-#endif
-#ifndef STDERR_LEVEL
-  /* anything >= this error level goes to stderr */
-  #define STDERR_LEVEL 5
-#endif
-
-
-/*****************************************************************
- *  DebugM(): function to display a debug message.
- *  Messages have different levels.  The low numbers are low severity
- *  while the high numbers are really important.  Very high numbers
- *  are sent to stderr rather than stdout.
- *  The default severity scale is from 0 to 10.
- *     0 = plain message
- *     4 = important message
- *     5 = warning (stderr)
- *     10 = CRASH BANG BOOM error (stderr)
- *  The remaining args are like printf: a format string and some args.
- *  This function can be turned off by compiling without the DEBUGM flag
- *  No parameters to this function should have a side effect!
- *  No functions should be passed as parameters!  (including inline)
- *****************************************************************/
-
-#ifdef DEBUGM
-
-/* #include "InfoStream.h" */
-
-  #define Debug(x) (x)
-  #define DebugM(level,format) \
-	{ \
-	  if ((level >= MIN_DEBUG_LEVEL) && (level <= MAX_DEBUG_LEVEL)) \
-	  { \
-	    infostream Dout; \
-	    if (level >= STDERR_LEVEL)	Dout << "[ERROR " << level << "] "; \
-	    else if (level > 0) Dout << "[Debug " << level << "] "; \
-	    Dout << iPE << ' ' << iFILE; \
-	    Dout << format << endi; \
-	  } \
-	}
-
- #else
-  /* make a void function. */
-  /* parameters with side effects will be removed! */
-  #define Debug(x) ;
-  #define DebugM(x,y)	;
-
- #endif /* DEBUGM */
-
diff --git a/src/Dihedral.cu b/src/Dihedral.cu
deleted file mode 100644
index 2565a5aee052b3e288707e5a059c928032d58be4..0000000000000000000000000000000000000000
--- a/src/Dihedral.cu
+++ /dev/null
@@ -1,18 +0,0 @@
-// Dihedral.cu 
-// Authors: Justin Dufresne and Terrance Howard, 2013
-
-#include "Dihedral.h"
-
-Dihedral::Dihedral(int ind1, int ind2, int ind3, int ind4, String fileName) : 
-		ind1(ind1), ind2(ind2), ind3(ind3), ind4(ind4), fileName(fileName) {}
-
-Dihedral::Dihedral(const Dihedral& d) : ind1(d.ind1), ind2(d.ind2), ind3(d.ind3), ind4(d.ind4),
-		tabFileIndex(d.tabFileIndex), fileName(d.fileName) {}
-
-String Dihedral::toString() {
-	return String("DIHEDRAL ") + ind1 + " " + ind2 + " " + ind3 + " " + ind4 + " " + fileName;
-}
-
-void Dihedral::print() {
-	printf("DIHEDRAL (%d %d %d %d) %s\n", ind1, ind2, ind3, ind4, fileName.val());
-}
diff --git a/src/Dihedral.h b/src/Dihedral.h
deleted file mode 100644
index a6bb70738ae65fbdc54f0c96da7b585aa081f0be..0000000000000000000000000000000000000000
--- a/src/Dihedral.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// Dihedral.h
-// Authors: Justin Dufresne and Terrance Howard, 2013
-
-#ifndef DIHEDRAL_H
-#define DIHEDRAL_H
-
-#include "useful.h"
-#include <cuda.h>
-
-class Dihedral {
-public:
-	int ind1, ind2, ind3, ind4;
-	int tabFileIndex;
-		// This will be assigned after ComputeForce.cu loads the TabulatedDihedralPotential objects.
-		// The tabFileIndex is used by ComputeForce to discern which TabDiPot this Dihedral object uses.
-	String fileName;
-	Dihedral() : ind1(-1), ind2(-1), ind3(-1), ind4(-1), tabFileIndex(-1) {}
-	Dihedral(int ind1, int ind2, int ind3, int ind4, String fileName);
-	Dihedral(const Dihedral& d);
-	HOST DEVICE inline int getIndex(int index) const {
-		if (index == ind1) return 1;
-		if (index == ind2) return 2;
-		if (index == ind3) return 3;
-		if (index == ind4) return 4;
-		return -1;
-	}
-	String toString();
-	void print();
-};
-
-#endif
diff --git a/src/Exclude.cu b/src/Exclude.cu
deleted file mode 100644
index 7ee075ea65f889c127f4ffa25be209e3f579a79b..0000000000000000000000000000000000000000
--- a/src/Exclude.cu
+++ /dev/null
@@ -1,150 +0,0 @@
-#include "Exclude.h"
-#include <cuda.h>
-
-Exclude* makeExcludes(Bond* bonds, int2* bondMap, int num, int numBonds, String exList, int& numExcludes) {	
-	int oldNumExcludes = numExcludes;
-	numExcludes = 0;
-	int resCap = numBonds;
-	Exclude* result = new Exclude[numBonds];
-	Node** particles = new Node*[num];	// an array of linked lists
-	Node** trees;
-	int cap = 16;
-	trees = new Node*[cap];
-	int numTrees = 0;
-	for (int i = 0; i < num; i++)
-		particles[i] = new Node(i);
-	for (int i = 0; i < num; i++) {
-		if (!particles[i]->inTree) {
-			if (numTrees >= cap) {
-				Node** temp = trees;
-				cap *= 2;
-				trees = new Node*[cap];	
-				for (int j = 0; j < numTrees; j++)
-					trees[j] = temp[j];
-				delete temp;
-			}
-			
-			int bondstart = bondMap[i].x;
-			int bondend = bondMap[i].y;
-			int nextsize;
-			nextsize = particles[i]->makeTree(particles, bonds, bondMap, bondstart, bondend);
-			if (nextsize > 1)
-				trees[numTrees++] = particles[i];
-		}
-	}
-	printf("exList %s\n", exList.val());
-	int depth = atoi(exList.val());
-
-	Node** newTree;
-	int treeCap = 100;
-	int numNodes = 0;
-	for (int i = 0; i < num; i++) {	
-		Node* p = particles[i];
-		if (p->numBonds < 1) continue;
-		newTree = new Node*[treeCap];
-		for (int j = 0; j < num; j++)
-			particles[j]->inTree = false;
-		newTree[0] = p;
-		numNodes = 1;
-		int oldNumNodes = 0;
-		for (int j = 0; j < depth; j++) {
-			int tempNum = numNodes;
-			for (int k = oldNumNodes; k < tempNum; k++) {
-				oldNumNodes = numNodes;
-				Node* p2 = particles[newTree[k]->index];
-				p2->inTree = true;
-				for (int m = 0; m < p2->numBonds; m++) {
-					Node* p3 = p2->bonds[m];
-					if (!p3->inTree) {
-						p3->inTree = true;
-						if (numExcludes >= resCap) {
-							printf("Expanding result\n");
-							Exclude* tempResult = result;	
-							resCap *= 2;
-							result = new Exclude[resCap];
-							for (int n = 0; n < numExcludes; n++)
-								result[n] = tempResult[n];
-							delete tempResult;
-						}
-						Exclude ex(i, p3->index);
-						result[numExcludes++] = ex;
-
-						if (numNodes >= treeCap) {
-							printf("Expanding newTree\n");
-							Node** tempTree = newTree;
-							treeCap *= 2;
-							newTree = new Node*[treeCap];
-							for (int n = 0; n < numNodes; n++)
-								newTree[n] = tempTree[n];
-							delete tempTree;
-						}
-						newTree[numNodes++] = p3; 
-					}
-				}
-			}
-		}
-		delete[] newTree;
-	}
-
-	delete[] particles;
-	delete[] trees;
-	numExcludes += oldNumExcludes;
-	return result;
-}
-
-void Exclude::print() {
-	printf("EXCLUDE %d %d\n", ind1, ind2);
-}
-
-bool Exclude::operator==(const Exclude& e) const {
-	return (ind1 == e.ind1) && (ind2 == e.ind2);
-}
-
-bool Exclude::operator!=(const Exclude& e) const {
-	return !(*this == e);
-}
-
-//////////////////////////
-// Node Implementations //
-//////////////////////////
-
-Node::Node(int index) : index(index) {
-	inTree = false;
-	cap = 4;
-	numBonds = 0;
-	bonds = new Node*[cap];
-}
-
-void Node::clearTree() {
-	printf("index %d cleared\n", index);
-	inTree = false;
-	for (int i = 0; i < numBonds; i++)
-		if (bonds[i]->inTree) 
-			bonds[i]->clearTree();
-}
-
-int Node::makeTree(Node** particles, Bond* bonds, int2* bondMap, int bondstart, int bondend) {
-	inTree = true;
-	int sum = 1;
-	for (int i = bondstart; i < bondend; i++)
-		add(particles[bonds[i].ind2]);
-
-	for (int i = bondstart; i < bondend; i++) {
-		Node* p = particles[bonds[i].ind2];
-		if (!p->inTree)
-			sum += p->makeTree(particles, bonds, bondMap, bondMap[p->index].x, bondMap[p->index].y);
-	}
-	return sum;
-}
-
-void Node::add(Node* n) {
-	if (numBonds >= cap) {
-		Node** temp = bonds;
-		cap *= 2;	
-		bonds = new Node*[cap];
-		for (int i = 0; i < numBonds; i++)
-			bonds[i] = temp[i];
-		delete temp;
-	}
-	bonds[numBonds++] = n;
-}
diff --git a/src/Exclude.h b/src/Exclude.h
deleted file mode 100644
index 35051af16777a289a6168cf492dc14b9ff598cd4..0000000000000000000000000000000000000000
--- a/src/Exclude.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// Exclude.h
-// Copyright Justin Dufresne and Terrance Howard, 2013
-
-#ifndef EXCLUDE_H
-#define EXCLUDE_H
-
-#include "JamesBond.h"
-#include <limits.h>
-
-class Exclude {
-public:
-	Exclude() : ind1(-1), ind2(-1) {}
-	Exclude(int ind1, int ind2) : ind1(ind1), ind2(ind2) {}
-	bool operator==(const Exclude& e) const;
-	bool operator!=(const Exclude& e) const;
-	void print();
-	int ind1;
-	int ind2;
-};
-
-class Node {
-public:
-	Node(int index);
-	void clearTree();
-	int makeTree(Node** particles, Bond* bonds, int2* bondMap, int bondstart, int bondend);
-	void add(Node* n);
-	bool inTree;
-	int index;
-	int cap;
-	int numBonds;
-	Node** bonds;
-};
-
-// makeExcludes(Bond* bonds, int* bondMap, int num, int numBonds, String exList)
-// @param    list of sorted cell bonds; corresponding bond map; number of particles; number of bonds;
-//           string formated like so "EXCLUDE 1-2 1-3 1-4"; number of excludes
-// @return   Array of Excludes
-// This algorithm finds the central particle in every bond tree,
-// then creates a list of exclusions for the particle pairs 
-// defined in exList. For example, 1-2 means that there should
-// be an exclusion between the central particle and every 
-// particle it is directly bonded to. 1-3 means that there should
-// be an exclusion between the central particle and every particle
-// it is two bonds away from
-Exclude* makeExcludes(Bond* bonds, int2* bondMap, int num, int numBonds,
-		String exList, int& numExcludes);
-void getExcludes(int root, Node* curr, Exclude* result, int depth, int& capacity,
-		int& numExcludes, bool sentinel, bool* done);
-
-#endif
diff --git a/src/FlowForce.cpp b/src/FlowForce.cpp
deleted file mode 100644
index b9baf3c9f2c1174020d5a6e64da324af5de0313e..0000000000000000000000000000000000000000
--- a/src/FlowForce.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-#include "FlowForce.h"
-
-
-FlowForce::FlowForce(float v) {
-	// Parameters
-	float chanLength = 1000.0f;
-	float chanWidth = 100.0f;
-	float buffArea = 300.0f * 100.0f;
-	chanVel0 = v;
-
-	chanHalfLen = 0.5f *chanLength;
-	chanHalfWidth = 0.5f * chanWidth;
-
-	// Compute the buffer velocity to have equal flow rates.
-	buffVel = 4.0f / 3.0f * chanVel0 * chanWidth * chanWidth / buffArea;
-}
-
-Vector3 FlowForce::force(Vector3 r, float diffusion) const {
-	if (fabs(r.x) < chanHalfLen) {		// A poiseille flow
-		if (fabs(r.y) > chanHalfWidth)
-			return Vector3(0.0f);
-		float ratio = r.y/chanHalfWidth;
-		float vx = chanVel0*(1.0f - ratio*ratio);
-		return Vector3(vx / diffusion, 0.0f, 0.0f);
-	}
-	return Vector3(buffVel/diffusion, 0.0f, 0.0f);
-}
-
diff --git a/src/FlowForce.h b/src/FlowForce.h
deleted file mode 100644
index 8ffe930af173b908201ec9de7fe046d310e0b768..0000000000000000000000000000000000000000
--- a/src/FlowForce.h
+++ /dev/null
@@ -1,23 +0,0 @@
-///////////////////////////////////////////////////////////////////////
-// Author: Jeff Comer <jcomer2@illinois.edu>
-#ifndef FLOWFORCE_H
-#define FLOWFORCE_H
-
-#include <cmath>
-#include "useful.h"
-// using namespace std;
-
-class FlowForce {
-public:
-	FlowForce(float v);
-
-	Vector3 force(Vector3 r, float diffusion) const;
-
-private:
-	float chanHalfLen;
-	float chanHalfWidth;
-	float chanVel0;
-	float buffVel;
-};
-
-#endif
diff --git a/src/GPUController.h b/src/GPUController.h
deleted file mode 100644
index 088a3ddc2564ce0a81bbb1d89575b571b026b0bc..0000000000000000000000000000000000000000
--- a/src/GPUController.h
+++ /dev/null
@@ -1,44 +0,0 @@
-// provides interface between main CPU loop and various GPUs
-//   -- holds data for each GPU
-
-#pragma once
-#include "useful.h"
-
-class GPUcontroller {
-public:
-	GPUcontroller(const Configuration& c, const long int randomSeed,
-			bool debug, int numReplicas = 0);
-	~GPUcontroller();
-
-	static bool DEBUG;
-
-private:  
-
-	void copyToCUDA();
-
-	
-private:
-	const Configuration& conf;
-	int numReplicas;
-
-	// Integrator variables
-	BaseGrid* sys;
-	ComputeForce* internal;
-	Vector3* forceInternal;
-
-	// CUDA device variables
-	Vector3 *pos_d, *forceInternal_d, *force_d;
-	int *type_d;
-	BrownianParticleType **part_d;
-	BaseGrid *sys_d, *kTGrid_d;
-	Random *randoGen_d;
-	Bond* bonds_d;
-	int2* bondMap_d;
-	Exclude* excludes_d;
-	int2* excludeMap_d;
-	Angle* angles_d;
-	Dihedral* dihedrals_d;
-	
-}
-	
-	
diff --git a/src/GPUManager.cpp b/src/GPUManager.cpp
index 0a7d89c4075ab7d47d6c16a7e5a66e1429baed89..45a9faec474903e7534dee762f49ac3c037fadd2 100644
--- a/src/GPUManager.cpp
+++ b/src/GPUManager.cpp
@@ -1,4 +1,5 @@
 #include "GPUManager.h"
+#ifdef USE_CUDA
 
 #ifndef gpuErrchk
 #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
@@ -184,3 +185,5 @@ void GPUManager::init_comms() {
     NCCLCHECK(ncclCommInitAll(comms, gpus.size(), gpu_ids));
 }
 #endif
+
+#endif
diff --git a/src/GPUManager.h b/src/GPUManager.h
index 5847c03cf03cd7d98823d27213f34c42f328fc0c..2086361712b1ddc837091702c16db9a94faf63b9 100644
--- a/src/GPUManager.h
+++ b/src/GPUManager.h
@@ -1,6 +1,6 @@
-#ifndef GPU_MANAGER_H
-#define GPU_MANAGER_H
+#pragma once
 
+#ifdef USE_CUDA
 #include <cstdio>
 #include <vector>
 #include <cuda.h>
diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu
deleted file mode 100644
index ef6ef5dfd8eb5599b87e32b800a915f649d1ead1..0000000000000000000000000000000000000000
--- a/src/GrandBrownTown.cu
+++ /dev/null
@@ -1,1755 +0,0 @@
-#include "GrandBrownTown.h"
-#include "GrandBrownTown.cuh"
-/* #include "ComputeGridGrid.cuh" */
-#include "WKFUtils.h"
-#include "BrownParticlesKernel.h"
-#include "nvtx_defs.h"
-#include <stdlib.h>     /* srand, rand */
-#include <time.h>       /* time */
-#include <thrust/device_ptr.h>
-#include <fstream>
-#include <cuda_profiler_api.h>
-
-#ifdef _OPENMP
-#include <omp.h>
-//#else
-//typedef int omp_int_t;
-//inline omp_int_t omp_get_thread_num() { return 0; }
-//inline omp_int_t omp_get_max_threads() { return 1; }
-#endif
-
-#ifndef gpuErrchk
-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) {
-	if (code != cudaSuccess) {
-		fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), file, line);
-	if (abort) exit(code);
-	}
-}
-#endif
-
-bool GrandBrownTown::DEBUG;
-
-cudaEvent_t START, STOP;
-
-GPUManager GrandBrownTown::gpuman = GPUManager();
-
-GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg,
-		bool debug, bool imd_on, unsigned int imd_port, int numReplicas) :
-	imd_on(imd_on), imd_port(imd_port), numReplicas(numReplicas),
-	//conf(c), RBC(RigidBodyController(c,outArg)) {
-	conf(c) {
-
-        RBC.resize(numReplicas);      
-        for(int i = 0; i < numReplicas; ++i)
-        {
-            RigidBodyController* rb = new RigidBodyController(c, outArg, seed, i);
-            RBC[i] = rb;
-        }
-
-        //printf("%d\n",__LINE__);
-        //Determine which dynamic. Han-Yi Chou
-        particle_dynamic  = c.ParticleDynamicType;
-        rigidbody_dynamic = c.RigidBodyDynamicType;
-        ParticleInterpolationType = c.ParticleInterpolationType;
-        RigidBodyInterpolationType = c.RigidBodyInterpolationType;
-        //particle_langevin_integrator = c.ParticleLangevinIntegrator;
-        printf("%d\n",__LINE__);
-	for (int i = 0; i < numReplicas; ++i) 
-        {
-		std::stringstream curr_file, restart_file, out_prefix;
-
-		if (numReplicas > 1) {
-		    curr_file << outArg << '.' << i << ".curr";
-		    restart_file   << outArg << '.' << i << ".restart";
-		    out_prefix << outArg << '.' << i;
-		} else {
-		    curr_file << outArg << ".curr";
-		    restart_file   << outArg << ".restart";
-		    out_prefix << outArg;
-		}
-
-                outCurrFiles.push_back(curr_file.str());
-                restartFiles.push_back(restart_file.str());
-                outFilePrefixes.push_back(out_prefix.str());
-
-                //Han-Yi Chou for flush out the momentum
-                if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
-                {
-                    std::stringstream restart_file_p, out_momentum_prefix, out_force_prefix;
-                    restart_file_p << outArg << '.' << i << ".momentum.restart";
-                    out_momentum_prefix << outArg << '.' << i << ".momentum";
-                    //out_force_prefix << outArg << ".force." << i;
-
-                    restartMomentumFiles.push_back(restart_file_p.str());//Han-Yi Chou
-                    outMomentumFilePrefixes.push_back(out_momentum_prefix.str());
-                    //outForceFilePrefixes.push_back(out_force_prefix.str());
-                }           
-	}
-
-	GrandBrownTown::DEBUG = debug;
-	sysDim = c.sysDim;
-	sys = c.sys;
-
-	// Particle variables
-	partsFromFile = c.partsFromFile;
-	indices = c.indices;
-	numPartsFromFile = c.numPartsFromFile;  // number of particle types
-	bonds = c.bonds;
-	numCap = c.numCap;                      // max number of particles
-	num = c.num;                            // current number of particles
-	num_rb_attached_particles = c.num_rb_attached_particles;
-	numGroupSites = c.numGroupSites;
-
-	// Allocate arrays of positions, types and serial numbers
-	pos    = new Vector3[(num+num_rb_attached_particles+numGroupSites) * numReplicas];  // [HOST] array of particles' positions.
-        // Allocate arrays of momentum Han-Yi Chou
-        if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
-        {
-            momentum = new Vector3[num * numReplicas]; //[HOST] array of particles' momentum
-            if(particle_dynamic == String("NoseHooverLangevin"))
-                random = new float[num * numReplicas];
-        }
-        //printf("%d\n",__LINE__);
-        //for debug
-        //force = new Vector3[num * numReplicas];
-
-	type   = new     int[(num+num_rb_attached_particles) * numReplicas];  // [HOST] array of particles' types.
-	serial = new     int[(num+num_rb_attached_particles) * numReplicas];  // [HOST] array of particles' serial numbers.
-
-	// Allocate things for rigid body
-	// RBC = RigidBodyController(c);
-  // printf("About to devicePrint\n");
-	// devicePrint<<<1,1>>>(&(c.rigidBody[0]));
-	// devicePrint<<<1,1>>>(RBC.rbType_d);
-	cudaDeviceSynchronize();
-	// printf("Done with devicePrint\n");
-
-
-	
-	// Replicate identical initial conditions across all replicas
-	for (int r = 0; r < numReplicas; ++r) {
-	    std::copy(c.type, c.type + num+num_rb_attached_particles,
-		      type + r*(num+num_rb_attached_particles));
-	    std::copy(c.serial, c.serial + num + num_rb_attached_particles,
-		      serial + r*(num+num_rb_attached_particles));
-	  if (c.copyReplicaCoordinates > 0)
-	    std::copy(c.pos, c.pos + num, pos + r*num);
-	}
-        if (c.copyReplicaCoordinates <= 0)
-          std::copy(c.pos, c.pos + numReplicas*num, pos);
-
-        //printf("%d\n",__LINE__); 
-        //Han-Yi Chou
-        if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
-            std::copy(c.momentum,c.momentum+num*numReplicas,momentum);
-
-        //printf("%d\n",__LINE__);
-
-	currSerial = c.currSerial;  // serial number of the next new particle
-	name = c.name;              // list of particle types! useful when 'numFluct == 1'
-	posLast = c.posLast;        // previous positions of particles  (used for computing ionic current)
-        momLast = c.momLast;
-	timeLast = c.timeLast;      // previous time (used with posLast)
-	minimumSep = c.minimumSep;  // minimum separation allowed when placing new particles
-
-	// System parameters
-	outputName = c.outputName;
-	timestep = c.timestep;
-	steps = c.steps;
-	seed = c.seed;
-	temperatureGridFile = c.temperatureGridFile;
-	inputCoordinates = c.inputCoordinates;
-	restartCoordinates = c.restartCoordinates;
-	numberFluct = c.numberFluct;
-	interparticleForce = c.interparticleForce;
-	tabulatedPotential = c.tabulatedPotential;
-	readBondsFromFile = c.readBondsFromFile;
-	fullLongRange = c.fullLongRange;
-	kT = c.kT;
-	temperature = c.temperature;
-	coulombConst = c.coulombConst;
-	electricField = c.electricField;
-	cutoff = c.cutoff;
-	switchLen = c.switchLen;
-	outputPeriod = c.outputPeriod;
-	outputEnergyPeriod = c.outputEnergyPeriod;
-	outputFormat = c.outputFormat;
-	currentSegmentZ = c.currentSegmentZ;
-	numberFluctPeriod = c.numberFluctPeriod;
-	decompPeriod = c.decompPeriod;
-	numCapFactor = c.numCapFactor;
-	kTGrid = c.kTGrid;
-	tGrid = c.tGrid;
-	sigmaT = c.sigmaT;
-
-	// Parameter files
-	partTableFile = c.partTableFile;
-	bondTableFile = c.bondTableFile;
-	angleTableFile = c.angleTableFile;
-	dihedralTableFile = c.dihedralTableFile;
-
-	// Other parameters.
-	switchStart = c.switchStart;
-	maxInitialPot = c.maxInitialPot;
-	initialZ = c.initialZ;
-
-	// Particle parameters.
-	part = c.part;
-	numParts = c.numParts;
-	numBonds = c.numBonds;
-	numExcludes = c.numExcludes;
-	numAngles = c.numAngles;
-	numDihedrals = c.numDihedrals;
-	partTableIndex0 = c.partTableIndex0;
-	partTableIndex1 = c.partTableIndex1;
-
-	numBondAngles = c.numBondAngles;
-
-	numTabBondFiles = c.numTabBondFiles;
-	bondMap = c.bondMap;
-	// TODO: bondList = c.bondList;
-
-	excludes = c.excludes;
-        part = c.part;
-	excludeMap = c.excludeMap;
-	excludeRule = c.excludeRule;
-	excludeCapacity = c.excludeCapacity;
-
-	angles = c.angles;
-	numTabAngleFiles = c.numTabAngleFiles;
-
-	dihedrals = c.dihedrals;
-	numTabDihedralFiles = c.numTabDihedralFiles;
-
-	bondAngles = c.bondAngles;
-
-	// Device parameters
-	//type_d = c.type_d;
-	part_d = c.part_d;
-	sys_d = c.sys_d;
-	kTGrid_d = c.kTGrid_d;
-	//bonds_d = c.bonds_d;
-	//bondMap_d = c.bondMap_d;
-	//excludes_d = c.excludes_d;
-	//excludeMap_d = c.excludeMap_d;
-	//angles_d = c.angles_d;
-	//dihedrals_d = c.dihedrals_d;
-
-	printf("Setting up random number generator with seed %lu\n", seed);
-	randoGen = new Random(num * numReplicas, seed);
-	copyRandToCUDA();
-
-        if(particle_dynamic == String("NoseHooverLangevin"))
-            InitNoseHooverBath(num * numReplicas);
-
-	// "Some geometric stuff that should be gotten rid of." -- Jeff Comer
-	Vector3 buffer = (sys->getCenter() + 2.0f * sys->getOrigin())/3.0f;
-	initialZ = buffer.z;
-
-	// Load random coordinates if necessary Han-Yi Chou
-	if (!c.loadedCoordinates) {
-		//printf("Populating\n");
-		//populate(); Han-Yi Chou, Actually the list is already populated 
-	    initialCond();
-		printf("Setting random initial conditions.\n");
-	}
-
-	// Prepare internal force computation
-	 //internal = new ComputeForce(num, part, numParts, sys, switchStart, switchLen, coulombConst,
-	 //			    fullLongRange, numBonds, numTabBondFiles, numExcludes, numAngles, numTabAngleFiles,
-	 //			    numDihedrals, numTabDihedralFiles, c.pairlistDistance, numReplicas);
-	internal = new ComputeForce(c, numReplicas);
-
-	//MLog: I did the other halve of the copyToCUDA function from the Configuration class here, keep an eye on any mistakes that may occur due to the location.
-	internal -> copyToCUDA(c.simNum, c.type, c.bonds, c.bondMap, c.excludes, c.excludeMap, c.angles, c.dihedrals, c.restraints, c.bondAngles, c.simple_potential_ids, c.simple_potentials, c.productPotentials );
-	if (numGroupSites > 0) init_cuda_group_sites();
-
-	// TODO: check for duplicate potentials 
-	if (c.tabulatedPotential) {
-		printf("Loading %d tabulated non-bonded potentials...\n", numParts*numParts);
-		for (int p = 0; p < numParts*numParts; p++) {
-			if (partTableFile[p].length() > 0) {
-				int type0 = partTableIndex0[p];
-				int type1 = partTableIndex1[p];
-
-				internal->addTabulatedPotential(partTableFile[p].val(), type0, type1);
-				// printf("  Loaded %s for types %s and %s.\n", partTableFile[p].val(),
-				// 		part[type0].name.val(), part[type1].name.val());
-			}
-		}
-	}
-	printf("Using %d non-bonded exclusions\n",c.numExcludes/2);
-
-	if (c.readBondsFromFile) {
-		printf("Loading %d tabulated bond potentials...\n", numTabBondFiles);
-		for (int p = 0; p < numTabBondFiles; p++)
-			if (bondTableFile[p].length() > 0) {
-				//MLog: make sure to add to all GPUs
-			    // printf("...loading %s\n",bondTableFile[p].val());
-			    internal->addBondPotential(bondTableFile[p].val(), p, bonds, bondAngles);
-				// printf("%s\n",bondTableFile[p].val());
-			} else {
-			    printf("...skipping %s (\n",bondTableFile[p].val());
-			    internal->addBondPotential(bondTableFile[p].val(), p, bonds, bondAngles);
-			}
-			    
-	}
-
-	if (c.readAnglesFromFile) {
-		printf("Loading %d tabulated angle potentials...\n", numTabAngleFiles);
-		for (int p = 0; p < numTabAngleFiles; p++)
-			if (angleTableFile[p].length() > 0)
-			{
-				//MLog: make sure to do this for every GPU
-			    internal->addAnglePotential(angleTableFile[p].val(), p, angles, bondAngles);
-			}
-	}
-
-	if (c.readDihedralsFromFile) {
-		printf("Loading %d tabulated dihedral potentials...\n", numTabDihedralFiles);
-		for (int p = 0; p < numTabDihedralFiles; p++)
-			if (dihedralTableFile[p].length() > 0)
-				internal->addDihedralPotential(dihedralTableFile[p].val(), p, dihedrals);
-	}
-
-	auto _get_index = [this](int idx, int replica) {
-	    // Convenient lambda function to deal with increasingly complicated indexing
-	    auto num = this->num;
-	    auto numReplicas = this->numReplicas;
-	    auto num_rb_attached_particles = this->num_rb_attached_particles;
-	    auto numGroupSites = this->numGroupSites;
-	    idx = (idx < num+num_rb_attached_particles) ? idx + replica*(num+num_rb_attached_particles)
-		: (idx-num-num_rb_attached_particles) + numReplicas*(num+num_rb_attached_particles) + replica * numGroupSites;
-	    return idx;
-	};
-
-	//Mlog: this is where we create the bondList.
-	if (numBonds > 0) {
-		bondList = new int3[ (numBonds / 2) * numReplicas ];
-		int j = 0;
-
-		for(int k = 0 ; k < numReplicas; k++)
-		{
-			for(int i = 0; i < numBonds; ++i)
-			{
-				if(bonds[i].ind1 < bonds[i].ind2)
-				{
-					if (bonds[i].tabFileIndex == -1) {
-						fprintf(stderr,"Error: bondfile '%s' was not read with tabulatedBondFile command.\n", bonds[i].fileName.val());
-						exit(1);
-					}
-					bondList[j] = make_int3( _get_index(bonds[i].ind1, k), _get_index(bonds[i].ind2, k), bonds[i].tabFileIndex );
-					// cout << "Displaying: bondList["<< j <<"].x = " << bondList[j].x << ".\n"
-					// << "Displaying: bondList["<< j <<"].y = " << bondList[j].y << ".\n"
-					// << "Displaying: bondList["<< j <<"].z = " << bondList[j].z << ".\n";
-					++j;
-				}
-			}
-		}
-	}
-	// internal->createBondList(bondList);
-
-	if (numAngles > 0) {
-	angleList = new int4[ (numAngles) * numReplicas ];
-	for(int k = 0 ; k < numReplicas; k++) {
-	    for(int i = 0; i < numAngles; ++i) {
-			if (angles[i].tabFileIndex == -1) {
-				fprintf(stderr,"Error: anglefile '%s' was not read with tabulatedAngleFile command.\n", angles[i].fileName.val());
-				exit(1);
-			}
-			angleList[i+k*numAngles] = make_int4( _get_index(angles[i].ind1,k), _get_index(angles[i].ind2,k), _get_index(angles[i].ind3,k), angles[i].tabFileIndex );
-	    }
-	}
-	}
-	
-	if (numDihedrals > 0) {
-	dihedralList = new int4[ (numDihedrals) * numReplicas ];
-	dihedralPotList = new  int[ (numDihedrals) * numReplicas ];
-	for(int k = 0 ; k < numReplicas; k++) {
-	    for(int i = 0; i < numDihedrals; ++i) {
-			if (dihedrals[i].tabFileIndex == -1) {
-				fprintf(stderr,"Error: dihedralfile '%s' was not read with tabulatedDihedralFile command.\n", dihedrals[i].fileName.val());
-				exit(1);
-			}
-			dihedralList[i+k*numDihedrals] = make_int4( _get_index(dihedrals[i].ind1,k), _get_index(dihedrals[i].ind2,k), _get_index(dihedrals[i].ind3,k), _get_index(dihedrals[i].ind4,k) );
-		dihedralPotList[i+k*numDihedrals] = dihedrals[i].tabFileIndex;
-	    }
-	}
-	}
-
-	if (numBondAngles > 0) {
-	bondAngleList = new int4[ (numBondAngles*2) * numReplicas ];
-	for(int k = 0 ; k < numReplicas; k++) {
-	    for(int i = 0; i < numBondAngles; ++i) {
-			if (bondAngles[i].tabFileIndex1 == -1) {
-				fprintf(stderr,"Error: bondanglefile '%s' was not read with tabulatedAngleFile command.\n", bondAngles[i].angleFileName1.val());
-				exit(1);
-			}
-			if (bondAngles[i].tabFileIndex2 == -1) {
-				fprintf(stderr,"Error: bondanglefile1 '%s' was not read with tabulatedBondFile command.\n", bondAngles[i].bondFileName.val());
-				exit(1);
-			}
-			if (bondAngles[i].tabFileIndex3 == -1) {
-				fprintf(stderr,"Error: bondanglefile2 '%s' was not read with tabulatedBondFile command.\n", bondAngles[i].angleFileName2.val());
-				exit(1);
-			}
-			int idx = i+k*numBondAngles;
-			bondAngleList[idx*2]   = make_int4( bondAngles[i].ind1+k*num, bondAngles[i].ind2+k*num,
-							    bondAngles[i].ind3+k*num, bondAngles[i].ind4+k*num );
-			bondAngleList[idx*2+1] = make_int4( bondAngles[i].tabFileIndex1, bondAngles[i].tabFileIndex2, bondAngles[i].tabFileIndex3, -1 );
-	    }
-	}
-	}
-
-	internal->copyBondedListsToGPU(bondList,angleList,dihedralList,dihedralPotList,bondAngleList);
-	
-	forceInternal = new Vector3[(num+num_rb_attached_particles+numGroupSites)*numReplicas];
-	if (fullLongRange != 0)
-	    printf("No cell decomposition created.\n");
-
-	// Prepare the trajectory output writer.
-	for (int repID = 0; repID < numReplicas; ++repID) {
-		TrajectoryWriter *w = new TrajectoryWriter(outFilePrefixes[repID].c_str(), TrajectoryWriter::getFormatName(outputFormat),
-							   sys->getBox(), num, timestep, outputPeriod);
-                
-		writers.push_back(w);
-	}
-
-        //Preparing the writers for momentum if necessary Han-Yi Chou
-        if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
-        {
-            for (int repID = 0; repID < numReplicas; ++repID) 
-            {
-
-                TrajectoryWriter *w = new TrajectoryWriter(outMomentumFilePrefixes[repID].c_str(), TrajectoryWriter::getFormatName(outputFormat),
-                                                           sys->getBox(), num, timestep, outputPeriod);
-                momentum_writers.push_back(w);
-            }
-        }
-	updateNameList();
-}
-
-GrandBrownTown::~GrandBrownTown() {
-	delete[] forceInternal;
-        forceInternal = NULL;
-	delete[] pos;
-        pos = NULL;
-        //Han-Yi Chou
-        if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
-        {
-            delete[] momentum;
-            momentum = NULL;
-            if(particle_dynamic == String("NoseHooverLangevin"))
-            {
-                delete[] random;
-                random = NULL;
-            }
-        }
-        //for debug
-        //delete[] force;
-
-	delete[] type;
-	delete[] serial;
-	//delete randoGen;
-
-	if (numBonds > 0)
-		delete[] bondList;
-	if (numAngles > 0)
-		delete[] angleList;
-	if (numDihedrals > 0) {
-		delete[] dihedralList;
-		delete[] dihedralPotList;
-	}
-        if(randoGen->states != NULL)
-            {
-                gpuErrchk(cudaFree(randoGen->states));
-                randoGen->states = NULL;
-            }
-            if(randoGen->integer_h != NULL)
-            {
-                delete[] randoGen->integer_h;
-                randoGen->integer_h = NULL;
-            }
-            if(randoGen->integer_d != NULL)
-            {
-                gpuErrchk(cudaFree(randoGen->integer_d));
-                randoGen->integer_d = NULL;
-            }
-            if(randoGen->uniform_h != NULL)
-            {
-                delete[] randoGen->uniform_h;
-                randoGen->uniform_h = NULL;
-            }
-            if(randoGen->uniform_d != NULL)
-            {
-                gpuErrchk(cudaFree(randoGen->uniform_d));
-                randoGen->uniform_d = NULL;
-            }
-            //curandDestroyGenerator(randoGen->generator);
-            delete randoGen;
-            gpuErrchk(cudaFree(randoGen_d));
-            for(std::vector<RigidBodyController*>::iterator iter = RBC.begin(); iter != RBC.end(); ++iter)
-            {
-                //(*iter)->~RigidBodyController();
-                delete *iter;
-            }
-            RBC.clear();
-	// Auxillary objects
-	delete internal;
-        internal = NULL;
-	for (int i = 0; i < numReplicas; ++i)
-        {
-		delete writers[i];
-                writers[i] = NULL;
-        }
-
-        if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
-        {
-            for (int i = 0; i < numReplicas; ++i)
-            {
-                delete momentum_writers[i];
-                momentum_writers[i]=NULL;
-             }
-            //for (int i = 0; i < numReplicas; ++i)
-                //delete force_writers[i];
-
-        }
-	//gpuErrchk(cudaFree(pos_d));
-	//gpuErrchk(cudaFree(forceInternal_d));
-	//gpuErrchk(cudaFree(randoGen_d));
-	//gpuErrchk( cudaFree(bondList_d) );
-
-	if (imd_on)
-		delete[] imdForces;
-	
-		
-}
-
-//Nose Hoover is now implement for particles.
-void GrandBrownTown::run()
-{
-
-    // Open the files for recording ionic currents
-    for (int repID = 0; repID < numReplicas; ++repID) 
-    {
-        writers[repID]->newFile(pos + repID*(num+num_rb_attached_particles), name, 0.0f, num); // 'pos + (repID*num)' == array-to-pointer decay
-        if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
-            momentum_writers[repID]->newFile((momentum + repID * num), name, 0.0f, num); // 'pos + (repID*num)' == array-to-pointer decay
-        //random_writers[repID]->newFile(random + (repID * num), name, 0.0f, num);
-    }
-
-    // Initialize timers (util.*)
-    wkf_timerhandle timer0, timerS;
-    timer0 = wkf_timer_create();
-    timerS = wkf_timer_create();
-
-    #ifdef USE_NCCL
-    cudaStream_t* nccl_broadcast_streams = new cudaStream_t[gpuman.gpus.size()];
-    for (int i=0; i< gpuman.gpus.size(); ++i) nccl_broadcast_streams[i] = 0;
-    #endif
-
-    copyToCUDA();
-
-    if(particle_dynamic == String("Langevin"))
-        internal -> copyToCUDA(forceInternal, pos,momentum);
-    else if(particle_dynamic == String("NoseHooverLangevin"))
-        internal -> copyToCUDA(forceInternal, pos, momentum, random);
-    else
-        internal -> copyToCUDA(forceInternal, pos);
-
-    // IMD Stuff
-    void* sock = NULL;
-    void* clientsock = NULL;
-    int length;
-    if (imd_on) 
-    {
-        printf("Setting up incoming socket\n");
-        vmdsock_init();
-        sock = vmdsock_create();
-        clientsock = NULL;
-        vmdsock_bind(sock, imd_port);
-
-        printf("Waiting for IMD connection on port %d...\n", imd_port);
-        vmdsock_listen(sock);
-        while (!clientsock) 
-        {
-            if (vmdsock_selread(sock, 0) > 0) 
-            {
-                clientsock = vmdsock_accept(sock);
-                if (imd_handshake(clientsock))
-                    clientsock = NULL;
-            }
-        }
-        sleep(1);
-        if (vmdsock_selread(clientsock, 0) != 1 || imd_recv_header(clientsock, &length) != IMD_GO) 
-        {
-            clientsock = NULL;
-        }
-        imdForces = new Vector3[num*numReplicas];
-        for (size_t i = 0; i < num; ++i) // clear old forces
-            imdForces[i] = Vector3(0.0f);
-
-    } // endif (imd_on)
-
-    // Start timers
-    wkf_timer_start(timer0);
-    wkf_timer_start(timerS);
-
-    //// Happens at step 1 later anyway!
-    // if (fullLongRange == 0)
-    // {
-    //     // cudaSetDevice(0);
-    //     internal->decompose();
-    //     gpuErrchk(cudaDeviceSynchronize());
-    //     #ifdef _OPENMP
-    //     omp_set_num_threads(4);
-    //     #endif
-    //     #pragma omp parallel for
-    //     for(int i = 0; i < numReplicas; ++i)
-    //         RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*(num+conf.num_rb_attached_particles), sys_d);
-    //     gpuErrchk(cudaDeviceSynchronize());
-    // }
-
-    float t; // simulation time
-
-    int numBlocks = ((num+num_rb_attached_particles) * numReplicas) / NUM_THREADS + (((num+num_rb_attached_particles) * numReplicas) % NUM_THREADS == 0 ? 0 : 1);
-    int tl = temperatureGridFile.length();
-    Vector3 *force_d;
-    gpuErrchk(cudaMalloc((void**)&force_d, sizeof(Vector3)*(num+num_rb_attached_particles+numGroupSites) * numReplicas));
-
-    printf("Configuration: %d particles | %d replicas\n", num, numReplicas);
-    for (int i=0; i< gpuman.gpus.size(); ++i) {
-	gpuman.use(i);
-	gpuErrchk( cudaProfilerStart() );
-    }
-    gpuman.use(0);
-
-    //float total_energy = 0.f;
-    // Main loop over Brownian dynamics steps
-    for (long int s = 1; s < steps; s++)
-    {
-      PUSH_NVTX("Main loop timestep",0)
-        bool get_energy = ((s % outputEnergyPeriod) == 0);
-        //At the very first time step, the force is computed
-        if(s == 1)
-        {
-            // 'interparticleForce' - determines whether particles interact with each other
-	    internal->clear_force();
-	    internal->clear_energy();
-	    const std::vector<Vector3*>& _pos = internal->getPos_d();
-
-	    if (num_rb_attached_particles > 0) {
-		#pragma omp parallel for
-		for(int i = 0; i < numReplicas; ++i) {
-		    RBC[i]->update_attached_particle_positions(
-			internal->getPos_d()[0]+num+i*(num+num_rb_attached_particles),
-			internal->getForceInternal_d()[0]+num+i*(num+num_rb_attached_particles),
-			internal->getEnergy()+num+i*(num+num_rb_attached_particles),
-			sys_d, num, num_rb_attached_particles, numReplicas);
-		}
-	    }
-
-	    if (numGroupSites > 0) updateGroupSites<<<(numGroupSites/32+1),32>>>(_pos[0], groupSiteData_d, num + num_rb_attached_particles, numGroupSites, numReplicas);
-
-	    #ifdef USE_NCCL
-	    if (gpuman.gpus.size() > 1) {
-		gpuman.nccl_broadcast(0, _pos, _pos, (num+num_rb_attached_particles+numGroupSites)*numReplicas, -1);
-	    }
-	    #endif
-	    gpuman.sync();
-
-
-
-            #ifdef _OPENMP
-            omp_set_num_threads(4);
-            #endif
-            #pragma omp parallel for
-            for(int i = 0; i < numReplicas; ++i)
-                RBC[i]->clearForceAndTorque(); //Han-Yi Chou
-            
-            if (interparticleForce)
-            {
-                if (tabulatedPotential)
-                {
-                    switch (fullLongRange)
-                    {
-                        case 0: // [ N*log(N) ] interactions, + cutoff | decomposition
-                            {
-                                // cudaSetDevice(0);
-                                 internal -> decompose();
-                                #ifdef _OPENMP
-                                omp_set_num_threads(4);
-                                #endif
-                                #pragma omp parallel for
-                                for(int i = 0; i < numReplicas; ++i)
-                                    RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*(num+num_rb_attached_particles), sys_d);
-                            }
-                            internal -> computeTabulated(get_energy);
-                            break;
-                        default:
-                            internal->computeTabulatedFull(get_energy);
-                            break;
-                    }
-                }
-                else
-                {
-                    // Not using tabulated potentials.
-                    switch (fullLongRange)
-                    {
-                        case 0: // Use cutoff | cell decomposition.
-                            if (s % decompPeriod == 0)
-                            {
-                                // cudaSetDevice(0);
-                                internal->decompose();
-                                #ifdef _OPENMP
-                                omp_set_num_threads(4);
-                                #endif
-                                #pragma omp parallel for
-                                for(int i = 0; i < numReplicas; ++i)
-                                    RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*(num+num_rb_attached_particles), sys_d);
-                            }
-                            internal->compute(get_energy);
-                            break;
-
-                        case 1: // Do not use cutoff
-                            internal->computeFull(get_energy);
-                            break;
-
-                        case 2: // Compute only softcore forces.
-                            internal->computeSoftcoreFull(get_energy);
-                            break;
-
-                        case 3: // Compute only electrostatic forces.
-                            internal->computeElecFull(get_energy);
-                            break;
-                    }
-                }
-            }//if inter-particle force
-
-	    if (get_energy) {
-		compute_position_dependent_force_for_rb_attached_particles
-		    <<< numBlocks, NUM_THREADS >>> (
-			internal -> getPos_d()[0], internal -> getForceInternal_d()[0],
-			internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType);
-	    } else {
-		compute_position_dependent_force_for_rb_attached_particles
-		    <<< numBlocks, NUM_THREADS >>> (
-			internal -> getPos_d()[0],
-			internal -> getForceInternal_d()[0], internal -> getEnergy(),
-			internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType);
-	    }
-
-
-            #ifdef _OPENMP
-            omp_set_num_threads(4);
-            #endif
-            #pragma omp parallel for
-            for(int i = 0; i < numReplicas; ++i)
-                RBC[i]->updateForces(internal->getPos_d()[0]+i*(num+num_rb_attached_particles),
-				     internal->getForceInternal_d()[0]+i*(num+num_rb_attached_particles),
-				     s,
-				     internal->getEnergy()+i*(num+num_rb_attached_particles),
-				     get_energy,
-				     RigidBodyInterpolationType, sys, sys_d, num, num_rb_attached_particles);
-            if(rigidbody_dynamic == String("Langevin"))
-            {
-                #ifdef _OPENMP
-                omp_set_num_threads(4);
-                #endif
-                #pragma omp parallel for
-                for(int i = 0; i < numReplicas; ++i)
-                {
-                    RBC[i]->SetRandomTorques();
-                    RBC[i]->AddLangevin();
-                }
-            }
-	    #ifdef USE_NCCL
-	    if (gpuman.gpus.size() > 1) {
-		const std::vector<Vector3*>& _f = internal->getForceInternal_d();
-		gpuman.nccl_reduce(0, _f, _f, (num+num_rb_attached_particles+numGroupSites)*numReplicas, -1);
-	    }
-	    #endif
-
-	    if (numGroupSites > 0) distributeGroupSiteForces<false><<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], groupSiteData_d, num+num_rb_attached_particles, numGroupSites, numReplicas);
-
-        }//if step == 1
-
-	PUSH_NVTX("Clear particle energy data",1)
-	internal->clear_energy();
-	gpuman.sync();
-	POP_NVTX
-
-	PUSH_NVTX("Integrate particles",2)
-        if(particle_dynamic == String("Langevin"))
-            updateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal->getPos_d()[0], internal->getMom_d(), internal->getForceInternal_d()[0], internal->getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d, randoGen_d, numReplicas, ParticleInterpolationType);
-        else if(particle_dynamic == String("NoseHooverLangevin"))
-            //kernel for Nose-Hoover Langevin dynamic
-            updateKernelNoseHooverLangevin<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d()[0], internal -> getMom_d(), 
-            internal -> getRan_d(), internal -> getForceInternal_d()[0], internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d,
-            randoGen_d, numReplicas, ParticleInterpolationType);
-        ////For Brownian motion
-        else
-            updateKernel<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d()[0], internal -> getForceInternal_d()[0], internal -> getType_d(),
-                                                       part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d, randoGen_d, numReplicas,
-                                                       internal->getEnergy(), get_energy, ParticleInterpolationType);
-
-	POP_NVTX
-
-	PUSH_NVTX("Integrate rigid bodies",2)
-        if(rigidbody_dynamic == String("Langevin"))
-        {
-            #ifdef _OPENMP
-            omp_set_num_threads(4);
-            #endif
-            #pragma omp parallel for
-            for(int i = 0; i < numReplicas; ++i)
-            {
-                RBC[i]->integrateDLM(sys, 0);
-                RBC[i]->integrateDLM(sys, 1);
-            }
-        }
-        else
-	{
-            #ifdef _OPENMP
-            omp_set_num_threads(4);
-            #endif
-            #pragma omp parallel for ordered
-            for(int i = 0; i < numReplicas; ++i)
-            {
-                RBC[i]->integrate(sys, s);
-                #pragma omp ordered
-                RBC[i]->print(s);
-            }
-        }
-	POP_NVTX
-
-	PUSH_NVTX("Update rigid body attached particle positions",3)
-	if (num_rb_attached_particles > 0) {
-	    #pragma omp parallel for
-	    for(int i = 0; i < numReplicas; ++i) {
-		RBC[i]->update_attached_particle_positions(
-		    internal->getPos_d()[0]+num+i*(num+num_rb_attached_particles),
-		    internal->getForceInternal_d()[0]+num+i*(num+num_rb_attached_particles),
-		    internal->getEnergy()+num+i*(num+num_rb_attached_particles),
-		    sys_d, num, num_rb_attached_particles, numReplicas);
-	    }
-	}
-	POP_NVTX
-
-        if (s % outputPeriod == 0) {
-	    PUSH_NVTX("Copy particle positions to host for output",7)
-            // Copy particle positions back to CPU
-	    gpuErrchk(cudaDeviceSynchronize());
-            gpuErrchk(cudaMemcpy(pos, internal ->getPos_d()[0], sizeof(Vector3) * (num+num_rb_attached_particles) * numReplicas, cudaMemcpyDeviceToHost));
-	    POP_NVTX
-	}
-        if (imd_on && clientsock && s % outputPeriod == 0)
-        {
-	    assert(gpuman.gpus.size()==1); // TODO: implement IMD with multiple gpus
-	    gpuErrchk(cudaDeviceSynchronize());
-            float* coords = new float[num*3]; // TODO: move allocation out of run loop
-            int* atomIds = new int[num]; // TODO: move allocation out of run loop
-            int length;
-
-            bool paused = false;
-            while (vmdsock_selread(clientsock, 0) > 0 || paused)
-            {
-                switch (imd_recv_header(clientsock, &length))
-                {
-                        case IMD_DISCONNECT:
-                            printf("[IMD] Disconnecting...\n");
-                            imd_disconnect(clientsock);
-                            clientsock = NULL;
-                            sleep(5);
-                            break;
-                        case IMD_KILL:
-                            printf("[IMD] Killing...\n");
-                            imd_disconnect(clientsock);
-                            clientsock = NULL;
-                            steps = s; // Stop the simulation at this step
-                            sleep(5);
-                            break;
-                        case IMD_PAUSE:
-                            paused = !paused;
-                            break;
-                        case IMD_GO:
-                            printf("[IMD] Caught IMD_GO\n");
-                            break;
-                        case IMD_MDCOMM:
-                            for (size_t i = 0; i < num; ++i) // clear old forces
-                                imdForces[i] = Vector3(0.0f);
-
-                            if (imd_recv_mdcomm(clientsock, length, atomIds, coords))
-                            {
-                                printf("[IMD] Error receiving forces\n");
-                            }
-                            else
-                            {
-                                for (size_t j = 0; j < length; ++j)
-                                {
-                                    int i = atomIds[j];
-                                    imdForces[i] = Vector3(coords[j*3], coords[j*3+1], coords[j*3+2]) * conf.imdForceScale;
-                                }
-                            }
-                            break;
-                        default:
-                            printf("[IMD] Something weird happened. Disconnecting..\n");
-                            break;
-                }
-            }
-            if (clientsock)
-            {
-                    // float* coords = new float[num*3]; // TODO: move allocation out of run loop
-                    for (size_t i = 0; i < num; i++)
-                    {
-                        const Vector3& p = pos[i];
-                        coords[3*i] = p.x;
-                        coords[3*i+1] = p.y;
-                        coords[3*i+2] = p.z;
-                    }
-                    imd_send_fcoords(clientsock, num, coords);
-            }
-                delete[] coords;
-                delete[] atomIds;
-        }
-
-        #ifdef _OPENMP
-        omp_set_num_threads(4);
-        #endif
-
-	PUSH_NVTX("Clear rigid body forces",2)
-        #pragma omp parallel for
-        for(int i = 0; i < numReplicas; ++i) 
-            RBC[i]->clearForceAndTorque();
-	POP_NVTX
-
-	
-	if (numGroupSites > 0) {
- 	  PUSH_NVTX("Update collective coordinates",2)
-	    gpuman.sync();
-	    updateGroupSites<<<(numGroupSites/32+1),32>>>(internal->getPos_d()[0], groupSiteData_d, num + num_rb_attached_particles, numGroupSites, numReplicas);
-	    gpuman.sync();
-	  POP_NVTX
-	}
-
-        if (imd_on && clientsock)
-            internal->setForceInternalOnDevice(imdForces); // TODO ensure replicas are mutually exclusive with IMD // TODO add multigpu support with IMD
-	else {
-	  PUSH_NVTX("Clear particle forces",2)
-            internal->clear_force();
-	    #ifdef USE_NCCL
-	    if (gpuman.gpus.size() > 1) {
-		const std::vector<Vector3*>& _p = internal->getPos_d();
-		nccl_broadcast_streams[0] = gpuman.gpus[0].get_next_stream();
-		gpuman.nccl_broadcast(0, _p, _p, (num+num_rb_attached_particles+numGroupSites)*numReplicas, nccl_broadcast_streams);
-	    }
-	    #endif
-	    POP_NVTX
-    	}
-
-        if (interparticleForce)
-        {
-            // 'tabulatedPotential' - determines whether interaction is described with tabulated potentials or formulas
-            if (tabulatedPotential)
-            {
-                switch (fullLongRange)
-                {
-                    case 0: // [ N*log(N) ] interactions, + cutoff | decomposition
-                        if (s % decompPeriod == 0)
-                        {
-			  PUSH_NVTX("Decompose particles",5)
-                            internal -> decompose();
-			  POP_NVTX
-                            #ifdef _OPENMP
-                            omp_set_num_threads(4);
-                            #endif
-			    PUSH_NVTX("Update rigid body particle lists",6)
-                            #pragma omp parallel for
-                            for(int i = 0; i < numReplicas; ++i)
-                                RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*(num+num_rb_attached_particles), sys_d);
-			    POP_NVTX
-                        }
-			PUSH_NVTX("Calculate particle-particle forces",7)
-                        internal -> computeTabulated(get_energy);
-			POP_NVTX
-			#ifdef USE_NCCL
-			if (gpuman.gpus.size() > 1) {
-			  PUSH_NVTX("Reduce particle forces",6)
-			    const std::vector<Vector3*>& _f = internal->getForceInternal_d();
-			    gpuman.nccl_reduce(0, _f, _f, (num+num_rb_attached_particles)*numReplicas, -1);
-			  POP_NVTX
-			}
-			#endif
-                        break;
-                    default: // [ N^2 ] interactions, no cutoff | decompositions
-                        internal->computeTabulatedFull(get_energy);
-                        break;
-                }
-            }
-            else
-            { 
-                // Not using tabulated potentials.
-                switch (fullLongRange)
-                {
-                        case 0: // Use cutoff | cell decomposition.
-                            if (s % decompPeriod == 0)
-                            {
-                               internal->decompose();
-                               #ifdef _OPENMP
-                               omp_set_num_threads(4);
-                               #endif
-                               #pragma omp parallel for
-                               for(int i = 0; i < numReplicas; ++i)
-                                   RBC[i]->updateParticleLists( (internal->getPos_d()[0])+i*num, sys_d);
-                            }
-                            internal->compute(get_energy);
-                            break;
-                        case 1: // Do not use cutoff
-                            internal->computeFull(get_energy);
-                            break;
-
-                        case 2: // Compute only softcore forces.
-                            internal->computeSoftcoreFull(get_energy);
-                            break;
-
-                        case 3: // Compute only electrostatic forces.
-                            internal->computeElecFull(get_energy);
-                            break;
-                }
-            }
-        }
-
-	PUSH_NVTX("Compute RB attached particle forces",4)
-	if (get_energy) {
-	    compute_position_dependent_force_for_rb_attached_particles
-		<<< numBlocks, NUM_THREADS >>> (
-		    internal -> getPos_d()[0], internal -> getForceInternal_d()[0],
-		    internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType);
-	} else {
-	    compute_position_dependent_force_for_rb_attached_particles
-		<<< numBlocks, NUM_THREADS >>> (
-		    internal -> getPos_d()[0],
-		    internal -> getForceInternal_d()[0], internal -> getEnergy(),
-		    internal -> getType_d(), part_d, electricField, num, num_rb_attached_particles, numReplicas, ParticleInterpolationType);
-	}
-	POP_NVTX
-
-
-        //compute the force for rigid bodies
-        #ifdef _OPENMP
-        omp_set_num_threads(4);
-        #endif
-	PUSH_NVTX("Compute RB-RB forces forces",5)
-        #pragma omp parallel for
-        for(int i = 0; i < numReplicas; ++i) // TODO: Use different buffer for RB particle forces to avoid race condition
-            RBC[i]->updateForces((internal->getPos_d()[0])+i*(num+num_rb_attached_particles), (internal->getForceInternal_d()[0])+i*(num+num_rb_attached_particles), s, (internal->getEnergy())+i*(num+num_rb_attached_particles), get_energy,
-				 RigidBodyInterpolationType, sys, sys_d, num, num_rb_attached_particles);
-	POP_NVTX
-
-	if (numGroupSites > 0) {
-	  PUSH_NVTX("Spread collective coordinate forces to constituent particles",4)
-	    gpuman.sync();
-	    // if ((s%100) == 0) {
-	    distributeGroupSiteForces<true><<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], groupSiteData_d, num+num_rb_attached_particles, numGroupSites, numReplicas);
-	// } else {
-	//     distributeGroupSiteForces<false><<<(numGroupSites/32+1),32>>>(internal->getForceInternal_d()[0], groupSiteData_d, num+num_rb_attached_particles, numGroupSites, numReplicas);
-	// }
-	    gpuman.sync();
-	  POP_NVTX
-	}
-
-	PUSH_NVTX("Update particle coordinates",2)
-        if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
-            LastUpdateKernelBAOAB<<< numBlocks, NUM_THREADS >>>(internal -> getPos_d()[0], internal -> getMom_d(), internal -> getForceInternal_d()[0], 
-            internal -> getType_d(), part_d, kT, kTGrid_d, electricField, tl, timestep, num, num_rb_attached_particles, sys_d, randoGen_d, numReplicas, internal->getEnergy(), get_energy,
-            ParticleInterpolationType);
-            //gpuErrchk(cudaDeviceSynchronize());
-	POP_NVTX
-  
-	PUSH_NVTX("Update RB coordinates",3)
-        if(rigidbody_dynamic == String("Langevin"))
-        {
-            #ifdef _OPENMP
-            omp_set_num_threads(4);
-            #endif
-            #pragma omp parallel for ordered
-            for(int i = 0; i < numReplicas; ++i)
-            {
-                RBC[i]->SetRandomTorques();
-                RBC[i]->AddLangevin();
-                RBC[i]->integrateDLM(sys, 2);
-                #pragma omp ordered
-                RBC[i]->print(s);
-            }
-        }
-	POP_NVTX
-
-        if (s % outputPeriod == 0)
-        {
-	  PUSH_NVTX("Copy and write particle and RB coordinates for output",3)
-            if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
-            {
-                gpuErrchk(cudaMemcpy(momentum, internal ->  getMom_d(), sizeof(Vector3) * (num) * numReplicas, cudaMemcpyDeviceToHost));
-            }
-            t = s*timestep;
-            // Loop over all replicas
-            for (int repID = 0; repID < numReplicas; ++repID)
-            {
-
-                if (numberFluct == 1)
-                    updateNameList(); // no need for it here if particles stay the same
-
-                // Write the trajectory.
-                writers[repID]->append(pos + repID*(num+num_rb_attached_particles), name, serial, t, num);
-
-                if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
-                {
-                    momentum_writers[repID]->append(momentum + repID * (num+num_rb_attached_particles), name, serial, t, num);
-                    //force_writers[repID]->append(force + repID * (num+num_rb_attached_particles), name, serial, t, num);
-                }
-            }
-            // TODO: Currently, not compatible with replicas. Needs a fix.
-            if (numberFluct == 1)
-                updateReservoirs();
-
-           remember(t);
-        }
-        if (get_energy)
-        {
-                wkf_timer_stop(timerS);
-                t = s * timestep;
-                // Simulation progress and statistics.
-                float percent = (100.0f * s) / steps;
-                float msPerStep = wkf_timer_time(timerS) * 1000.0f / outputEnergyPeriod;
-                float nsPerDay = numReplicas * timestep / msPerStep * 864E5f;
-
-                // Nice thousand separator
-                setlocale(LC_NUMERIC, "");
-
-                // Do the output
-                printf("\rStep %ld [%.2f%% complete | %.3f ms/step | %.3f ns/day]",s, percent, msPerStep, nsPerDay);
-        //}
-        //if (get_energy)
-        //{
-
-                // Copy positions from GPU to CPU.
-                //gpuErrchk(cudaMemcpy(pos, internal->getPos_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost));
-                float e = 0.f;
-                float V = 0.f;
-                thrust::device_ptr<float> en_d(internal->getEnergy());
-                V = (thrust::reduce(en_d, en_d+(num+num_rb_attached_particles)*numReplicas)) / numReplicas;
-                if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
-                {
-                    gpuErrchk(cudaMemcpy(momentum, internal->getMom_d(), sizeof(Vector3)*num*numReplicas,cudaMemcpyDeviceToHost));
-                    e = KineticEnergy();
-                }   
-                std::fstream energy_file;
-                energy_file.open( (outFilePrefixes[0]+".energy.dat").c_str(), std::fstream::out | std::fstream::app);
-                if(energy_file.is_open())
-                {
-                    energy_file << "Kinetic Energy: " << e*num*0.5f*(2.388458509e-1) << " (kT) "<< std::endl;
-                    energy_file << "Potential Energy: " << V << " (kcal/mol) " << std::endl;
-                    energy_file.close();
-                }
-                else
-                {
-                    std::cout << "Error in opening energy files\n";
-                }
-                
-                if(rigidbody_dynamic == String("Langevin"))
-                {
-                    #ifdef _OPENMP
-                    omp_set_num_threads(4);
-                    #endif
-                    #pragma omp parallel for
-                    for(int i = 0; i < numReplicas; ++i)
-                        RBC[i]->KineticEnergy();
-                }
-                std::fstream rb_energy_file;
-                rb_energy_file.open( (outFilePrefixes[0]+".rb_energy.dat").c_str(), std::fstream::out | std::fstream::app);
-                if(rb_energy_file.is_open())
-                {
-                    float k_tol = 0.f;
-                    float v_tol = 0.f;
-                    float (RigidBody::*func_ptr)();
-                    #ifdef _OPENMP
-                    omp_set_num_threads(4);
-                    #endif
-                    #pragma omp parallel for private(func_ptr) reduction(+:k_tol,v_tol)
-                    for(int i = 0; i < numReplicas; ++i)
-                    {
-                        func_ptr = &RigidBody::getKinetic;
-                        k_tol += RBC[i]->getEnergy(func_ptr);
-                        func_ptr = &RigidBody::getEnergy;
-                        v_tol += RBC[i]->getEnergy(func_ptr);
-                    }
-                    rb_energy_file << "Kinetic Energy "   << k_tol/numReplicas << " (kT)" << std::endl;
-                    rb_energy_file << "Potential Energy " << v_tol/numReplicas << " (kcal/mol)" << std::endl;
-                    rb_energy_file.close();
-                }
-                else
-                {
-                    std::cout << "Error in opening rb energy files\n"; 
-                }
-
-                // Write restart files for each replica.
-                for (int repID = 0; repID < numReplicas; ++repID)
-                    writeRestart(repID);
-
-                wkf_timer_start(timerS);
-		POP_NVTX
-         } // s % outputEnergyPeriod
-     POP_NVTX
-     } // done with all Brownian dynamics steps
-
-     if (imd_on and clientsock)
-     {
-            if (vmdsock_selread(clientsock, 0) == 1)
-            {
-                int length;
-                switch (imd_recv_header(clientsock, &length))
-                {
-                    case IMD_DISCONNECT:
-                        printf("\n[IMD] Disconnecting...\n");
-                        imd_disconnect(clientsock);
-                        clientsock = NULL;
-                        sleep(5);
-                        break;
-                    case IMD_KILL:
-                        printf("\n[IMD] Killing...\n");
-                        imd_disconnect(clientsock);
-                        clientsock = NULL;
-                        sleep(5);
-                        break;
-                    default:
-                        printf("\n[IMD] Something weird happened. Disconnecting..\n");
-                        break;
-                }
-            }
-     }
-     // Stop the main timer.
-     wkf_timer_stop(timer0);
-
-     // Compute performance data.
-     const float elapsed = wkf_timer_time(timer0); // seconds
-     int tot_hrs = (int) std::fmod(elapsed / 3600.0f, 60.0f);
-     int tot_min = (int) std::fmod(elapsed / 60.0f, 60.0f);
-     float tot_sec   = std::fmod(elapsed, 60.0f);
-
-     printf("\nFinal Step: %d\n", (int) steps);
-
-     printf("Total Run Time: ");
-     if (tot_hrs > 0) printf("%dh%dm%.1fs\n", tot_hrs, tot_min, tot_sec);
-     else if (tot_min > 0) printf("%dm%.1fs\n", tot_min, tot_sec);
-     else printf("%.2fs\n", tot_sec);
-
-     gpuErrchk(cudaFree(force_d));
-} // GrandBrownTown::run()
-
-// --------------------------------------------
-// Populate lists of types and serial numbers.
-//
-void GrandBrownTown::populate() {
-	for (int repID = 0; repID < numReplicas; repID++) {
-	    const int offset = repID * (num+num_rb_attached_particles);
-		int pn = 0;
-		int p = 0;
-		for (int i = 0; i < num; i++) {
-			type[i + offset] = p;
-			serial[i + offset] = currSerial++;
-
-			if (++pn >= part[p].num) {
-				p++;
-				pn = 0;
-			}
-		}
-	}
-}
-
-
-
-void GrandBrownTown::writeRestart(int repID) const 
-{
-    FILE* out   = fopen(restartFiles[repID].c_str(), "w");
-    const int offset = repID * (num+num_rb_attached_particles);
-
-    for (int i = 0; i < num; ++i) 
-    {
-        const int ind = i + offset;
-        const Vector3& p = pos[ind];
-	fprintf(out, "%d %.10g %.10g %.10g\n", type[ind], p.x, p.y, p.z); 
-    }
-    fclose(out);
-
-    if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
-    {
-        out = fopen(restartMomentumFiles[repID].c_str(), "w");
-        
-        for (int i = 0; i < num; ++i) 
-        {
-            const int ind = i + offset;
-            const Vector3& p = momentum[ind];
-            fprintf(out, "%d %.10g %.10g %.10g\n", type[ind], p.x, p.y, p.z); 
-        }
-        fclose(out);
-    }
-   
-}
-
-/*the center is defined by the first pmf*/
-void GrandBrownTown::initialCondCen() {
-	for (int i = 0; i < num; i++)
-		pos[i] = part[ type[i] ].pmf[0]->getCenter();
-}
-
-
-// Set random initial positions for all particles and replicas
-void GrandBrownTown::initialCond() {
-	for (int repID = 0; repID < numReplicas; repID++) {
-	    const int offset = repID * (num+num_rb_attached_particles);
-		for (int i = 0; i < num; i++) {
-			pos[i + offset] = findPos(type[i + offset]);
-		}
-	}
-}
-
-// A couple old routines for getting particle positions.
-Vector3 GrandBrownTown::findPos(int typ) {
-    // TODO: sum over grids
-	Vector3 r;
-	const BrownianParticleType& pt = part[typ];
-	do {
-		const float rx = sysDim.x * randoGen->uniform(); 
-		const float ry = sysDim.y * randoGen->uniform();
-		const float rz = sysDim.z * randoGen->uniform();
-		r = sys->wrap( Vector3(rx, ry, rz) );
-	} while (pt.pmf[0]->interpolatePotential(r) > *pt.meanPmf);
-	return r;
-}
-
-
-Vector3 GrandBrownTown::findPos(int typ, float minZ) {
-	Vector3 r;
-	const BrownianParticleType& pt = part[typ];
-	do {
-		const float rx = sysDim.x * randoGen->uniform();
-		const float ry = sysDim.y * randoGen->uniform();
-		const float rz = sysDim.z * randoGen->uniform();
-		r = sys->wrap( Vector3(rx, ry, rz) );
-	} while (pt.pmf[0]->interpolatePotential(r) > *pt.meanPmf and fabs(r.z) > minZ);
-	return r;
-}
-
-//Compute the kinetic energy of particle and rigid body Han-Yi Chou
-float GrandBrownTown::KineticEnergy()
-{
-    float *vec_red, *energy;
-    float particle_energy;
-
-    gpuErrchk(cudaMalloc((void**)&vec_red, 512*sizeof(float)));
-    gpuErrchk(cudaMalloc((void**)&energy, sizeof(float)));
-    gpuErrchk(cudaMemset((void*)energy,0,sizeof(float)));
-
-    BrownParticlesKineticEnergy<64><<<dim3(512),dim3(64)>>>(internal->getMom_d(), internal -> getType_d(), part_d, vec_red, num, num_rb_attached_particles, numReplicas);
-    gpuErrchk(cudaDeviceSynchronize());
-
-    Reduction<64><<<dim3(1),dim3(64)>>>(vec_red, energy, 512);
-
-    gpuErrchk(cudaMemcpy((void*)&particle_energy, energy, sizeof(float), cudaMemcpyDeviceToHost));
-
-    gpuErrchk(cudaFree(vec_red));
-    gpuErrchk(cudaFree(energy));
-
-    return 2. * particle_energy / kT / num / numReplicas; //In the unit of 0.5kT
-}
-/*
-void GrandBrownTown::RotKineticEnergy()
-{
-    RBC.KineticEnergy();
-
-    return 2. * e / numReplicas / kT; //In the unit of 0.5kT
-}
-*/
-void GrandBrownTown::InitNoseHooverBath(int N)
-{
-    printf("Entering Nose-Hoover Langevin\n");
-    int count = 0;
-
-    for(int i = 0; i < N; ++i)
-    {
-        int typ = type[i];
-        double mu = part[typ].mu;
-        
-        double sigma = sqrt(kT / mu);
-
-        float tmp = sigma * randoGen->gaussian();
-        random[(size_t)count] = tmp;
-        ++count;
-    }
-    printf("Done in nose-hoover bath\n");
-}
-
-void GrandBrownTown::init_cuda_group_sites()
-{
-    // Count the number of particles that form groups
-    int num_particles = 0;
-    for (auto it = conf.groupSiteData.begin(); it != conf.groupSiteData.end(); ++it) {
-	num_particles += it->size();
-    }
-
-    // Create GPU-friendly data structure
-    assert(numReplicas == 1);    // TODO make this work for replicas
-    int* tmp = new int[numGroupSites+1+num_particles];
-    num_particles = 0;
-    int i = 0;
-    for (auto it = conf.groupSiteData.begin(); it != conf.groupSiteData.end(); ++it) {
-	tmp[i] = num_particles+numGroupSites+1;
-	// printf("DEBUG: tmp[%d] = %d\n", i, tmp[i]);
-	for (auto it2 = it->begin(); it2 != it->end(); ++it2) {
-	    tmp[num_particles+numGroupSites+1] = *it2;
-	    // printf("DEBUG: tmp[%d] = %d\n", num_particles+numGroupSites+1, *it2);
-	    num_particles++;
-	}
-	i++;
-    }
-    assert(i == numGroupSites);
-    tmp[i] = num_particles+numGroupSites+1;
-    // printf("DEBUG: tmp[%d] = %d\n", i, tmp[i]);
-
-    // printf("DEBUG: Finally:\n");
-    // for (int j = 0; j < numGroupSites+1+num_particles; j++) {
-    //         printf("DEBUG: tmp[%d] = %d\n", j, tmp[j]);
-    // }
-
-    // Copy data structure to GPU
-    gpuErrchk(cudaMalloc((void**) &groupSiteData_d, sizeof(int)*(numGroupSites+1+num_particles)));
-    gpuErrchk(cudaMemcpy(groupSiteData_d, tmp, sizeof(int)*(numGroupSites+1+num_particles), cudaMemcpyHostToDevice));
-    // TODO deallocate CUDA
-    delete[] tmp;
-
-}
-
-// -----------------------------------------------------------------------------
-// Initialize file for recording ionic current
-void GrandBrownTown::newCurrent(int repID) const {
-    /*
-	FILE* out = fopen(outCurrFiles[repID].c_str(), "w");
-	fclose(out);
-    */
-}
-
-
-// -----------------------------------------------------------------------------
-// Record the ionic current flowing through the entire system
-void GrandBrownTown::writeCurrent(int repID, float t) const {
-    return;
-    /*
-	FILE* out = fopen(outCurrFiles[repID].c_str(), "a");
-	fprintf(out, "%.10g %.10g %d\n", 0.5f*(t+timeLast), current(t), num);
-	fclose(out);
-    */
-}
-
-
-// -----------------------------------------------------------------------------
-// Record the ionic current in a segment -segZ < z < segZ
-void GrandBrownTown::writeCurrentSegment(int repID, float t, float segZ) const {
-    return;
-    /*
-	FILE* out = fopen(outCurrFiles[repID].c_str(), "a");
-	int i;
-	fprintf(out, "%.10g ", 0.5f * (t + timeLast));
-	for (i = -1; i < numParts; i++)
-		fprintf(out, "%.10g ", currentSegment(t,segZ,i));
-	fprintf(out, "%d\n", num);
-	fclose(out);
-    */
-}
-
-
-// ----------------------------------------------------
-// Compute the current in nanoamperes for entire system
-//
-float GrandBrownTown::current(float t) const {
-	float curr = 0.0f;
-	float dt = timeLast - t;
-
-	for (int i = 0; i < num; i++) {
-		Vector3 d = sys->wrapDiff(pos[i]-posLast[i]);
-		curr += part[type[i]].charge*d.z/(sysDim.z*dt)*1.60217733e-1f;
-	}
-	return curr;
-}
-
-
-// -----------------------------------------------------
-// Compute the current in nanoamperes for a restricted segment (-segZ < z < segZ).
-//
-float GrandBrownTown::currentSegment(float t, float segZ, int carrier) const {
-	float curr = 0.0f;
-	float dt = t - timeLast;
-
-	for (int i = 0; i < num; i++) {
-		float z0 = posLast[i].z;
-		float z1 = pos[i].z;
-
-		// Ignore carriers outside the range for both times.
-		if (fabs(z0) > segZ && fabs(z1) > segZ) continue;
-
-		// Cut the pieces outside the range.
-		if (z0 < -segZ) z0 = -segZ;
-		if (z1 < -segZ) z1 = -segZ;
-		if (z0 > segZ) z0 = segZ;
-		if (z1 > segZ) z1 = segZ;
-
-		float dz = sys->wrapDiff(z1 - z0, sysDim.z);
-		if ( carrier == type[i] || carrier == -1) {
-			curr += part[type[i]].charge*dz/(2.0f*segZ*dt)*1.60217733e-1f;
-		}
-	}
-	return curr;
-}
-
-
-int GrandBrownTown::getReservoirCount(int partInd, int resInd) const {
-	int count = 0;
-	const Reservoir* res = part[partInd].reservoir;
-	for (int i = 0; i < num; ++i)
-		if (type[i] == partInd and res->inside(i, pos[i]))
-			count++;
-	return count;
-}
-
-IndexList GrandBrownTown::getReservoirList(int partInd, int resInd) const {
-	IndexList ret;
-	const Reservoir* res = part[partInd].reservoir;
-	for (int i = 0; i < num; ++i)
-		if (type[i] == partInd and res->inside(resInd, pos[i]))
-			ret.add(i);
-	return ret;
-}
-
-Vector3 GrandBrownTown::freePosition(Vector3 r0, Vector3 r1, float minDist) {
-	const int maxTries = 1000;
-	bool tooClose = true;
-	Vector3 r;
-	Vector3 d = r1 - r0;
-	float minDist2 = minDist*minDist;
-
-	const CellDecomposition& decomp = internal->getDecomp();
-	const CellDecomposition::cell_t *cells = decomp.getCells();
-
-	int tries = 0;
-	while (tooClose) {
-		r.x = r0.x + d.x*randoGen->uniform();
-		r.y = r0.y + d.y*randoGen->uniform();
-		r.z = r0.z + d.z*randoGen->uniform();
-
-		tooClose = false;
-		// Check to make sure we are not too near another particle.
-		const CellDecomposition::cell_t cell = decomp.getCell(decomp.getCellID(r));
-		for (int i = -1; i <= 1; ++i) {
-			for (int j = -1; j <= 1; ++j) {
-				for (int k = -1; k <= 1; ++k) {
-					int nID = decomp.getNeighborID(cell, i, j, k);
-					// TODO: Determine which replica to use to look for free position.
-					const CellDecomposition::range_t range = decomp.getRange(nID, 0);
-					for (int n = range.first; n < range.last; ++n) {
-						Vector3 dj = pos[ cells[n].particle ];
-						if (dj.length2() < minDist2) {
-							tooClose = true;
-							break;
-						}
-					}
-				}
-			}
-		}
-
-		// Don't try too many times.
-		if (++tries > maxTries) {
-			printf("WARNING: freePosition too many tries to find free position.\n");
-			break;
-		}
-	}
-
-	return r;
-}
-
-// -----------------------------------------------------------------------------
-// Update the list of particle names[] for simulations with varying number of
-// particles
-void GrandBrownTown::updateNameList() {
-	if (outputFormat == TrajectoryWriter::formatTraj) {
-		char typeNum[64];
-		for (int i = 0; i < num; ++i) {
-			sprintf(typeNum, "%d", type[i]);
-			name[i] = typeNum;
-		}
-	} else {
-		for (int i = 0; i < num; ++i)
-			name[i] = part[ type[i] ].name;
-	}
-}
-
-// -----------------------------------------------------------------------------
-// Save particle positions for analysis purposes.
-// TODO: Fix for multiple replicas.
-void GrandBrownTown::remember(float t) {
-	timeLast = t;
-	std::copy(pos, pos + num * numReplicas, posLast);
-        if(particle_dynamic == String("Langevin") || particle_dynamic == String("NoseHooverLangevin"))
-            std::copy(momentum, momentum + num * numReplicas, momLast);
-}
-
-// -----------------------------------------------------------------------------
-// Delete particles listed in the list 'p'
-void GrandBrownTown::deleteParticles(IndexList& p) {
-	int n = 0;
-	for (int i = 0; i < num; ++i) {
-		pos[n] = pos[i];
-		type[n] = type[i];
-		serial[n] = serial[i];
-		if (p.find(i) == -1) n++;
-	}
-	num = n;
-}
-
-
-// -----------------------------------------------------------------------------
-// Add particles, obey numCap limit
-void GrandBrownTown::addParticles(int n, int typ) {
-	if (num + n > numCap) n = numCap - num;
-
-	for (int i = num; i < num + n; i++) {
-		pos[i] = findPos(typ, initialZ);
-		type[i] = typ;
-		serial[i] = currSerial;
-		currSerial++;
-	}
-	num += n;
-}
-
-// -----------------------------------------------------------------------------
-// Add particles randomly within a region between r0 and r1.
-// TODO: Fix for CUDA.
-void GrandBrownTown::addParticles(int n, int typ, Vector3 r0, Vector3 r1) {
-	if (num + n > numCap) n = numCap - num;
-
-	Vector3 d = r1 - r0;
-	for (int i = num; i < num + n; ++i) {
-		Vector3 r;
-		r.x = r0.x + d.x * randoGen->uniform();
-		r.y = r0.y + d.y * randoGen->uniform();
-		r.z = r0.z + d.z * randoGen->uniform();
-
-		pos[i] = r;
-		type[i] = typ;
-		serial[i] = currSerial++;
-	}
-	num += n;
-}
-
-// -----------------------------------------------------------------------------
-// Add particles randomly within the region defined by r0 and r1. Maintain a
-// minimum distance of minDist between particles.
-// TODO: Fix for CUDA.
-void GrandBrownTown::addParticles(int n, int typ, Vector3 r0, Vector3 r1, float minDist) {
-	if (num + n > numCap) n = numCap - num;
-	const int n0 = num;
-	for (int i = n0; i < n0 + n; i++) {
-		// Generate a position for the new particle.
-		pos[i] = freePosition(r0, r1, minDist);
-		type[i] = typ;
-		num++;
-		// Update the cell decomposition
-		internal->updateNumber(num); /* RBTODO: unsure if type arg is ok */
-	}
-}
-
-// -----------------------------------------------------------------------------
-// Add or delete particles in the reservoirs. Reservoirs are not wrapped.
-void GrandBrownTown::updateReservoirs() {
-	bool numberChange = false;
-	for (int p = 0; p < numParts; ++p) {
-		if (part[p].reservoir == NULL) continue;
-
-		const int n = part[p].reservoir->length();
-
-		for (int res = 0; res < n; res++) {
-			// Get the current number of particles in the reservoir.
-			IndexList resPart = getReservoirList(p, res);
-			int numberCurr = resPart.length();
-
-			// Determine the new number for this particle from a Poisson distribution.
-			float number0 = part[p].reservoir->getMeanNumber(res);
-			int number = randoGen->poisson(number0);
-
-			// If the number is the same nothing needs to be done.
-			if (number == numberCurr) continue;
-
-			if (number < numberCurr) {
-				int dn = numberCurr - number;
-
-				// We need to delete particles.  Choose them at random.
-				IndexList delPart;
-				int pick = static_cast<int>(randoGen->uniform() *numberCurr) % numberCurr;
-				if (pick + dn >= numberCurr) {
-					int dn0 = dn - (numberCurr - pick);
-					delPart = resPart.range(pick, numberCurr-1);
-					delPart.add(resPart.range(0, dn0-1));
-				} else {
-					delPart = resPart.range(pick, pick + dn-1);
-				}
-
-				deleteParticles(delPart);
-				numberChange = true;
-			} else {
-				// We need to add particles.
-				Vector3 r0 = part[p].reservoir->getOrigin(res);
-				Vector3 r1 = part[p].reservoir->getDestination(res);
-				addParticles(number - numberCurr, p, r0, r1, minimumSep);
-				numberChange = true;
-			}
-		} // end reservoir loop
-	} // end particle loop
-
-	if (numberChange)
-		internal->updateNumber(num);
-}
-
-void GrandBrownTown::copyRandToCUDA() {
-	gpuErrchk(cudaMalloc((void**)&randoGen_d, sizeof(Random)));
-        gpuErrchk(cudaMemcpy(&(randoGen_d->states), &(randoGen->states), sizeof(curandState_t*),cudaMemcpyHostToDevice));
-}
-
-
-// -----------------------------------------------------------------------------
-// Allocate memory on GPU(s) and copy to device
-void GrandBrownTown::copyToCUDA() {
-	/* const size_t tot_num = num * numReplicas;
-	gpuErrchk(cudaMalloc(&pos_d, sizeof(Vector3) * tot_num));
-	gpuErrchk(cudaMemcpyAsync(pos_d, pos, sizeof(Vector3) * tot_num,
-														cudaMemcpyHostToDevice));
-
-	gpuErrchk(cudaMalloc(&forceInternal_d, sizeof(Vector3) * num * numReplicas));
-	gpuErrchk(cudaMemcpyAsync(forceInternal_d, forceInternal, sizeof(Vector3) * tot_num,
-														cudaMemcpyHostToDevice));*/
-
-	// gpuErrchk(cudaDeviceSynchronize());
-}
-
-/*void GrandBrownTown::createBondList()
-{
-	size_t size = (numBonds / 2) * numReplicas * sizeof(int3);
-	gpuErrchk( cudaMalloc( &bondList_d, size ) );
-	gpuErrchk( cudaMemcpyAsync( bondList_d, bondList, size, cudaMemcpyHostToDevice) );
-
-	for(int i = 0 ; i < (numBonds / 2) * numReplicas ; i++)
-	{
-		cout << "Displaying: bondList_d["<< i <<"].x = " << bondList[i].x << ".\n"
-			<< "Displaying: bondList_d["<< i <<"].y = " << bondList[i].y << ".\n"
-			<< "Displaying: bondList_d["<< i <<"].z = " << bondList[i].z << ".\n";
-
-	}
-}*/
diff --git a/src/GrandBrownTown.cuh b/src/GrandBrownTown.cuh
deleted file mode 100644
index 44ffe6f82c1c3dde464d8039ac6fef26dd97f84f..0000000000000000000000000000000000000000
--- a/src/GrandBrownTown.cuh
+++ /dev/null
@@ -1,539 +0,0 @@
-// GrandBrownTown.cuh
-//
-// Terrance Howard <heyterrance@gmail.com>
-#pragma once
-//#define MDSTEP
-#define Unit1 4.18679994e4
-#define Unit2 2.046167337
-//#define Debug
-#include "CudaUtil.cuh"
-#include "RigidBodyType.h"
-#include "RigidBodyGrid.h"
-
-__device__
-Vector3 step(Vector3& r0, float kTlocal, Vector3 force, float diffusion, Vector3 diffGrad,
-						 float timestep, BaseGrid *sys, Random *randoGen, int num);
-
-inline __device__
-ForceEnergy compute_position_dependent_force(
-    const Vector3* __restrict__ pos, Vector3* __restrict__ forceInternal,
-    const int* __restrict__ type, BrownianParticleType** part,
-    const float electricField, const int scheme, const int idx)
-{
-    int t = type[idx];
-    Vector3 r0 = pos[idx];
-    const BrownianParticleType& pt = *part[t];
-    Vector3 forceExternal = Vector3(0.0f, 0.0f, pt.charge * electricField);
-
-    ForceEnergy fe(0.f, 0.f);
-    for(int i = 0; i < pt.numPartGridFiles; ++i)
-    {
-	ForceEnergy tmp(0.f, 0.f);
-	if(!scheme) {
-	    BoundaryCondition bc = pt.pmf_boundary_conditions[i];
-	    INTERPOLATE_FORCE(tmp, pt.pmf[i]->interpolateForceDLinearly, bc, r0)
-		} else
-	    tmp = pt.pmf[i]->interpolateForceD(r0);
-	fe.f += tmp.f * pt.pmf_scale[i];
-	fe.e += tmp.e * pt.pmf_scale[i];
-    }
-    // if(get_energy)
-    // 	energy[idx] += fe.e;
-
-#ifndef FORCEGRIDOFF
-    // Add a force defined via 3D FORCE maps (not 3D potential maps)
-    if(!scheme)
-    {
-	if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotentialLinearly(r0);
-	if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotentialLinearly(r0);
-	if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotentialLinearly(r0);
-    }
-    else
-    {
-	if (pt.forceXGrid != NULL) fe.f.x += pt.forceXGrid->interpolatePotential(r0);
-	if (pt.forceYGrid != NULL) fe.f.y += pt.forceYGrid->interpolatePotential(r0);
-	if (pt.forceZGrid != NULL) fe.f.z += pt.forceZGrid->interpolatePotential(r0);
-    }
-#endif
-    fe.f = fe.f + forceExternal;
-    return fe;
-}
-
-
-////The kernel is for Nose-Hoover Langevin dynamics
-__global__ void 
-updateKernelNoseHooverLangevin(Vector3* __restrict__ pos, Vector3* __restrict__ momentum, float* random, 
-                               Vector3* __restrict__ forceInternal, int type[], BrownianParticleType* part[], 
-                               float kT, BaseGrid* kTGrid, float electricField, int tGridLength, float timestep, 
-                               int num, int num_rb_attached_particles, BaseGrid* sys, Random* randoGen, int numReplicas, int scheme)
-{
-    int idx = blockIdx.x * blockDim.x + threadIdx.x;
-    if (idx < num * numReplicas)
-    {
-	idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles);
-
-	ForceEnergy fe = compute_position_dependent_force(
-	    pos, forceInternal, type, part, electricField, scheme, idx );
-
-        int t = type[idx];
-        Vector3 r0  = pos[idx];
-        Vector3 p0  = momentum[idx];
-        float   ran = random[idx];
-
-        const BrownianParticleType& pt = *part[t];
-
-        Vector3 force = forceInternal[idx] + fe.f;
-        #ifdef Debug
-        forceInternal[idx] = -force;
-        #endif
-        // Get local kT value
-        float kTlocal;
-        if(!scheme)
-            kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotentialLinearly(r0); /* periodic */
-        else
-            kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotential(r0); /* periodic */
-
-        // Update the particle's position using the calculated values for time, force, etc.
-        float mass  = pt.mass;
-        float mu    = pt.mu;
-        Vector3 gamma   = pt.transDamping;
-        float rando = (*randoGen).gaussian(idx, num * numReplicas);
-
-        float tmp   = sqrtf(kTlocal * (1.f - expf(-2.f * gamma.x * timestep)) / mu);
-
-        if (pt.diffusionGrid != NULL)
-        {
-
-            Vector3 gridCenter = pt.diffusionGrid->origin +
-            pt.diffusionGrid->basis.transform( Vector3(0.5*pt.diffusionGrid->nx, 0.5*pt.diffusionGrid->ny,
-                                                       0.5*pt.diffusionGrid->nz));
-            Vector3 p2 = r0 - gridCenter;
-            p2 = sys->wrapDiff( p2 ) + gridCenter;
-            ForceEnergy diff;
-            if(!scheme)
-                diff = pt.diffusionGrid->interpolateForceDLinearly<periodic>(p2);
-            else
-                diff = pt.diffusionGrid->interpolateForceD(p2);
-            gamma = Vector3(kTlocal / (mass * diff.e));
-        }
-
-        #ifdef MDSTEP
-        force = Vector3(-r0.x, -r0.y, -r0.z);
-        #endif
-
-        p0  = p0  + 0.5f * timestep * force * Unit1;
-
-        r0  = r0  + 0.5f * timestep * p0 * 1e4 / mass;
-        //r0 = sys->wrap(r0);
-
-        ran = ran + 0.5f * (p0.length2() / mass * 0.238845899f - 3.f * kTlocal) / mu;
-
-        p0  = expf(-0.5f * ran) * p0;
-
-        ran = expf(-gamma.x * timestep) * ran + tmp * rando;
-
-        p0  = expf(-0.5f * ran) * p0;
-
-        ran = ran + 0.5f * (p0.length2() / mass * 0.238845899 - 3.f * kTlocal) / mu;
- 
-        r0  = r0  + 0.5f * timestep * p0 * 1e4 / mass;
-        r0 = sys->wrap(r0);
-
-        pos[idx] = r0;
-        momentum[idx] = p0;
-        random[idx]= ran;     
-    }
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//This is the kernel for BAOAB (velocity verlet algorithm) which is symplectic. For more information,
-//please infer http://www.MolecularDynamics.info
-//The original BBK kernel is no longer used since the random numbers should be reused 
-//which is not possible in GPU code.
-//Han-Yi Chou
-__global__ void updateKernelBAOAB(Vector3* pos, Vector3* momentum, Vector3* __restrict__ forceInternal,
-                                  int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, 
-                                  float electricField,int tGridLength, float timestep,
-				  int num, int num_rb_attached_particles, BaseGrid* sys,
-                                  Random* randoGen, int numReplicas, int scheme)
-{
-    int idx = blockIdx.x * blockDim.x + threadIdx.x;
-
-
-    if (idx < num * numReplicas)
-    {
-	idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles);
-
-	ForceEnergy fe = compute_position_dependent_force(
-	    pos, forceInternal, type, part, electricField, scheme, idx );
-	// if (get_energy) energy[idx] += fe.e;
-
-        int t = type[idx];
-        Vector3 r0 = pos[idx];
-        Vector3 p0 = momentum[idx];
-        const BrownianParticleType& pt = *part[t];
-
-        Vector3 force = forceInternal[idx] + fe.f;
-#ifdef Debug
-        forceInternal[idx] = -force;
-#endif
-
-        // Get local kT value
-        float kTlocal;
-        if(!scheme)
-            kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotentialLinearly(r0); /* periodic */
-        else
-            kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotential(r0); /* periodic */
-
-        // Update the particle's position using the calculated values for time, force, etc.
-        float mass      = pt.mass;
-        Vector3 gamma   = pt.transDamping;
-        Vector3 rando = (*randoGen).gaussian_vector(idx, num * numReplicas);
-        //printf("%f %f %f\n", rando.x, rando.y, rando.z);
-        if (pt.diffusionGrid != NULL) 
-        {
-
-            Vector3 gridCenter = pt.diffusionGrid->origin +
-            pt.diffusionGrid->basis.transform( Vector3(0.5*pt.diffusionGrid->nx, 0.5*pt.diffusionGrid->ny,
-                                                       0.5*pt.diffusionGrid->nz));
-            Vector3 p2 = r0 - gridCenter;
-            p2 = sys->wrapDiff( p2 ) + gridCenter;
-            ForceEnergy diff;
-            if(!scheme)
-                diff = pt.diffusionGrid->interpolateForceDLinearly<periodic>(p2);
-            else
-                diff = pt.diffusionGrid->interpolateForceD(p2);
-            gamma = Vector3(kTlocal / (mass * diff.e));
-        }
-
-        #ifdef MDSTEP
-        force = Vector3(-r0.x, -r0.y, -r0.z);
-        #endif
-
-        p0 = p0 + 0.5f * timestep * force * Unit1;
-        r0 = r0 + 0.5f * timestep / mass * p0 * 1e4;
-
-        p0.x = expf(-timestep * gamma.x) * p0.x + sqrtf(mass * kTlocal * (1.f-expf(-2.f*timestep*gamma.x))) * rando.x * Unit2;
-        p0.y = expf(-timestep * gamma.y) * p0.y + sqrtf(mass * kTlocal * (1.f-expf(-2.f*timestep*gamma.y))) * rando.y * Unit2;
-        p0.z = expf(-timestep * gamma.z) * p0.z + sqrtf(mass * kTlocal * (1.f-expf(-2.f*timestep*gamma.z))) * rando.z * Unit2;
-
-        r0 = r0 + 0.5f * timestep * p0 * 1e4 / mass;
-        r0 = sys->wrap(r0);
-        
-        pos[idx]      = r0;
-        momentum[idx] = p0;
-
-        //if(idx == 0)
-          //  printf("%f %f %f\n", pos[idx].x,pos[idx].y,pos[idx].z);
-    }
-}
-
-//update momentum in the last step of BAOAB integrator for the Langevin dynamics. Han-Yi Chou
-__global__ void LastUpdateKernelBAOAB(Vector3* pos,Vector3* momentum, Vector3* __restrict__ forceInternal,
-                                      int type[], BrownianParticleType* part[], float kT, BaseGrid* kTGrid, 
-                                      float electricField, int tGridLength, float timestep, int num, int num_rb_attached_particles,
-                                      BaseGrid* sys, Random* randoGen, int numReplicas, float* __restrict__ energy, bool get_energy,int scheme)
-{
-    int idx  = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x);
-
-    if (idx < num * numReplicas)
-    {
-	idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles);
-
-	ForceEnergy fe = compute_position_dependent_force(
-	    pos, forceInternal, type, part, electricField, scheme, idx );
-	if (get_energy) energy[idx] += fe.e;
-
-        Vector3 r0 = pos[idx];
-        Vector3 p0 = momentum[idx];
-
-        Vector3 force = forceInternal[idx] + fe.f;
-#ifdef Debug
-        forceInternal[idx] = -force;
-#endif
-
-        #ifdef MDSTEP
-        force = Vector3(-r0.x, -r0.y, -r0.z);
-        #endif
-
-        p0 = p0 + 0.5f * timestep * force * Unit1;
-        momentum[idx] = p0;
-    }
-}
-
-//Update kernel for Brownian dynamics
-__global__
-void updateKernel(Vector3* pos, Vector3* __restrict__ forceInternal, int type[], 
-                  BrownianParticleType* part[],float kT, BaseGrid* kTGrid, float electricField, 
-                  int tGridLength, float timestep, int num, int num_rb_attached_particles, BaseGrid* sys,
-		  Random* randoGen, int numReplicas, float* energy, bool get_energy, int scheme) 
-{
-	// Calculate this thread's ID
-	int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x);
-        
-	// TODO: Make this a grid-stride loop to make efficient reuse of RNG states 
-	// Loop over ALL particles in ALL replicas
-	if (idx < num * numReplicas) 
-        {
-	    idx = (idx % num) + (idx/num) * (num+num_rb_attached_particles);
-		const int t = type[idx];
-		Vector3   p = pos[idx];
-
-		const BrownianParticleType& pt = *part[t];
-                
-	 	/* printf("atom %d: forceInternal: %f %f %f\n", idx, forceInternal[idx].x, forceInternal[idx].y, forceInternal[idx].z);  */
-
-		ForceEnergy fe = compute_position_dependent_force(
-		    pos, forceInternal, type, part, electricField, scheme, idx );
-
-		// Compute total force:
-		//	  Internal:  interaction between particles
-		//	  External:  electric field (now this is basically a constant vector)
-		//	  forceGrid: ADD force due to PMF or other potentials defined in 3D space
-		Vector3 force = forceInternal[idx] + fe.f;
-                #ifdef Debug
-                forceInternal[idx] = -force;
-                #endif
-
-
-		// Get local kT value
-		float kTlocal;
-                if(!scheme)
-                    kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotentialLinearly(p); /* periodic */
-                else
-                    kTlocal = (tGridLength == 0) ? kT : kTGrid->interpolatePotential(p); /* periodic */
-
-		// Update the particle's position using the calculated values for time, force, etc.
-		float diffusion = pt.diffusion;
-		Vector3 diffGrad = Vector3(0.0f);
-		// printf("force: %f %f %f %f %f %f\n", p.x, p.y, p.z,
-		//        fe.f.x, fe.f.y, fe.f.z);
-		//        // force.x, force.y, force.z);
-
-		if (pt.diffusionGrid != NULL) 
-                {
-			// printf("atom %d: pos: %f %f %f\n", idx, p.x, p.y, p.z);
-			// p = pt.diffusionGrid->wrap(p); // illegal mem access; no origin/basis?
-
-			Vector3 gridCenter = pt.diffusionGrid->origin +
-				pt.diffusionGrid->basis.transform( Vector3(0.5*pt.diffusionGrid->nx,
-									   0.5*pt.diffusionGrid->ny,
-									   0.5*pt.diffusionGrid->nz)); 
-			Vector3 p2 = p - gridCenter;
-			p2 = sys->wrapDiff( p2 ) + gridCenter;			
-			/* p2 = sys->wrap( p2 ); */
-			/* p2 = p2 - gridCenter; */
-			/* printf("atom %d: ps2: %f %f %f\n", idx, p2.x, p2.y, p2.z); */
-                        ForceEnergy diff;
-                        if(!scheme)	
-			    diff = pt.diffusionGrid->interpolateForceDLinearly<periodic>(p2);
-                        else
-                            diff = pt.diffusionGrid->interpolateForceD(p2);
-			diffusion = diff.e;
-			diffGrad = diff.f;
-		}
-
-		// if (idx == 0) {
-		// 	printf("force: "); force.print();
-		// }
-		
-		Vector3 tmp = step(p, kTlocal, force, diffusion, -diffGrad, timestep, sys, randoGen, 
-                                   num * numReplicas);
-		// assert( tmp.length() < 10000.0f );
-		pos[idx] = tmp;
-
-                if(get_energy)
-                {
-                    float en_local = 0.f;
-                    for(int i = 0; i < pt.numPartGridFiles; ++i)
-                    {
-			float en_tmp = 0.0f;
-                        if(!scheme)
-                            en_tmp = pt.pmf[i]->interpolatePotentialLinearly(tmp);
-                        else
-                            en_tmp = pt.pmf[i]->interpolatePotential(tmp);
-			en_tmp *= pt.pmf_scale[i];
-                    }
-                    energy[idx] += en_local;
-                }		
-	}
-}
-/*
-//This is the BBK Langevin integrator for Langevin dynamics Han-Yi Chou
-__device__ inline void step(Vector3& r0, Vector3& p0, float kTlocal, Vector3 force, float diffusion, Vector3 diffGrad,
-                            float mass, Vector3& gamma, float timestep, BaseGrid *sys, Random *randoGen, int num)
-{
-    int idx = blockIdx.x * blockDim.x + threadIdx.x;
-    Vector3 rando = randoGen->gaussian_vector(idx, num);
-    float tmp = sqrtf(diffusion * timestep);
-
-    p0.x = (1.0f - 0.50f * timestep * gamma.x) * p0.x + (0.50f * timestep * force.x * Unit1 + (tmp * sqrtf(gamma.x) * rando.x) * mass * Unit2);
-    p0.y = (1.0f - 0.50f * timestep * gamma.y) * p0.y + (0.50f * timestep * force.y * Unit1 + (tmp * sqrtf(gamma.y) * rando.y) * mass * Unit2);
-    p0.z = (1.0f - 0.50f * timestep * gamma.z) * p0.z + (0.50f * timestep * force.z * Unit1 + (tmp * sqrtf(gamma.z) * rando.z) * mass * Unit2);
-
-    r0 = r0 + (timestep * p0) / mass * 1e4;
-    Vector3 r = r0;
-    r0 = sys->wrap(r);
-}
-
-//This is the BBK integrator for updating momentum in Langevin dynamics Han-Yi Chou
-__device__ inline void step(Vector3& p0, float kTlocal, Vector3 force, float diffusion, Vector3 diffGrad,
-                            float mass, Vector3& gamma, float timestep, BaseGrid *sys, Random *randoGen, int num)
-{
-    int idx = blockIdx.x * blockDim.x + threadIdx.x;
-    Vector3 rando = randoGen->gaussian_vector(idx, num);
-    float tmp = sqrtf(diffusion * timestep);
-    
-    p0.x = (p0.x + (0.50 * timestep * force.x * Unit1 + (tmp * sqrtf(gamma.x) * rando.x) * mass * Unit2)) / (1.0f+0.5f*timestep*gamma.x);
-    p0.y = (p0.y + (0.50 * timestep * force.y * Unit1 + (tmp * sqrtf(gamma.y) * rando.y) * mass * Unit2)) / (1.0f+0.5f*timestep*gamma.y);
-    p0.z = (p0.z + (0.50 * timestep * force.z * Unit1 + (tmp * sqrtf(gamma.z) * rando.z) * mass * Unit2)) / (1.0f+0.5f*timestep*gamma.z);
-}
-*/
-//For Brownian dynamics
-__device__
-inline Vector3 step(Vector3& r0, float kTlocal, Vector3 force, float diffusion,
-						Vector3 diffGrad, float timestep, BaseGrid *sys,
-						Random *randoGen, int num) {
-	const int idx = static_cast<int>(blockIdx.x * blockDim.x + threadIdx.x);
-	// TODO: improve performance by storing state locally, then sending it back to GPU
-	Vector3 rando = (*randoGen).gaussian_vector(idx, num);
-
-	diffusion *= timestep;
-	Vector3 r = r0 + (diffusion / kTlocal) * force
-							+ timestep * diffGrad
-							+ sqrtf(2.0f * diffusion) * rando;
-	// Wrap about periodic boundaries
-	return sys->wrap(r);
-}
-
-__global__
-void updateGroupSites(Vector3 pos[], int* groupSiteData, int num, int numGroupSites, int numReplicas) {
-    int i = blockIdx.x * blockDim.x + threadIdx.x;
-
-    // TODO: improve naive implementation so that each thread loads memory single pos elemment
-
-    // For all threads representing a valid pair of particles
-    if (i < numGroupSites*numReplicas) {
-	pos[num*numReplicas + i] = Vector3(0.0f); 
-    }
-
-    // For all threads representing a valid pair of particles
-    if (i < numGroupSites*numReplicas) {
-	const int imod = i % numGroupSites;
-	const int rep = i/numGroupSites;
-	const int start  = groupSiteData[imod];
-	const int finish = groupSiteData[imod+1];
-	float weight = 1.0 / (finish-start);
-
-	Vector3 tmp = Vector3(0.0f);
-
-	for (int j = start; j < finish; j++) {
-	    const int aj = groupSiteData[j] + num*rep;
-	    tmp += weight * pos[aj];
-	}
-	// printf("GroupSite %d (mod %d) COM (start,finish, x,y,z): (%d,%d, %f,%f,%f)\n",i, imod, start, finish, tmp.x, tmp.y, tmp.z);
-	pos[num*numReplicas + i] = tmp;
-    }
-}
-
-template<bool print>
-__global__
-void distributeGroupSiteForces(Vector3 force[], int* groupSiteData, int num, int numGroupSites, int numReplicas) {
-    // TODO, handle groupsite energies
-    int i = blockIdx.x * blockDim.x + threadIdx.x;
-
-    // For all threads representing a valid pair of particles
-    if (i < numGroupSites*numReplicas) {
-	const int imod = i % numGroupSites;
-	const int rep = i/numGroupSites;
-	const int start  = groupSiteData[imod];
-	const int finish = groupSiteData[imod+1];
-	float weight = 1.0 / (finish-start);
-
-	const Vector3 tmp = weight*force[num*numReplicas+i];
-	// if (print) {
-	//     printf("GroupSite %d Force rep %d: %f %f %f\n",i, rep, tmp.x, tmp.y, tmp.z);
-	// }
-
-	for (int j = start; j < finish; j++) {
-	    const int aj = groupSiteData[j] + num*rep;
-	    atomicAdd( force+aj, tmp );
-	}
-    }
-}
-
-__global__ void devicePrint(RigidBodyType* rb[]) {
-	// printf("Device printing\n");
-	int i = 0;
-	printf("RigidBodyType %d: numGrids = %d\n", i, rb[i]->numPotGrids);
-	// printf("  RigidBodyType %d: potGrid: %p\n", i, rb[i]->rawPotentialGrids);
-	// int j = 0;
-	// printf("  RigidBodyType %d: potGrid[%d]: %p\n", i, j, &(rb[i]->rawPotentialGrids[j]));
-	// printf("  RigidBodyType %d: potGrid[%d] size: %d\n", i, j, rb[i]->rawPotentialGrids[j].getSize());
-	// BaseGrid g = rb[i]->rawPotentialGrids[j];
-	// for (int k = 0; k < rb[i]->rawPotentialGrids[j].size(); k++)
-	// for (int k = 0; k < rb[i]->rawPotentialGrids[j].getSize(); k++)
-	// 	printf("    rbType_d[%d]->potGrid[%d].val[%d]: %g\n",
-	// 				 i, j, k, rb[i]->rawPotentialGrids[j].val[k]);
-	// i, j, k, rb[i]->rawPotentialGrids[j]).val[k];
-	
-}
-
-// __global__ void devicePrint(RigidBodyType* rb[]) {
-// 	// printf("Device printing\n");
-// 	int i = 0;
-// 	printf("RigidBodyType %d: numGrids = %d\n", i, rb[i]->numPotGrids);
-// 	printf("RigidBodyType %d: potGrid: %p\n", i, rb[i]->rawPotentialGrids);
-// 	int j = 0;
-// 	printf("RigidBodyType %d: potGrid[%d]: %p\n", i, &(rb[i]->rawPotentialGrids[j]));
-// 	BaseGrid g = rb[i]->rawPotentialGrids[j];
-// 	// for (int k = 0; k < rb[i]->rawPotentialGrids[j].size(); k++)
-// 	for (int k = 0; k < g->getSize(); k++)
-// 		printf("rbType_d[%d]->potGrid[%d].val[%d]: %g\n",
-// 					 i, j, k, g.val[k]);
-// 	// i, j, k, rb[i]->rawPotentialGrids[j]).val[k];
-	
-// }
-
-
-// __device__ Vector3* totalForce;
-// Vector3* totalForce_h;
-// void initTotalForce() {
-//     cudaMalloc( &totalForce_h, sizeof(Vector3) );
-//     cudaMemcpyToSymbol(totalForce, &totalForce_h, sizeof(totalForce_h));
-// }
-
-__global__ void compute_position_dependent_force_for_rb_attached_particles(
-    const Vector3* __restrict__ pos,
-    Vector3* __restrict__ forceInternal, float* __restrict__ energy,
-    const int* __restrict__ type, BrownianParticleType** __restrict__ part,
-    const float electricField, const int num, const int num_rb_attached_particles,
-    const int numReplicas, const int scheme)
-{
-    int idx = blockIdx.x * blockDim.x + threadIdx.x;
-    if (idx < num_rb_attached_particles * numReplicas)
-    {
-	idx = num + (idx % num_rb_attached_particles) + (idx/num_rb_attached_particles) * (num+num_rb_attached_particles);
-	ForceEnergy fe = compute_position_dependent_force(
-	    pos, forceInternal, type, part, electricField, scheme, idx );
-	atomicAdd( &forceInternal[idx], fe.f );
-	atomicAdd( &energy[idx], fe.e );
-    }
-}
-__global__ void compute_position_dependent_force_for_rb_attached_particles(
-    const Vector3* __restrict__ pos, Vector3* __restrict__ forceInternal,
-    const int* __restrict__ type, BrownianParticleType** __restrict__ part,
-    const float electricField, const int num, const int num_rb_attached_particles,
-    const int numReplicas, const int scheme)
-{
-    int idx = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if (idx < num_rb_attached_particles * numReplicas)
-    {
-	idx = num + (idx % num_rb_attached_particles) + (idx/num_rb_attached_particles) * (num+num_rb_attached_particles);
-	ForceEnergy fe = compute_position_dependent_force(
-	    pos, forceInternal, type, part, electricField, scheme, idx );
-	atomicAdd( &forceInternal[idx], fe.f );
-    }
-}
diff --git a/src/GrandBrownTown.h b/src/GrandBrownTown.h
deleted file mode 100644
index ce7f19c29534db17e8b2ef8656f866b701455c67..0000000000000000000000000000000000000000
--- a/src/GrandBrownTown.h
+++ /dev/null
@@ -1,309 +0,0 @@
-// Author: Jeff Comer <jcomer2@illinois.edu>
-#ifndef GRANDBROWNTOWN_H
-#define GRANDBROWNTOWN_H
-
-#ifdef __CUDACC__
-    #define HOST __host__
-    #define DEVICE __device__
-#else
-    #define HOST
-    #define DEVICE
-#endif
-
-#include <ctime>
-#include <cstdio>
-#include <cstdlib>
-#include <iostream>
-#include <locale.h> // setlocale
-#include <sstream> // std::stringstream
-#include <string> // std::string
-#include <vector> // std::vector
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <curand_kernel.h>
-
-#include "GPUManager.h"
-#include "useful.h"
-#include "BaseGrid.h"
-#include "OverlordGrid.h"
-#include "Reader.h"
-#include "RandomCUDA.h"
-#include "ComputeForce.h"
-#include "BrownianParticleType.h"
-#include "TrajectoryWriter.h"
-#include "JamesBond.h"
-#include "Exclude.h"
-#include "Angle.h"
-#include "Configuration.h"
-#include "Dihedral.h"
-/* #include "RigidBody.h" */
-/* #include "RigidBodyType.h" */
-/* #include "RigidBodyGrid.h" */
-#include "RigidBodyController.h"
-#include "WKFUtils.h"
-
-// IMD
-#include "vmdsock.h"
-#include "imd.h"
-
-//#include "analyticForce.h"
-
-// using namespace std;
-
-// #define FORCEGRIDOFF
-
-class GrandBrownTown {
-public:
-	GrandBrownTown(const char* configFile, const char* outArg,
-			bool debug, bool imd_on, unsigned int imd_port, int numReplicas = 0);
-	GrandBrownTown(const Configuration& c, const char* outArg,
-			bool debug, bool imd_on, unsigned int imd_port, int numReplicas = 0);
-	~GrandBrownTown();
-
-	void run();
-	static bool DEBUG;
-
-private:  
-
-	// Given the numbers of each particle, populate the type list.
-	void populate();
-
-	// Count the number of atoms in the restart file.
-	int countRestart(const char* fileName);
-
-	void writeRestart(int repID) const;
-        void writeMomentumRestart(int repID) const;
-
-	void initialCondCen();
-	void initialCond();
-
-	// A couple old routines for getting particle positions.
-	Vector3 findPos(int typ);
-	Vector3 findPos(int typ, float minZ);
-	
-	bool readTableFile(const String& value, int currTab);
-	bool readBondFile(const String& value, int currBond);
-	bool readAngleFile(const String& value, int currAngle);
-
-	void newCurrent(int repID) const;
-	void writeCurrent(int repID, float t) const;
-	void writeCurrentSegment(int repID, float t, float segZ) const;
-	void getDebugForce();
-	
-	void copyRandToCUDA();
-	void copyToCUDA();
-
-        //Compute the kinetic energy in general. Han-Yi Chou
-        float KineticEnergy();
-        //float RotKineticEnergy();
-
-        //Initialize the Nose-Hoover auxilliary variables
-        void InitNoseHooverBath(int N);
-        //curandState_t *randoDevice;
-
-	void init_cuda_group_sites();
-
-public:
-	// Compute the current in nanoamperes.
-	float current(float t) const;
-
-	// Compute the current in nanoamperes for a restricted segment (-segZ < z < segZ).
-	float currentSegment(float t, float segZ, int carrier) const;
-
-	int getReservoirCount(int partInd, int resInd) const;
-
-	IndexList getReservoirList(int partInd, int resInd) const;
-		
-	// Find an open position to place a particle.
-	Vector3 freePosition(Vector3 r0, Vector3 r1, float minDist);
-
-private:
-	static GPUManager gpuman;
-	const Configuration& conf;
-	int numReplicas;
-	
-	// IMD variables
-	bool imd_on;
-	unsigned int imd_port;
-	Vector3* imdForces;
-	
-	// Output variables
-	std::vector<std::string> outCurrFiles;
-	std::vector<std::string> restartFiles;
-	std::vector<std::string> outFilePrefixes;
-
-        //Hna-Yi Chou Langevin Dynamics
-        std::vector<std::string> restartMomentumFiles;
-        std::vector<std::string> outMomentumFilePrefixes;//, outForceFilePrefixes;
-
-	std::vector<TrajectoryWriter*> writers;
-
-        //For momentum, i.e. Langevin dynamic Han-Yi Chou
-        std::vector<TrajectoryWriter*> momentum_writers;
-        //std::vector<TrajectoryWriter*> force_writers;
-
-	Vector3 sysDim;
-
-	// Integrator variables
-	BaseGrid* sys;
-	Random *randoGen;
-	ComputeForce* internal;
-	Vector3* forceInternal;
-
-	// Particle variables
-	String* partsFromFile;
-	int* indices;
-	int numPartsFromFile;
-	Bond* bonds;
-	int numCap; 		// max number of particles
-	int num; 			// number of particles
-	Vector3* pos; 		// particle positions
-        Vector3* momentum;      // particle momentum Han-Yi Chou
-        float *random;
-        //Vector3* force;
-	int* type; 			// particle types: 0, 1, ... -> num * numReplicas
-	String* name; 		// particle types: POT, CLA, ... -> num * numReplicas
-	int* serial; 		// particle serial numbers
-	int currSerial; 	// the serial number of the next new particle
-	Vector3* posLast; 	// previous positions of particles
-        Vector3* momLast;
-	float timeLast; 	// used with posLast
-	float minimumSep; 	// minimum separation allowed with placing new particles
-
-	std::vector<RigidBodyController*> RBC;
-	Vector3* rbPos; 		// rigid body positions
-	
-	// CUDA device variables
-	//Vector3 *pos_d, *forceInternal_d, *force_d;
-	//int *type_d;
-	BrownianParticleType **part_d;
-	BaseGrid *sys_d, *kTGrid_d;
-	Random* randoGen_d;
-	//Bond* bonds_d;
-	//int2* bondMap_d;
-	//Exclude* excludes_d;
-	//int2* excludeMap_d;
-	//Angle* angles_d;
-	//Dihedral* dihedrals_d;
-
-	// System parameters
-	String outputName;
-	float timestep;
-	long int steps;
-	unsigned long int seed;
-	String temperatureGridFile;
-	String inputCoordinates;
-	String restartCoordinates;
-	int numberFluct;
-	int interparticleForce;
-	int tabulatedPotential;
-	int fullLongRange;
-	float kT;
-	float temperature;
-	float coulombConst;
-	float electricField;
-	float cutoff;
-	float switchLen;
-	int outputPeriod;
-	int outputEnergyPeriod;
-	int outputFormat;
-	float currentSegmentZ;
-	int numberFluctPeriod;
-	int decompPeriod;
-	int numCapFactor;
-	BaseGrid* kTGrid;
-	BaseGrid* tGrid;
-	BaseGrid* sigmaT;
-
-	// Other parameters.
-	float switchStart;
-	float maxInitialPot;
-	float initialZ;
-
-	// Particle parameters.
-	BrownianParticleType* part;
-	int numParts;
-	int numBonds;
-	int numExcludes;
-	int numAngles;
-	int numDihedrals;
-
-    int num_rb_attached_particles;
-
-	int numGroupSites;
-	int* groupSiteData_d;
-
-	String partFile;
-	String bondFile;
-	String excludeFile;
-	String angleFile;
-	String dihedralFile;
-	bool readPartsFromFile;
-	bool readBondsFromFile;
-	bool readExcludesFromFile;
-	bool readAnglesFromFile;
-	bool readDihedralsFromFile;
-	String* partGridFile;
-	String* partDiffusionGridFile;
-	String* partForceXGridFile;
-	String* partForceYGridFile;
-	String* partForceZGridFile;
-	String* partTableFile;
-	String* partReservoirFile;
-	int* partTableIndex0;
-	int* partTableIndex1;
-
-	String* bondTableFile;
-	int numTabBondFiles;
-	int2* bondMap;
-	int3 *bondList;
-
-	Exclude* excludes;
-	int2* excludeMap;
-	String excludeRule;
-	int excludeCapacity;
-
-	Angle* angles;
-	String* angleTableFile;
-	int numTabAngleFiles;
-	int4 *angleList;
-
-	Dihedral* dihedrals;
-	String* dihedralTableFile;
-	int numTabDihedralFiles;
-	int4 *dihedralList;
-	int  *dihedralPotList;
-
-	int numBondAngles;
-	BondAngle* bondAngles;
-	int4 *bondAngleList;
-
-        //Han-Yi Chou
-        String particle_dynamic;
-        String rigidbody_dynamic;
-        String particle_langevin_integrator;
-        int ParticleInterpolationType;
-        int RigidBodyInterpolationType;
-	void updateNameList();
-
-	void remember(float t);
-
-	void deleteParticles(IndexList& p);
-
-	void addParticles(int n, int typ);
-
-	// Add particles randomly within the region defined by r0 and r1.
-	void addParticles(int n, int typ, Vector3 r0, Vector3 r1);
-
-	// Add particles randomly within the region defined by r0 and r1.
-	// Maintains a minimum distance of minDist between particles.
-	void addParticles(int n, int typ, Vector3 r0, Vector3 r1, float minDist);
-
-	// Add or delete particles in the reservoirs.
-	// Reservoirs are not wrapped.
-	void updateReservoirs();
-
-};
-
-#endif
diff --git a/src/Integrator.cpp b/src/Integrator.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5c285cb37d9071c9f6fa7b52c7433940d3eeec0f
--- /dev/null
+++ b/src/Integrator.cpp
@@ -0,0 +1,67 @@
+#include "Integrator.h"
+
+bool operator<(const Integrator::Conf x, const Integrator::Conf y) { return (int(x) < int(y)); };
+std::map<Integrator::Conf, Integrator*> Integrator::_integrators;
+	
+Integrator* Integrator::GetIntegrator(Conf& conf) {
+	// Checks _integrators for a matching configuration, returns one if found, otherwise creates
+	if (conf.backend == Conf::Default) {
+#ifdef USE_CUDA
+	    conf.backend = Conf::CUDA;
+#else
+	    conf.backend = Conf::CPU;
+#endif
+	}
+
+	// Insert configuration into map, if it exists 
+	auto emplace_result = Integrator::_integrators.emplace(conf, nullptr);
+	auto& it = emplace_result.first;
+	bool& inserted = emplace_result.second;
+	if (inserted) {
+	    // Conf not found, so create a new one 
+	    Integrator* tmp;
+
+	    switch (conf.object_type) {
+	    case Conf::Particle:
+		switch (conf.algorithm) {
+		case Conf::BD:
+		    switch (conf.backend) {
+		    case Conf::CUDA:
+#ifdef USE_CUDA
+			tmp = new BDIntegrateCUDA();
+#else
+			std::cerr << "WARNING: Integrator::GetIntegrator: "
+				  << "CUDA disabled, creating CPU integrator instead" << std::endl;
+			tmp = new BDIntegrate();
+#endif
+			break;
+		    case Conf::CPU:
+			tmp = new BDIntegrate();
+			break;
+		    default:
+			std::cerr << "Error: Integrator::GetIntegrator: "
+				  << "Unrecognized backend; exiting" << std::endl;
+			assert(false);
+		    }
+		    break;
+		case Conf::MD:
+		    assert(false);
+		    break;
+		default:
+		    std::cerr << "Error: Integrator::GetIntegrator: "
+			      << "Unrecognized algorithm type; exiting" << std::endl;
+		    assert(false);
+		}
+		break;
+	    case Conf::RigidBody:
+		assert(false);
+		break;
+	    default:
+		std::cerr << "Error: Integrator::GetIntegrator: "
+			  << "Unrecognized object type; exiting" << std::endl;
+		assert(false);
+	    }
+	    it->second = tmp;
+	}
+	return it->second;
+}
diff --git a/src/Integrator.h b/src/Integrator.h
new file mode 100644
index 0000000000000000000000000000000000000000..8e71eb99b1ac001617a71325c85bea71a4799978
--- /dev/null
+++ b/src/Integrator.h
@@ -0,0 +1,49 @@
+#pragma once
+
+#include <cassert>
+#include <iostream>
+#include <map>
+#include "PatchOps.h"
+
+#ifdef __CUDACC__
+    #define HOST __host__
+    #define DEVICE __device__
+#else
+    #define HOST
+    #define DEVICE
+#endif
+
+namespace IntegratorKernels {
+    HOST DEVICE  void __inline__ BDIntegrate() {
+	// std::cout << "Computes::BDIntegrate_inline" << std::endl;
+	printf("Integrator::BDIntegrate\n");
+    };
+}
+
+class Integrator : public BaseCompute {
+public:
+    virtual void compute(Patch* patch) = 0;
+    int num_patches() const { return 1; };
+
+    // Following relates to lazy initialized factory method
+    struct Conf {
+	enum Object   { Particle, RigidBody };
+	enum Algorithm { BD, MD };
+	enum Backend   { Default, CUDA, CPU };    
+
+	Object object_type;
+	Algorithm algorithm;
+	Backend backend;
+
+	explicit operator int() const {return object_type*16 + algorithm*4 + backend;};
+    };
+        
+    static Integrator* GetIntegrator(Conf& conf);
+	    	
+protected:
+    static std::map<Conf, Integrator*> _integrators;
+
+};
+
+#include "Integrator/CUDA.h"
+#include "Integrator/CPU.h"
diff --git a/src/Integrator/CPU.cpp b/src/Integrator/CPU.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..02c4510e82a949c3ffe9537b54493e97f83f1a7f
--- /dev/null
+++ b/src/Integrator/CPU.cpp
@@ -0,0 +1,6 @@
+#include "CPU.h"
+
+void BDIntegrate::compute(Patch* p) {
+    std::cout << "BDIntegrate::compute()" << std::endl;
+    IntegratorKernels::BDIntegrate();
+};
diff --git a/src/Integrator/CPU.h b/src/Integrator/CPU.h
new file mode 100644
index 0000000000000000000000000000000000000000..876062f4c62db50e89775506412d2b5ca6429471
--- /dev/null
+++ b/src/Integrator/CPU.h
@@ -0,0 +1,8 @@
+#pragma once
+#include "../Integrator.h"
+
+class BDIntegrate : public Integrator {
+public:
+    void compute(Patch* patch);
+    int num_patches() const { return 1; };
+};
diff --git a/src/Integrator/CUDA.cu b/src/Integrator/CUDA.cu
new file mode 100644
index 0000000000000000000000000000000000000000..d3a3933c1878b09321a2a4cb379e14987cfb6fa2
--- /dev/null
+++ b/src/Integrator/CUDA.cu
@@ -0,0 +1,15 @@
+#include "CUDA.h"
+
+#ifdef USE_CUDA
+__global__ void BDIntegrate_kernel() {
+    if (threadIdx.x == 0) {
+	printf("BDIntegrate_kernel()\n");
+	IntegratorKernels::BDIntegrate();
+    }
+};
+
+void BDIntegrateCUDA::compute(Patch* p) {
+    printf("BDIntegrateCUDA::compute()\n");
+    BDIntegrate_kernel<<<1,32>>>();
+};
+#endif
diff --git a/src/Integrator/CUDA.h b/src/Integrator/CUDA.h
new file mode 100644
index 0000000000000000000000000000000000000000..90e17724fbdc89a4be0fe6d6ad87f928ed856038
--- /dev/null
+++ b/src/Integrator/CUDA.h
@@ -0,0 +1,10 @@
+#pragma once
+#include "../Integrator.h"
+
+#ifdef USE_CUDA
+class BDIntegrateCUDA : public Integrator {
+public:
+    void compute(Patch* patch);
+    int num_patches() const { return 1; };
+};
+#endif
diff --git a/src/Interactions.h b/src/Interactions.h
new file mode 100644
index 0000000000000000000000000000000000000000..d58ae497e9eefdb54463b50689a0f28b12042859
--- /dev/null
+++ b/src/Interactions.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "useful.h"
+
+class Interactions {
+    // Object to store all kinds of info about the simulation system, but no particle data
+
+public:
+    size_t num_interactions;
+    
+};
+
+class BondInteractions : public Interactions {
+
+private:
+    static const char* type = "Bond";
+    size_t bondlist;
+};
diff --git a/src/JamesBond.cu b/src/JamesBond.cu
deleted file mode 100644
index ee496cdb5064ea2e6799c8aca05e036b418a0692..0000000000000000000000000000000000000000
--- a/src/JamesBond.cu
+++ /dev/null
@@ -1,28 +0,0 @@
-/* Bond, James Bond.cu Copyright Justin Dufresne and Terrance Howard, 2013.
- * We prefer our code shaken, not stirred.
- */
-
-#include "JamesBond.h"
-
-Bond::Bond(String strflag, int ind1, int ind2, String fileName) :
-		ind1(ind1), ind2(ind2), fileName(fileName) {
-	if (strflag == "REPLACE") {
-		flag = REPLACE;
-	} else if (strflag == "ADD") {
-		flag = ADD;
-	} else {
-		printf("WARNING: Invalid operation flag found:"
-					 "\"BOND %s %d %d\"\n", strflag.val(), ind1, ind2);
-		printf("         Using default flag\n");
-		flag = DEFAULT;
-	}
-	tabFileIndex = -1;
-}
-
-void Bond::print() {
-	printf("BOND %s %d %d %s\n", flags[flag].val(), ind1, ind2, fileName.val());
-}
-
-String Bond::toString() {
-	return "BOND " + flags[flag] + " " + ind1 + " " + ind2 + " " + fileName;
-}
diff --git a/src/JamesBond.h b/src/JamesBond.h
deleted file mode 100644
index 899f6314e66eff88d5eb5800391cc16a3e108807..0000000000000000000000000000000000000000
--- a/src/JamesBond.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/* Bond, James Bond.h Copyright Justin Dufresne and Terrance Howard, 2013.
- * We prefer our code shaken, not stirred.
- */
-
-#ifndef BOND_H
-#define BOND_H
-
-#ifdef __CUDACC__
-    #define HOST __host__
-    #define DEVICE __device__
-#else
-    #define HOST
-    #define DEVICE
-#endif
-
-#include "useful.h"
-#include "TabulatedPotential.h"
-#include <cuda.h>
-
-const String flags[] = { "DEFAULT", "REPLACE", "ADD" };
-
-class Bond {
-public:
-	enum {
-		DEFAULT = 1,
-		REPLACE = 1,
-		ADD = 2
-	};
-
-	Bond() : flag(DEFAULT), ind1(-1), ind2(-1) { }
-
-	Bond(int flag, int ind1, int ind2, String fileName) :
-			flag(flag),
-			ind1(ind1), ind2(ind2),
-			fileName(fileName) { }
-
-	Bond(String strflag, int ind1, int ind2, String fileName);
-
-	void print();
-
-	String toString();
-
-public:
-	int flag;
-	int ind1;
-	int ind2;
-	int tabFileIndex;
-	String fileName;
-};
-
-#endif
diff --git a/src/OverlordGrid.h b/src/OverlordGrid.h
deleted file mode 100644
index af2a415c0defb508451c41efeb6d1bff7785a4c2..0000000000000000000000000000000000000000
--- a/src/OverlordGrid.h
+++ /dev/null
@@ -1,345 +0,0 @@
-
-///////////////////////////////////////////////////////////////////////
-// Cell decomposition of points.
-// Author: Jeff Comer <jcomer2@illinois.edu>
-#ifndef OVERLORDGRID_H
-#define OVERLORDGRID_H
-
-#include "BaseGrid.h"
-#include "useful.h"
-
-#ifdef __CUDACC__
-    #define HOST __host__
-    #define DEVICE __device__
-#else
-    #define HOST 
-    #define DEVICE 
-#endif
-
-class OverlordGrid : public BaseGrid {
-public:
-  OverlordGrid(const BaseGrid& grid) : BaseGrid(grid) {
-    initSubgrids();
-    initUniqueGrids();
-  }
-  OverlordGrid() {};  
-  /*OverlordGrid(const char* systemDefFile) : BaseGrid(readDefFirst(systemDefFile)) {
-    printf("size: %d\n", size);
-    
-    // Initialize stuff.
-    initSubgrids();
-    initUniqueGrids();
-
-    // Load the rest of the system definition file.
-    readDef(systemDefFile);
-    }*/
-  
-
-  // Read a grid from a file.
-  OverlordGrid(const char* rootGrid) : BaseGrid(rootGrid) {
-    // Initialize stuff.
-    initSubgrids();
-    initUniqueGrids();
-  }
-
-private:
-  void initSubgrids() {
-    subgrid = new const BaseGrid*[size];
-    subtrans = new Matrix3[size];
-    for (int i = 0; i < size; i++) {
-      subtrans[i] = Matrix3(1.0f);
-      subgrid[i] = NULL;
-    }
-  }
-
-  void initUniqueGrids() {
-    uniqueGridNum = 0;
-    uniqueGrid = new BaseGrid*[size];
-    uniqueGridName = new String[size];
-    for (int i = 0; i < size; i++) uniqueGrid[i] = NULL;
-  }
-
-public:
-  int readDef(const char* systemDefFile) {
-    // Open the file.
-    FILE* inp = fopen(systemDefFile, "r");
-    if (inp == NULL) {
-      printf("OverlordGrid:readDef Couldn't open file `%s'.\n", systemDefFile);
-      exit(-1);
-    }
-    
-    int ind;
-    char gridFile[STRLEN];
-    char transform[STRLEN];
-    char line[STRLEN];
-    int nRead;
-    int count = 0;
-    while (fgets(line, STRLEN, inp) != NULL) {
-      // Ignore comments.
-      int len = strlen(line);
-      if (line[0] == '#') continue;
-      if (len < 2) continue;
-      
-      // Read definition lines.
-      nRead = sscanf(line, "%d %s %s", &ind, gridFile, transform);
-      if (nRead < 3) {
-	printf("OverlordGrid:readDef Improperly formatted line `%s'\n", line);
-	fclose(inp);
-	exit(-1);
-      }
-
-      // Skip the root grid.
-      if (ind < 0) continue;
-      
-      // Die for an improper index.
-      if (ind >= size) {
-	printf("OverlordGrid:readDef Index %d does not exist for %d nodes.\n", ind, size);
-	fclose(inp);
-	exit(-1);
-      }
-
-      // Find the grid to link to.
-      String gridName(gridFile);
-      int gridInd = -1;
-      for (int i = 0; i < uniqueGridNum; i++) {
-	if (gridName == uniqueGridName[i]) {
-	  gridInd = i;
-	  break;
-	}
-      }
-
-      // This is new grid.
-      // Load it.
-      if (gridInd < 0) {
-	if (uniqueGridNum >= size) {
-	  printf("OverlordGrid:readDef Too many unique grids.\n");
-	  fclose(inp);
-	  exit(-1);
-	}
-
-	uniqueGrid[uniqueGridNum] = new BaseGrid(gridFile);
-	uniqueGridName[uniqueGridNum] = gridFile;
-	gridInd = uniqueGridNum;
-	printf("New grid: %s\n", gridFile);
-	uniqueGridNum++;
-      }
-
-      // Link the subgrid.
-      link(ind, uniqueGrid[gridInd], parseTransform(transform));
-      count++;
-    }
-
-    return count;
-  }
-
-  static String readDefFirst(const char* systemDefFile) {
-    // Open the file.
-    FILE* inp = fopen(systemDefFile, "r");
-    if (inp == NULL) {
-      printf("OverlordGrid:readDefFirst Couldn't open file `%s'.\n", systemDefFile);
-      exit(-1);
-    }
-    
-    int ind;
-    char gridFile[STRLEN];
-    char transform[STRLEN];
-    char line[STRLEN];
-    int nRead;
-    while (fgets(line, STRLEN, inp) != NULL) {
-      // Ignore comments.
-      int len = strlen(line);
-      if (line[0] == '#') continue;
-      if (len < 2) continue;
-      
-      // Read definition lines.
-      nRead = sscanf(line, "%d %s %s", &ind, gridFile, transform);
-      if (nRead < 3 || ind != -1) {
-	printf("OverlordGrid:readDefFirst Improperly formatted line `%s'\n", line);
-	fclose(inp);
-	exit(-1);
-      }
-      
-      // Just get the root grid and return.
-      return String(gridFile);
-    }
-    
-    return String();
-  }
-
-  virtual ~OverlordGrid() {
-    delete[] subgrid;
-
-    for (int i = 0; i < uniqueGridNum; i++) delete uniqueGrid[i];
-    delete[] uniqueGrid;
-    delete[] uniqueGridName;
-  }
-
-  static Matrix3 parseTransform(const char* trans) {    
-    if (strlen(trans) < 2) return Matrix3(1.0f);
-
-    char sgn = trans[0];
-    char axis = trans[1];
-
-    Matrix3 ret(1.0f);
-    switch(axis) {
-    case 'x':
-    case 'X':
-      if (sgn == '-') ret = Matrix3(0.0f, 0.0f, -1.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f);
-      else ret = Matrix3(0.0f, 0.0f, 1.0f, 0.0f, -1.0f, 0.0f, 1.0f, 0.0f, 0.0f);
-      break;
-
-    case 'y':
-    case 'Y':
-      if (sgn == '-') ret = Matrix3(0.0f, -1.0f, 0.0f, 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f);
-      else ret = Matrix3(0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
-      break;
-
-    case 'z':
-    case 'Z':
-      if (sgn == '-') ret = Matrix3(-1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, -1.0f);
-      else ret = Matrix3(1.0f);
-      break;
-
-    }
-
-    return ret.transpose();
-  }
-
-  // Link a grid node to a subgrid.
-  bool link(int j, const BaseGrid* p, Matrix3 trans) {
-    if (j < 0 || j >= size) return false;
-    subgrid[j] = p;
-    subtrans[j] = trans;
-
-    return true;
-  }
-  bool link(int j, const BaseGrid& g, Matrix3 trans) {
-    return link(j, &g, trans);
-  }
-
-  virtual float getPotential(Vector3 pos) const {
-    // Find the nearest node.
-    int j = nearestIndex(pos);
-    
-    // Return nothing for a null subgrid.
-    if (subgrid[j] == NULL) return 0.0f;
-
-    // Shift the point to get into node j's space.
-    Vector3 r = subtrans[j].transform(pos - getPosition(j));
-    // Do a getPotential on the subgrid.
-    return subgrid[j]->getPotential(r);
-  }
-
-  DEVICE virtual float interpolatePotential(Vector3 pos) const {
-    // Find the nearest node.
-    int j = nearestIndex(pos);
-    
-    // Return nothing for a null subgrid.
-    if (subgrid[j] == NULL) return 0.0f;
-
-    // Shift the point to get into node j's space.
-    Vector3 r = subtrans[j].transform(pos - getPosition(j));
-    // Do a getPotential on the subgrid.
-    return subgrid[j]->interpolatePotential(r);
-  }
-
-  DEVICE virtual float interpolatePotentialLinearly(Vector3 pos) const {
-    // Find the nearest node.
-    int j = nearestIndex(pos);
-    
-    // Return nothing for a null subgrid.
-    if (subgrid[j] == NULL) return 0.0f;
-
-    // Shift the point to get into node j's space.
-    Vector3 r = subtrans[j].transform(pos - getPosition(j));
-    // Do a getPotential on the subgrid.
-    return subgrid[j]->interpolatePotentialLinearly(r);
-  }
-  
-  Vector3 interpolateForce(Vector3 pos) const {
-    // Find the nearest node.
-    int j = nearestIndex(pos);
-    
-    // Return nothing for a null subgrid.
-    if (subgrid[j] == NULL) return Vector3(0.0f);
-    // Shift the point to get into node j's space.
-    Vector3 r = subtrans[j].transform(pos - getPosition(j));
-
-    Vector3 f;
- 	Vector3 l = subgrid[j]->getInverseBasis().transform(r - subgrid[j]->getOrigin());
-    	int homeX = int(floor(l.x));
-    	int homeY = int(floor(l.y));
-    	int homeZ = int(floor(l.z));
-       	 // Get the array jumps with shifted indices.
-   	 int jump[3];
-    	jump[0] = subgrid[j]->getNz()*subgrid[j]->getNy();
-    	jump[1] = subgrid[j]->getNz();
-    	jump[2] = 1;
-   	// Shift the indices in the home array.
-   	int home[3];
-    	home[0] = homeX;
-   	home[1] = homeY;
-    	home[2] = homeZ;
-
-    	// Shift the indices in the grid dimensions.
-    	int g[3];
-	g[0] = subgrid[j]->getNx();
-	g[1] = subgrid[j]->getNy();
-	g[2] = subgrid[j]->getNz();
-
-	// Get the interpolation coordinates.
-	   float w[3];
-	w[0] = l.x - homeX;
-	w[1] = l.y - homeY;
-	w[2] = l.z - homeZ;
-	// Find the values at the neighbors.
-	float g1[4][4][4];
-	for (int ix = 0; ix < 4; ix++) {
-	      	for (int iy = 0; iy < 4; iy++) {
-			for (int iz = 0; iz < 4; iz++) {
-	  		// Wrap around the periodic boundaries. 
-				int jx = ix-1 + home[0];
-		 		 jx = subgrid[j]->wrap(jx, g[0]);
-		  		int jy = iy-1 + home[1];
-		 		 jy = subgrid[j]->wrap(jy, g[1]);
-		 		 int jz = iz-1 + home[2];
-		  		jz = subgrid[j]->wrap(jz, g[2]);
-		  
-				 int ind = jz*jump[2] + jy*jump[1] + jx*jump[0];
-				  g1[ix][iy][iz] = subgrid[j]->val[ind];
-			}
-	      	}
-	}  
-
-    f.x = subgrid[j]->interpolateDiffX(r, w, g1);
-    f.y = subgrid[j]->interpolateDiffY(r, w, g1);
-    f.z = subgrid[j]->interpolateDiffZ(r, w, g1);
-    Matrix3 m = subgrid[j]->getInverseBasis();
-    Vector3 f1 = m.transpose().transform(f);
-    Vector3 f2 = subtrans[j].transpose().transform(f1);
-    return f2;
-  }
- 
-  int getUniqueGridNum() const { return uniqueGridNum; }
-
-  bool writeSubgrids(const char* fileName) const {
-    FILE* out = fopen(fileName, "w");
-    if (out == NULL) return false;
-
-    for (int i = 0; i < size; i++) {
-      if (subgrid[i] != NULL)
-	fprintf(out, "%d %g %s\n", i, subgrid[i]->mean(), subtrans[i].toString1().val());
-    }
-    fclose(out);
-
-    return true;
-  }
-
-private:  
-  const BaseGrid** subgrid;
-  Matrix3* subtrans;
-  BaseGrid** uniqueGrid;
-  String* uniqueGridName;
-  int uniqueGridNum;
-};
-#endif
diff --git a/src/ParticlePatch.cpp b/src/ParticlePatch.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1977e1fbcfdffebddc54a55989e69e0f399261df
--- /dev/null
+++ b/src/ParticlePatch.cpp
@@ -0,0 +1,13 @@
+#include "ParticlePatch.h"
+
+// BasePatch::BasePatch(size_t num, short thread_id, short gpu_id) { ;};
+// BasePatch::BasePatch() {};
+// BasePatch::~BasePatch() {};
+
+// Patch::Patch(size_t num, short thread_id, short gpu_id) {};
+
+void Patch::compute() {
+    for (auto& c_p: local_computes) {
+	c_p->compute(this);
+    }
+};
diff --git a/src/ParticlePatch.cu b/src/ParticlePatch.cu
new file mode 100644
index 0000000000000000000000000000000000000000..3628b208e26278975b34866e6cf9272c943524eb
--- /dev/null
+++ b/src/ParticlePatch.cu
@@ -0,0 +1,14 @@
+#include "ParticlePatch.h"
+
+// BasePatch::BasePatch(size_t num, short thread_id, short gpu_id) { ;};
+// BasePatch::BasePatch() {};
+// BasePatch::~BasePatch() {};
+
+Patch::Patch(size_t num, short thread_id, short gpu_id) {};
+
+void Patch::compute() {
+    for (auto& c_p: local_computes) {
+	c_p->compute(this);
+    }
+};
+
diff --git a/src/ParticlePatch.h b/src/ParticlePatch.h
new file mode 100644
index 0000000000000000000000000000000000000000..3f337868a2f28cdff0344a4b92e4dbd76ade774e
--- /dev/null
+++ b/src/ParticlePatch.h
@@ -0,0 +1,107 @@
+#pragma once
+
+#ifdef __CUDACC__
+    #define HOST __host__
+    #define DEVICE __device__
+#else
+    #define HOST
+    #define DEVICE
+#endif
+
+#include <vector> // std::vector
+#include <memory> // std::make_unique
+
+#ifdef USE_CUDA
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <curand_kernel.h>
+#endif
+
+#include "SimSystem.h"
+#include "useful.h"
+
+#include "PatchOps.h"
+//class BaseCompute;
+
+class BasePatch {
+public:
+    // BasePatch(size_t num, short thread_id, short gpu_id, SimSystem& sys);
+    // BasePatch(size_t num, short thread_id, short gpu_id);
+    // BasePatch();
+    // ~BasePatch();
+
+private:
+    size_t capacity;
+    size_t num;
+    short thread_id;		// MPI
+    short gpu_id;		// -1 if GPU unavailable
+
+    int patch_idx;		// Unique ID across ranks
+
+    Vector3 lower_bound;
+    Vector3 upper_bound;
+};
+
+class PatchProxy : public BasePatch {
+public:
+    // ???
+private:
+    //
+};
+
+class Patch : public BasePatch {
+public:
+    Patch(size_t num, short thread_id, short gpu_id) {};
+    // void deleteParticles(IndexList& p);
+    // void addParticles(int n, int typ);
+    // template<class T>
+    // void add_compute(std::unique_ptr<T>&& p) {
+    // 	std::unique_ptr<BaseCompute> base_p = static_cast<std::unique_ptr<BaseCompute>>(p);
+    // 	local_computes.emplace_back(p);
+    // };
+    void add_compute(std::unique_ptr<BaseCompute>&& p) {
+	local_computes.emplace_back(std::move(p));
+    };
+
+    void compute();
+    
+private:
+    // std::vector<PatchProxy> neighbors;    
+    std::vector<std::unique_ptr<BaseCompute>> local_computes; // Operations that will be performed on this patch each timestep
+    std::vector<std::unique_ptr<BaseCompute>> nonlocal_computes; // Operations that will be performed on this patch each timestep
+    
+    static int patch_idx;		// Unique ID across ranks
+
+    // CPU particle arrays
+    Vector3* pos;
+    Vector3* momentum;
+
+    Vector3* rb_pos;
+    Matrix3* rb_orient;
+    Vector3* rb_mom;
+    Vector3* rb_amom;
+
+    int* type;	     // particle types: 0, 1, ... -> num * numReplicas
+
+    int num_rb_attached_particles;
+    int num_group_sites;
+    int* groupSiteData_d;
+
+    // Device arrays
+    Vector3* pos_d;
+    Vector3* momentum_d;
+    Vector3* rb_pos_d;
+    Matrix3* rb_orient_d;
+    Vector3* rb_mom_d;
+    Vector3* rb_amom_d;
+    int* type_d;
+};
+
+// // Patch::Patch(size_t num, short thread_id, short gpu_id) {};
+// #ifndef USE_CUDA
+// void Patch::compute() {
+//     for (auto& c_p: local_computes) {
+// 	c_p->compute(this);
+//     }
+// };
+// #endif
diff --git a/src/PatchOps.h b/src/PatchOps.h
new file mode 100644
index 0000000000000000000000000000000000000000..13115b01602576f6ad286f11ac8d3b99d67ead1b
--- /dev/null
+++ b/src/PatchOps.h
@@ -0,0 +1,14 @@
+#pragma once
+
+class Patch;
+
+class BaseCompute {
+    // Low level class that operates on Patch data
+public:
+    virtual void compute(Patch* patch) = 0;
+    virtual int num_patches() const = 0;
+private:
+    void* compute_data;
+};
+
+#include "Integrator.h"
diff --git a/src/ProductPotential.h b/src/ProductPotential.h
deleted file mode 100644
index ff918e609c1b59827b71bfa0af68401ba287079a..0000000000000000000000000000000000000000
--- a/src/ProductPotential.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#pragma once
-
-#ifdef __CUDACC__
-    #define HOST __host__
-    #define DEVICE __device__
-#else
-    #define HOST 
-    #define DEVICE 
-#endif
-
-#include "useful.h"
-#include <cuda.h>
-
-
-class ProductPotentialConf {
-public:
-    ProductPotentialConf() {}
-    ProductPotentialConf( std::vector< std::vector<int> > indices, std::vector<String> potential_names ) :
-	indices(indices), potential_names(potential_names) { }
-
-    std::vector< std::vector<int> > indices; /* indices of particles */
-    std::vector<String> potential_names;
-
-    inline ProductPotentialConf(const ProductPotentialConf& a) : indices(a.indices), potential_names(a.potential_names) { }
-
-    
-	/* String toString(); */
-	/* void print(); */
-};
-
diff --git a/src/Random.h b/src/Random.h
deleted file mode 100644
index 7abdaecfb99d94826d2254d44075fcfe86b50e80..0000000000000000000000000000000000000000
--- a/src/Random.h
+++ /dev/null
@@ -1,194 +0,0 @@
-/**
-***  Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by
-***  The Board of Trustees of the University of Illinois.
-***  All rights reserved.
-**/
-
-/*
- * Copyright (c) 1993 Martin Birgmeier
- * All rights reserved.
- *
- * You may redistribute unmodified or modified versions of this source
- * code provided that the above copyright notice and this and the
- * following conditions are retained.
- *
- * This software is provided ``as is'', and comes with no warranties
- * of any kind. I shall in no event be liable for anything that happens
- * to anyone/anything when using this software.
- */
-
-#ifndef RANDOM_H
-#define RANDOM_H
-
-#ifdef __CUDACC__
-    #define HOST __host__
-    #define DEVICE __device__
-#else
-    #define HOST 
-    #define DEVICE 
-#endif
-
-#include "common.h"
-#include "useful.h"
-
-#ifdef _MSC_VER
-#define INT64_LITERAL(X) X ## i64
-#else
-#define INT64_LITERAL(X) X ## LL
-#endif
-
-#define	RAND48_SEED   INT64_LITERAL(0x00001234abcd330e)
-#define	RAND48_MULT   INT64_LITERAL(0x00000005deece66d)
-#define	RAND48_ADD    INT64_LITERAL(0x000000000000000b)
-#define RAND48_MASK   INT64_LITERAL(0x0000ffffffffffff)
-
-class Random {
-
-private:
-
- float second_gaussian;
-  int64 second_gaussian_waiting;
-  int64 rand48_seed;
-  int64 rand48_mult;
-  int64 rand48_add;
-
-public:
-
-  // default constructor
-  Random(void) {
-    init(0);
-    rand48_seed = RAND48_SEED;
-  }
-
-  // constructor with seed
-  Random(unsigned long seed) {
-    init(seed);
-  }
-
-  // reinitialize with seed
-  HOST DEVICE inline void init(unsigned long seed) {
-    second_gaussian = 0;
-    second_gaussian_waiting = 0;
-    rand48_seed = seed & INT64_LITERAL(0x00000000ffffffff);
-    rand48_seed = rand48_seed << 16;
-    rand48_seed |= RAND48_SEED & INT64_LITERAL(0x0000ffff);
-    rand48_mult = RAND48_MULT;
-    rand48_add = RAND48_ADD;
-    // print("INIT\n");
-  }
-
-  HOST DEVICE inline void print(char* string)
-  {
-	printf(string);
-	printf("RAND48_SEED = %d, RAND48_MULT = %d, RAND48_ADD = %d, RAND48_MASK = %d\n", RAND48_SEED, RAND48_MULT, RAND48_ADD, RAND48_MASK);
-	printf("rand48_seed = %d, rand48_mult = %d, rand48_add = %d\n", rand48_seed, rand48_mult, rand48_add);
-  }
-
-  // advance generator by one (seed = seed * mult + add, to 48 bits)
-  HOST DEVICE inline void skip(void) {
-    rand48_seed = ( rand48_seed * rand48_mult + rand48_add ) & RAND48_MASK;
-  }
-
-  // split into numStreams different steams and take stream iStream
-  void split(int iStream, int numStreams) {
-    int i;
-
-    // make sure that numStreams is odd to ensure maximum period
-    numStreams |= 1;
-
-    // iterate to get to the correct stream
-    for ( i = 0; i < iStream; ++i ) skip();
-
-    // save seed and add so we can use skip() for our calculations
-    int64 save_seed = rand48_seed;
-
-    // calculate c *= ( 1 + a + ... + a^(numStreams-1) )
-    rand48_seed = rand48_add;
-    for ( i = 1; i < numStreams; ++i ) skip();
-    int64 new_add = rand48_seed;
-
-    // calculate a = a^numStreams
-    rand48_seed = rand48_mult;
-    rand48_add  = 0;
-    for ( i = 1; i < numStreams; ++i ) skip();
-    rand48_mult = rand48_seed;
-
-    rand48_add  = new_add;
-    rand48_seed = save_seed;
-
-    second_gaussian = 0;
-    second_gaussian_waiting = 0;
-    print("END SPLIT\n");
-  }
-
-  // return a number uniformly distributed between 0 and 1
-  HOST DEVICE inline BigReal uniform(void) {
-    skip();
-    const float exp48 = ( 1.0 / (float)(INT64_LITERAL(1) << 48) );
-    return ( (float) rand48_seed * exp48 );
-  }
-
-  long poisson(BigReal lambda) {
-    const BigReal l = exp(-lambda);
-    long k = 0;
-    BigReal p = uniform();
-    
-    while (p >= l) {
-      p *= uniform();
-      k = k + 1;
-    }
-
-    return k;
-  }
-
-  // return a number from a standard gaussian distribution
-  HOST DEVICE inline BigReal gaussian(void) {
-    BigReal fac, r, v1, v2;
-
-    if (second_gaussian_waiting) {
-      second_gaussian_waiting = 0;
-      return second_gaussian;
-    } else {
-      r = 2.;                 // r >= 1.523e-8 ensures abs result < 6
-      while (r >=1. || r < 1.523e-8) { // make sure we are within unit circle
-        v1 = 2.0 * uniform() - 1.0;
-        v2 = 2.0 * uniform() - 1.0;
-        r = v1*v1 + v2*v2;
-				// printf("r %f", r);
-      }
-      fac = sqrt(-2.0 * log(r)/r);
-      // now make the Box-Muller transformation to get two normally
-      // distributed random numbers. Save one and return the other.
-      second_gaussian_waiting = 1;
-      second_gaussian = v1 * fac;
-      return v2 * fac;
-    }
-
-  }
-
-  // return a vector of gaussian random numbers
-  HOST DEVICE inline Vector3 gaussian_vector(void) {
-    return Vector3( gaussian(), gaussian(), gaussian() );
-  }
-
-  // return a random long
-  HOST DEVICE inline long integer(void) {
-    skip();
-    return ( ( rand48_seed >> 17 ) & INT64_LITERAL(0x000000007fffffff) );
-  }
-
-  // randomly order an array of whatever
-  template <class Elem> void reorder(Elem *a, int n) {
-    for ( int i = 0; i < (n-1); ++i ) {
-      int ie = i + ( integer() % (n-i) );
-      if ( ie == i ) continue;
-      const Elem e = a[ie];
-      a[ie] = a[i];
-      a[i] = e;
-    }
-  }
-
-};
-
-#endif  // RANDOM_H
-
diff --git a/src/RandomCPU.h b/src/RandomCPU.h
deleted file mode 100644
index 5b56cd5e7898ed94ef6a39c5a89a50c26f09bddd..0000000000000000000000000000000000000000
--- a/src/RandomCPU.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/**
-***  Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by
-***  The Board of Trustees of the University of Illinois.
-***  All rights reserved.
-**/
-
-/*
- * Copyright (c) 1993 Martin Birgmeier
- * All rights reserved.
- *
- * You may redistribute unmodified or modified versions of this source
- * code provided that the above copyright notice and this and the
- * following conditions are retained.
- *
- * This software is provided ``as is'', and comes with no warranties
- * of any kind. I shall in no event be liable for anything that happens
- * to anyone/anything when using this software.
- */
-
-/*â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“.
-| Redundant with Random.h but with RandomCPU class since Random class is also |
-| declared in RandomCUDA.h, and I want to use both for rigidBody code         |
-`â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“*/
-// RBTODO: make Random.h include RandomCPU.h and "typedef" this class
-
-#ifndef RANDOM_CPU_H
-#define RANDOM_CPU_H
-
-#ifdef __CUDACC__
-#define HOST __host__
-#define DEVICE __device__
-#else
-#define HOST 
-#define DEVICE 
-#endif
-
-#include "namd_common.h"
-#include "useful.h"
-
-#ifdef _MSC_VER
-#define INT64_LITERAL(X) X ## i64
-#else
-#define INT64_LITERAL(X) X ## LL
-#endif
-
-#define	RAND48_SEED   INT64_LITERAL(0x00001234abcd330e)
-#define	RAND48_MULT   INT64_LITERAL(0x00000005deece66d)
-#define	RAND48_ADD    INT64_LITERAL(0x000000000000000b)
-#define RAND48_MASK   INT64_LITERAL(0x0000ffffffffffff)
-
-class RandomCPU {
-
-private:
-
-	float second_gaussian;
-  bool second_gaussian_waiting;
-  int64 rand48_seed;
-  int64 rand48_mult;
-  int64 rand48_add;
-
-public:
-
-  // default constructor
-  RandomCPU(void) {
-    init(0);
-    rand48_seed = RAND48_SEED;
-  }
-
-  // constructor with seed
-  RandomCPU(unsigned long seed) {
-    init(seed);
-  }
-
-  // reinitialize with seed
-  HOST DEVICE inline void init(unsigned long seed) {
-    second_gaussian = 0;
-    second_gaussian_waiting = false;
-    rand48_seed = seed & INT64_LITERAL(0x00000000ffffffff);
-    rand48_seed = rand48_seed << 16;
-    rand48_seed |= RAND48_SEED & INT64_LITERAL(0x0000ffff);
-    rand48_mult = RAND48_MULT;
-    rand48_add = RAND48_ADD;
-  }
-
-  HOST DEVICE inline void print(const char* string)
-		{
-			printf(string);
-			printf("RAND48_SEED = %lld, RAND48_MULT = %lld, RAND48_ADD = %lld, RAND48_MASK = %lld\n", RAND48_SEED, RAND48_MULT, RAND48_ADD, RAND48_MASK);
-			printf("rand48_seed = %ld, rand48_mult = %ld, rand48_add = %ld\n", rand48_seed, rand48_mult, rand48_add);
-		}
-
-  // advance generator by one (seed = seed * mult + add, to 48 bits)
-  HOST DEVICE inline void skip(void) {
-    rand48_seed = ( rand48_seed * rand48_mult + rand48_add ) & RAND48_MASK;
-  }
-
-  // split into numStreams different steams and take stream iStream
-  void split(int iStream, int numStreams) {
-    int i;
-
-    // make sure that numStreams is odd to ensure maximum period
-    numStreams |= 1;
-
-    // iterate to get to the correct stream
-    for ( i = 0; i < iStream; ++i ) skip();
-
-    // save seed and add so we can use skip() for our calculations
-    int64 save_seed = rand48_seed;
-
-    // calculate c *= ( 1 + a + ... + a^(numStreams-1) )
-    rand48_seed = rand48_add;
-    for ( i = 1; i < numStreams; ++i ) skip();
-    int64 new_add = rand48_seed;
-
-    // calculate a = a^numStreams
-    rand48_seed = rand48_mult;
-    rand48_add  = 0;
-    for ( i = 1; i < numStreams; ++i ) skip();
-    rand48_mult = rand48_seed;
-
-    rand48_add  = new_add;
-    rand48_seed = save_seed;
-
-    second_gaussian = 0;
-    second_gaussian_waiting = false;
-    print("END SPLIT\n");
-  }
-
-  // return a number uniformly distributed between 0 and 1
-  HOST DEVICE inline BigReal uniform(void) {
-    skip();
-    const float exp48 = ( 1.0 / (float)(INT64_LITERAL(1) << 48) );
-    return ( (float) rand48_seed * exp48 );
-  }
-
-  long poisson(BigReal lambda) {
-    const BigReal l = exp(-lambda);
-    long k = 0;
-    BigReal p = uniform();
-    
-    while (p >= l) {
-      p *= uniform();
-      k = k + 1;
-    }
-
-    return k;
-  }
-
-  // return a number from a standard gaussian distribution
-  HOST DEVICE inline BigReal gaussian(void) {
-    BigReal fac, r, v1, v2;
-
-    if (second_gaussian_waiting) {
-      second_gaussian_waiting = false;
-      return second_gaussian;
-    } else {
-      r = 2.;                 // r >= 1.523e-8 ensures abs result < 6
-      while (r >=1. || r < 1.523e-8) { // make sure we are within unit circle
-        v1 = 2.0 * uniform() - 1.0;
-        v2 = 2.0 * uniform() - 1.0;
-        r = v1*v1 + v2*v2;
-				// printf("r %f\n", r);
-      }
-      fac = sqrt(-2.0 * log(r)/r);
-      // now make the Box-Muller transformation to get two normally
-      // distributed random numbers. Save one and return the other.
-      second_gaussian_waiting = true;
-      second_gaussian = v1 * fac;
-      return v2 * fac;
-    }
-
-  }
-
-  // return a vector of gaussian random numbers
-  HOST DEVICE inline Vector3 gaussian_vector(void) {
-    return Vector3( gaussian(), gaussian(), gaussian() );
-  }
-
-  // return a random long
-  HOST DEVICE inline long integer(void) {
-    skip();
-    return ( ( rand48_seed >> 17 ) & INT64_LITERAL(0x000000007fffffff) );
-  }
-
-  // randomly order an array of whatever
-  template <class Elem> void reorder(Elem *a, int n) {
-    for ( int i = 0; i < (n-1); ++i ) {
-      int ie = i + ( integer() % (n-i) );
-      if ( ie == i ) continue;
-      const Elem e = a[ie];
-      a[ie] = a[i];
-      a[i] = e;
-    }
-  }
-
-};
-
-#endif  // RANDOM_CPU_H
-
diff --git a/src/RandomCUDA.cu b/src/RandomCUDA.cu
deleted file mode 100644
index fe90992114e41c938d22ceee5630de3d56d89668..0000000000000000000000000000000000000000
--- a/src/RandomCUDA.cu
+++ /dev/null
@@ -1,113 +0,0 @@
-#include "RandomCUDA.h"
-
-__global__
-void initKernel(unsigned long seed, curandState_t *state, int num);
-
-void Random::init(int num, unsigned long seed) {
-	if (states != NULL)
-		gpuErrchk(cudaFree(states));
-	gpuErrchk(cudaMalloc(&states, sizeof(curandState) * num));
-	int nBlocks = num / NUM_THREADS + 1;
-	initKernel<<< nBlocks, NUM_THREADS >>>(seed, states, num);
-	gpuErrchk(cudaDeviceSynchronize());
-
-	// Create RNG and set seed
-	curandCreateGenerator(&generator, CURAND_RNG_PSEUDO_XORWOW);
-	curandSetPseudoRandomGeneratorSeed(generator, seed);
-	
-	if (uniform_d != NULL) 
-        {
-            gpuErrchk(cudaFree(uniform_d));
-            uniform_d = NULL;
-        }
-        if(integer_d!=NULL)
-        {
-            gpuErrchk(cudaFree(integer_d));
-            integer_d = NULL;
-        }
-        if(gaussian_d!=NULL)
-        {
-            gpuErrchk(cudaFree(gaussian_d));
-            gaussian_d = NULL;
-        }
-        if(integer_h!=NULL)
-        {
-	    delete[] integer_h;
-            integer_h = NULL;
-        }
-        if(uniform_h!=NULL)
-        {
-	    delete[] uniform_h;
-            uniform_h = NULL;
-	}
-        if(gaussian_h!=NULL)
-        {
-	    delete[] gaussian_h;
-            gaussian_h = NULL;
-        }
-	gpuErrchk(cudaMalloc((void**)&uniform_d, sizeof(float) * RAND_N));
-	gpuErrchk(cudaMalloc((void**)&integer_d, sizeof(unsigned int) * RAND_N));
-	gpuErrchk(cudaMalloc((void**)&gaussian_d, sizeof(float) * RAND_N));
-	integer_h = new unsigned int[RAND_N];
-	uniform_h = new float[RAND_N];
-	gaussian_h = new float[RAND_N];
-	uniform_n = 0;
-	integer_n = 0;
-	gaussian_n = 0;
-}
-
-float Random::uniform() {
-	if (uniform_n < 1) {
-		cuRandchk(curandGenerateUniform(generator, uniform_d, RAND_N));
-		gpuErrchk(cudaMemcpy(uniform_h, uniform_d, sizeof(float) * RAND_N, cudaMemcpyDeviceToHost));
-		uniform_n = RAND_N;
-	}
-	return uniform_h[--uniform_n];
-}
-
-unsigned int Random::poisson(float lambda) {
-	const float l = exp(-lambda);
-	unsigned int k = 0;
-	float p = uniform();
-	while (p >= l) {
-		p *= uniform();
-		k = k + 1;
-	}
-	return k;
-}
-
-unsigned int Random::integer() {
-	if (integer_n < 1) {
-		curandGenerate(generator, integer_d, RAND_N);
-		gpuErrchk(cudaMemcpy(integer_h, integer_d, sizeof(unsigned int) * RAND_N, cudaMemcpyDeviceToHost));
-		integer_n = RAND_N;
-	}
-	return integer_h[--integer_n];
-}
-
-void Random::reorder(int a[], int n) {
-	for (int i = 0; i < (n-1); ++i) {
-		unsigned int j = i + (integer() % (n-i));
-		if ( j == i )
-			continue;
-		std::swap<int>(a[i], a[j]);
-		const int tmp = a[j];
-		a[j] = a[i];
-		a[i] = tmp;
-	}
-}
-
-__global__ 
-void initKernel(unsigned long seed, curandState_t *state, int num) {
-       int idx = blockIdx.x * blockDim.x + threadIdx.x;
-       int step = blockDim.x * gridDim.x;
-       for(int i = idx; i < num; i=i+step)
-       {
-           curandState_t local;
-           // curand_init(clock64()+seed,i,0,&local);
-           //curand_init(clock64(),i,0,&state[i]);
-	   curand_init(seed,i,0,&local);
-           state[(size_t)i] = local;
-       }
-
-}
diff --git a/src/RandomCUDA.h b/src/RandomCUDA.h
deleted file mode 100644
index 88e70b041f4f60a5cd9dfb7daebaf9cac07f504d..0000000000000000000000000000000000000000
--- a/src/RandomCUDA.h
+++ /dev/null
@@ -1,102 +0,0 @@
-#ifndef RANDOM_CUDA_H
-#define RANDOM_CUDA_H
-
-// #include "/usr/include/linux/cuda.h"
-// #include "/usr/local/encap/cuda-4.0/include/cuda_runtime.h"
-// #include "/usr/local/encap/cuda-4.0/include/curand_kernel.h"
-// #include "/usr/local/encap/cuda-4.0/include/curand.h"
-#include <cuda.h>
-#include <curand_kernel.h>
-#include <curand.h>
-
-#include "useful.h"
-#include "ComputeForce.h"
-
-#ifdef __CUDACC__
-    #define HOST __host__
-    #define DEVICE __device__
-#else
-    #define HOST 
-    #define DEVICE 
-#endif
-
-#ifndef gpuErrchk
-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) {
-	if (code != cudaSuccess) {
-		fprintf(stderr,"CUDA Error: %s (%s:%d)\n", cudaGetErrorString(code), file, line);
-		if (abort) exit(code);
-	}
-}
-#endif
-
-#define cuRandchk(ans) { cuRandAssert((ans), __FILE__, __LINE__); }
-inline void cuRandAssert(curandStatus code, const char *file, int line, bool abort=true) {
-	if (code != CURAND_STATUS_SUCCESS) {
-		fprintf(stderr, "CURAND Error: %d (%s:%d)\n", code, file, line);
-		if (abort) exit(code);
-	}
-}
-
-class Random {
-public:
-	static const size_t RAND_N = 1024*4; // max random numbers stored
-
-	curandState_t *states;
-	curandGenerator_t generator;
-	unsigned int *integer_h, *integer_d;
-	float *uniform_h, *uniform_d;
-	float *gaussian_h, *gaussian_d;
-	size_t integer_n, uniform_n, gaussian_n;
-
-public:
-
-	Random() : states(NULL), integer_h(NULL), integer_d(NULL), uniform_h(NULL), uniform_d(NULL), gaussian_h(NULL), gaussian_d(NULL) { }
-	Random(int num, unsigned long seed=0) : states(NULL), integer_h(NULL), integer_d(NULL), uniform_h(NULL), uniform_d(NULL), gaussian_h(NULL), gaussian_d(NULL) {		
-		init(num, seed);
-	}
-
-	void init(int num, unsigned long seed);
-
-	DEVICE inline float gaussian(int idx, int num) {
-		// TODO do stuff
-		if (idx < num)
-			return curand_normal(&states[idx]);
-		return 0.0f;
-	}
-	DEVICE inline float gaussian(curandState* state) {
-		return curand_normal(state);
-	}
-
-	DEVICE inline Vector3 gaussian_vector(int idx, int num) {
-		// TODO do stuff
-		if (idx < num) {
-			curandState localState = states[idx];
-			Vector3 v = gaussian_vector(&localState);
-			states[idx] = localState;
-			return v;
-		} else return Vector3(0.0f);			
-	}
-	DEVICE inline Vector3 gaussian_vector(curandState* state) {
-		float x = gaussian(state);
-		float y = gaussian(state);
-		float z = gaussian(state);
-		return Vector3(x, y, z);
-	}
-
-	unsigned int integer();
-	unsigned int poisson(float lambda);
-	float uniform();
-
-	HOST inline float gaussian() {
-	    if (gaussian_n < 1) {
-		cuRandchk(curandGenerateNormal(generator, gaussian_d, RAND_N, 0, 1));
-		gpuErrchk(cudaMemcpy(gaussian_h, gaussian_d, sizeof(float) * RAND_N, cudaMemcpyDeviceToHost));
-	    }
-	    return gaussian_h[--gaussian_n];
-	}
-
-	void reorder(int *a, int n);
-};
-
-#endif
diff --git a/src/Reader.h b/src/Reader.h
deleted file mode 100644
index cf41363bcba7068e1d50be82047fe60cc58420f4..0000000000000000000000000000000000000000
--- a/src/Reader.h
+++ /dev/null
@@ -1,129 +0,0 @@
-///////////////////////////////////////////////////////////////////////
-// Configuration file reader
-// Author: Jeff Comer <jcomer2@illinois.edu>
-#ifndef READER_H
-#define READER_H
-
-#include "useful.h"
-
-class Reader {
-public:
-  Reader(const char* fileName) {
-		FILE* inp = fopen(fileName, "r");
-		char line[256];
-
-		if (inp == NULL) {
-			printf("Error! Reader::Reader could not open `%s'.\n", fileName);
-			exit(-1);
-		}
-
-		const int numLines = countParameterLines(fileName);
-		param = new String[numLines];
-		value = new String[numLines];
-
-		int count = 0;
-		while (fgets(line, 256, inp) != NULL) {
-			// Ignore comments.
-			int len = strlen(line);
-			if (line[0] == '#') continue;
-			if (len < 2) continue;
-			
-			String s(line);
-			int numTokens = s.tokenCount();
-			
-			// The config files were originally supposed to have only two tokens (words separated by spaces) per line
-			// I took this restriction out because it allows for more intuitive config file construction
-			/*
-			if (numTokens != 2) {
-				printf("Warning: Invalid config file line: %s\n", line);
-				continue;
-			}
-			*/
-			
-			String* tokenList = new String[numTokens];
-			s.tokenize(tokenList);
-			if (tokenList == NULL) {
-				printf("Warning: Invalid config file line: %s\n", line);
-				continue;
-			}
-			param[count] = tokenList[0];
-			for (int i = 1; i < numTokens; i++) {
-				value[count].add(tokenList[i]);
-				if (i != numTokens - 1)
-					value[count].add(" ");
-			}
-			//printf("%s %s\n", tokenList[0].val(), tokenList[1].val());
-			// printf("READER: %d %s %s\n", count, param[count].val(), value[count].val());
-			count++;
-
-			delete[] tokenList;
-		}
-		num = count;
-
-		fclose(inp);
-	}
-
-	~Reader() {
-		delete[] param;
-		delete[] value;
-	}
-	
-	static int countParameterLines(const char* fileName) {
-		FILE* inp = fopen(fileName, "r");
-		char line[256];
-		int count = 0;
-
-		while (fgets(line, 256, inp) != NULL) {
-			// Ignore comments.
-			int len = strlen(line);
-			if (line[0] == '#') continue;
-			if (len < 2) continue;
-			
-			count++;
-		}
-		fclose(inp);
-
-		return count;
-	}
-
-	int length() const { return num; }
-
-	String getParameter(int i) const {
-		i %= num;
-		while (i < 0) i += num;
-		return param[i];
-	}
-
-	String getValue(int i) const {
-		i %= num;
-		while (i < 0) i += num;
-		// printf("Reader::getValue(%d) %s\n",i,value[i].val());
-		return value[i];
-	}
-
-	String toString() const {
-		String ret;
-		for (int i = 0; i < num; i++) {
-			ret.add(param[i]);
-			ret.add(' ');
-			ret.add(value[i]);
-			ret.add('\n');
-		}
-		return ret;
-	}
-
-	int countParameter(const String& p) const {
-		int count = 0;
-		for (int i = 0; i < num; i++)
-			if (param[i] == p)
-				count++;
-		return count;
-	}
-
-private:
-  int num;
-  String* param;
-  String* value;
-};
-
-#endif
diff --git a/src/Reservoir.cu b/src/Reservoir.cu
deleted file mode 100644
index 50f0fe4f2e5fe2e53c5ccbb339d6f669cca17d05..0000000000000000000000000000000000000000
--- a/src/Reservoir.cu
+++ /dev/null
@@ -1,130 +0,0 @@
-// Configuration file reader
-// Author: Jeff Comer <jcomer2@illinois.edu>
-
-#include "Reservoir.h"
-#include <cuda.h>
-
-
-Reservoir::Reservoir(const char* reservoirFile) {
-	reservoirs = countReservoirs(reservoirFile);
-	r0 = new Vector3[reservoirs];
-	r1 = new Vector3[reservoirs];
-	num = new float[reservoirs];
-
-	readReservoirs(reservoirFile);
-	validateRegions();
-}
-
-Reservoir::~Reservoir() {
-	delete[] r0;
-	delete[] r1;
-	delete[] num;
-}
-
-int Reservoir::countReservoirs(const char* reservoirFile) {
-	// Open the file.
-	FILE* inp = fopen(reservoirFile, "r");
-	if (inp == NULL) {
-		printf("Reservoir:Reservoir Couldn't open file `%s'.\n", reservoirFile);
-		exit(-1);
-	}
-
-	int count = 0;
-	float x0, y0, z0, x1, y1, z1;
-	float n;
-	char line[STRLEN];
-	int nRead;
-	while (fgets(line, STRLEN, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-      
-		// Read definition lines.
-		nRead = sscanf(line, "%f %f %f %f %f %f %f", &x0, &y0, &z0, &x1, &y1, &z1, &n);
-		if (nRead < 7) {
-			printf("Reservoir:Reservoir Improperly formatted line `%s'\n", line);
-			fclose(inp);
-			exit(-1);
-		}
-		count++;
-	}
-	return count;
-}
-
-void Reservoir::readReservoirs(const char* reservoirFile) {
-	// Open the file.
-	FILE* inp = fopen(reservoirFile, "r");
-	if (inp == NULL) {
-		printf("Reservoir:Reservoir Couldn't open file `%s'.\n", reservoirFile);
-		exit(-1);
-	}
-
-	int count = 0;
-	float x0, y0, z0, x1, y1, z1;
-	float n;
-	char line[STRLEN];
-	int nRead;
-	while (fgets(line, STRLEN, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-      
-		// Read definition lines.
-		nRead = sscanf(line, "%f %f %f %f %f %f %f", &x0, &y0, &z0, &x1, &y1, &z1, &n);
-		if (nRead < 7) {
-			printf("Reservoir:Reservoir Improperly formatted line `%s'\n", line);
-			fclose(inp);
-			exit(-1);
-		}
-
-		r0[count] = Vector3(x0, y0, z0);
-		r1[count] = Vector3(x1, y1, z1);
-		num[count] = n;
-
-		count++;
-	}
-}
-
-void Reservoir::validateRegions() {
-	for (int i = 0; i < reservoirs; i++) {
-		Vector3 a = r0[i];
-		Vector3 b = r1[i];
-
-		if (a.x > b.x) {r0[i].x = b.x; r1[i].x = a.x;}
-		if (a.y > b.y) {r0[i].y = b.y; r1[i].y = a.y;}
-		if (a.z > b.z) {r0[i].z = b.z; r1[i].z = a.z;}
-	}
-}
-
-Vector3 Reservoir::getOrigin(int i) const {
-	if (i < 0 || i >= reservoirs) return Vector3(0.0f);
-	return r0[i];
-}
-Vector3 Reservoir::getDestination(int i) const {
-	if (i < 0 || i >= reservoirs) return Vector3(0.0f);
-	return r1[i];
-}
-Vector3 Reservoir::getDifference(int i) const {
-	if (i < 0 || i >= reservoirs) return Vector3(0.0f);
-	return r1[i] - r0[i];
-}
-
-//TODO: check getMeanNumber function
-float Reservoir::getMeanNumber(int i) const {
-	if (i < 0 || i >= reservoirs) return 0.0f;
-	return num[i];
-}
-int Reservoir::length() const {
-	return reservoirs;
-}
-
-bool Reservoir::inside(int i, Vector3 r) const {
-	if (i < 0 || i >= reservoirs) return false;
-	if (r.x < r0[i].x || r.x >= r1[i].x) return false;
-	if (r.y < r0[i].y || r.y >= r1[i].y) return false;
-	if (r.z < r0[i].z || r.z >= r1[i].z) return false;
-	return true;
-}
-
diff --git a/src/Reservoir.h b/src/Reservoir.h
deleted file mode 100644
index 3ed27700343fcf5b3cf5f0357ba27088aefae328..0000000000000000000000000000000000000000
--- a/src/Reservoir.h
+++ /dev/null
@@ -1,35 +0,0 @@
-// Configuration file reader
-// Author: Jeff Comer <jcomer2@illinois.edu>
-#ifndef RESERVOIR_H
-#define RESERVOIR_H
-
-#define STRLEN 512 
-
-#include "useful.h"
-
-class Reservoir {
-public:
-  Reservoir(const char* reservoirFile);
-  ~Reservoir();
-	
-  static int countReservoirs(const char* reservoirFile);
-	
-  Vector3 getOrigin(int i) const;
-  Vector3 getDestination(int i) const;
-  Vector3 getDifference(int i) const;
-
-  float getMeanNumber(int i) const;
-  int length() const;
-
-  bool inside(int i, Vector3 r) const;
-
-private:
-  int reservoirs;
-  Vector3* r0;
-  Vector3* r1;
-  float* num;
-	
-  void readReservoirs(const char* reservoirFile);
-  void validateRegions();
-};
-#endif
diff --git a/src/Restraint.h b/src/Restraint.h
deleted file mode 100644
index 219f141a0d5f7088b8de9e4334df94da75503353..0000000000000000000000000000000000000000
--- a/src/Restraint.h
+++ /dev/null
@@ -1,14 +0,0 @@
-// Exclude.h
-// Copyright Justin Dufresne and Terrance Howard, 2013
-
-#pragma once
-#include "useful.h"
-
-struct Restraint {
-public:
-    Restraint() : id(-1) {}
-    Restraint(int id, Vector3 r0, float k) : id(id), r0(r0), k(k) {}
-    int id;
-    Vector3 r0;
-    float k;
-};
diff --git a/src/RigidBody.cu b/src/RigidBody.cu
deleted file mode 100644
index 8aeb7aa65711c72fa02149e73cf969a611f4078f..0000000000000000000000000000000000000000
--- a/src/RigidBody.cu
+++ /dev/null
@@ -1,562 +0,0 @@
-#include <iostream>
-#include <typeinfo>
-#include "RigidBody.h"
-#include "RigidBodyType.h"
-#include "RigidBodyController.h"
-#include "Configuration.h"
-#include "ComputeGridGrid.cuh"
-
-#include "Debug.h"
-
-#ifndef gpuErrchk
-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) {
-   if (code != cudaSuccess) {
-      fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), __FILE__, line);
-      if (abort) exit(code);
-   }
-}
-#endif
-
-RigidBody::RigidBody(String name, const Configuration& cref, const RigidBodyType& tref, RigidBodyController* RBCref, int attached_particle_start, int attached_particle_end) 
-    : name(name), c(&cref), t(&tref), RBC(RBCref), attached_particle_start(attached_particle_start), attached_particle_end(attached_particle_end), impulse_to_momentum(4.1867999435271e4) /*impulse_to_momentum(4.184e8f)*/ { init(); }
-RigidBody::RigidBody(const RigidBody& rb)
-    : name(rb.name), c(rb.c), t(rb.t), RBC(rb.RBC), attached_particle_start(rb.attached_particle_start), attached_particle_end(rb.attached_particle_end), impulse_to_momentum(4.1867999435271e4)/*impulse_to_momentum(4.184e8f)*/ { init(); }
-void RigidBody::init() {
-	// units "(kcal_mol/AA) * ns" "dalton AA/ns" * 4.184e+08	
-	timestep = c->timestep;
-	Temp = c->temperature * 0.0019872065f;
-	// RBTODO: use temperature grids
-	// tempgrid = c->temperatureGrid;
-	position = t->initPos;
-
-	// Orientation matrix that brings vector from the RB frame to the lab frame
-	orientation = t->initRot;
-
-        momentum = t->initMomentum;
-        angularMomentum = t->initAngularMomentum;
-
-	// Memory allocation for forces between particles and grids 
-	const int& numGrids = t->numPotGrids;
-	if (numGrids > 0) {
-	    numParticles = new int[numGrids];
-	    particles_d = new int*[numGrids];
-	    particleForceStreams = new const cudaStream_t*[numGrids];
-
-	    for (int i = 0; i < numGrids; ++i) {
-		numParticles[i] = -1;
-		const int& n = t->numParticles[i];
-		if (n > 0) {
-		    // gpuErrchk(cudaMalloc( &particles_d[i], 0.5*sizeof(int)*n )); // not sure why 0.5 was here; prolly bug
-		    gpuErrchk(cudaMalloc( &particles_d[i], sizeof(int)*n )); // TODO: dynamically allocate memory as needed
-		}
-	    }
-	} else {
-	    numParticles = NULL;
-	}
-}
-
-GPUManager RigidBody::gpuman = GPUManager();
-
-//Boltzmann distribution
-void RigidBody::Boltzmann(unsigned long int seed)
-{
-
-    double sigma[4] = { sqrt(t->mass*Temp) * 2.046167135,sqrt(t->inertia.x*Temp) * 2.046167135, sqrt(t->inertia.y*Temp) * 2.046167135, sqrt(t->inertia.z*Temp) * 2.046167135 };
-
-    momentum = sigma[0]*getRandomGaussVector();
-    angularMomentum = getRandomGaussVector();
-    angularMomentum.x *= sigma[1];
-    angularMomentum.y *= sigma[2];
-    angularMomentum.z *= sigma[3];
-    // printf("%f\n", Temp);
-    // printf("%f\n", Temperature());
-}
-
-RigidBody::~RigidBody() {
-	const int& numGrids = t->numPotGrids;
-	for (int i = 0; i < numGrids; ++i) {
-		const int& n = t->numParticles[i];
-		if (n > 0) {
-			gpuErrchk(cudaFree( particles_d[i] ));
-		}
-	}
-	if (numParticles != NULL) {
-		delete[] numParticles;
-		delete[] particles_d;
-		delete[] particleForceStreams;
-	}
-}
-
-int RigidBody::appendNumParticleBlocks( std::vector<int>* blocks ) {
-    int ret = 0;
-    const int& numGrids = t->numPotGrids;
-    for (int i = 0; i < numGrids; ++i) {
-	numParticles[i] = -1;
-	const int& n = t->numParticles[i];
-	const int nb = (n/NUMTHREADS)+1; // max number of blocks
-	if (n > 0) {
-	    blocks->push_back(nb);
-	    ret += nb;
-	}
-    }
-    return ret;
-}
-
-__global__
-void update_particle_positions_kernel(Vector3* __restrict__ pos, const int start, const int num,
-			       const Vector3* __restrict__ pos_rb,
-			       const Vector3 center, const Matrix3 orientation) {
-	const int i = blockIdx.x * blockDim.x + threadIdx.x;
-	if (i < num) {
-	    const int aid = i+start;
-	    pos[aid] = orientation*pos_rb[i] + center;
-	}
-}		
-void RigidBody::update_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d) {
-    int num_attached = attached_particle_end - attached_particle_start;
-    int nb = floor(num_attached/NUMTHREADS) + 1;
-    update_particle_positions_kernel<<<nb,NUMTHREADS>>>(pos_d, attached_particle_start, num_attached,
-						 t->attached_particle_positions_d, position, orientation);
-}
-
-void RigidBody::addForce(Force f) { 
-	force += f; 
-} 
-void RigidBody::addTorque(Force torq) {
-	torque += torq; 
-}
-void RigidBody::addEnergy(float e)
-{
-    energy += e;
-}
-
-// TODO move code snippet to more general location
-template<class C, class T>
-auto contains(const C& v, const T& x)
--> decltype(end(v), true)
-{
-    return end(v) != std::find(begin(v), end(v), x);
-}
-/*
-void RigidBody::initialize_possible_particles()
-{
-    std::vector<int> atomic_ids;
-
-    // Loop over potential grids
-    for (int i = 0; i < t->numPotGrids; ++i) {
-	atomic_ids.clear();
-
-	String& gridName = t->potentialGridKeys[i];
-	
-	// Loop over particle types to count the number of particles
-	for (int j = 0; j < conf->numParts; ++j) {
-	    if (contains(conf->partRigidBodyGrid[j], gridName)) {
-		// gridNames contained gridName, so add the particles to atomic_ids
-		for (int aid = 0; aid < conf->num + conf->num_rb_attached_particles; ++aid) {
-		    if (conf->type[aid] == j && (aid < exclude_start || aid > exclude_end)) {
-			atomic_ids.push_back(aid);
-		    }
-		}
-	    }
-	}
-
-	// Initialize device data
-	size_t sz = sizeof(int) * atomic_ids.size();
-	gpuErrchk(cudaMalloc( &(possible_particles_d[i]), sz ));
-	gpuErrchk(cudaMemcpyAsync( possible_particles_d[i], &atomic_ids[0], sz, cudaMemcpyHostToDevice))
-
-	// // Add particles attached to OTHER RBs
-	// int rb_particle_offset = conf->num;
-	// for (const auto& rbs: RBC->rigidBodyByType)
-	// {
-	//     int rb_type
-	//     const RigidBodyType& rb_type = rbs[0].t;
-	//     if (rbs.t != 
-	//     for (int& ptype: rb_type->attached_particle_types) {
-	// 	// This could be made much more efficient
-	// 	if (contains(conf->partRigidBodyGrid[ptype], gridName)) {
-	// 	// Add particles
-	// 	rb
-	// 	rbs[0].sum
-	// 	    }
-	//     rb_particle_offset += rb_type.attached_particle_types
-	    
-	//     for (int k = 0; k < t->gridNames.size(); ++k) {
-	// 	    if (t->gridNames[k] == gridName) {
-	// 		t->numParticles[i] += conf->numPartsOfType[j];			
-	// 	    }
-
-	    
-	// 		// Loop over rigid body grid names associated with particle type
-	// 		const std::vector<String>& gridNames = conf->partRigidBodyGrid[j];
-	// 		for (int k = 0; k < gridNames.size(); ++k) {
-	// 		    if (gridNames[k] == gridName) {
-	// 			// Copy type j particles to particles[i]
-	// 			memcpy( &(particles[i][pid]), tmp, sizeof(int)*currId );
-	// 			assert(currId == conf->numPartsOfType[j]);
-	// 			pid += conf->numPartsOfType[j];
-	// 		    }
-	// 		}
-	// 	    }
-
-	// numParticles[i] = 0;
-
-	    // Count the particles interacting with potential grid i
-	    // Loop over particle types
-	    for (int j = 0; j < conf->numParts; ++j) {
-
-		// Loop over rigid body grid names associated with particle type
-		const std::vector<String>& gridNames = conf->partRigidBodyGrid[j];
-		for (int k = 0; k < t->gridNames.size(); ++k) {
-		    if (t->gridNames[k] == gridName) {
-			numParticles[i] += conf->numPartsOfType[j];			
-		    }
-		}
-	    }
-
-	    // Add RB particles
-	    for (const auto& rbv: RBC->rigidBodyByType)
-	    {
-		int ptype = rbv[0].t->attached_particle_types;
-		const std::vector<String>& gridNames = conf->partRigidBodyGrid[ptype];
-		for (int k = 0; k < t->gridNames.size(); ++k) {
-		    if (t->gridNames[k] == gridName) {
-			t->numParticles[i] += conf->numPartsOfType[j];			
-		    }
-		
-	    attached_particle_
-	    
-	    if (numParticles[i] > 0) {
-
-		    // allocate array of particle ids for the potential grid 
-		    particles[i] = new int[numParticles[i]];
-		    int pid = 0;
-		
-		    // Loop over particle types to count the number of particles
-		    for (int j = 0; j < conf->numParts; ++j) {
-
-			// Build temporary id array of type j particles
-			int tmp[conf->numPartsOfType[j]];
-			int currId = 0;
-			for (int aid = 0; aid < conf->num; ++aid) {
-			    if (conf->type[aid] == j)
-				tmp[currId++] = aid;
-			}
-			if (currId == 0) continue;
-
-			// Loop over rigid body grid names associated with particle type
-			const std::vector<String>& gridNames = conf->partRigidBodyGrid[j];
-			for (int k = 0; k < gridNames.size(); ++k) {
-			    if (gridNames[k] == gridName) {
-				// Copy type j particles to particles[i]
-				memcpy( &(particles[i][pid]), tmp, sizeof(int)*currId );
-				assert(currId == conf->numPartsOfType[j]);
-				pid += conf->numPartsOfType[j];
-			    }
-			}
-		    }
-
-		    // Initialize device data
-		    size_t sz = sizeof(int) * numParticles[i];
-		    gpuErrchk(cudaMalloc( &(particles_d[i]), sz ));
-		    gpuErrchk(cudaMemcpyAsync( particles_d[i], particles[i], sz, cudaMemcpyHostToDevice));
-		}
-	}
-}
-*/
-void RigidBody::updateParticleList(Vector3* pos_d, BaseGrid* sys_d) {
-	for (int i = 0; i < t->numPotGrids; ++i) {
-		numParticles[i] = 0;
-		int& tnp = t->numParticles[i];
-		if (tnp > 0) {
-		    int idx = t->potential_grid_idx[i];
-			Vector3 gridCenter = t->RBC->grids[idx].getCenter();
-			float cutoff = gridCenter.length();
-			cutoff += t->RBC->grids[idx].getRadius();
-			cutoff += c->pairlistDistance; 
-		   
-			int* tmp_d;
-			gpuErrchk(cudaMalloc( &tmp_d, sizeof(int) ));
-			gpuErrchk(cudaMemcpy( tmp_d, &numParticles[i], sizeof(int), cudaMemcpyHostToDevice ));
-
-			int nb = floor(tnp/NUMTHREADS) + 1;
-#if __CUDA_ARCH__ >= 300
-			createPartlist<<<nb,NUMTHREADS>>>(pos_d, tnp, t->particles_d[i],
-							  attached_particle_start, attached_particle_end,
-							tmp_d, particles_d[i],
-							gridCenter + position, cutoff*cutoff, sys_d);
-#else
-			createPartlist<<<nb,NUMTHREADS,NUMTHREADS/WARPSIZE>>>(pos_d, tnp, t->particles_d[i],
-							  attached_particle_start, attached_particle_end,
-							tmp_d, particles_d[i],
-							gridCenter + position, cutoff*cutoff, sys_d);
-#endif			
-			gpuErrchk(cudaMemcpy(&numParticles[i], tmp_d, sizeof(int), cudaMemcpyDeviceToHost ));
-			gpuErrchk(cudaFree( tmp_d ));
-		}
-	}
-}
-
-void RigidBody::callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d, ForceEnergy* forcestorques_d, const std::vector<int>& forcestorques_offset, int& fto_idx) {
-	// Apply the force and torque on the rigid body, and forces on particles
-	
-	// RBTODO: performance: consolidate CUDA stream management
-	// loop over potential grids 
-	for (int i = 0; i < t->numPotGrids; ++i) {
-		if (numParticles[i] <= 0) continue;
-		// const int nb = 500;
-		/*
-		  r: postion of particle in real space
-		  B: grid Basis
-		  o: grid origin
-		  R: rigid body orientation
-		  c: rigid body center
-
-		  B': R.B 
-		  c': R.o + c
-		*/
-
-		const cudaStream_t& stream = gpuman.get_next_stream();
-		particleForceStreams[i] = &stream;
-
-		size_t idx = t->potential_grid_idx[i];
-
-		Vector3 c =  getOrientation()*t->RBC->grids[idx].getOrigin() + getPosition();
-		Matrix3 B = (getOrientation()*t->RBC->grids[idx].getBasis()).inverse();
-		
-		const int nb = (numParticles[i]/NUMTHREADS)+1;		
-		computePartGridForce<<< nb, NUMTHREADS, NUMTHREADS*2*sizeof(ForceEnergy), stream >>>(
-			pos_d, force_d, numParticles[i], particles_d[i],
-			t->RBC->grids_d+idx,
-			B, getPosition(), c, forcestorques_d+forcestorques_offset[fto_idx++], energy, get_energy, scheme, sys_d);
-	}
-}
-
-void RigidBody::apply_attached_particle_forces(const Vector3* force) {
-    const auto &rb_pos = t->attached_particle_positions;
-    int num = rb_pos.size();
-    Vector3 total_force = Vector3(0.0f);
-    Vector3 torque = Vector3(0.0f);
-    for (int i = 0; i < num; ++i) {
-	const int j = i + attached_particle_start;
-	torque = torque + (orientation*rb_pos[i]).cross(force[j]);
-	total_force = total_force + force[j];
-    }
-    addForce(total_force);
-    addTorque(torque);
-}
-
-void RigidBody::applyGridParticleForces(BaseGrid* sys, ForceEnergy* forcestorques, const std::vector<int>& forcestorques_offset, int& fto_idx) {
-	// loop over potential grids 
-	for (int i = 0; i < t->numPotGrids; ++i) {
-		if (numParticles[i] <= 0) continue;
-		const int nb = (numParticles[i]/NUMTHREADS)+1;
-		int idx = t->potential_grid_idx[i];
-		Vector3 c =  getOrientation()*t->RBC->grids[idx].getOrigin() + getPosition();
-
-		// Sum and apply forces and torques
-		//Vector3 f = Vector3(0.0f);
-		ForceEnergy f = ForceEnergy(0.f,0.f);
-		Vector3 torq = Vector3(0.0f);
-		for (int k = 0; k < nb; ++k) {
-		    int j = forcestorques_offset[fto_idx]+2*k;
-		    f = f + forcestorques[j];
-		    torq = torq + forcestorques[j+1].f;
-		}
-		++fto_idx;
-	        //why the force points are at the origin of the potential?	
-		torq = -torq + (sys->wrapDiff(getPosition()-c)).cross( f.f ); 
-		addForce( -f.f );
-		addTorque( torq );
-                addEnergy( f.e );
-	}
-}
-
-	/*===========================================================================\
-	| Following "Algorithm for rigid-body Brownian dynamics" Dan Gordon, Matthew |
-	|   Hoyles, and Shin-Ho Chung                                                |
-	|   http://langevin.anu.edu.au/publications/PhysRevE_80_066703.pdf           |
-	|                                                                            |
-	|                                                                            |
-	| BUT: assume diagonal friction tensor and no Wiener process / stochastic    |
-	|   calculus then this is just the same as for translation                   |
-	|                                                                            |
-	|   < T_i(t) T_i(t) > = 2 kT friction inertia                                |
-	|                                                                            |
-	|   friction / kt = Diff                                                     |
-	\===========================================================================*/
-void RigidBody::addLangevin(Vector3 w1, Vector3 w2) 
-{
-    Vector3 transForceCoeff = Vector3::element_sqrt( 2. * Temp * t->mass*t->transDamping / timestep );
-    Vector3  rotTorqueCoeff = Vector3::element_sqrt( 2. * Temp * Vector3::element_mult( t->inertia,t->rotDamping) / timestep );
-
-    Force f = Vector3::element_mult(transForceCoeff,w1) -
-              Vector3::element_mult(t->transDamping, orientation.transpose()*momentum) * 10000;
-    
-    Force torq = Vector3::element_mult(rotTorqueCoeff,w2) -
-                 Vector3::element_mult(t->rotDamping, angularMomentum) * 10000;
-
-    f = orientation * f;
-    torq = orientation * torq;
-
-    addForce(f);
-    addTorque(torq);
-}
-
-  /*==========================================================================\
-	| from: Dullweber, Leimkuhler, Maclachlan. Symplectic splitting methods for |
-	| rigid body molecular dynamics. JCP 107. (1997)                            |
-	| http://jcp.aip.org/resource/1/jcpsa6/v107/i15/p5840_s1                    |
-	\==========================================================================*/
-void RigidBody::integrateDLM(BaseGrid* sys, int startFinishAll) 
-{
-    Vector3 trans; // = *p_trans;
-    //Matrix3 rot = Matrix3(1); // = *p_rot;
-
-    if ( isnan(force.x) || isnan(torque.x) ) 
-    {   
-        // NaN check
-        printf("Rigid Body force or torque was NaN!\n");
-        exit(-1);
-    }
-
-    if (startFinishAll == 0 || startFinishAll == 2) 
-    {
-        // propogate momenta by half step
-        momentum += 0.5f * timestep * force * impulse_to_momentum;
-        angularMomentum += 0.5f * timestep * (orientation.transpose()*torque) * impulse_to_momentum;
-    } 
-    else if (startFinishAll == 1)
-    {
-        position += timestep * momentum / t->mass * 1e4; // update CoM a full timestep
-	position = sys->wrap( position );
-
-        // update orientations a full timestep
-        Matrix3 R; // represents a rotation about a principle axis
-        R = Rx(0.5*timestep * angularMomentum.x / t->inertia.x * 1e4); // R1
-        applyRotation(R);
-
-        R = Ry(0.5*timestep * angularMomentum.y / t->inertia.y * 1e4); // R2
-        applyRotation(R);
-                        
-        R = Rz(    timestep * angularMomentum.z / t->inertia.z * 1e4); // R3
-        applyRotation(R);
-                        
-        R = Ry(0.5*timestep * angularMomentum.y / t->inertia.y * 1e4); // R4
-        applyRotation(R);
-
-        R = Rx(0.5*timestep * angularMomentum.x / t->inertia.x * 1e4); // R5
-        applyRotation(R);               
-        // TODO make this periodic
-        // printf("det: %.12f\n", orientation.det());
-        orientation = orientation.normalized();
-        // orientation = orientation/orientation.det();
-        // printf("det2: %.12f\n", orientation.det());
-        // orientation = orientation/orientation.det(); // TODO: see if this can be somehow eliminated (wasn't in original DLM algorithm...)
-    }
-}
-/* Following:
-Brownian Dynamics Simulation of Rigid Particles of Arbitrary Shape in External Fields
-Miguel X. Fernandes, JosÃ© GarcÃa de la Torre
-*/
-
-//Chris original implementation for Brownian motion
-void RigidBody::integrate(BaseGrid* sys, int startFinishAll)
-{
-    // UNITS
-    // Temp: kcal_mol
-    // t->transDamping: (kcal_mol/AA) / (amu AA/ns)
-    // t->mass: amu
-    // diffusion: AA**2/ns
-
-    //if (startFinishAll == 1) return;
-
-    //Matrix3 rot = Matrix3(1); // = *p_rot;
-
-    if ( isnan(force.x) || isnan(torque.x) ) 
-    {
-        printf("Rigid Body force or torque was NaN!\n");
-        exit(-1);
-    }
-    //float Temp = 1;
-    Vector3 diffusion    = Temp / (t->transDamping*t->mass); // TODO: assign diffusion in config file, or elsewhere
-    //Vector3 diffusion    = Temp / (t->transDamping*t->mass);
-    Vector3 rotDiffusion = Temp / (Vector3::element_mult(t->rotDamping,t->inertia));
-
-    Vector3 rando  = getRandomGaussVector();
-    Vector3 offset = Vector3::element_mult( (diffusion / Temp), orientation.transpose() * force ) * timestep +
-                     Vector3::element_mult( Vector3::element_sqrt( 2.0f * diffusion * timestep), rando) ;
-
-    position += orientation*offset;
-    position = sys->wrap( position );
-
-    rando = getRandomGaussVector();
-    Vector3 rotationOffset = Vector3::element_mult( (rotDiffusion / Temp) , orientation.transpose() * torque * timestep) +
-                             Vector3::element_mult( Vector3::element_sqrt( 2.0f * rotDiffusion * timestep), rando );
-
-    // Consider whether a DLM-like decomposition of rotations is needed for time-reversibility
-    orientation = orientation * (Rz(rotationOffset.z * 0.5) * Ry(rotationOffset.y * 0.5) * Rx(rotationOffset.x)
-                              *  Ry(rotationOffset.y * 0.5) * Rz(rotationOffset.z * 0.5));
-    //orientation = orientation * Rz(rotationOffset.z) * Ry(rotationOffset.y) * Rx(rotationOffset.x);
-    orientation = orientation.normalized();
-}
- 
-float RigidBody::Temperature()
-{
-    return (momentum.length2() / t->mass + 
-            angularMomentum.x * angularMomentum.x / t->inertia.x + 
-            angularMomentum.y * angularMomentum.y / t->inertia.y + 
-            angularMomentum.z * angularMomentum.z / t->inertia.z) * 0.50 / Temp * (2.388458509e-1);
-}
-
-void RigidBody::applyRotation(const Matrix3& R) {
-	angularMomentum = R * angularMomentum;
-	// According to DLM, but rotations work the wrong way; I think DLM update is wrong
-	// orientation = orientation * R.transpose(); 
-
-	// This makes sense: apply a rotation in the body frame followed by a transformation from body to lab frame
-	// Also works in statistical test
-	// Consistent with www.archer.ac.uk/documentation/white-papers/lammps-elba/lammps-ecse.pdf
-	orientation = orientation * R; 
-        orientation.normalized();	
-}
-
-// Rotations about axes
-// for very small angles 10^-8, cos^2+sin^2 != 1 
-// concerned about the accumulation of errors in non-unitary transformations!
-Matrix3 RigidBody::Rx(BigReal t) {
-	BigReal qt = 0.25*t*t;  // for approximate calculations of sin(t) and cos(t)
-	BigReal cos = (1-qt)/(1+qt);
-	BigReal sin = t/(1+qt);
-
-	return Matrix3(
-		1.0f, 0.0f, 0.0f,
-		0.0f,  cos, -sin,
-		0.0f,  sin,  cos);
-}
-Matrix3 RigidBody::Ry(BigReal t) {
-	BigReal qt = 0.25*t*t;  // for approximate calculations of sin(t) and cos(t)
-	BigReal cos = (1-qt)/(1+qt);
-	BigReal sin = t/(1+qt);
-
-	return Matrix3(
-		cos,  0.0f,  sin,
-		0.0f, 1.0f, 0.0f,
-		-sin, 0.0f,  cos);
-}
-Matrix3 RigidBody::Rz(BigReal t) {
-	BigReal qt = 0.25*t*t;  // for approximate calculations of sin(t) and cos(t)
-	BigReal cos = (1-qt)/(1+qt);
-	BigReal sin = t/(1+qt);
-
-	return Matrix3(
-		cos,  -sin, 0.0f,
-		sin,   cos, 0.0f,
-		0.0f, 0.0f, 1.0f);
-}
-Matrix3 RigidBody::eulerToMatrix(const Vector3 e) {
-	// convert euler angle input to rotation matrix
-	// http://en.wikipedia.org/wiki/Rotation_formalisms_in_three_dimensions#Conversion_formulae_between_formalisms
-	return Rz(e.z) * Ry(e.y) * Rx(e.x);
-}
diff --git a/src/RigidBody.h b/src/RigidBody.h
deleted file mode 100644
index e3e81737ae1d7aee45f16e915d44f6aa413e1749..0000000000000000000000000000000000000000
--- a/src/RigidBody.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*===========================\
-| RigidBody Class for device |
-\===========================*/
-#pragma once
-
-#include "useful.h"
-#include "RandomCPU.h"		/* for BD integration; RBTODO: fix this */
-#include "GPUManager.h"
-
-#ifdef __CUDACC__
-    #define HOST __host__
-    #define DEVICE __device__
-#else
-    #define HOST 
-    #define DEVICE 
-#endif
-
-#include "RigidBodyType.h"
-#include "RigidBodyController.h"
-
-class Configuration;
-class BaseGrid;
-typedef float BigReal;					/* strip this out later */
-typedef Vector3 Force;
-
-
-class RigidBody { // host side representation of rigid bodies
-	friend class RigidBodyController;
-	/*â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“.
-	| See Appendix A of: Dullweber, Leimkuhler and McLaclan. "Symplectic        |
-	| splitting methods for rigid body molecular dynamics". J Chem Phys. (1997) |
-	`â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“*/
-		public:
-    RigidBody(String name, const Configuration& c, const RigidBodyType& t, RigidBodyController* RBC,
-	      int attached_particle_start, int attached_particle_end);
-
-    RigidBody(const RigidBody& rb);
-    // RigidBody(const RigidBody& rb) : RigidBody(rb.name, *rb.c, *rb.t) {};
-	void init();
-	/* HOST DEVICE RigidBody(RigidBodyType t); */
-	~RigidBody();
-
-	int appendNumParticleBlocks( std::vector<int>* blocks );
-
-    void update_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d);
-
-	HOST DEVICE void addForce(Force f); 
-	HOST DEVICE void addTorque(Force t);
-        HOST DEVICE void addEnergy(float e);
-	HOST DEVICE void addLangevin(Vector3 w1, Vector3 w2);
-        HOST inline void setKinetic(float e) { kinetic = e; };	
-	HOST DEVICE inline void clearForce() { force = Force(0.0f); energy = 0.f;}
-	//HOST DEVICE inline void clearForce() { force = ForceEnergy(0.f, 0.f); }
-	HOST DEVICE inline void clearTorque() { torque = Force(0.0f); }
-
-	// HOST DEVICE void integrate(Vector3& old_trans, Matrix3& old_rot, int startFinishAll);
-	// HOST DEVICE void integrate(Vector3& old_trans, Matrix3& old_rot, int startFinishAll);
-	void integrateDLM(BaseGrid* sys, int startFinishAll);
-	void integrate(BaseGrid* sys, int startFinishAll);	
-
-	// HOST DEVICE inline String getKey() const { return key; }
-	// HOST DEVICE inline String getKey() const { return t->name; }
-	HOST DEVICE inline String getKey() const { return name; }
-	
-	HOST DEVICE inline Vector3 transformBodyToLab(Vector3 v) const { return orientation*v + position; }
-	HOST DEVICE inline Vector3 getPosition() const { return position; }
-	HOST DEVICE inline Matrix3 getOrientation() const { return orientation; }
-	// HOST DEVICE inline Matrix3 getBasis() const { return orientation; }
-	HOST DEVICE inline BigReal getMass() const { return t->mass; }
-	//HOST DEVICE inline Vector3 getVelocity() const { return momentum/t->mass; }
-	HOST DEVICE inline Vector3 getVelocity() const { return momentum; }
-        HOST float getEnergy() { return energy; }
-        HOST float getKinetic(){ return kinetic; }
-	//HOST DEVICE inline Vector3 getAngularVelocity() const { 
-	//	return Vector3( angularMomentum.x / t->inertia.x,
-	//								 angularMomentum.y / t->inertia.y,
-									 //angularMomentum.z / t->inertia.z );
-	//}
-	HOST DEVICE inline Vector3 getAngularVelocity() const { 
-              return Vector3( angularMomentum.x, angularMomentum.y, angularMomentum.z);
-        }
-
-        void initializeParticleLists();
-	void updateParticleList(Vector3* pos_d, BaseGrid* sys_d);
-	void callGridParticleForceKernel(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d, ForceEnergy* forcestorques_d, const std::vector<int>& forcestorques_offset, int& fto_idx);
-	void apply_attached_particle_forces(const Vector3* force);
-	void applyGridParticleForces(BaseGrid* sys, ForceEnergy* forcestorques, const std::vector<int>& forcestorques_offset, int& fto_idx);
-	
-	bool langevin;
-	Vector3 torque; // lab frame (except in integrate())
-        
-private:
-	static GPUManager gpuman;
-
-	RigidBodyController* RBC;
-	inline Vector3 getRandomGaussVector() { 
-	    return RBC->getRandomGaussVector();
-	}
-
-	// String key;
-	String name;
-	/* static const SimParameters * simParams; */
-	Vector3 position;		  /* position of center of mass */
-	// Q = orientation.transpose(); in Dullweber et al
-	Matrix3 orientation;					/* rotation that brings RB coordinates into the lab frame */
-
-	Vector3 momentum;		 /* in lab frame */
-	Vector3 angularMomentum; // angular momentum along corresponding principal axes
-        Vector3 W1,W2;
- 
-	// Langevin
-	Vector3 langevinTransFriction; /* RBTODO: make this work with a grid */
-	Vector3 langevinRotFriction;
-	BigReal Temp;
-
-	/* Vector3 transDampingCoeff; */
-	/* Vector3 transForceCoeff; */
-	/* Vector3 rotDampingCoeff; */
-	/* Vector3 rotTorqueCoeff;     */
-
-	// integration
-	const Configuration* c;
-	const RigidBodyType* t;
-	float timestep;					
-	Vector3 force;  // lab frame
-        float energy; //potential energy
-        float kinetic; 
-	bool isFirstStep; 
-	
-	int* numParticles;		  /* particles affected by potential grids */
-	int** possible_particles_d;		 	
-	int** particles_d;		 	
-	const cudaStream_t** particleForceStreams;
-
-    int attached_particle_start, attached_particle_end;
-    
-	/*â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“.
-	| units "kcal_mol/AA * ns" "(AA/ns) * amu" |
-	`â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“*/
-	BigReal impulse_to_momentum; /* should be const, but copy constructor failed */
-
-	HOST DEVICE inline void applyRotation(const Matrix3& R);
-	HOST DEVICE inline Matrix3 Rx(BigReal t);
-	HOST DEVICE inline Matrix3 Ry(BigReal t);
-	HOST DEVICE inline Matrix3 Rz(BigReal t);
-	HOST DEVICE inline Matrix3 eulerToMatrix(const Vector3 e);
-        float Temperature();
-        void  Boltzmann(unsigned long int);
-};
-
diff --git a/src/RigidBodyController.cu b/src/RigidBodyController.cu
deleted file mode 100644
index b7d11654130e3c8a9fc085de7de1a28ea31a975e..0000000000000000000000000000000000000000
--- a/src/RigidBodyController.cu
+++ /dev/null
@@ -1,1168 +0,0 @@
-/* #include "RigidBody.h" */
-#include <iomanip>
-#include "RigidBodyController.h"
-#include "Configuration.h"
-#include "RigidBodyType.h"
-#include "RigidBodyGrid.h"
-#include "ComputeGridGrid.cuh"
-
-// #include "GPUManager.h"
-// GPUManager RigidBodyController::gpuman;
-
-#include <cuda_profiler_api.h>
-
-// #include <vector>
-#include "Debug.h"
-
-#include "RandomCPU.h"							/* RBTODO: fix this? */
-
-#ifndef gpuErrchk
-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-inline void gpuAssert(cudaError_t code, String file, int line, bool abort=true) {
-   if (code != cudaSuccess) {
-       fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), __FILE__, line);
-      if (abort) exit(code);
-   }
-}
-#endif
-
-// allocate and initialize an array of stream handles
-cudaStream_t *RigidBodyForcePair::stream = (cudaStream_t *) malloc(NUMSTREAMS * sizeof(cudaStream_t));
-int RigidBodyForcePair::nextStreamID = 0;        /* used during stream init */
-int RigidBodyForcePair::lastRbGridID = -1; /* used to schedule kernel interaction */
-RigidBodyForcePair* RigidBodyForcePair::lastRbForcePair = NULL;
-/* #include <cuda.h> */
-/* #include <cuda_runtime.h> */
-/* #include <curand_kernel.h> */
-
-RigidBodyController::RigidBodyController(const Configuration& c, const char* prefix, unsigned long int seed, int repID) : conf(c)
-{
-        char str[8];
-        sprintf(str, "%d", repID);
-        strcpy(outArg, prefix);
-        strcat(outArg, ".");
-        strcat(outArg, str);
-
-	gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */
-	construct_grids();
-	for (int i = 0; i < conf.numRigidTypes; i++)
-	    conf.rigidBody[i].initializeParticleLists();
-
-	int numRB = 0;
-	// grow list of rbs
-
-	int attached_particle_offset = 0;
-	for (int i = 0; i < conf.numRigidTypes; i++) {			
-		numRB += conf.rigidBody[i].num;
-		int attached_particle_in_type = conf.rigidBody[i].num_attached_particles();
-		std::vector<RigidBody> tmp;
-		// RBTODO: change conf.rigidBody to conf.rigidBodyType
-		const int jmax = conf.rigidBody[i].num;
-		for (int j = 0; j < jmax; j++) {
-			String name = conf.rigidBody[i].name;
-			if (jmax > 1) {
-			    char stmp[128];
-			    snprintf(stmp, 128, "#%d", j);
-			    name.add( stmp );
-			}
-			RigidBody r(name, conf, conf.rigidBody[i], this,
-				    attached_particle_offset, attached_particle_offset+attached_particle_in_type);
-			attached_particle_offset += attached_particle_in_type;
-
-			int nb = r.appendNumParticleBlocks( &particleForceNumBlocks );
-			tmp.push_back( r );
-		}
-		rigidBodyByType.push_back(tmp);
-	}
-	attached_particle_forces = new Vector3[attached_particle_offset];
-
-	totalParticleForceNumBlocks = 0;
-	for (int i=0; i < particleForceNumBlocks.size(); ++i) {
-	    particleForce_offset.push_back(2*totalParticleForceNumBlocks);
-	    totalParticleForceNumBlocks += particleForceNumBlocks[i];
-	}
-
-	gpuErrchk(cudaMallocHost(&(particleForces), sizeof(ForceEnergy) * 2*totalParticleForceNumBlocks))
-	gpuErrchk(cudaMalloc(&(particleForces_d), sizeof(ForceEnergy) * 2*totalParticleForceNumBlocks))
-
-	if (conf.restartRBCoordinates.length() > 0)
-	    load_restart_coordinates(conf.restartRBCoordinates.val());
-	else if (conf.inputRBCoordinates.length() > 0)
-	    loadRBCoordinates(conf.inputRBCoordinates.val());
-	
-	random = new RandomCPU(conf.seed + repID + 1); /* +1 to avoid using same seed as RandomCUDA */
-	
-	
-	initializeForcePairs();	// Must run after construct_grids()
-	gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */
-}
-
-RigidBodyController::~RigidBodyController() {
-	for (int i = 0; i < rigidBodyByType.size(); i++)
-		rigidBodyByType[i].clear();
-	rigidBodyByType.clear();
-
-	delete [] attached_particle_forces;
-	delete random;
-}
-
-struct GridKey {
-    String name;
-    float scale;
-    GridKey(const String& name, const float& scale) :
-	name(name), scale(scale) { }
-
-    bool operator==(const GridKey& o) const { return name == o.name && scale == o.scale; }
-};
-
-void RigidBodyController::construct_grids() {
-    // typedef std::tuple<String, float> GridKey;
-    
-    // Build dictionary to reuse grids across all types, first finding scale factors
-    std::vector<GridKey> all_files;
-    std::vector<GridKey>::iterator itr;
-
-    for (int t_idx = 0; t_idx < conf.numRigidTypes; ++t_idx)
-    {
-	// TODO: don't duplicate the code below three times
-	RigidBodyType& t = conf.rigidBody[t_idx];
-	t.RBC = this;
-
-	t.numPotGrids = t.potentialGridFiles.size();
-	t.numDenGrids = t.densityGridFiles.size();
-	t.numPmfs = t.pmfFiles.size();
-
-	t.potential_grid_idx = new size_t[t.numPotGrids]; // TODO; don't allocate here
-	t.density_grid_idx = new size_t[t.numDenGrids]; // TODO; don't allocate here
-	t.pmf_grid_idx = new size_t[t.numPmfs]; // TODO; don't allocate here
-	for (size_t i = 0; i < t.potentialGridFiles.size(); ++i)
-	{
-
-	    String& filename = t.potentialGridFiles[i];
-	    String& name = t.potentialGridKeys[i];
-	    float scale = 1.0f;
-	    for (size_t j = 0; j < t.potentialGridScaleKeys.size(); ++j)
-	    {
-		if (name == t.potentialGridScaleKeys[j])
-		    scale = t.potentialGridScale[j];
-	    }
-
-	    GridKey key = GridKey(filename, scale);
-	    size_t key_idx;
-	    // Find key if it exists
-	    itr = std::find(all_files.begin(), all_files.end(), key);
-	    if (itr == all_files.end())
-	    {
-		key_idx = all_files.size();
-		all_files.push_back( key );
-	    }
-	    else 
-	    {
-		key_idx = std::distance(all_files.begin(), itr);
-	    }
-
-	    // Assign index into all_files to RigidBodyType
-	    t.potential_grid_idx[i] = key_idx;
-
-	}
-
-	// Density
-	for (size_t i = 0; i < t.densityGridFiles.size(); ++i)
-	{
-
-	    String& filename = t.densityGridFiles[i];
-	    String& name = t.densityGridKeys[i];
-	    float scale = 1.0f;
-	    for (size_t j = 0; j < t.densityGridScaleKeys.size(); ++j)
-	    {
-		if (name == t.densityGridScaleKeys[j])
-		    scale = t.densityGridScale[j];
-	    }
-
-	    GridKey key = GridKey(filename, scale);
-	    size_t key_idx;
-	    // Find key if it exists
-	    itr = std::find(all_files.begin(), all_files.end(), key);
-	    if (itr == all_files.end())
-	    {
-		key_idx = all_files.size();
-		all_files.push_back( key );
-	    }
-	    else 
-	    {
-		key_idx = std::distance(all_files.begin(), itr);
-	    }
-
-	    // Assign index into all_files to RigidBodyType
-	    t.density_grid_idx[i] = key_idx;
-	}
-
-	//PMF	
-	for (size_t i = 0; i < t.pmfFiles.size(); ++i)
-	{
-
-	    String& filename = t.pmfFiles[i];
-	    String& name = t.pmfKeys[i];
-	    float scale = 1.0f;
-	    for (size_t j = 0; j < t.pmfScaleKeys.size(); ++j)
-	    {
-		if (name == t.pmfScaleKeys[j])
-		    scale = t.pmfScale[j];
-	    }
-
-	    GridKey key = GridKey(filename, scale);
-	    size_t key_idx;
-	    // Find key if it exists
-	    itr = std::find(all_files.begin(), all_files.end(), key);
-	    if (itr == all_files.end())
-	    {
-		key_idx = all_files.size();
-		all_files.push_back( key );
-	    }
-	    else 
-	    {
-		key_idx = std::distance(all_files.begin(), itr);
-	    }
-
-	    // Assign index into all_files to RigidBodyType
-	    t.pmf_grid_idx[i] = key_idx;
-	}
-	
-	// TODO: have RBType manage this allocation
-	gpuErrchk(cudaMalloc(&t.potential_grid_idx_d, sizeof(size_t)*t.numPotGrids ));
-	gpuErrchk(cudaMalloc(&t.density_grid_idx_d, sizeof(size_t)*t.numDenGrids ));
-	gpuErrchk(cudaMalloc(&t.pmf_grid_idx_d, sizeof(size_t)*t.numPmfs ));
-
-	gpuErrchk(cudaMemcpy(t.potential_grid_idx_d, t.potential_grid_idx, sizeof(size_t)*t.numPotGrids, cudaMemcpyHostToDevice ));
-	gpuErrchk(cudaMemcpy(t.density_grid_idx_d, t.density_grid_idx, sizeof(size_t)*t.numDenGrids, cudaMemcpyHostToDevice ));
-	gpuErrchk(cudaMemcpy(t.pmf_grid_idx_d, t.pmf_grid_idx, sizeof(size_t)*t.numPmfs, cudaMemcpyHostToDevice ));
-
-    }
-    
-    // Store grids 
-    grids = new BaseGrid[all_files.size()];
-    gpuErrchk(cudaMalloc( &grids_d, sizeof(RigidBodyGrid)*all_files.size() ));
-    
-    // Read and scale grids, then copy to GPU
-    for (size_t i = 0; i < all_files.size(); ++i)
-    {
-	GridKey& key = all_files[i];
-	BaseGrid& g0 = grids[i];
-	g0 = BaseGrid(key.name);
-	g0.scale(key.scale);
-
-	RigidBodyGrid g = RigidBodyGrid();
-	g.nx = g0.nx;
-	g.ny = g0.ny;
-	g.nz = g0.nz;
-	g.size = g0.size;
-	g.val = g0.val;
-	   
-	// Copy to GPU, starting with grid data
-	float* tmp;
-	size_t sz = sizeof(float) * g.getSize();
-	gpuErrchk(cudaMalloc( &tmp, sz)); 
-	gpuErrchk(cudaMemcpy( tmp, g.val, sz, cudaMemcpyHostToDevice));
-
-	// Set grid pointer to device 
-	g.val = tmp;
-
-	// Copy grid
-	sz = sizeof(RigidBodyGrid);
-	// gpuErrchk(cudaMalloc(&ptr_d, sz));
-	gpuErrchk(cudaMemcpy(&grids_d[i], &g, sz, cudaMemcpyHostToDevice));
-
-	// Restore pointer
-	g.val = NULL;
-	tmp = NULL;
-    }
-}	
-
-void RigidBodyController::destruct_grids() {
-    // TODO
-
-}
-
-bool RigidBodyController::loadRBCoordinates(const char* fileName) {
-	char line[STRLEN];
-	FILE* inp = fopen(fileName, "r");
-
-	if (inp == NULL) {
-		printf("GrandBrownTown: load RB coordinates: File '%s' does not exist\n", fileName);
-		exit(-1);	   
-	}
-
-	int imax = rigidBodyByType.size();
-	int i = 0;
-	int jmax = rigidBodyByType[i].size();
-	int j = 0;
-
-	while (fgets(line, STRLEN, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-
-		String s(line);
-		int numTokens = s.tokenCount();
-		if (numTokens < 3+9) {
-			printf("GrandBrownTown: load RB coordinates: Invalid coordinate file line: %s\n", line);
-			fclose(inp);	
-			exit(-1);
-		}
-                if(conf.RigidBodyDynamicType == String("Langevin") && numTokens < 18)
-                {
-                    std::cout << "Warning the initial momentum set by random number" << std::endl;
-                }
-
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-		if (tokenList == NULL) {
-			printf("GrandBrownTown: load RB coordinates: Invalid coordinate file line: %s\n", line);
-			fclose(inp);
-			exit(-1);
-		}
-
-		RigidBody& rb = rigidBodyByType[i][j];
-		rb.position = Vector3(
-			(float) strtod(tokenList[0],NULL), (float) strtod(tokenList[1],NULL), (float) strtod(tokenList[2],NULL));
-		rb.orientation = Matrix3(
-			(float) strtod(tokenList[3],NULL), (float) strtod(tokenList[4],NULL), (float) strtod(tokenList[5],NULL),
-			(float) strtod(tokenList[6],NULL), (float) strtod(tokenList[7],NULL), (float) strtod(tokenList[8],NULL),
-			(float) strtod(tokenList[9],NULL), (float) strtod(tokenList[10],NULL), (float) strtod(tokenList[11],NULL));
-
-	        if(conf.RigidBodyDynamicType == String("Langevin") && numTokens >= 18)
-                {
-                    rb.momentum = Vector3((float)strtod(tokenList[12],NULL), (float) strtod(tokenList[13],NULL), (float) strtod(tokenList[14],NULL));
-                    rb.angularMomentum = Vector3((float)strtod(tokenList[15],NULL), (float) strtod(tokenList[16],NULL), (float) strtod(tokenList[17],NULL));
-                }
-
-	        if(conf.RigidBodyDynamicType == String("Langevin") && numTokens < 18)
-		    rb.Boltzmann(conf.seed);
-               
-		delete[] tokenList;
-
-		j++;
-		if (j == jmax) {
-			i++;
-			if (i == imax)
-				break;
-			j=0;
-			jmax = rigidBodyByType[i].size();
-		}
-	}
-	fclose(inp);
-	return true;
-}
-bool RigidBodyController::load_restart_coordinates(const char* filename) {
-	char line[STRLEN];
-	FILE* inp = fopen(filename, "r");
-
-	if (inp == NULL) {
-		printf("ARBD: load RB coordinates: File '%s' does not exist\n", filename);
-		exit(-1);	   
-	}
-
-	int imax = rigidBodyByType.size();
-	int i = 0;
-	int jmax = rigidBodyByType[i].size();
-	int j = 0;
-
-	while (fgets(line, STRLEN, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-
-		String s(line);
-		int numTokens = s.tokenCount();
-		if (numTokens < 2+3+9) {
-			printf("ARBD: invalid RB restart coordinate line: %s\n", line);
-			fclose(inp);	
-			exit(-1);
-		}
-                if(conf.RigidBodyDynamicType == String("Langevin") && numTokens < 20)
-                {
-                    std::cout << "Warning the initial momentum set by random number" << std::endl;
-                }
-
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-		if (tokenList == NULL) {
-			printf("ARBD: invalid RB restart coordinate line: %s\n", line);
-			fclose(inp);
-			exit(-1);
-		}
-
-		RigidBody& rb = rigidBodyByType[i][j];
-		printf("Assinging positions %d %d %f\n",i,j, (float) strtod(tokenList[2],NULL));
-		rb.position = Vector3(
-			(float) strtod(tokenList[2],NULL), (float) strtod(tokenList[3],NULL), (float) strtod(tokenList[4],NULL));
-		rb.orientation = Matrix3(
-			(float) strtod(tokenList[5],NULL), (float) strtod(tokenList[6],NULL), (float) strtod(tokenList[7],NULL),
-			(float) strtod(tokenList[8],NULL), (float) strtod(tokenList[9],NULL), (float) strtod(tokenList[10],NULL),
-			(float) strtod(tokenList[11],NULL), (float) strtod(tokenList[12],NULL), (float) strtod(tokenList[13],NULL));
-
-	        if(conf.RigidBodyDynamicType == String("Langevin") && numTokens >= 20)
-                {
-                    rb.momentum = Vector3((float)strtod(tokenList[14],NULL), (float) strtod(tokenList[15],NULL), (float) strtod(tokenList[16],NULL));
-                    rb.angularMomentum = Vector3((float)strtod(tokenList[17],NULL), (float) strtod(tokenList[18],NULL), (float) strtod(tokenList[19],NULL));
-                }
-
-	        if(conf.RigidBodyDynamicType == String("Langevin") && numTokens < 20)
-		    rb.Boltzmann(conf.seed);
-
-		delete[] tokenList;
-
-		j++;
-		if (j == jmax) {
-			i++;
-			if (i == imax)
-				break;
-			j=0;
-			jmax = rigidBodyByType[i].size();
-		}
-	}
-	fclose(inp);
-	if (i < imax || j < jmax) {
-	    printf("ARBD: RB restart file did not contain the correct number of lines: %s\n", filename);
-	    exit(-1);
-	}
-	return true;
-}
-
-		
-
-void RigidBodyController::initializeForcePairs() {
-	// Loop over all pairs of rigid body types
-	//   the references here make the code more readable, but they may incur a performance loss
-	RigidBodyForcePair::createStreams();
-	// printf("Initializing force pairs\n");
-	for (int ti = 0; ti < conf.numRigidTypes; ti++) {
-		RigidBodyType& t1 = conf.rigidBody[ti];
-		for (int tj = ti; tj < conf.numRigidTypes; tj++) {
-			RigidBodyType& t2 = conf.rigidBody[tj];
-
-
-			const std::vector<String>& keys1 = t1.densityGridKeys; 
-			const std::vector<String>& keys2 = t2.potentialGridKeys;
-
-			// printf("  Working on type pair ");
-			// t1.name.printInline(); printf(":"); t2.name.print();
-			
-			// Loop over all pairs of grid keys (e.g. "Elec")
-			std::vector<int> gridKeyId1;
-			std::vector<int> gridKeyId2;
-			
-			// printf("  Grid keys %d:%d\n",keys1.size(),keys2.size());
-
-			bool paired = false;
-			for(int k1 = 0; k1 < keys1.size(); k1++) {
-				for(int k2 = 0; k2 < keys2.size(); k2++) {
-				    // printf("    checking grid keys ");
-				    //	keys1[k1].printInline(); printf(":"); keys2[k2].print();
-					
-					if ( keys1[k1] == keys2[k2] ) {
-						gridKeyId1.push_back( t1.density_grid_idx[k1] );
-						gridKeyId2.push_back( t2.potential_grid_idx[k2] );
-						paired = true;
-					}
-				}
-			}
-			
-			if (paired) {
-				// found matching keys => calculate force between all grid pairs
-				std::vector<RigidBody>& rbs1 = rigidBodyByType[ti];
-				std::vector<RigidBody>& rbs2 = rigidBodyByType[tj];
-
-				// Loop over rigid bodies of these types
-				for (int i = 0; i < rbs1.size(); i++) {
-					for (int j = (ti==tj ? i+1 : 0); j < rbs2.size(); j++) {
-						RigidBody* rb1 = &(rbs1[i]);
-						RigidBody* rb2 = &(rbs2[j]);
-
-						// printf("    pushing RB force pair for %d:%d\n",i,j);
-						RigidBodyForcePair fp = RigidBodyForcePair(&(t1),&(t2),rb1,rb2,gridKeyId1,gridKeyId2, false, conf.rigidBodyGridGridPeriod );
-						gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */
-						forcePairs.push_back( fp ); 
-						// printf("    done pushing RB force pair for %d:%d\n",i,j);
-					}
-				}
-			}
-		}
-	}
-
-	// add Pmfs (not a true pairwise RB interaction; hacky implementation)
-	for (int ti = 0; ti < conf.numRigidTypes; ti++) {
-		RigidBodyType& t1 = conf.rigidBody[ti];
-
-		const std::vector<String>& keys1 = t1.densityGridKeys; 
-		const std::vector<String>& keys2 = t1.pmfKeys;
-		std::vector<int> gridKeyId1;
-		std::vector<int> gridKeyId2;
-		
-		// Loop over all pairs of grid keys (e.g. "Elec")
-		bool paired = false;
-		for(int k1 = 0; k1 < keys1.size(); k1++) {
-			for(int k2 = 0; k2 < keys2.size(); k2++) {
-				if ( keys1[k1] == keys2[k2] ) {
-				    gridKeyId1.push_back( t1.density_grid_idx[k1] );
-				    gridKeyId2.push_back( t1.pmf_grid_idx[k2] );
-				    paired = true;
-				}
-			}
-		}	
-		if (paired) {
-			// found matching keys => calculate force between all grid pairs
-			std::vector<RigidBody>& rbs1 = rigidBodyByType[ti];
-			
-			// Loop over rigid bodies of these types
-			for (int i = 0; i < rbs1.size(); i++) {
-					RigidBody* rb1 = &(rbs1[i]);
-					RigidBodyForcePair fp = RigidBodyForcePair(&(t1),&(t1),rb1,rb1,gridKeyId1,gridKeyId2, true, conf.rigidBodyGridGridPeriod);
-					gpuErrchk(cudaDeviceSynchronize()); /* RBTODO: this should be extraneous */
-					forcePairs.push_back( fp ); 
-			}
-		}
-	}
-
-	// Initialize device data for RB force pairs after std::vector is done growing
-	for (int i = 0; i < forcePairs.size(); i++)
-		forcePairs[i].initialize();
-			
-}
-
-void RigidBodyController::update_attached_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d, BaseGrid* sys_d, int num, int num_rb_attached_particles, int numReplicas) {
-    for (int i = 0; i < rigidBodyByType.size(); i++) {
-	for (int j = 0; j < rigidBodyByType[i].size(); j++) {
-	    rigidBodyByType[i][j].update_particle_positions(pos_d, force_d, energy_d);
-	}
-    }
-    gpuErrchk(cudaMemset((void*) force_d, 0, num_rb_attached_particles*sizeof(Vector3)));
-    gpuErrchk(cudaMemset((void*) energy_d, 0, num_rb_attached_particles*sizeof(float)));
-}
-
-
-void RigidBodyController::updateParticleLists(Vector3* pos_d, BaseGrid* sys_d) {
-	for (int i = 0; i < rigidBodyByType.size(); i++) {
-		for (int j = 0; j < rigidBodyByType[i].size(); j++) {
-			rigidBodyByType[i][j].updateParticleList(pos_d, sys_d);
-		}
-	}
-}
-
-void RigidBodyController::clearForceAndTorque()
-{
-    // clear old forces
-    for (int i = 0; i < rigidBodyByType.size(); i++) 
-    {
-        for (int j = 0; j < rigidBodyByType[i].size(); j++) 
-        {
-            RigidBody& rb = rigidBodyByType[i][j];
-            rb.clearForce();
-            rb.clearTorque();
-        }
-    }
-}
-
-void RigidBodyController::updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d, int num, int num_rb_attached_particles)
-{
-	//if (s <= 1)
-		//gpuErrchk( cudaProfilerStart() );
-	
-	// Gridâ€“particle forces	
-	int pfo_idx = 0;
-	for (int i = 0; i < rigidBodyByType.size(); i++) {
-		for (int j = 0; j < rigidBodyByType[i].size(); j++) {
-			RigidBody& rb = rigidBodyByType[i][j];
-			rb.callGridParticleForceKernel( pos_d, force_d, s, energy, get_energy, scheme, sys, sys_d, particleForces_d, particleForce_offset, pfo_idx );
-		}
-	}
-
-	// RBTODO: launch kernels ahead of time and sync using event and memcpyAsync 
-	gpuErrchk( cudaDeviceSynchronize() );
-	cudaMemcpy(particleForces, particleForces_d, sizeof(ForceEnergy)*2*totalParticleForceNumBlocks, cudaMemcpyDeviceToHost);
-	cudaMemcpyAsync(attached_particle_forces, &force_d[num], sizeof(Vector3)*(num_rb_attached_particles), cudaMemcpyDeviceToHost);
-
-	pfo_idx=0;
-	for (int i = 0; i < rigidBodyByType.size(); i++) {
-		for (int j = 0; j < rigidBodyByType[i].size(); j++) {
-			RigidBody& rb = rigidBodyByType[i][j];
-			rb.applyGridParticleForces(sys, particleForces, particleForce_offset, pfo_idx);
-;
-		}
-	}
-
-	{
-	    gpuErrchk( cudaDeviceSynchronize() );
-	    for (auto &rbv: rigidBodyByType) {
-		for (auto &rb: rbv) {
-		    rb.apply_attached_particle_forces( attached_particle_forces );
-		}
-	    }
-	}
-
-
-	// Gridâ€“Grid forces
-	if ( ((s % conf.rigidBodyGridGridPeriod) == 0 || s == 1 ) && forcePairs.size() > 0) {
-		for (int i=0; i < forcePairs.size(); i++) {
-			// TODO: performance: make this check occur less frequently
-		    if (forcePairs[i].isOverlapping(sys)) {
-				forcePairs[i].callGridForceKernel(i, s, scheme, sys_d);
-		    }
-		}
-		
-		// each kernel call is followed by async memcpy for previous; now get last
-		RigidBodyForcePair* fp = RigidBodyForcePair::lastRbForcePair;
-                if(RigidBodyForcePair::lastRbGridID >= 0)
-                {
-		    fp->retrieveForcesForGrid( fp->lastRbGridID );
-		    fp->lastRbGridID = -1;
-                }
-		// stream sync was slower than device sync
-		/* for (int i = 0; i < NUMSTREAMS; i++) { */
-		/* 	const cudaStream_t &s = RigidBodyForcePair::stream[i]; */
-		/* 	gpuErrchk(cudaStreamSynchronize( s ));  */
-		/* } */
-		gpuErrchk(cudaDeviceSynchronize());
-		for (int i=0; i < forcePairs.size(); i++)
-			if (forcePairs[i].isOverlapping(sys))
-				forcePairs[i].processGPUForces(sys);
-	}
-}
-
-void RigidBodyController::SetRandomTorques()
-{
-    for (int i = 0; i < rigidBodyByType.size(); i++)
-    {
-        for (int j = 0; j < rigidBodyByType[i].size(); j++)
-        {
-            RigidBody& rb = rigidBodyByType[i][j];
-            rb.W1 = random->gaussian_vector();
-            rb.W2 = random->gaussian_vector();
-        }
-    }           
-}
-
-void RigidBodyController::AddLangevin()
-{
-    for (int i = 0; i < rigidBodyByType.size(); i++)
-    {
-        for (int j = 0; j < rigidBodyByType[i].size(); j++)
-        {
-            RigidBody& rb = rigidBodyByType[i][j];
-
-            //printf("%f %f %f\n",rb.W1.x,rb.W1.y,rb.W1.z);
-            //printf("%f %f %f\n",rb.W2.x,rb.W2.y,rb.W2.z);
-
-            rb.addLangevin(rb.W1,rb.W2);
-        }
-    }
-}
-
-void RigidBodyController::integrateDLM(BaseGrid* sys, int step) 
-{
-    // tell RBs to integrate
-    for (int i = 0; i < rigidBodyByType.size(); i++) 
-    {
-        for (int j = 0; j < rigidBodyByType[i].size(); j++) 
-        {
-            RigidBody& rb = rigidBodyByType[i][j];
-            rb.integrateDLM(sys, step);
-        }
-    }
-}
-
-
-//Chris original part for Brownian motion
-void RigidBodyController::integrate(BaseGrid* sys, int step) 
-{
- 	// tell RBs to integrate
-	if ( step % conf.outputPeriod == 0 ) 
-        {       /* PRINT & INTEGRATE */
-		if (step == 0) 
-                {	// first step so only start this cycle
-			print(step);
-			for (int i = 0; i < rigidBodyByType.size(); i++)
-                        {
-				for (int j = 0; j < rigidBodyByType[i].size(); j++)
-                                {
-					RigidBody& rb = rigidBodyByType[i][j];
-					rb.integrate(sys, 0);	
-				}
-			}
-		} 
-                else 
-                {       // finish last cycle
-			for (int i = 0; i < rigidBodyByType.size(); i++)
-                        {
-				for (int j = 0; j < rigidBodyByType[i].size(); j++)
-                                {
-					RigidBody& rb = rigidBodyByType[i][j];
-					rb.integrate(sys, 1);	
-				}
-			}
-			//print(step);
-
-			// start this cycle
-			/*for (int i = 0; i < rigidBodyByType.size(); i++) {
-				for (int j = 0; j < rigidBodyByType[i].size(); j++) {
-					RigidBody& rb = rigidBodyByType[i][j];
-					rb.integrate(sys, 0);	
-				}
-			}*/
-		}
-	} 
-        else 
-        {	/* INTEGRATE ONLY */
-		if (step == 0) 
-                {		// first step so only start this cycle
-			print(step);
-			for (int i = 0; i < rigidBodyByType.size(); i++)
-                        {
-				for (int j = 0; j < rigidBodyByType[i].size(); j++)
-                                {
-					RigidBody& rb = rigidBodyByType[i][j];
-					rb.integrate(sys, 0);	
-				}
-			}
-		} 
-                else 
-                {       // integrate end of last step and start of this one
-			for (int i = 0; i < rigidBodyByType.size(); i++) 
-                        {
-				for (int j = 0; j < rigidBodyByType[i].size(); j++)
-                                {
-					RigidBody& rb = rigidBodyByType[i][j];
-					rb.integrate(sys, 2);	
-				}
-			}
-		}
-	}
-}
-
-void RigidBodyController::KineticEnergy()
-{
-    //float e = 0.;
-    //int num = 0;
-    for (int i = 0; i < rigidBodyByType.size(); i++) 
-    {
-        for (int j = 0; j < rigidBodyByType[i].size(); j++) 
-        {
-            RigidBody& rb = rigidBodyByType[i][j];
-            rb.setKinetic(rb.Temperature());
-            //rb.kinetic=tmp;
-            //e += tmp;
-            //num += 1;
-        }
-    }
-    //return e;
-    /*if(num > 0)
-        return e / num;
-    else
-        return 0.;*/
-}
-
-void RigidBodyForcePair::createStreams() {
-	for (int i = 0; i < NUMSTREAMS; i++)
-		gpuErrchk( cudaStreamCreate( &(stream[i]) ) );
-		// gpuErrchk( cudaStreamCreateWithFlags( &(stream[i]) , cudaStreamNonBlocking ) );
-}
-bool RigidBodyForcePair::isOverlapping(BaseGrid* sys) const {
-	if (isPmf) return true;
-	// float pairlistDist = 2.0f; /* TODO: get from conf */
-	float rbDist = sys->wrapDiff((rb1->getPosition() - rb2->getPosition())).length();
-	for (int i = 0; i < gridKeyId1.size(); ++i) {
-		const int k1 = gridKeyId1[i];
-		const int k2 = gridKeyId2[i];
-		float d1 = type1->RBC->grids[k1].getRadius() + type1->RBC->grids[k1].getCenter().length();
-		float d2 = type2->RBC->grids[k2].getRadius() + type2->RBC->grids[k2].getCenter().length();
-		if (rbDist < d1+d2)
-			return true;
-	}
-	return false;
-}
-Vector3 RigidBodyForcePair::getOrigin1(const int i) {
-	const int k1 = gridKeyId1[i];
-	return rb1->transformBodyToLab( type1->RBC->grids[k1].getOrigin() );
-}
-Vector3 RigidBodyForcePair::getOrigin2(const int i) {
-	const int k2 = gridKeyId2[i];
-	Vector3 o = type2->RBC->grids[k2].getOrigin();
-	if (!isPmf)
-	    return rb2->transformBodyToLab( o );
-	else
-	    return o;
-}		
-Vector3 RigidBodyForcePair::getCenter2(const int i) {
-    Vector3 c;
-    if (!isPmf)
-	c = rb2->getPosition();
-    else {
-	const int k2 = gridKeyId2[i];
-	Vector3 o = type2->RBC->grids[k2].getCenter();
-    }
-    return c;
-}
-Matrix3 RigidBodyForcePair::getBasis1(const int i) {
-	const int k1 = gridKeyId1[i];
-	return rb1->getOrientation()*type1->RBC->grids[k1].getBasis();
-}
-Matrix3 RigidBodyForcePair::getBasis2(const int i) {
-	const int k2 = gridKeyId2[i];
-	Matrix3 b = type2->RBC->grids[k2].getBasis();
-	if (!isPmf)
-	    return rb2->getOrientation()*b;
-	else
-	    return b;
-}
-
-// RBTODO: bundle several rigidbodypair evaluations in single kernel call
-void RigidBodyForcePair::callGridForceKernel(int pairId, int s, int scheme, BaseGrid* sys_d) 
-{
-	// get the force/torque between a pair of rigid bodies
-	/* printf("  Updating rbPair forces\n"); */
-	const int numGrids = gridKeyId1.size();
-
-	/* if (s%10 != 0) */
-	/* 	pairId = -1000; */
-
-	// RBTODO: precompute certain common transformations and pass in kernel call
-	for (int i = 0; i < numGrids; i++) {
-		const int nb = numBlocks[i];
-		const int k1 = gridKeyId1[i];
-		const int k2 = gridKeyId2[i];
-		const cudaStream_t &s = gpuman.gpus[0].get_stream(streamID[i]);
-
-		/*
-			ijk: index of grid value
-			r: postion of point ijk in real space
-			B: grid Basis
-			o: grid origin
-			R: rigid body orientation
-			c: rigid body center
-
-			B': R.B 
-			c': R.o + c
-
-  		/.â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“.
-	  	| r = R.(B.ijk+o)+c |
-	  	| r = B'.ijk + c'   |
-	  	`â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“./
-		*/
-		Matrix3 B1 = getBasis1(i);
-		// Vector3 c = getOrigin1(i) - getOrigin2(i);
-		Vector3 center_u = getCenter2(i);
-		Matrix3 B2 = getBasis2(i).inverse();
-                
-		// RBTODO: get energy
-		if (!isPmf) {								/* pair of RBs */
-			computeGridGridForce<<< nb, NUMTHREADS, 2*sizeof(ForceEnergy)*NUMTHREADS, s>>>
-				(&type1->RBC->grids_d[k1], &type2->RBC->grids_d[k2],
-				 B1, B2, getOrigin1(i) - center_u, center_u - getOrigin2(i),
-				 forces_d[i], torques_d[i], scheme, sys_d);
-		} else {										/* RB with a PMF */
-			computeGridGridForce<<< nb, NUMTHREADS, 2*sizeof(ForceEnergy)*NUMTHREADS, s>>>
-				(&type1->RBC->grids_d[k1], &type2->RBC->grids_d[k2],
-				 B1, B2, getOrigin1(i) - center_u, center_u-getOrigin2(i),
-				 forces_d[i], torques_d[i], scheme, sys_d);
-		}
-		// retrieveForcesForGrid(i); // this is slower than approach below, unsure why
-		
-		if (lastRbGridID >= 0)
-			lastRbForcePair->retrieveForcesForGrid(lastRbGridID);
-		lastRbForcePair = this;
-		lastRbGridID = i;
-	}
-}
-
-void RigidBodyForcePair::retrieveForcesForGrid(const int i) {
-	// i: grid ID (less than numGrids)
-        const cudaStream_t &s = gpuman.gpus[0].get_stream(streamID[i]);
-	// const int nb = numBlocks[i];
-	const int nb = 1;
-
-	gpuErrchk(cudaMemcpyAsync(forces[i], forces_d[i], sizeof(ForceEnergy)*nb, cudaMemcpyDeviceToHost, s));
-	gpuErrchk(cudaMemcpyAsync(torques[i], torques_d[i], sizeof(Vector3)*nb, cudaMemcpyDeviceToHost, s));
-}
-void RigidBodyForcePair::processGPUForces(BaseGrid* sys) {
-	
-	const int numGrids = gridKeyId1.size();
-	Vector3 f = Vector3(0.f);
-	Vector3 t = Vector3(0.f);
-        float energy = 0.f;
-	for (int i = 0; i < numGrids; i++) {
-	    // const int nb = numBlocks[i];
-	    const int nb = 1;
-
-		//Vector3 tmpF = Vector3(0.0f);
-		ForceEnergy tmpF = ForceEnergy(0.f, 0.f);
-		Vector3 tmpT = Vector3(0.f);
-			
-		for (int j = 0; j < nb; j++) {
-			tmpF = tmpF + forces[i][j];
-			tmpT = tmpT + torques[i][j];
-		}
-		
-		// tmpT is the torque calculated about the origin of density grid
-		//   so here we transform torque to be about rb1
-		Vector3 o1 = getOrigin1(i);
-		tmpT = tmpT - (rb1->getPosition() - o1).cross( tmpF.f );
-
-		// clear forces on GPU
-		gpuErrchk(cudaMemset((void*)(forces_d[i]),0,nb*sizeof(ForceEnergy)));
-		gpuErrchk(cudaMemset((void*)(torques_d[i]),0,nb*sizeof(Vector3)));
-
-		// sum energies,forces and torques
-                energy += tmpF.e;
-		f = f + tmpF.f;
-		t = t + tmpT;
-	}
-
-	f *= updatePeriod;
-	t *= updatePeriod;
-	
-	rb1->addForce( f );
-	rb1->addTorque( t );
-        if(isPmf)
-            rb1->addEnergy( energy );
-	//if (!isPmf) {
-	else 
-        {
-		const Vector3 t2 = -t + sys->wrapDiff(rb2->getPosition()-rb1->getPosition()).cross( f );
-		rb2->addForce( -f );
-		rb2->addTorque( t2 );
-                rb1->addEnergy(energy*.5);
-                rb2->addEnergy(energy*.5);
-	}
-        
-	// printf("force: %s\n", f.toString().val());
-	// printf("torque: %s\n", t.toString().val());
-}
-
-void RigidBodyController::print(int step) {
-	// modeled after outputExtendedData() in Controller.C
-    if (conf.numRigidTypes <= 0) return;
-	if ( step >= 0 ) {
-		// Write RIGID BODY trajectory file
-		if ( step % conf.outputPeriod == 0 ) {
-			if ( ! trajFile.rdbuf()->is_open() ) {
-	      // open file
-			    // printf("OPENING RIGID BODY TRAJECTORY FILE\n");
-				// RBTODO: backup_file(simParams->rigidBodyTrajectoryFile);
-
-				char fname[140];
-				strcpy(fname,outArg);
-				strcat(fname, ".rb-traj");
-	      trajFile.open(fname);
-				
-	      while (!trajFile) {
-					/* if ( errno == EINTR ) {
-						printf("Warning: Interrupted system call opening RIGIDBODY trajectory file, retrying.\n");
-						trajFile.clear();
-						trajFile.open(simParams->rigidBodyTrajectoryFile);
-						continue;
-					}
-					*/ 
-					//char err_msg[257];
-					printf("Error opening RigidBody trajectory file %s",fname);
-					exit(1);
-	      }
-	      trajFile << "# RigidBody trajectory file" << std::endl;
-	      printLegend(trajFile);
-			}
-			// printf("WRITING RIGID BODY COORDINATES AT STEP %d\n",step);
-			printData(step,trajFile);
-			trajFile.flush();    
-		}
-                if(step % conf.outputEnergyPeriod == 0)
-                {
-                
-                }
-    
-		// Write restart File
-		/* if ( simParams->restartFrequency && */
-		/* 		 ((step % simParams->restartFrequency) == 0) && */
-		/* 		 (step != simParams->firstTimestep) )	{ */
-		if ( step % conf.outputPeriod == 0 && step != 0 ){
-		    // printf("RIGID BODY: WRITING RESTART FILE AT STEP %d\n", step);
-			char fname[140];
-			strcpy(fname,outArg);
-			strcat(fname, ".rigid");
-			// RBTODO: NAMD_backup_file(fname,".old"); /*  */
-			std::ofstream restartFile(fname);
-			while (!restartFile) {
-				/* RBTODO 
-	      if ( errno == EINTR ) {
-					printf("Warning: Interrupted system call opening rigid body restart file, retrying.\n");
-					restartFile.clear();
-					restartFile.open(fname);
-					continue;
-	      }
-				*/
-	      printf("Error opening rigid body restart file %s",fname);
-	      exit(1); // NAMD_err(err_msg);
-			}
-			restartFile << "# RigidBody restart file" << std::endl;
-			printLegend(restartFile);
-			printData(step,restartFile);
-			if (!restartFile) {
-	      printf("Error writing rigid body restart file %s",fname);
-	      exit(-1); // NAMD_err(err_msg);
-			} 
-		}
-	}
-}
-
-void RigidBodyController::printLegend(std::ofstream &file) {
-        file << "#$LABELS step RigidBodyKey"
-		 << " posX  posY  posZ"
-		 << " rotXX rotXY rotXZ"
-		 << " rotYX rotYY rotYZ"
-		 << " rotZX rotZY rotZZ"
-		 << " velX  velY  velZ"
-		 << " angVelX angVelY angVelZ" << std::endl;
-}
-void RigidBodyController::printData(int step,std::ofstream &file) {
-	// tell RBs to integrate
-	for (int i = 0; i < rigidBodyByType.size(); i++) {
-		for (int j = 0; j < rigidBodyByType[i].size(); j++) {
-			const RigidBody& rb = rigidBodyByType[i][j];
-			
-			Vector3 v =  rb.getPosition();
-			Matrix3 t =  rb.getOrientation();
-			file << step <<" "<< rb.getKey()
-					 <<" "<< v.x <<" "<< v.y <<" "<< v.z;
-			file << std::setprecision(10) <<" "<< t.exx <<" "<< t.exy <<" "<< t.exz
-					 <<" "<< t.eyx <<" "<< t.eyy <<" "<< t.eyz
-					 <<" "<< t.ezx <<" "<< t.ezy <<" "<< t.ezz;
-			v = rb.getVelocity();
-			file << std::setprecision(10) <<" "<< v.x <<" "<< v.y <<" "<< v.z;
-			v = rb.getAngularVelocity();
-			file << std::setprecision(10) <<" "<< v.x <<" "<< v.y <<" "<< v.z
-					 << std::endl;
-		}
-	}
-}
-
-float RigidBodyController::getEnergy(float (RigidBody::*Get)())
-{
-    float e = 0.f;
-    for (int i = 0; i < rigidBodyByType.size(); i++)
-    {
-        for(int j = 0; j < rigidBodyByType[i].size(); j++) 
-        { 
-            RigidBody& rb = rigidBodyByType[i][j];
-            //e += rb.getKinetic();
-            e += (rb.*Get)();
-        }
-    }
-    return e;
-}
-
-#if 0
-void RigidBodyController::printEnergyData(std::fstream &file)
-{
-    if(file.is_open())
-    {
-
-        for (int i = 0; i < rigidBodyByType.size(); i++) 
-        {
-            for(int j = 0; j < rigidBodyByType[i].size(); j++)
-            {
-                const RigidBody& rb = rigidBodyByType[i][j];
-                file << "Kinetic Energy " << rb.getKey() << ": " << rb.getKinetic() << " (kT)" << std::endl;
-                file << " Potential Energy " << rb.getKey() << ": " << rb.getEnergy() << " (kcal/mol)" << std::endl;
-            }
-       }
-    }
-    else
-    {
-        std::cout << " Error in opening files\n"; 
-    }      
-}
-#endif
-int RigidBodyForcePair::initialize() {
-    // printf("    Initializing (streams for) RB force pair...\n");
-
-	const int numGrids = gridKeyId1.size();
-	// RBTODO assert gridKeysIds are same size 
-
-	// allocate memory for forces/torques
-	for (int i = 0; i < numGrids; i++) {
-		const int k1 = gridKeyId1[i];
-		const int sz = type1->RBC->grids[k1].getSize();
-		int nb = sz / NUMTHREADS + ((sz % NUMTHREADS == 0) ? 0:1 );
-		streamID.push_back( nextStreamID % NUMSTREAMS );
-		nextStreamID++;
-
-		numBlocks.push_back(nb);
-
-		nb = 1;
-		//forces.push_back( new Vector3[nb] );
-		forces.push_back( new ForceEnergy[nb]);
-		torques.push_back( new Vector3[nb] );
-
-		//forces_d.push_back( new Vector3[nb] ); // RBTODO: correct?
-		forces_d.push_back( new ForceEnergy[nb]);
-		torques_d.push_back( new Vector3[nb] );
-
-		// allocate device memory for numBlocks of torque, etc.
-    // printf("      Allocating device memory for forces/torques\n");
-		gpuErrchk(cudaMalloc(&(forces_d[i]), sizeof(ForceEnergy) * nb));
-		gpuErrchk(cudaMalloc(&(torques_d[i]), sizeof(Vector3) * nb));
-	}
-	gpuErrchk(cudaDeviceSynchronize());
-	// printf("    Done initializing RB force pair\n");
-	return nextStreamID;
-}
-
-void RigidBodyForcePair::swap(RigidBodyForcePair& a, RigidBodyForcePair& b) {
-	using std::swap;
-	swap(a.type1, b.type1);
-	swap(a.type2, b.type2);
-	swap(a.rb1, b.rb1);
-	swap(a.rb2, b.rb2);
-
-	swap(a.gridKeyId1, b.gridKeyId1);
-	swap(a.gridKeyId2, b.gridKeyId2);
-
-	swap(a.numBlocks, b.numBlocks);
-
-	swap(a.forces,    b.forces);
-	swap(a.forces_d,  b.forces_d);
-	swap(a.torques,   b.torques);
-	swap(a.torques_d, b.torques_d);
-}
-
-
-RigidBodyForcePair::~RigidBodyForcePair() {
-    //printf("    Destructing RB force pair\n");
-	const int numGrids = gridKeyId1.size();
-
-	// printf("      numGrids = %d\n",numGrids);
-
-	// RBTODO assert gridKeysIds are same size 
-
-	// allocate memory for forces/torques
-	if (streamID.size() > 0) {
-		for (int i = 0; i < numGrids; i++) {
-			const int k1 = gridKeyId1[i];
-			const int nb = numBlocks[i];
-
-			// free device memory for numBlocks of torque, etc.
-			// printf("      Freeing device memory for forces/torques\n");
-			gpuErrchk(cudaFree( forces_d[i] ));	
-			gpuErrchk(cudaFree( torques_d[i] ));
-		}
-	}
-	streamID.clear();
-	numBlocks.clear();
-	forces.clear();
-	forces_d.clear();
-	torques.clear();
-	torques_d.clear();
-}
-
-
-
diff --git a/src/RigidBodyController.h b/src/RigidBodyController.h
deleted file mode 100644
index fa2fa4e5d1efd6b8bf43a83e9e34b456d5f67245..0000000000000000000000000000000000000000
--- a/src/RigidBodyController.h
+++ /dev/null
@@ -1,162 +0,0 @@
-#pragma once
-
-#include <vector>
-#include <fstream>
-/* #include "RandomCUDA.h" */
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include "useful.h"
-#include "BaseGrid.h"
-#include "RigidBodyGrid.h"
-#include "GPUManager.h"
-
-#define NUMSTREAMS 8
-
-// #include "RigidBody.h"
-
-class RigidBodyType;
-class RigidBody;
-class Configuration;
-class ForceEnergy;
-// class RandomCPU;
-#include "RandomCPU.h"
-
-// TODO: performance: create RigidBodyGridPair so pairlistdist check is done per grid pair, not per RB pair
-class RigidBodyForcePair  {
-	friend class RigidBodyController;
-
-public:
-	RigidBodyForcePair(RigidBodyType* t1, RigidBodyType* t2,
-										 RigidBody* rb1, RigidBody* rb2,
-					std::vector<int> gridKeyId1, std::vector<int> gridKeyId2, bool isPmf, int updatePeriod) :
-	updatePeriod(updatePeriod), type1(t1), type2(t2), rb1(rb1), rb2(rb2),
-		gridKeyId1(gridKeyId1), gridKeyId2(gridKeyId2), isPmf(isPmf)
-		{
-			printf("    Constructing RB force pair...\n");
-			/* initialize(); */
-			// printf("    done constructing RB force pair\n");
-		}
-	RigidBodyForcePair(const RigidBodyForcePair& o) :
-		updatePeriod(o.updatePeriod), type1(o.type1), type2(o.type2), rb1(o.rb1), rb2(o.rb2),
-		gridKeyId1(o.gridKeyId1), gridKeyId2(o.gridKeyId2), isPmf(o.isPmf) {
-		printf("    Copying RB force pair...\n");
-		/* initialize(); */
-	}
-	RigidBodyForcePair& operator=(RigidBodyForcePair& o) {
-		printf("    Copying assigning RB force pair...\n");
-		swap(*this,o);
-		return *this;
-	}	
-	~RigidBodyForcePair();
-
-	bool isOverlapping(BaseGrid* sys) const;
-
-private:
-	int initialize();
-	void swap(RigidBodyForcePair& a, RigidBodyForcePair& b);
-
-	int updatePeriod;
-	
-	RigidBodyType* type1;
-	RigidBodyType* type2;
-	RigidBody* rb1;
-	RigidBody* rb2;
-	
-	std::vector<int> gridKeyId1;
-	std::vector<int> gridKeyId2;
-	std::vector<int> numBlocks;
-
-	bool isPmf;
-	
-	//std::vector<Vector3*> forces;
-	//std::vector<Vector3*> forces_d;
-	std::vector<ForceEnergy*> forces;
-        std::vector<ForceEnergy*> forces_d;
-	std::vector<Vector3*> torques;
-	std::vector<Vector3*> torques_d;
-
-	static int nextStreamID; 
-	std::vector<int> streamID;
-	static cudaStream_t* stream;
-	static void createStreams();
-
-	static int lastStreamID;
-	static RigidBodyForcePair* lastRbForcePair;
-	static int lastRbGridID;
-	
-	void callGridForceKernel(int pairId, int s,int scheme, BaseGrid* sys_d);
-	void retrieveForcesForGrid(const int i);
-	void processGPUForces(BaseGrid*);
-	Matrix3 getBasis1(const int i);
-	Matrix3 getBasis2(const int i);
-	Vector3 getOrigin1(const int i);
-	Vector3 getOrigin2(const int i);
-	Vector3 getCenter2(const int i);
-
-
-	static GPUManager gpuman;
-};
-
-class RigidBodyController {
-public:
-	/* DEVICE RigidBodyController(const NamdState *s, int reductionTag, SimParameters *sp); */
-	RigidBodyController();
-        ~RigidBodyController();
-	RigidBodyController(const Configuration& c, const char* outArg, unsigned long int seed, int repID);
-
-        void AddLangevin();
-        void SetRandomTorques();
-	void integrate(BaseGrid* sys, int step);
-        void integrateDLM(BaseGrid* sys, int step);
-	void updateForces(Vector3* pos_d, Vector3* force_d, int s, float* energy, bool get_energy, int scheme, BaseGrid* sys, BaseGrid* sys_d, int num, int num_rb_attached_particles);
-	void updateParticleLists(Vector3* pos_d, BaseGrid* sys_d);
-    void update_attached_particle_positions(Vector3* pos_d, Vector3* force_d, float* energy_d, BaseGrid* sys_d, int num, int num_rb_attached_particles, int numReplicas);
-        void clearForceAndTorque(); 
-        void KineticEnergy();
-        void print(int step);
-        //void printEnergyData(std::fstream &file);
-        float getEnergy(float (RigidBody::*get)());
-private:
-	bool loadRBCoordinates(const char* fileName);
-    bool load_restart_coordinates(const char* filename);
-	void initializeForcePairs();
-
-	//void print(int step);
-	void printLegend(std::ofstream &file);
-	void printData(int step, std::ofstream &file);
-public:
-	RigidBodyType** rbType_d;
-
-	inline Vector3 getRandomGaussVector() {
-	    return random->gaussian_vector();
-	}
-	/* RequireReduction *gridReduction; */
-
-	BaseGrid* grids;
-	RigidBodyGrid* grids_d;
-	
-private:
-	std::ofstream trajFile;
-	
-	const Configuration& conf;
-	char outArg[128];
-	
-	RandomCPU* random;
-	/* RequireReduction *gridReduction; */
-	
-	Vector3* trans; // would have made these static, but
-	Matrix3* rot;  	// there are errors on rigidBody->integrate
-	std::vector< std::vector<RigidBody> > rigidBodyByType;
-	std::vector< RigidBodyForcePair > forcePairs;
-
-	void construct_grids();
-	void destruct_grids();
-
-        //float* rb_energy;
-	Vector3* attached_particle_forces;
-	ForceEnergy* particleForces;
-	ForceEnergy* particleForces_d;
-	std::vector<int> particleForceNumBlocks;
-	std::vector<int> particleForce_offset;
-	int totalParticleForceNumBlocks;
-};
diff --git a/src/RigidBodyGrid.cu b/src/RigidBodyGrid.cu
deleted file mode 100644
index 06d68bae97fdfcec6f1c66872c884824cd0ac3d5..0000000000000000000000000000000000000000
--- a/src/RigidBodyGrid.cu
+++ /dev/null
@@ -1,600 +0,0 @@
-//////////////////////////////////////////////////////////////////////
-// Grid base class that does just the basics.
-// Author: Jeff Comer <jcomer2@illinois.edu>
-
-#include "RigidBodyGrid.h"
-#include <cuda.h>
-
-#define STRLEN 512
-
-	/*                               \
-	| CONSTRUCTORS, DESTRUCTORS, I/O |
-	\===============================*/
-
-RigidBodyGrid::RigidBodyGrid() {
-	RigidBodyGrid tmp(1,1,1);
-	val = new float[1];
-	*this = tmp;									// TODO: verify that this is OK
-}
-
-// The most obvious of constructors.
-RigidBodyGrid::RigidBodyGrid(int nx0, int ny0, int nz0) {
-	nx = abs(nx0);
-	ny = abs(ny0);
-	nz = abs(nz0);
-	
-	val = new float[nx*ny*nz];
-	zero();
-}
-
-RigidBodyGrid::RigidBodyGrid(const BaseGrid& g) {
-	nx = g.nx;
-	ny = g.ny;
-	nz = g.nz;
-	
-	val = new float[nx*ny*nz];
-	for (int i = 0; i < nx*ny*nz; i++) val[i] = g.val[i];
-}
-
-// Make an exact copy of a grid.
-RigidBodyGrid::RigidBodyGrid(const RigidBodyGrid& g) {
-	nx = g.nx;
-	ny = g.ny;
-	nz = g.nz;
-	
-	val = new float[nx*ny*nz];
-	for (int i = 0; i < nx*ny*nz; i++) val[i] = g.val[i];
-}
-
-RigidBodyGrid RigidBodyGrid::mult(const RigidBodyGrid& g) {
-	for (int i = 0; i < nx*ny*nz; i++) val[i] *= g.val[i];
-	return *this;
-}
-
-RigidBodyGrid& RigidBodyGrid::operator=(const RigidBodyGrid& g) {
-	if(val!=NULL) 
-            delete[] val;
-	val = NULL;
-	nx = g.nx;
-	ny = g.ny;
-	nz = g.nz;
-	
-	val = new float[nx*ny*nz];
-	for (int i = 0; i < nx*ny*nz; i++) val[i] = g.val[i];
-
-	return *this;
-}
-
-RigidBodyGrid::~RigidBodyGrid() {
-	if (val != NULL)
-        {
-		delete[] val;
-                val = NULL;
-        }
-}
-
-void RigidBodyGrid::zero() {
-	for (int i = 0; i < nx*ny*nz; i++) val[i] = 0.0f;
-}
-
-bool RigidBodyGrid::setValue(int j, float v) {
-	if (j < 0 || j >= nx*ny*nz) return false;
-	val[j] = v;
-	return true;
-}
-
-bool RigidBodyGrid::setValue(int ix, int iy, int iz, float v) {
-	if (ix < 0 || ix >= nx) return false;
-	if (iy < 0 || iy >= ny) return false;
-	if (iz < 0 || iz >= nz) return false;
-	int j = iz + iy*nz + ix*ny*nz;
-
-	val[j] = v;
-	return true;
-}
-
-float RigidBodyGrid::getValue(int j) const {
-
-	if (j < 0 || j >= nx*ny*nz) return 0.0f;
-	return val[j];
-/*
-    Vector3 idx = getPosition(j)
-    return getValue(idx.x,idx.y,idx.z);
-*/
-}
-
-HOST DEVICE float RigidBodyGrid::getValue(int ix, int iy, int iz) const {
-/*
-           if(ix < 0) ix = 0;
-           else if(ix >= nx) ix = nx -1;
-
-           if(iy < 0) iy = 0;
-           else if(iy >= ny) iy = ny-1;
-
-           if(iz < 0) iz = 0;
-           else if(iz >= nz) iz = nz-1;
-
-           int j = iz + nz * (iy + ny * ix);
-           return val[j];
-*/
-
-	if (ix < 0 || ix >= nx) return 0.0f;
-	if (iy < 0 || iy >= ny) return 0.0f;
-	if (iz < 0 || iz >= nz) return 0.0f;
-	
-	int j = iz + iy*nz + ix*ny*nz;
-	return val[j];
-
-}
-
-Vector3 RigidBodyGrid::getPosition(const int j) const {
-	/* const int iz = j%nz; */
-	/* const int iy = (j/nz)%ny; */
-	/* const int ix = j/(nz*ny); */
-	const int jy = j/nz;
-	const int jx = jy/ny;
-
-	const int iz = j - jy*nz;
-	const int iy = jy - jx*ny;
-	// const int ix = jx;
-
-	return Vector3(jx,iy,iz);
-}
-
-Vector3 RigidBodyGrid::getPosition(int j, Matrix3 basis, Vector3 origin) const {
-	int iz = j%nz;
-	int iy = (j/nz)%ny;
-	int ix = j/(nz*ny);
-
-	return basis.transform(Vector3(ix, iy, iz)) + origin;
-}
-
-IndexList RigidBodyGrid::index(int j) const {
-	int iz = j%nz;
-	int iy = (j/nz)%ny;
-	int ix = j/(nz*ny);
-	IndexList ret;
-	ret.add(ix);
-	ret.add(iy);
-	ret.add(iz);
-	return ret;
-}
-int RigidBodyGrid::indexX(int j) const { return j/(nz*ny); }
-int RigidBodyGrid::indexY(int j) const { return (j/nz)%ny; }
-int RigidBodyGrid::indexZ(int j) const { return j%nz; }
-int RigidBodyGrid::index(int ix, int iy, int iz) const { return iz + iy*nz + ix*ny*nz; }
-
-// Add a fixed value to the grid.
-void RigidBodyGrid::shift(float s) {
-	for (int i = 0; i < nx*ny*nz; i++) val[i] += s;
-}
-
-// Multiply the grid by a fixed value.
-void RigidBodyGrid::scale(float s) {
-	for (int i = 0; i < nx*ny*nz; i++) val[i] *= s;
-}
-
-/** interpolateForce() to be used on CUDA Device **/
-DEVICE ForceEnergy RigidBodyGrid::interpolateForceD(const Vector3 l) const {
-	Vector3 f;
-	// Vector3 l = basisInv.transform(pos - origin);
-	const int homeX = int(floor(l.x));
-	const int homeY = int(floor(l.y));
-	const int homeZ = int(floor(l.z));
-	const float wx = l.x - homeX;
-	const float wy = l.y - homeY;
-	const float wz = l.z - homeZ;
-	const float wx2 = wx*wx;
-
-	/* f.x */
-	float g3[3][4];
-	for (int iz = 0; iz < 4; iz++) {
-		float g2[2][4];
-		const int jz = (iz + homeZ - 1);
-		for (int iy = 0; iy < 4; iy++) {
-			float v[4];
-			const int jy = (iy + homeY - 1);
-			for (int ix = 0; ix < 4; ix++) {
-				const int jx = (ix + homeX - 1);
-				const int ind = jz + jy*nz + jx*nz*ny;
-				v[ix] = jz < 0 || jz >= nz || jy < 0 || jy >= ny || jx < 0 || jx >= nx ?
-					0 : val[ind];
-			}
-			const float a3 = 0.5f*(-v[0] + 3.0f*v[1] - 3.0f*v[2] + v[3])*wx2;
-			const float a2 = 0.5f*(2.0f*v[0] - 5.0f*v[1] + 4.0f*v[2] - v[3])*wx;
-			const float a1 = 0.5f*(-v[0] + v[2]);
-			g2[0][iy] = 3.0f*a3 + 2.0f*a2 + a1;				/* f.x (derivative) */
-			g2[1][iy] = a3*wx + a2*wx + a1*wx + v[1]; /* f.y & f.z */
-		}
-
-		// Mix along y.
-		{
-			g3[0][iz] = 0.5f*(-g2[0][0] + 3.0f*g2[0][1] - 3.0f*g2[0][2] + g2[0][3])*wy*wy*wy +
-				0.5f*(2.0f*g2[0][0] - 5.0f*g2[0][1] + 4.0f*g2[0][2] - g2[0][3])      *wy*wy +
-				0.5f*(-g2[0][0] + g2[0][2])                                          *wy +
-				g2[0][1];
-		}
-
-		{
-			const float a3 = 0.5f*(-g2[1][0] + 3.0f*g2[1][1] - 3.0f*g2[1][2] + g2[1][3])*wy*wy;
-			const float a2 = 0.5f*(2.0f*g2[1][0] - 5.0f*g2[1][1] + 4.0f*g2[1][2] - g2[1][3])*wy;
-			const float a1 = 0.5f*(-g2[1][0] + g2[1][2]);
-			g3[1][iz] = 3.0f*a3 + 2.0f*a2 + a1;						/* f.y */
-			g3[2][iz] = a3*wy + a2*wy + a1*wy + g2[1][1]; /* f.z */
-		}
-	}
-
-	// Mix along z.
-	f.x = -0.5f*(-g3[0][0] + 3.0f*g3[0][1] - 3.0f*g3[0][2] + g3[0][3])*wz*wz*wz +
-		-0.5f*(2.0f*g3[0][0] - 5.0f*g3[0][1] + 4.0f*g3[0][2] - g3[0][3])*wz*wz +
-		-0.5f*(-g3[0][0] + g3[0][2])                                    *wz -
-		g3[0][1];
-	f.y = -0.5f*(-g3[1][0] + 3.0f*g3[1][1] - 3.0f*g3[1][2] + g3[1][3])*wz*wz*wz +
-		-0.5f*(2.0f*g3[1][0] - 5.0f*g3[1][1] + 4.0f*g3[1][2] - g3[1][3])*wz*wz +
-		-0.5f*(-g3[1][0] + g3[1][2])                                    *wz -
-		g3[1][1];
-	f.z = -1.5f*(-g3[2][0] + 3.0f*g3[2][1] - 3.0f*g3[2][2] + g3[2][3])*wz*wz -
-		(2.0f*g3[2][0] - 5.0f*g3[2][1] + 4.0f*g3[2][2] - g3[2][3])      *wz -
-		0.5f*(-g3[2][0] + g3[2][2]);
-	float e = 0.5f*(-g3[2][0] + 3.0f*g3[2][1] - 3.0f*g3[2][2] + g3[2][3])*wz*wz*wz +
-		0.5f*(2.0f*g3[2][0] - 5.0f*g3[2][1] + 4.0f*g3[2][2] - g3[2][3])    *wz*wz +
-		0.5f*(-g3[2][0] + g3[2][2])                                        *wz +
-		g3[2][1];
-	
-	return ForceEnergy(f,e);
-}
-//#define cubic
-DEVICE ForceEnergy RigidBodyGrid::interpolateForceDLinearly(const Vector3& l) const {
-//#ifdef cubic
-//return interpolateForceD(l);
-//#elif defined(cubic_namd)
-//return interpolateForceDnamd(l);
-//#else
-	// Find the home node.
-	const int homeX = int(floor(l.x));
-	const int homeY = int(floor(l.y));
-	const int homeZ = int(floor(l.z));
-
-	Vector3 f;
-
-	const float wx = l.x - homeX;
-	const float wy = l.y - homeY;	
-	const float wz = l.z - homeZ;
-
-	float v[2][2][2];
-	for (int iz = 0; iz < 2; iz++) {
-		int jz = (iz + homeZ);
-		for (int iy = 0; iy < 2; iy++) {
-			int jy = (iy + homeY);
-			for (int ix = 0; ix < 2; ix++) {
-				int jx = (ix + homeX);
-				int ind = jz + jy*nz + jx*nz*ny;
-				v[ix][iy][iz] = jz < 0 || jz >= nz || jy < 0 || jy >= ny || jx < 0 || jx >= nx ?
-					0 : val[ind];
-			}
-		}
-	}
-
-	float g3[3][2];
-	for (int iz = 0; iz < 2; iz++) {
-		float g2[2][2];
-		for (int iy = 0; iy < 2; iy++) {
-			g2[0][iy] = (v[1][iy][iz] - v[0][iy][iz]); /* f.x */
-			g2[1][iy] = wx * (v[1][iy][iz] - v[0][iy][iz]) + v[0][iy][iz]; /* f.y & f.z */
-		}
-		// Mix along y.
-		g3[0][iz] = wy * (g2[0][1] - g2[0][0]) + g2[0][0];
-		g3[1][iz] = (g2[1][1] - g2[1][0]);
-		g3[2][iz] = wy * (g2[1][1] - g2[1][0]) + g2[1][0];
-	}
-	// Mix along z.
-	f.x = -(wz * (g3[0][1] - g3[0][0]) + g3[0][0]);
-	f.y = -(wz * (g3[1][1] - g3[1][0]) + g3[1][0]);
-	f.z = -      (g3[2][1] - g3[2][0]);
-	float e = wz * (g3[2][1] - g3[2][0]) + g3[2][0];
-	return ForceEnergy(f,e);
-//#endif
-}
-DEVICE ForceEnergy RigidBodyGrid::interpolateForceDnamd(const Vector3& l) const
-{
-                Vector3 f;
-                //const Vector3 l = basisInv.transform(pos - origin);
-
-                const int homeX = int(floor(l.x));
-                const int homeY = int(floor(l.y));
-                const int homeZ = int(floor(l.z));
-                const float wx = l.x - homeX;
-                const float wy = l.y - homeY;
-                const float wz = l.z - homeZ;
-
-                Vector3 dg = Vector3(wx,wy,wz);
-
-                int inds[3];
-                inds[0] = homeX;
-                inds[1] = homeY;
-                inds[2] = homeZ;
-
-                // TODO: handle edges
-
-                // Compute b
-                                   float b[64];    // Matrix of values at 8 box corners
-                compute_b(b, inds);
-
-                // Compute a
-                                   float a[64];
-                compute_a(a, b);
-
-                // Calculate powers of x, y, z for later use
-                                   // e.g. x[2] = x^2
-                                                      float x[4], y[4], z[4];
-                x[0] = 1; y[0] = 1; z[0] = 1;
-                for (int j = 1; j < 4; j++) {
-                    x[j] = x[j-1] * dg.x;
-                    y[j] = y[j-1] * dg.y;
-                    z[j] = z[j-1] * dg.z;
-                }
-
-                float e = compute_V(a, x, y, z);
-                f = compute_dV(a, x, y, z);
-
-                //f = basisInv.transpose().transform(f);
-                return ForceEnergy(f,e);
-        }
-
-DEVICE float RigidBodyGrid::compute_V(float *a, float *x, float *y, float *z) const
-        {
-            float V = 0.0;
-            long int ind = 0;
-            for (int l = 0; l < 4; l++) {
-                for (int k = 0; k < 4; k++) {
-                    for (int j = 0; j < 4; j++) {
-                        V += a[ind] * x[j] * y[k] * z[l];
-                        ind++;
-                    }
-                }
-            }
-            return V;
-        }
-DEVICE Vector3 RigidBodyGrid::compute_dV(float *a, float *x, float *y, float *z) const
-        {
-            Vector3 dV = Vector3(0.0f);
-            long int ind = 0;
-            for (int l = 0; l < 4; l++) {
-                for (int k = 0; k < 4; k++) {
-                    for (int j = 0; j < 4; j++) {
-                        if (j > 0) dV.x += a[ind] * j * x[j-1] * y[k]   * z[l];         // dV/dx
-                        if (k > 0) dV.y += a[ind] * k * x[j]   * y[k-1] * z[l];         // dV/dy
-                        if (l > 0) dV.z += a[ind] * l * x[j]   * y[k]   * z[l-1];       // dV/dz
-                        ind++;
-                    }
-                }
-            }
-            return dV*(-1.f);
-        }
-DEVICE void RigidBodyGrid::compute_a(float *a, float *b) const
-        {
-            // Static sparse 64x64 matrix times vector ... nicer looking way than this?
-            a[0] = b[0];
-            a[1] = b[8];
-            a[2] = -3*b[0] + 3*b[1] - 2*b[8] - b[9];
-            a[3] = 2*b[0] - 2*b[1] + b[8] + b[9];
-            a[4] = b[16];
-            a[5] = b[32];
-            a[6] = -3*b[16] + 3*b[17] - 2*b[32] - b[33];
-            a[7] = 2*b[16] - 2*b[17] + b[32] + b[33];
-            a[8] = -3*b[0] + 3*b[2] - 2*b[16] - b[18];
-            a[9] = -3*b[8] + 3*b[10] - 2*b[32] - b[34];
-            a[10] = 9*b[0] - 9*b[1] - 9*b[2] + 9*b[3] + 6*b[8] + 3*b[9] - 6*b[10] - 3*b[11]
-                + 6*b[16] - 6*b[17] + 3*b[18] - 3*b[19] + 4*b[32] + 2*b[33] + 2*b[34] + b[35];
-            a[11] = -6*b[0] + 6*b[1] + 6*b[2] - 6*b[3] - 3*b[8] - 3*b[9] + 3*b[10] + 3*b[11]
-                - 4*b[16] + 4*b[17] - 2*b[18] + 2*b[19] - 2*b[32] - 2*b[33] - b[34] - b[35];
-            a[12] = 2*b[0] - 2*b[2] + b[16] + b[18];
-            a[13] = 2*b[8] - 2*b[10] + b[32] + b[34];
-            a[14] = -6*b[0] + 6*b[1] + 6*b[2] - 6*b[3] - 4*b[8] - 2*b[9] + 4*b[10] + 2*b[11]
-                - 3*b[16] + 3*b[17] - 3*b[18] + 3*b[19] - 2*b[32] - b[33] - 2*b[34] - b[35];
-            a[15] = 4*b[0] - 4*b[1] - 4*b[2] + 4*b[3] + 2*b[8] + 2*b[9] - 2*b[10] - 2*b[11]
-                + 2*b[16] - 2*b[17] + 2*b[18] - 2*b[19] + b[32] + b[33] + b[34] + b[35];
-            a[16] = b[24];
-            a[17] = b[40];
-            a[18] = -3*b[24] + 3*b[25] - 2*b[40] - b[41];
-            a[19] = 2*b[24] - 2*b[25] + b[40] + b[41];
-            a[20] = b[48];
-            a[21] = b[56];
-            a[22] = -3*b[48] + 3*b[49] - 2*b[56] - b[57];
-            a[23] = 2*b[48] - 2*b[49] + b[56] + b[57];
-            a[24] = -3*b[24] + 3*b[26] - 2*b[48] - b[50];
-            a[25] = -3*b[40] + 3*b[42] - 2*b[56] - b[58];
-            a[26] = 9*b[24] - 9*b[25] - 9*b[26] + 9*b[27] + 6*b[40] + 3*b[41] - 6*b[42] - 3*b[43]
-                + 6*b[48] - 6*b[49] + 3*b[50] - 3*b[51] + 4*b[56] + 2*b[57] + 2*b[58] + b[59];
-            a[27] = -6*b[24] + 6*b[25] + 6*b[26] - 6*b[27] - 3*b[40] - 3*b[41] + 3*b[42] + 3*b[43]
-                - 4*b[48] + 4*b[49] - 2*b[50] + 2*b[51] - 2*b[56] - 2*b[57] - b[58] - b[59];
-            a[28] = 2*b[24] - 2*b[26] + b[48] + b[50];
-            a[29] = 2*b[40] - 2*b[42] + b[56] + b[58];
-            a[30] = -6*b[24] + 6*b[25] + 6*b[26] - 6*b[27] - 4*b[40] - 2*b[41] + 4*b[42] + 2*b[43]
-                - 3*b[48] + 3*b[49] - 3*b[50] + 3*b[51] - 2*b[56] - b[57] - 2*b[58] - b[59];
-            a[31] = 4*b[24] - 4*b[25] - 4*b[26] + 4*b[27] + 2*b[40] + 2*b[41] - 2*b[42] - 2*b[43]
-                + 2*b[48] - 2*b[49] + 2*b[50] - 2*b[51] + b[56] + b[57] + b[58] + b[59];
-            a[32] = -3*b[0] + 3*b[4] - 2*b[24] - b[28];
-            a[33] = -3*b[8] + 3*b[12] - 2*b[40] - b[44];
-            a[34] = 9*b[0] - 9*b[1] - 9*b[4] + 9*b[5] + 6*b[8] + 3*b[9] - 6*b[12] - 3*b[13]
-                + 6*b[24] - 6*b[25] + 3*b[28] - 3*b[29] + 4*b[40] + 2*b[41] + 2*b[44] + b[45];
-            a[35] = -6*b[0] + 6*b[1] + 6*b[4] - 6*b[5] - 3*b[8] - 3*b[9] + 3*b[12] + 3*b[13]
-                - 4*b[24] + 4*b[25] - 2*b[28] + 2*b[29] - 2*b[40] - 2*b[41] - b[44] - b[45];
-            a[36] = -3*b[16] + 3*b[20] - 2*b[48] - b[52];
-            a[37] = -3*b[32] + 3*b[36] - 2*b[56] - b[60];
-            a[38] = 9*b[16] - 9*b[17] - 9*b[20] + 9*b[21] + 6*b[32] + 3*b[33] - 6*b[36] - 3*b[37]
-                + 6*b[48] - 6*b[49] + 3*b[52] - 3*b[53] + 4*b[56] + 2*b[57] + 2*b[60] + b[61];
-            a[39] = -6*b[16] + 6*b[17] + 6*b[20] - 6*b[21] - 3*b[32] - 3*b[33] + 3*b[36] + 3*b[37]
-                - 4*b[48] + 4*b[49] - 2*b[52] + 2*b[53] - 2*b[56] - 2*b[57] - b[60] - b[61];
-            a[40] = 9*b[0] - 9*b[2] - 9*b[4] + 9*b[6] + 6*b[16] + 3*b[18] - 6*b[20] - 3*b[22]
-                + 6*b[24] - 6*b[26] + 3*b[28] - 3*b[30] + 4*b[48] + 2*b[50] + 2*b[52] + b[54];
-            a[41] = 9*b[8] - 9*b[10] - 9*b[12] + 9*b[14] + 6*b[32] + 3*b[34] - 6*b[36] - 3*b[38]
-                + 6*b[40] - 6*b[42] + 3*b[44] - 3*b[46] + 4*b[56] + 2*b[58] + 2*b[60] + b[62];
-            a[42] = -27*b[0] + 27*b[1] + 27*b[2] - 27*b[3] + 27*b[4] - 27*b[5] - 27*b[6] + 27*b[7]
-                - 18*b[8] - 9*b[9] + 18*b[10] + 9*b[11] + 18*b[12] + 9*b[13] - 18*b[14] - 9*b[15]
-                - 18*b[16] + 18*b[17] - 9*b[18] + 9*b[19] + 18*b[20] - 18*b[21] + 9*b[22] - 9*b[23]
-                - 18*b[24] + 18*b[25] + 18*b[26] - 18*b[27] - 9*b[28] + 9*b[29] + 9*b[30] - 9*b[31]
-                - 12*b[32] - 6*b[33] - 6*b[34] - 3*b[35] + 12*b[36] + 6*b[37] + 6*b[38] + 3*b[39]
-                - 12*b[40] - 6*b[41] + 12*b[42] + 6*b[43] - 6*b[44] - 3*b[45] + 6*b[46] + 3*b[47]
-                - 12*b[48] + 12*b[49] - 6*b[50] + 6*b[51] - 6*b[52] + 6*b[53] - 3*b[54] + 3*b[55]
-                - 8*b[56] - 4*b[57] - 4*b[58] - 2*b[59] - 4*b[60] - 2*b[61] - 2*b[62] - b[63];
-            a[43] = 18*b[0] - 18*b[1] - 18*b[2] + 18*b[3] - 18*b[4] + 18*b[5] + 18*b[6] - 18*b[7]
-                + 9*b[8] + 9*b[9] - 9*b[10] - 9*b[11] - 9*b[12] - 9*b[13] + 9*b[14] + 9*b[15]
-                + 12*b[16] - 12*b[17] + 6*b[18] - 6*b[19] - 12*b[20] + 12*b[21] - 6*b[22] + 6*b[23]
-                + 12*b[24] - 12*b[25] - 12*b[26] + 12*b[27] + 6*b[28] - 6*b[29] - 6*b[30] + 6*b[31]
-                + 6*b[32] + 6*b[33] + 3*b[34] + 3*b[35] - 6*b[36] - 6*b[37] - 3*b[38] - 3*b[39]
-                + 6*b[40] + 6*b[41] - 6*b[42] - 6*b[43] + 3*b[44] + 3*b[45] - 3*b[46] - 3*b[47]
-                + 8*b[48] - 8*b[49] + 4*b[50] - 4*b[51] + 4*b[52] - 4*b[53] + 2*b[54] - 2*b[55]
-                + 4*b[56] + 4*b[57] + 2*b[58] + 2*b[59] + 2*b[60] + 2*b[61] + b[62] + b[63];
-            a[44] = -6*b[0] + 6*b[2] + 6*b[4] - 6*b[6] - 3*b[16] - 3*b[18] + 3*b[20] + 3*b[22]
-                - 4*b[24] + 4*b[26] - 2*b[28] + 2*b[30] - 2*b[48] - 2*b[50] - b[52] - b[54];
-            a[45] = -6*b[8] + 6*b[10] + 6*b[12] - 6*b[14] - 3*b[32] - 3*b[34] + 3*b[36] + 3*b[38]
-                - 4*b[40] + 4*b[42] - 2*b[44] + 2*b[46] - 2*b[56] - 2*b[58] - b[60] - b[62];
-            a[46] = 18*b[0] - 18*b[1] - 18*b[2] + 18*b[3] - 18*b[4] + 18*b[5] + 18*b[6] - 18*b[7]
-                + 12*b[8] + 6*b[9] - 12*b[10] - 6*b[11] - 12*b[12] - 6*b[13] + 12*b[14] + 6*b[15]
-                + 9*b[16] - 9*b[17] + 9*b[18] - 9*b[19] - 9*b[20] + 9*b[21] - 9*b[22] + 9*b[23]
-                + 12*b[24] - 12*b[25] - 12*b[26] + 12*b[27] + 6*b[28] - 6*b[29] - 6*b[30] + 6*b[31]
-                + 6*b[32] + 3*b[33] + 6*b[34] + 3*b[35] - 6*b[36] - 3*b[37] - 6*b[38] - 3*b[39]
-                + 8*b[40] + 4*b[41] - 8*b[42] - 4*b[43] + 4*b[44] + 2*b[45] - 4*b[46] - 2*b[47]
-                + 6*b[48] - 6*b[49] + 6*b[50] - 6*b[51] + 3*b[52] - 3*b[53] + 3*b[54] - 3*b[55]
-                + 4*b[56] + 2*b[57] + 4*b[58] + 2*b[59] + 2*b[60] + b[61] + 2*b[62] + b[63];
-            a[47] = -12*b[0] + 12*b[1] + 12*b[2] - 12*b[3] + 12*b[4] - 12*b[5] - 12*b[6] + 12*b[7]
-                - 6*b[8] - 6*b[9] + 6*b[10] + 6*b[11] + 6*b[12] + 6*b[13] - 6*b[14] - 6*b[15]
-                - 6*b[16] + 6*b[17] - 6*b[18] + 6*b[19] + 6*b[20] - 6*b[21] + 6*b[22] - 6*b[23]
-                - 8*b[24] + 8*b[25] + 8*b[26] - 8*b[27] - 4*b[28] + 4*b[29] + 4*b[30] - 4*b[31]
-                - 3*b[32] - 3*b[33] - 3*b[34] - 3*b[35] + 3*b[36] + 3*b[37] + 3*b[38] + 3*b[39]
-                - 4*b[40] - 4*b[41] + 4*b[42] + 4*b[43] - 2*b[44] - 2*b[45] + 2*b[46] + 2*b[47]
-                - 4*b[48] + 4*b[49] - 4*b[50] + 4*b[51] - 2*b[52] + 2*b[53] - 2*b[54] + 2*b[55]
-                - 2*b[56] - 2*b[57] - 2*b[58] - 2*b[59] - b[60] - b[61] - b[62] - b[63];
-            a[48] = 2*b[0] - 2*b[4] + b[24] + b[28];
-            a[49] = 2*b[8] - 2*b[12] + b[40] + b[44];
-            a[50] = -6*b[0] + 6*b[1] + 6*b[4] - 6*b[5] - 4*b[8] - 2*b[9] + 4*b[12] + 2*b[13]
-                - 3*b[24] + 3*b[25] - 3*b[28] + 3*b[29] - 2*b[40] - b[41] - 2*b[44] - b[45];
-            a[51] = 4*b[0] - 4*b[1] - 4*b[4] + 4*b[5] + 2*b[8] + 2*b[9] - 2*b[12] - 2*b[13]
-                + 2*b[24] - 2*b[25] + 2*b[28] - 2*b[29] + b[40] + b[41] + b[44] + b[45];
-            a[52] = 2*b[16] - 2*b[20] + b[48] + b[52];
-            a[53] = 2*b[32] - 2*b[36] + b[56] + b[60];
-            a[54] = -6*b[16] + 6*b[17] + 6*b[20] - 6*b[21] - 4*b[32] - 2*b[33] + 4*b[36] + 2*b[37]
-                - 3*b[48] + 3*b[49] - 3*b[52] + 3*b[53] - 2*b[56] - b[57] - 2*b[60] - b[61];
-            a[55] = 4*b[16] - 4*b[17] - 4*b[20] + 4*b[21] + 2*b[32] + 2*b[33] - 2*b[36] - 2*b[37]
-                + 2*b[48] - 2*b[49] + 2*b[52] - 2*b[53] + b[56] + b[57] + b[60] + b[61];
-            a[56] = -6*b[0] + 6*b[2] + 6*b[4] - 6*b[6] - 4*b[16] - 2*b[18] + 4*b[20] + 2*b[22]
-                - 3*b[24] + 3*b[26] - 3*b[28] + 3*b[30] - 2*b[48] - b[50] - 2*b[52] - b[54];
-            a[57] = -6*b[8] + 6*b[10] + 6*b[12] - 6*b[14] - 4*b[32] - 2*b[34] + 4*b[36] + 2*b[38]
-                - 3*b[40] + 3*b[42] - 3*b[44] + 3*b[46] - 2*b[56] - b[58] - 2*b[60] - b[62];
-           a[58] = 18*b[0] - 18*b[1] - 18*b[2] + 18*b[3] - 18*b[4] + 18*b[5] + 18*b[6] - 18*b[7]
-                + 12*b[8] + 6*b[9] - 12*b[10] - 6*b[11] - 12*b[12] - 6*b[13] + 12*b[14] + 6*b[15]
-                + 12*b[16] - 12*b[17] + 6*b[18] - 6*b[19] - 12*b[20] + 12*b[21] - 6*b[22] + 6*b[23]
-                + 9*b[24] - 9*b[25] - 9*b[26] + 9*b[27] + 9*b[28] - 9*b[29] - 9*b[30] + 9*b[31]
-                + 8*b[32] + 4*b[33] + 4*b[34] + 2*b[35] - 8*b[36] - 4*b[37] - 4*b[38] - 2*b[39]
-                + 6*b[40] + 3*b[41] - 6*b[42] - 3*b[43] + 6*b[44] + 3*b[45] - 6*b[46] - 3*b[47]
-                + 6*b[48] - 6*b[49] + 3*b[50] - 3*b[51] + 6*b[52] - 6*b[53] + 3*b[54] - 3*b[55]
-                + 4*b[56] + 2*b[57] + 2*b[58] + b[59] + 4*b[60] + 2*b[61] + 2*b[62] + b[63];
-            a[59] = -12*b[0] + 12*b[1] + 12*b[2] - 12*b[3] + 12*b[4] - 12*b[5] - 12*b[6] + 12*b[7]
-                - 6*b[8] - 6*b[9] + 6*b[10] + 6*b[11] + 6*b[12] + 6*b[13] - 6*b[14] - 6*b[15]
-                - 8*b[16] + 8*b[17] - 4*b[18] + 4*b[19] + 8*b[20] - 8*b[21] + 4*b[22] - 4*b[23]
-                - 6*b[24] + 6*b[25] + 6*b[26] - 6*b[27] - 6*b[28] + 6*b[29] + 6*b[30] - 6*b[31]
-                - 4*b[32] - 4*b[33] - 2*b[34] - 2*b[35] + 4*b[36] + 4*b[37] + 2*b[38] + 2*b[39]
-                - 3*b[40] - 3*b[41] + 3*b[42] + 3*b[43] - 3*b[44] - 3*b[45] + 3*b[46] + 3*b[47]
-                - 4*b[48] + 4*b[49] - 2*b[50] + 2*b[51] - 4*b[52] + 4*b[53] - 2*b[54] + 2*b[55]
-                - 2*b[56] - 2*b[57] - b[58] - b[59] - 2*b[60] - 2*b[61] - b[62] - b[63];
-            a[60] = 4*b[0] - 4*b[2] - 4*b[4] + 4*b[6] + 2*b[16] + 2*b[18] - 2*b[20] - 2*b[22]
-                + 2*b[24] - 2*b[26] + 2*b[28] - 2*b[30] + b[48] + b[50] + b[52] + b[54];
-            a[61] = 4*b[8] - 4*b[10] - 4*b[12] + 4*b[14] + 2*b[32] + 2*b[34] - 2*b[36] - 2*b[38]
-                + 2*b[40] - 2*b[42] + 2*b[44] - 2*b[46] + b[56] + b[58] + b[60] + b[62];
-            a[62] = -12*b[0] + 12*b[1] + 12*b[2] - 12*b[3] + 12*b[4] - 12*b[5] - 12*b[6] + 12*b[7]
-                - 8*b[8] - 4*b[9] + 8*b[10] + 4*b[11] + 8*b[12] + 4*b[13] - 8*b[14] - 4*b[15]
-                - 6*b[16] + 6*b[17] - 6*b[18] + 6*b[19] + 6*b[20] - 6*b[21] + 6*b[22] - 6*b[23]
-                - 6*b[24] + 6*b[25] + 6*b[26] - 6*b[27] - 6*b[28] + 6*b[29] + 6*b[30] - 6*b[31]
-                - 4*b[32] - 2*b[33] - 4*b[34] - 2*b[35] + 4*b[36] + 2*b[37] + 4*b[38] + 2*b[39]
-                - 4*b[40] - 2*b[41] + 4*b[42] + 2*b[43] - 4*b[44] - 2*b[45] + 4*b[46] + 2*b[47]
-                - 3*b[48] + 3*b[49] - 3*b[50] + 3*b[51] - 3*b[52] + 3*b[53] - 3*b[54] + 3*b[55]
-                - 2*b[56] - b[57] - 2*b[58] - b[59] - 2*b[60] - b[61] - 2*b[62] - b[63];
-            a[63] = 8*b[0] - 8*b[1] - 8*b[2] + 8*b[3] - 8*b[4] + 8*b[5] + 8*b[6] - 8*b[7]
-                + 4*b[8] + 4*b[9] - 4*b[10] - 4*b[11] - 4*b[12] - 4*b[13] + 4*b[14] + 4*b[15]
-                + 4*b[16] - 4*b[17] + 4*b[18] - 4*b[19] - 4*b[20] + 4*b[21] - 4*b[22] + 4*b[23]
-                + 4*b[24] - 4*b[25] - 4*b[26] + 4*b[27] + 4*b[28] - 4*b[29] - 4*b[30] + 4*b[31]
-                + 2*b[32] + 2*b[33] + 2*b[34] + 2*b[35] - 2*b[36] - 2*b[37] - 2*b[38] - 2*b[39]
-                + 2*b[40] + 2*b[41] - 2*b[42] - 2*b[43] + 2*b[44] + 2*b[45] - 2*b[46] - 2*b[47]
-                + 2*b[48] - 2*b[49] + 2*b[50] - 2*b[51] + 2*b[52] - 2*b[53] + 2*b[54] - 2*b[55]
-                + b[56] + b[57] + b[58] + b[59] + b[60] + b[61] + b[62] + b[63];
-        }
-DEVICE void RigidBodyGrid::compute_b(float * __restrict__ b, int * __restrict__ inds) const
-        {
-            int k[3];
-            k[0] = nx;
-            k[1] = ny;
-            k[2] = nz;
-
-            int inds2[3] = {0,0,0};
-
-            for (int i0 = 0; i0 < 8; i0++) {
-                inds2[0] = 0;
-                inds2[1] = 0;
-                inds2[2] = 0;
-
-                /* printf("%d\n", inds2[0]); */
-                /* printf("%d\n", inds2[1]); */
-                /* printf("%d\n", inds2[2]); */
-
-                bool zero_derivs = false;
-
-                int bit = 1;    // bit = 2^i1 in the below loop
-                for (int i1 = 0; i1 < 3; i1++) {
-                    inds2[i1] = (inds[i1] + ((i0 & bit) ? 1 : 0)) % k[i1];
-                    bit <<= 1;  // i.e. multiply by 2
-                }
-                //int d_hi[3] = {1, 1, 1};
-                int d_lo[3] = {1, 1, 1};
-                float voffs[3] = {0.0f, 0.0f, 0.0f};
-                float dscales[3] = {0.5, 0.5, 0.5};
-
-                for (int i1 = 0; i1 < 3; i1++) {
-                    if (inds2[i1] == 0) {
-                        zero_derivs = true;
-                    }
-                    else if (inds2[i1] == k[i1]-1) {
-                        zero_derivs = true;
-                    }
-                    else {
-                        voffs[i1] = 0.0;
-                    }
-                }
-
-                // V
-                b[i0] = getValue(inds2[0],inds2[1],inds2[2]);
-
-                if (zero_derivs) {
-                    b[8+i0] = 0.0;
-                    b[16+i0] = 0.0;
-                    b[24+i0] = 0.0;
-                    b[32+i0] = 0.0;
-                    b[40+i0] = 0.0;
-                    b[48+i0] = 0.0;
-                    b[56+i0] = 0.0;
-                } else {
-                    b[8+i0]  = dscales[0] * (getValue(inds2[0]+1,inds2[1],inds2[2]) - getValue(inds2[0]-d_lo[0],inds2[1],inds2[2]) + voffs[0]); //  dV/dx
-                    b[16+i0] = dscales[1] * (getValue(inds2[0],inds2[1]+1,inds2[2]) - getValue(inds2[0],inds2[1]-d_lo[1],inds2[2]) + voffs[1]); //  dV/dy
-                    b[24+i0] = dscales[2] * (getValue(inds2[0],inds2[1],inds2[2]+1) - getValue(inds2[0],inds2[1],inds2[2]-d_lo[2]) + voffs[2]); //  dV/dz
-                    b[32+i0] = dscales[0] * dscales[1] *
-                        (getValue(inds2[0]+1,inds2[1]+1,inds2[2]) - getValue(inds2[0]-d_lo[0],inds2[1]+1,inds2[2])
-                       - getValue(inds2[0]+1,inds2[1]-d_lo[1],inds2[2]) + getValue(inds2[0]-d_lo[0],inds2[1]-d_lo[1],inds2[2]));      //  d2V/dxdy
-
-                    b[40+i0] = dscales[0] * dscales[2] *
-                              (getValue(inds2[0]+1,inds2[1],inds2[2]+1) - getValue(inds2[0]-d_lo[0],inds2[1],inds2[2]+1)
-                             - getValue(inds2[0]+1,inds2[1],inds2[2]-d_lo[2]) + getValue(inds2[0]-d_lo[0],inds2[1],inds2[2]-d_lo[2]));      //  d2V/dxdz
-
-                    b[48+i0] = dscales[1] * dscales[2] *
-                               (getValue(inds2[0],inds2[1]+1,inds2[2]+1) - getValue(inds2[0],inds2[1]-d_lo[1],inds2[2]+1)
-                              - getValue(inds2[0],inds2[1]+1,inds2[2]-d_lo[2]) + getValue(inds2[0],inds2[1]-d_lo[1],inds2[2]-d_lo[2]));      //  d2V/dydz
-
-                    b[56+i0] = dscales[0] * dscales[1] * dscales[2] *                                    // d3V/dxdydz
-                       (getValue(inds2[0]+1,inds2[1]+1,inds2[2]+1) - getValue(inds2[0]+1,inds2[1]+1,inds2[2]-d_lo[2]) -
-                        getValue(inds2[0]+1,inds2[1]-d_lo[1],inds2[2]+1) - getValue(inds2[0]-d_lo[0],inds2[1]+1,inds2[2]+1) +
-                        getValue(inds2[0]+1,inds2[1]-d_lo[1],inds2[2]-d_lo[2]) + getValue(inds2[0]-d_lo[0],inds2[1]+1,inds2[2]-d_lo[2]) +
-                        getValue(inds2[0]-d_lo[0],inds2[1]-d_lo[1],inds2[2]+1) - getValue(inds2[0]-d_lo[0],inds2[1]-d_lo[1],inds2[2]-d_lo[2]));
-
-                        }
-                    }
-                }
-
diff --git a/src/RigidBodyGrid.h b/src/RigidBodyGrid.h
deleted file mode 100644
index f586a411227d3cc7a2a08e0b0388dcde74bd50ec..0000000000000000000000000000000000000000
--- a/src/RigidBodyGrid.h
+++ /dev/null
@@ -1,130 +0,0 @@
-//////////////////////////////////////////////////////////////////////
-// Copy of BaseGrid with some modificaitons
-// 
-#ifndef RBBASEGRID_H
-#define RBBASEGRID_H
-// #pragma once
-
-#ifdef __CUDACC__
-    #define HOST __host__
-    #define DEVICE __device__
-#else
-    #define HOST 
-    #define DEVICE 
-#endif
-
-#include "BaseGrid.h"
-#include "useful.h"
-#include <cmath>
-#include <cstring>
-#include <cstdio>
-#include <cstdlib>
-#include <ctime>
-#include <cuda.h>
-
-// using namespace std;
-
-#define STRLEN 512
-
-/* class ForceEnergy { */
-/* public: */
-/* 	DEVICE ForceEnergy(Vector3 &f, float &e) : */
-/* 		f(f), e(e) {}; */
-/* 	Vector3 f; */
-/* 	float e; */
-/* }; */
-
-class RigidBodyGrid { 
-	friend class SparseGrid;
-	
-public:
-	/*                               \
-	| CONSTRUCTORS, DESTRUCTORS, I/O |
-	\===============================*/
-	
-	// RBTODO Fix?
-	RigidBodyGrid(); // cmaffeo2 (2015) moved this out of protected, cause I wanted RigidBodyGrid in a struct
-  // The most obvious of constructors.
-	RigidBodyGrid(int nx0, int ny0, int nz0);
-
-  // Make a copy of a BaseGrid grid.
-  RigidBodyGrid(const BaseGrid& g);
-
-  // Make an exact copy of a grid.
-  RigidBodyGrid(const RigidBodyGrid& g);
-
-  RigidBodyGrid mult(const RigidBodyGrid& g);
-
-  RigidBodyGrid& operator=(const RigidBodyGrid& g);
-  
-	virtual ~RigidBodyGrid();
-
-	/*             \
-	| DATA METHODS |
-	\=============*/
-		
-	void zero();
-  
-  bool setValue(int j, float v);
-
-  bool setValue(int ix, int iy, int iz, float v);
-
-  virtual float getValue(int j) const;
-
-  HOST DEVICE float getValue(int ix, int iy, int iz) const;
-
-  HOST DEVICE Vector3 getPosition(int j) const;
-	HOST DEVICE Vector3 getPosition(int j, Matrix3 basis, Vector3 origin) const;
-		
-  IndexList index(int j) const;
-  int indexX(int j) const;
-  int indexY(int j) const;
-  int indexZ(int j) const;
-  int index(int ix, int iy, int iz) const;
-  
-  /* int index(Vector3 r) const; */
-  /* int nearestIndex(Vector3 r) const; */
-
-  HOST DEVICE inline int length() const { return nx*ny*nz; }
-
-  HOST DEVICE inline int getNx() const {return nx;}
-  HOST DEVICE inline int getNy() const {return ny;}
-  HOST DEVICE inline int getNz() const {return nz;}
-  HOST DEVICE inline int getSize() const {return nx*ny*nz;}
-
-  HOST DEVICE inline int getRadius(Matrix3 basis) const {
-	  // return radius of smallest sphere circumscribing grid
-	  float radius = basis.transform(Vector3(nx,ny,nz)).length2();
-
-	  float tmp = basis.transform(Vector3(-nx,ny,nz)).length2();
-	  radius = tmp > radius ? tmp : radius;
-
-	  tmp = basis.transform(Vector3(nx,-ny,nz)).length2();
-	  radius = tmp > radius ? tmp : radius;
-
-	  tmp = basis.transform(Vector3(nx,ny,-nz)).length2();
-	  radius = tmp > radius ? tmp : radius;
-
-	  return 0.5 * sqrt(radius);
-  }
-  DEVICE ForceEnergy interpolateForceDnamd(const Vector3& l) const;
-  DEVICE float compute_V(float *a, float *x, float *y, float *z) const;
-  DEVICE Vector3 compute_dV(float *a, float *x, float *y, float *z) const;
-  DEVICE void compute_a(float *a, float *b) const;
-  DEVICE void compute_b(float * __restrict__ b, int * __restrict__ inds) const; 
-  // Add a fixed value to the grid.
-  void shift(float s);
-
-  // Multiply the grid by a fixed value.
-  void scale(float s);
-	
-	DEVICE ForceEnergy interpolateForceDLinearly(const Vector3& l) const;
-	DEVICE ForceEnergy interpolateForceD(Vector3 l) const;
-  
-public:
-  int nx, ny, nz;
-  int size;
-  float* val;
-};
-
-#endif
diff --git a/src/RigidBodyType.cu b/src/RigidBodyType.cu
deleted file mode 100644
index 45d91877ee4104b4f6be31e69d68a71c49e79d25..0000000000000000000000000000000000000000
--- a/src/RigidBodyType.cu
+++ /dev/null
@@ -1,244 +0,0 @@
-#include <cassert>
-#include "Configuration.h"
-#include "RigidBodyType.h"
-#include "Reservoir.h"
-#include "BaseGrid.h"
-#include "RigidBodyGrid.h"
-
-#ifndef gpuErrchk
-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) {
-   if (code != cudaSuccess) {
-      fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), __FILE__, line);
-      if (abort) exit(code);
-   }
-}
-#endif
-
-void RigidBodyType::clear() {
-	num = 0;											// RBTODO: not 100% sure about this
-	if (reservoir != NULL) delete reservoir;
-	reservoir = NULL;
-	// pmf = NULL;
-	mass = 1.0;
-
-	// TODO: make sure that this actually removes grid data
-	potentialGridKeys.clear();
-	densityGridKeys.clear();
-	pmfKeys.clear();
-
-	
-	if (numParticles != NULL) {
-		for (int i=0; i < numPotGrids; ++i) {
-			printf("CLEARING\n");
-			if (numParticles[i] > 0) {
-				delete[] particles[i];
-				gpuErrchk(cudaFree( particles_d[i] ));
-			}
-		}
-		delete[] numParticles;
-		delete[] particles;
-		delete[] particles_d;
-		numParticles = NULL;
-	}
-}
-
-
-// void RigidBodyType::setDampingCoeffs(float timestep, float tmp_mass, Vector3 tmp_inertia, float tmp_transDamping, float tmp_rotDamping) {
-void RigidBodyType::setDampingCoeffs(float timestep) { /* MUST ONLY BE CALLED ONCE!!! */
-	/*â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“.
-	| DiffCoeff = kT / dampingCoeff mass                     |
-	|                                                        |
-	| type->DampingCoeff has units of (1/ps)                 |
-	|                                                        |
-	| f[kcal/mol AA] = - dampingCoeff * momentum[amu AA/ns]  |
-	|                                                        |
-	| units "(1/ns) * (amu AA/ns)" "kcal_mol/AA" * 2.390e-09 |
-	`â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“*/
-
-	/*â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“.
-	| < f(t) f(t') > = 2 kT dampingCoeff mass delta(t-t') |
-	|                                                     |
-	|  units "sqrt( k K (1/ns) amu / ns )" "kcal_mol/AA"  |
-	|    * 2.1793421e-06                                  |
-	`â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“*/
-	// RBTODO: make units consistent with rest of RB code 
-	float Temp = 295; /* RBTODO: temperature should be read from grid? Or set in uniformly in config file */
-	transForceCoeff = 2.1793421e-06 * Vector3::element_sqrt( 2*Temp*mass*transDamping/timestep );
-
-	// setup for langevin
-	// langevin = rbParams->langevin;
-	// if (langevin) {
-	// T = - dampingCoeff * angularMomentum
-
-	// < f(t) f(t') > = 2 kT dampingCoeff inertia delta(t-t')
-	rotTorqueCoeff = 2.1793421e-06 *
-		Vector3::element_sqrt( 2*Temp* Vector3::element_mult(inertia,rotDamping) / timestep );
-
-
-	transDamping = 2.3900574e-9 * transDamping;
-	rotDamping = 2.3900574e-9 * rotDamping;
-
-}
-	
-void RigidBodyType::attach_particles() {
-    for (const auto& filename: attached_particle_files) {
-	const size_t line_char = 256;
-	FILE* inp = fopen(filename.val(), "r");
-	char line[line_char];
-
-	// If the particle file cannot be found, exit the program
-	if (inp == NULL) {
-	    printf("ERROR: Could not open `%s'.\n", filename.val());
-	    fclose(inp);
-	    exit(1);
-	}
-
-	// Get and process all lines of input
-	while (fgets(line, line_char, inp) != NULL) {
-		// Lines in the particle file that begin with # are comments
-		if (line[0] == '#') continue;
-
-		String s(line);
-		int numTokens = s.tokenCount();
-
-		// Break the line down into pieces (tokens) so we can process them individually
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-
-		// Legitimate GROUP input lines have at least 3 tokens:
-		// Particle_type | x | y | z
-		// A line without exactly six tokens should be discarded.
-		if (numTokens != 4) {
-		    printf("Error: Invalid attached particle file line: %s\n", tokenList[0].val());
-		    fclose(inp);
-		    exit(-1);
-		}
-
-		// Make sure the index of this particle is unique.
-		// NOTE: The particle list is sorted by index.
-		int type_idx = conf->find_particle_type( tokenList[0].val() );
-		if (type_idx < 0) {
-		    printf("Error: Unrecognized particle type: %s\n", line);
-		    fclose(inp);
-		    exit(-1);
-		}
-		attached_particle_types.push_back( type_idx );
-		attached_particle_positions.push_back( Vector3(atof(tokenList[1].val()), atof(tokenList[2].val()), atof(tokenList[3].val())) );
-	}
-	fclose(inp);
-    }
-    size_t sz = sizeof(Vector3)*attached_particle_positions.size();
-    gpuErrchk(cudaMalloc( &(attached_particle_positions_d), sz ));
-    gpuErrchk(cudaMemcpyAsync( attached_particle_positions_d, &attached_particle_positions[0], sz, cudaMemcpyHostToDevice));
-}
-
-void RigidBodyType::addGrid(String s, std::vector<String> &keys, std::vector<String> &files) {
-	// tokenize and return
-	int numTokens = s.tokenCount();
-	if (numTokens != 2) {
-		printf("ERROR: could not add Grid.\n"); // TODO improve this message
-		exit(1);
-	}
-	String* token = new String[numTokens];
-	s.tokenize(token);
-	keys.push_back( String(token[0]) );
-	files.push_back( String(token[1]) );
-	delete[] token;
-}
-void RigidBodyType::addPotentialGrid(String s) {
-    addGrid(s, potentialGridKeys, potentialGridFiles);
-}
-void RigidBodyType::addDensityGrid(String s) {
-    addGrid(s, densityGridKeys, densityGridFiles);
-}
-void RigidBodyType::addPMF(String s) {
-    addGrid(s, pmfKeys, pmfFiles);
-}
-
-void RigidBodyType::addScaleFactor(String s, std::vector<String> &keys, std::vector<float> &vals) {
-	// tokenize and return
-	int numTokens = s.tokenCount();
-	if (numTokens != 2) {
-		printf("ERROR: could not add Grid.\n"); // TODO improve this message
-		exit(1);
-	}
-	String* token = new String[numTokens];
-	s.tokenize(token);
-	String key = token[0];
-	float v = (float) strtod(token[1], NULL);
-	keys.push_back( key );
-	vals.push_back( v );
-	delete[] token;
-}
-void RigidBodyType::scalePotentialGrid(String s) {
-    addScaleFactor(s, potentialGridScaleKeys, potentialGridScale);
-}
-void RigidBodyType::scaleDensityGrid(String s) {
-    addScaleFactor(s, densityGridScaleKeys, densityGridScale);
-}
-void RigidBodyType::scalePMF(String s) {
-    addScaleFactor(s, pmfScaleKeys, pmfScale);
-}
-
-void RigidBodyType::initializeParticleLists() {
-	if (numPotGrids < 1) return;
-
-	numParticles = new int[numPotGrids];
-	particles = new int*[numPotGrids];
-	particles_d = new int*[numPotGrids];
-
-	// Loop over potential grids
-	for (int i = 0; i < numPotGrids; ++i) {
-		String& gridName = potentialGridKeys[i];
-		numParticles[i] = 0;
-
-		// Count the particles interacting with potential grid i
-		// Loop over particle types
-		for (int j = 0; j < conf->numParts; ++j) {
-			// Loop over rigid body grid names associated with particle type
-			const std::vector<String>& gridNames = conf->partRigidBodyGrid[j];
-			for (int k = 0; k < gridNames.size(); ++k) {
-				if (gridNames[k] == gridName) {
-					numParticles[i] += conf->numPartsOfType[j];
-				}
-			}
-		}
-
-		if (numParticles[i] > 0) {
-
-		    // allocate array of particle ids for the potential grid 
-		    particles[i] = new int[numParticles[i]];
-		    int pid = 0;
-		
-		    // Loop over particle types to count the number of particles
-		    for (int j = 0; j < conf->numParts; ++j) {
-
-			// Build temporary id array of type j particles
-			int tmp[conf->numPartsOfType[j]];
-			int currId = 0;
-			for (int aid = 0; aid < conf->num + conf->num_rb_attached_particles ; ++aid) {
-			    if (conf->type[aid] == j)
-				tmp[currId++] = aid;
-			}
-			if (currId == 0) continue;
-
-			// Loop over rigid body grid names associated with particle type
-			const std::vector<String>& gridNames = conf->partRigidBodyGrid[j];
-			for (int k = 0; k < gridNames.size(); ++k) {
-			    if (gridNames[k] == gridName) {
-				// Copy type j particles to particles[i]
-				memcpy( &(particles[i][pid]), tmp, sizeof(int)*currId );
-				assert(currId == conf->numPartsOfType[j]);
-				pid += conf->numPartsOfType[j];
-			    }
-			}
-		    }
-
-		    // Initialize device data
-		    size_t sz = sizeof(int) * numParticles[i];
-		    gpuErrchk(cudaMalloc( &(particles_d[i]), sz ));
-		    gpuErrchk(cudaMemcpyAsync( particles_d[i], particles[i], sz, cudaMemcpyHostToDevice));
-		}
-	}
-}
diff --git a/src/RigidBodyType.h b/src/RigidBodyType.h
deleted file mode 100644
index 5cc1442052a102da7198e1e6eeda806c4cdaa8b7..0000000000000000000000000000000000000000
--- a/src/RigidBodyType.h
+++ /dev/null
@@ -1,137 +0,0 @@
-// RigidBodyType.h (2015)
-// Author: Chris Maffeo <cmaffeo2@illinois.edu>
-
-#pragma once
-#include <vector>
-/* #include <thrust/host_vector.h> */
-/* #include <thrust/device_vector.h> */
-/* #include "Reservoir.h" */
-/* #include "BaseGrid.h" */
-/* #include "RigidBodyGrid.h" */
-#include "useful.h"
-
-#include <cstdio>
-
-class Reservoir;
-class BaseGrid;
-class RigidBodyGrid;
-class Configuration;
-class RigidBodyController;
-class RigidBody;
-
-class RigidBodyType {
-    friend class RigidBody;
-public:
-RigidBodyType(const String& name = "", const Configuration* conf = NULL ) :
-	name(name), conf(conf), num(0),
-		reservoir(NULL), mass(1.0f), inertia(), transDamping(),
-		rotDamping(), initPos(), initRot(Matrix3(1.0f)), initMomentum(Vector3(0.f)), initAngularMomentum(Vector3(0.f)),
-		numPotGrids(0), numDenGrids(0), numPmfs(0), numParticles(NULL) { }
-	~RigidBodyType() { clear(); }
-private:
-	// Deletes all members
-	void clear();
-	// void copy(const RigidBodyType& src);
-
-	void addGrid(String s, std::vector<String> &keys, std::vector<String> &files);
-	void addScaleFactor(String s, std::vector<String> &keys, std::vector<float> &vals);
-	
-public:
-	/* RigidBodyType& operator=(const RigidBodyType& src); */
-	void copyGridsToDevice();
-	
-    void append_attached_particle_file(String s) { attached_particle_files.push_back(s); }
-    void attach_particles();
-    size_t num_attached_particles() const { return attached_particle_types.size() ;}
-    const std::vector<int>& get_attached_particle_types() const { return attached_particle_types; }
-
-	void addPotentialGrid(String s);
-	void addDensityGrid(String s);
-	void addPMF(String s);
-	void scalePotentialGrid(String s);
-	void scaleDensityGrid(String s);
-	void scalePMF(String s);
-
-	void setDampingCoeffs(float timestep);
-
-	void initializeParticleLists();
-	// TODO: privatize
-public:
-	String name;
-private:
-	const Configuration* conf;
-	std::vector<String> attached_particle_files;
-	std::vector<int>attached_particle_types;
-private:
-    std::vector<Vector3>attached_particle_positions;
-    Vector3* attached_particle_positions_d;
-
-public:
-	int num; // number of particles of this type
-
-	Reservoir* reservoir;
-
-	float mass;
-	Vector3 inertia;
-	Vector3 transDamping;
-	Vector3 rotDamping;
-	Vector3 transForceCoeff;
-	Vector3 rotTorqueCoeff;
-
-	Vector3 initPos;	
-	Matrix3 initRot;
-        Vector3 initMomentum;
-        Vector3 initAngularMomentum;
-	
-	std::vector<String> potentialGridKeys;
-	std::vector<String> densityGridKeys;
-	std::vector<String> pmfKeys;
-
-	std::vector<String> potentialGridFiles;
-	std::vector<String> densityGridFiles;
-	std::vector<String> pmfFiles;
-
-	std::vector<String> potentialGridScaleKeys;
-	std::vector<String> densityGridScaleKeys;
-	std::vector<String> pmfScaleKeys;
-
-	std::vector<float> potentialGridScale;
-	std::vector<float> densityGridScale;
-	std::vector<float> pmfScale;
-	
-	// RBTODO: clear std::vectors after initialization, (but keep offsets)
-	// duplicates of std::vector grids for device
-public:
-	int numPotGrids;
-	int numDenGrids;
-	int numPmfs;
-
-	int* numParticles;		  /* particles affected by potential grids */
-	int** particles;		 	
-	int** particles_d;		 	
-
-
-	/* RigidBodyGrid* rawPotentialGrids; */
-	/* RigidBodyGrid* rawDensityGrids; */
-	/* BaseGrid* rawPmfs; */
-	/* Matrix3* rawPotentialBases; */
-	/* Matrix3* rawDensityBases; */
-	/* Vector3* rawPotentialOrigins; */
-	/* Vector3* rawDensityOrigins;		 */
-
-	
-	// device pointers
-	/* RigidBodyGrid** rawPotentialGrids_d; */
-	/* RigidBodyGrid** rawDensityGrids_d; */
-	/* RigidBodyGrid** rawPmfs_d; */
-
-	size_t* potential_grid_idx;
-	size_t* density_grid_idx;
-	size_t* pmf_grid_idx;
-
-	size_t* potential_grid_idx_d;
-	size_t* density_grid_idx_d;
-	size_t* pmf_grid_idx_d;
-	
-	RigidBodyController* RBC;
-};
diff --git a/src/Scatter.cpp b/src/Scatter.cpp
deleted file mode 100644
index 2657875b2be3d658311ce96b7d9950683d301bf1..0000000000000000000000000000000000000000
--- a/src/Scatter.cpp
+++ /dev/null
@@ -1,258 +0,0 @@
-///////////////////////////////////////////////////////////////////////
-// An array of positions.
-// Author: Jeff Comer <jcomer2@illinois.edu>
-
-#include "Scatter.h"
-
-Scatter::Scatter(const char* coordFile) {
-	// Count the number of points.
-	n = countCoordinates(coordFile);
-    
-	// Load the coordinates.
-	r = new Vector3[n];
-	readCoordinates(coordFile, n, r);
-}
-
-Scatter::Scatter(const char* coordFile, float cutTime) {
-	// Count the number of points.
-	n = countTrajectory(coordFile, cutTime);
-    
-	// Load the coordinates.
-	r = new Vector3[n];
-	readTrajectory(coordFile, n, r, cutTime);
-}
-  
-Scatter::Scatter(const char* coordFile, float cutTime0, float cutTime1) {
-	// Count the number of points.
-	n = countTrajectory(coordFile, cutTime0, cutTime1);
-    
-	// Load the coordinates.
-	r = new Vector3[n];
-	readTrajectory(coordFile, n, r, cutTime0, cutTime1);
-}
-
-Scatter::~Scatter() {
-	delete[] r;
-}
-
-Matrix3 Scatter::topMatrix() const {
-	if (n < 3) return Matrix3(1.0f);
-	return Matrix3(r[0], r[1], r[2]);
-}
-
-Vector3 Scatter::get(int i) const {
-#ifdef DEBUG 
-	if (i < 0 || i >= n) {
-		printf("Warning! Scatter::get out of bounds.\n");
-		return Vector3(0.0f);
-	}
-#endif
-	return r[i];
-}
-int Scatter::length() const {
-	return n;
-}
-
-Vector3 Scatter::minBound() const {
-	Vector3 ret = r[0];
-	for (int i = 1; i < n; i++) {
-		if (r[i].x < ret.x) ret.x = r[i].x;
-		if (r[i].y < ret.y) ret.y = r[i].y;
-		if (r[i].z < ret.z) ret.z = r[i].z;
-	}
-	return ret;
-}
-
-Vector3 Scatter::maxBound() const {
-	Vector3 ret = r[0];
-	for (int i = 1; i < n; i++) {
-		if (r[i].x > ret.x) ret.x = r[i].x;
-		if (r[i].y > ret.y) ret.y = r[i].y;
-		if (r[i].z > ret.z) ret.z = r[i].z;
-	}
-	return ret;
-}
-
-int Scatter::countCoordinates(const char* fileName) {
-	int nRead;
-	int n = 0;
-	float x, y, z;
-	char line[256];
-
-	// Open the file.
-	FILE* inp = fopen(fileName,"r");
-	if (inp == NULL) {
-		printf("Scatter:countCoordinates Couldn't open file %s\n.",fileName);
-		exit(-1);
-	}
-
-	while (fgets(line, 256, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-    
-		// Read values.
-		nRead = sscanf(line, "%f %f %f", &x, &y, &z);
-		if (nRead >= 3) n++;
-	}
-    
-	fclose(inp);
-	return n;
-}
-
-int Scatter::countTrajectory(const char* fileName, float cutTime) {
-	int nRead;
-	int n = 0;
-	float t, x, y, z;
-	char line[256];
-
-	// Open the file.
-	FILE* inp = fopen(fileName,"r");
-	if (inp == NULL) {
-		printf("Scatter:countCoordinates Couldn't open file %s\n.",fileName);
-		exit(-1);
-	}
-
-	while (fgets(line, 256, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-    
-		// Read values.
-		nRead = sscanf(line, "%f %f %f %f", &t, &x, &y, &z);
-		if (nRead >= 4 && t >= cutTime) n++;
-	}
-    
-	fclose(inp);
-	return n;
-}
-
-int Scatter::countTrajectory(const char* fileName, float cutTime0, float cutTime1) {
-	int nRead;
-	int n = 0;
-	float t, x, y, z;
-	char line[256];
-
-	// Open the file.
-	FILE* inp = fopen(fileName,"r");
-	if (inp == NULL) {
-		printf("Scatter:countCoordinates Couldn't open file %s\n.",fileName);
-		exit(-1);
-	}
-
-	while (fgets(line, 256, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-    
-		// Read values.
-		nRead = sscanf(line, "%f %f %f %f", &t, &x, &y, &z);
-		if (nRead >= 4 && t >= cutTime0 && t< cutTime1) n++;
-	}
-    
-	fclose(inp);
-	return n;
-}
-
-void Scatter::readCoordinates(const char* fileName, int num, Vector3* r) {
-	int nRead;
-	int n = 0;
-	float x, y, z;
-	char line[256];
-
-	// Open the file.
-	FILE* inp = fopen(fileName,"r");
-	if (inp == NULL) {
-		printf("Scatter:countCoordinates Couldn't open file %s\n.",fileName);
-		exit(-1);
-	}
-
-	while (fgets(line, 256, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-    
-		// Read values.
-		nRead = sscanf(line, "%f %f %f", &x, &y, &z);
-		if (nRead >= 3) {
-			r[n].x = x;
-			r[n].y = y;
-			r[n].z = z;
-			n++;
-			if (n >= num) break;
-		}
-	}
-    
-	fclose(inp);
-}
-
-void Scatter::readTrajectory(const char* fileName, int num, Vector3* r, float cutTime) {
-	int nRead;
-	int n = 0;
-	float t, x, y, z;
-	char line[256];
-
-	// Open the file.
-	FILE* inp = fopen(fileName,"r");
-	if (inp == NULL) {
-		printf("Scatter:countCoordinates Couldn't open file %s\n.",fileName);
-		exit(-1);
-	}
-
-	while (fgets(line, 256, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-    
-		// Read values.
-		nRead = sscanf(line, "%f %f %f %f", &t, &x, &y, &z);
-		if (nRead >= 4 && t >= cutTime) {
-			r[n].x = x;
-			r[n].y = y;
-			r[n].z = z;
-			n++;
-			if (n >= num) break;
-		}
-	}
-    
-	fclose(inp);
-}
-
-void Scatter::readTrajectory(const char* fileName, int num, Vector3* r, float cutTime0, float cutTime1) {
-	int nRead;
-	int n = 0;
-	float t, x, y, z;
-	char line[256];
-
-	// Open the file.
-	FILE* inp = fopen(fileName,"r");
-	if (inp == NULL) {
-		printf("Scatter:countCoordinates Couldn't open file %s\n.",fileName);
-		exit(-1);
-	}
-
-	while (fgets(line, 256, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-    
-		// Read values.
-		nRead = sscanf(line, "%f %f %f %f", &t, &x, &y, &z);
-		if (nRead >= 4 && t >= cutTime0 && t < cutTime1) {
-			r[n].x = x;
-			r[n].y = y;
-			r[n].z = z;
-			n++;
-			if (n >= num) break;
-		}
-	}
-    
-	fclose(inp);
-}
-
diff --git a/src/Scatter.h b/src/Scatter.h
deleted file mode 100644
index 3347fcc6fbe3d99e60a369946ec8b96406e953e8..0000000000000000000000000000000000000000
--- a/src/Scatter.h
+++ /dev/null
@@ -1,44 +0,0 @@
-///////////////////////////////////////////////////////////////////////
-// An array of positions.
-// Author: Jeff Comer <jcomer2@illinois.edu>
-#ifndef SCATTER_H
-#define SCATTER_H
-
-#include <cstring>
-#include <cstdlib>
-#include <cstdio>
-#include "useful.h"
-// using namespace std;
-
-class Scatter {
-public:
-	Scatter(const char* coordFile);
-	Scatter(const char* coordFile, float cutTime);
-	Scatter(const char* coordFile, float cutTime0, float cutTime1);
-
-	~Scatter();
-
-	Matrix3 topMatrix() const;
-	
-	Vector3 get(int i) const;
-	
-	int length() const;
-
-	Vector3 minBound() const;
-	Vector3 maxBound() const;
-
-	static int countCoordinates(const char* fileName);
-	static int countTrajectory(const char* fileName, float cutTime);
-	static int countTrajectory(const char* fileName, float cutTime0, float cutTime1);
-private:
-	int n;
-	Vector3* r;
-
-	Scatter(const Scatter&){}
-
-	// Read coordinates into a Vector array.
-	void readCoordinates(const char* fileName, int num, Vector3* r);
-	void readTrajectory(const char* fileName, int num, Vector3* r, float cutTime);
-	void readTrajectory(const char* fileName, int num, Vector3* r, float cutTime0, float cutTime1);
-};
-#endif
diff --git a/src/SignalManager.cpp b/src/SignalManager.cpp
index 47b28668eaf91271383b5f6ebb05fbbca5a39b68..08a87ac7f202d2f7674527bc32a21a26cc95e03e 100644
--- a/src/SignalManager.cpp
+++ b/src/SignalManager.cpp
@@ -2,7 +2,7 @@
 #include <cstdio>
 #include <cstdlib>
 #ifdef SIGNAL
-
+#include "common.h"
 //#include "ARBDException.h"
 
 void SignalManager::segfault_handler(int sig, siginfo_t *info, void *secret) 
diff --git a/src/SignalManager.h b/src/SignalManager.h
index 2276cae3e33c69c63bcf68ade806dbbcca18d297..c58ad2d9dface2c2d1e45a2b66dbc50a2aa676eb 100644
--- a/src/SignalManager.h
+++ b/src/SignalManager.h
@@ -24,8 +24,6 @@
 #endif
 #endif
 
-
-
 namespace SignalManager 
 {
 
diff --git a/src/SimManager.cpp b/src/SimManager.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8fee0dc43450a373c057abf40cd39113954d9e6e
--- /dev/null
+++ b/src/SimManager.cpp
@@ -0,0 +1,32 @@
+#include "SimManager.h"
+#include <memory>
+
+// class LocalPairForce;
+// class NeighborPairForce;
+// class BDIntegrate;
+// #include "Computes.h"
+
+void SimManager::run() {
+    std::cout << "running" << std::endl;
+    // SimSystem sys = SimSystem();
+    // Patch p(10,0,0,sys);
+    Patch p(10,0,0);
+    //ProxyPatch p2(10,0,0);
+
+    // p.add_compute( std::make_unique<LocalPairForce>() );
+    // p.add_compute( std::make_unique<NeighborPairForce>() );
+
+#ifdef USE_CUDA
+    p.add_compute( std::make_unique<BDIntegrateCUDA>() );
+#else
+    p.add_compute( std::make_unique<BDIntegrate>() );
+#endif
+    
+    for (size_t step = 0; step < 10; ++step) {
+	p.compute();
+#ifdef USE_CUDA
+	cudaDeviceSynchronize();
+#endif
+    }
+    
+};
diff --git a/src/SimManager.h b/src/SimManager.h
new file mode 100644
index 0000000000000000000000000000000000000000..f48f00176af5dddd04ee475bd6a9e91ec14a17ad
--- /dev/null
+++ b/src/SimManager.h
@@ -0,0 +1,9 @@
+#pragma once
+#include <iostream>
+#include "ParticlePatch.h"
+#include "PatchOps.h"
+
+class SimManager {
+public:
+    void run();    
+};
diff --git a/src/SimSystem.h b/src/SimSystem.h
new file mode 100644
index 0000000000000000000000000000000000000000..efe5ecd6cb6f5fc2016ce5ed890701cd9399f469
--- /dev/null
+++ b/src/SimSystem.h
@@ -0,0 +1,4 @@
+
+class SimSystem {
+    
+};
diff --git a/src/TAGS.sh b/src/TAGS.sh
deleted file mode 100644
index df6d4fafbf1240dae1a52ef7dc406ad808db2714..0000000000000000000000000000000000000000
--- a/src/TAGS.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#! /bin/bash
-# etags *.h* *.c*
-etags -l c++ *.[cChH]*
diff --git a/src/TabulatedAngle.cu b/src/TabulatedAngle.cu
deleted file mode 100644
index a1a858fca144bb6cfe76d23e92853ce97474386e..0000000000000000000000000000000000000000
--- a/src/TabulatedAngle.cu
+++ /dev/null
@@ -1,79 +0,0 @@
-// tabulatedAngle.cu
-// Authors: Justin Dufresne and Terrance Howard, 2013
-
-#include "TabulatedAngle.h"
-
-TabulatedAnglePotential::TabulatedAnglePotential()
-{
-	pot = NULL;
-	size = 0;
-	fileName = "";
-}
-
-TabulatedAnglePotential::TabulatedAnglePotential(String fileName) : fileName(fileName)
-{
-	FILE* inp = fopen(fileName.val(), "r");
-	if (inp == NULL) {
-		printf("TabulatedAnglePotential: could not open file '%s'\n", fileName.val());
-		exit(-1);
-	}
-	char line[256];
-	int capacity = 256;
-	float* angle = new float[capacity];
-	pot = new float[capacity];
-	size = 0;
-	while(fgets(line, 256, inp)) {
-		String s(line);
-		int numTokens = s.tokenCount();
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-		
-		// Legitimate TABULATED ANGLE inputs have 2 tokens
-		// ANGLE | VALUE
-		// Any angle input line without exactly 2 tokens should be discarded
-		if (numTokens != 2) {
-			printf("Invalid angle input line: %s\n", line);
-			continue;
-		}		
-		
-		// Discard any empty line
-		if (tokenList == NULL) {	
-			printf("Empty angle input line: %s\n", line);
-			continue;
-		}
-		
-		if (size >= capacity) {
-			float* temp = angle;
-			float* temp2 = pot;
-			capacity *= 2;
-			angle = new float[capacity];
-			pot = new float[capacity];
-			for (int i = 0; i < size; i++) {
-				angle[i] = temp[i];
-				pot[i] = temp2[i];
-			}
-			delete[] temp;
-			delete[] temp2;
-		}	
-		angle[size] = atof(tokenList[0].val());
-		pot[size++] = atof(tokenList[1].val());
-	}
-	// units "1/deg" "1/radian"  *57.29578
-	angle_step_inv = 57.29578f * (size-1) / (angle[size-1]-angle[0]); 
-	delete[] angle;
-	fclose(inp);
-}
-
-TabulatedAnglePotential::TabulatedAnglePotential(const TabulatedAnglePotential &tab)
-{
-	size = tab.size;
-	fileName = tab.fileName;
-	pot = new float[size];
-	for (int i = 0; i < size; i++)
-		pot[i] = tab.pot[i];
-	angle_step_inv = tab.angle_step_inv;
-}
-
-TabulatedAnglePotential::~TabulatedAnglePotential() {
-	delete[] pot;
-}
diff --git a/src/TabulatedAngle.h b/src/TabulatedAngle.h
deleted file mode 100644
index d08a178cebdf4532066894928d43382486019d55..0000000000000000000000000000000000000000
--- a/src/TabulatedAngle.h
+++ /dev/null
@@ -1,97 +0,0 @@
-// tabulatedAngle.h
-// Authors: Justin Dufresne and Terrance Howard, 2013
-#ifndef TABULATEDANGLE_H
-#define TABULATEDANGLE_H
-
-#include "useful.h"
-#include "Angle.h"
-#include "TabulatedPotential.h"
-#include "BaseGrid.h"
-
-__device__ void atomicAdd( Vector3* address, Vector3 val);
-
-
-class TabulatedAnglePotential
-{
-public:
-	TabulatedAnglePotential();
-	TabulatedAnglePotential(String fileName);
-	TabulatedAnglePotential(const TabulatedAnglePotential &tab);
-	~TabulatedAnglePotential();
-	float* pot;			// actual potential values
-	float angle_step_inv;	// '1/step' angle in potential file. potential file might not go 1, 2, 3,...,360, it could be in steps of .5 or something smaller 
-	int size;			// The number of data points in the file
-	String fileName;
-
-	HOST DEVICE inline EnergyForce computeOLD(Angle* a, Vector3* pos, BaseGrid* sys, int index) {
-		// First, we must find the actual angle we're working with. 
-		// Grab the positions of each particle in the angle
-		const Vector3 posa = pos[a->ind1];
-		const Vector3 posb = pos[a->ind2];
-		const Vector3 posc = pos[a->ind3];
-
-		// The vectors between each pair of particles
-		const Vector3 ab = sys->wrapDiff(posa - posb);
-		const Vector3 bc = sys->wrapDiff(posb - posc);
-		const Vector3 ac = sys->wrapDiff(posc - posa);
-  
-		// Find the distance between each pair of particles
-		const float distab = ab.length();
-		const float distbc = bc.length();
-		const float distac = ac.length();
-  
-		// Find the cosine of the angle we want - <ABC	
-		float cos = (distbc * distbc + distab * distab - distac * distac) / (2.0f * distbc * distab);
-  
-		// If the cosine is illegitimate, set it to 1 or -1 so that acos won't fail
-		if (cos < -1.0f) cos = -1.0f;
-		if (cos > 1.0f) cos = 1.0f;
-
-		// Find the sine while we're at it.
-		float sin = sqrtf(1.0f - cos*cos);
-
-		// Now we can use the cosine to find the actual angle (in radians)		
-		float angle = acos(cos);
-
-		// tableAngle is divided into units of angle_step length
-		// 'convertedAngle' is the angle, represented in these units
-		float convertedAngle = angle * angle_step_inv;
-
-		// tableAngle[0] stores the potential at angle_step
-		// tableAngle[1] stores the potential at angle_step * 2, etc.
-		// 'home' is the index after which 'convertedAngle' would appear if it were stored in the table	
-
-		int home = int(floor(convertedAngle));
-
-		// diffHome is the distance between the convertedAngle and the home index
-		float diffHome = convertedAngle - home;
-
-		// Linear interpolation for the potential
-		float pot0 = pot[home];
-		float delta_pot = pot[(home+1) % size] - pot0;
-		float energy = (delta_pot * angle_step_inv) * diffHome + pot0;
-		float diff = -delta_pot * angle_step_inv;
-		diff /= sin;
-
-		// Don't know what these are for, so I didn't bother giving them better names. 
-		// Sorry, future person.
-		float c1 = diff / distab;
-		float c2 = diff / distbc;
-
-		// Calculate the forces
-		Vector3 force1 = c1 * (ab * (cos / distab) - bc / distbc); // force on particle 1
-		Vector3 force3 = c2 * (bc * (cos / distbc) - ab / distab); // force on particle 3
-		Vector3 force2 = -(force1 + force3); // the force on particle 2 (the central particle)
-
-		EnergyForce ret;
-		if (index == 1)
-			ret = EnergyForce(energy, force1);
-		if (index == 2)
-			ret = EnergyForce(energy, force2);
-		if (index == 3)
-			ret = EnergyForce(energy, force3);
-		return ret;
-	}
-};
-
-#endif
diff --git a/src/TabulatedDihedral.cu b/src/TabulatedDihedral.cu
deleted file mode 100644
index c70008d6eec9102fbe377e17237682ee23f6a43a..0000000000000000000000000000000000000000
--- a/src/TabulatedDihedral.cu
+++ /dev/null
@@ -1,89 +0,0 @@
-// TabulatedDihedral.cu 
-// Authors: Justin Dufresne and Terrance Howard, 2013
-
-#include "TabulatedDihedral.h"
-#include <cassert>
-
-TabulatedDihedralPotential::TabulatedDihedralPotential() :
-		pot(NULL), size(0), fileName("") {}
-
-TabulatedDihedralPotential::TabulatedDihedralPotential(String fileName) : fileName(fileName), size(0) {
-	FILE* inp = fopen(fileName.val(), "r");
-	if (inp == NULL) {
-		printf("TabulatedDihedralPotential: could not open file '%s'\n", fileName.val());
-		exit(-1);
-	}
-	char line[256];
-	int capacity = 256;
-	float* angle = new float[capacity];
-	pot = new float[capacity];
-	while(fgets(line, 256, inp)) {
-		String s(line);
-		int numTokens = s.tokenCount();
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-		
-		// Legitimate TABULATED Dihedral inputs have 2 tokens
-		// ANGLE | VALUE
-		// Any angle input line without exactly 2 tokens should be discarded
-		if (numTokens != 2) {
-			printf("Invalid Dihedral input line: %s\n", line);
-			continue;
-		}		
-		
-		// Discard any empty line
-		if (tokenList == NULL) {
-			printf("Empty Dihedral input line: %s\n", line);
-			continue;
-		}
-		
-		if (size >= capacity) {
-			float* temp = angle;
-			float* temp2 = pot;
-			capacity *= 2;
-			angle = new float[capacity];
-			pot = new float[capacity];
-			for (int i = 0; i < size; i++) {
-				angle[i] = temp[i];
-				pot[i] = temp2[i];
-			}
-			delete[] temp;
-			delete[] temp2;
-		}	
-		angle[size] = atof(tokenList[0].val());
-		pot[size++] = atof(tokenList[1].val());
-	}
-	// units "1/deg" "1/radian"  *57.29578
-	float deltaAngle = (angle[size-1]-angle[0])/(size-1); 
-	assert( deltaAngle > 0 );
-	assert( size*deltaAngle >= 360 );
-
-	float tmp[size];
-	for (int j = 0; j < size; ++j) {
-	    // reorganize data so that the angle goes from [-Pi,Pi+delta)
-	    float a = -180.0f - angle[0] + j*deltaAngle;
-	    while (a < 0) a += 360.0f;
-	    while (a >= size*deltaAngle) a -= 360.0f;
-	    int i = round( a / deltaAngle );
-	    assert(i >= 0);
-	    assert(i < size);
-	    tmp[j] = pot[i];
-	}
-	for (int i = 0; i < size; ++i) pot[i] = tmp[i];
-
-	angle_step_inv = 57.29578f / deltaAngle;
-		 
-	delete[] angle;
-	fclose(inp);
-}
-
-TabulatedDihedralPotential::TabulatedDihedralPotential(const TabulatedDihedralPotential &src) :
-		size(src.size), fileName(src.fileName), angle_step_inv(src.angle_step_inv) {
-	pot = new float[size];
-	for (int i = 0; i < size; i++)
-		pot[i] = src.pot[i];
-}
-
-TabulatedDihedralPotential::~TabulatedDihedralPotential() {
-	delete[] pot;
-}
diff --git a/src/TabulatedDihedral.h b/src/TabulatedDihedral.h
deleted file mode 100644
index 00c816128813d36686b46bd67142c92efc3d1883..0000000000000000000000000000000000000000
--- a/src/TabulatedDihedral.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// TabulatedDihedral.h
-// Authors: Justin Dufresne and Terrance Howard, 2013
-
-#ifndef TABULATEDDIHEDRAL_H
-#define TABULATEDDIHEDRAL_H
-
-#include "useful.h"
-
-#include "Dihedral.h"
-#include "TabulatedPotential.h"
-#include "BaseGrid.h"
-
-// #include <math.h>
-// #define _USING_MATH_DEFINES
-
-class TabulatedDihedralPotential {
-public:
-	TabulatedDihedralPotential();
-	TabulatedDihedralPotential(String fileName);
-	TabulatedDihedralPotential(const TabulatedDihedralPotential &src);
-	~TabulatedDihedralPotential();
-
-	float* pot;				// actual potential values
-	float angle_step_inv;	// angular increments of potential file
-	int size;					// number of data points in the file
-	String fileName;
-
-	// RBTODO: deprecate
-	HOST DEVICE inline EnergyForce computeOLD(Dihedral* d, Vector3* pos, BaseGrid* sys, int index) { 
-		const Vector3 posa = d->ind1;
-		const Vector3 posb = d->ind2;
-		const Vector3 posc = d->ind3;
-		const Vector3 posd = d->ind4;
-		
-		const Vector3 ab = sys->wrapDiff(posa - posb);
-		const Vector3 bc = sys->wrapDiff(posb - posc);
-		const Vector3 cd = sys->wrapDiff(posc - posd);
-		
-		//const float distab = ab.length();
-		const float distbc = bc.length();
-		//const float distcd = cd.length();
-	
-		Vector3 crossABC = ab.cross(bc);
-		Vector3 crossBCD = bc.cross(cd);
-		Vector3 crossX = bc.cross(crossABC);
-
-		const float cos_phi = crossABC.dot(crossBCD) / (crossABC.length() * crossBCD.length());
-		const float sin_phi = crossX.dot(crossBCD) / (crossX.length() * crossBCD.length());
-		
-		const float angle = -atan2(sin_phi, cos_phi);
-
-		float energy = 0.0f;
-		float force = 0.0f;
-	
-		Vector3 f1, f2, f3; // forces
-		f1 = -distbc * crossABC.rLength2() * crossABC;
-		f3 = -distbc * crossBCD.rLength2() * crossBCD;
-		f2 = -(ab.dot(bc) * bc.rLength2()) * f1 - (bc.dot(cd) * bc.rLength2()) * f3;
-	
-		// Shift "angle" by "PI" since    -PI < dihedral < PI
-		// And our tabulated potential data: 0 < angle < 2 PI
-		float t = (angle + BD_PI) * angle_step_inv;
-		int home = (int) floorf(t);
-		t = t - home;
-
-		home = home % size;
-		int home1 = (home + 1) % size;
-
-		//================================================
-		// Linear interpolation
-		float U0 = pot[home];       // Potential
-		float dU = pot[home1] - U0; // Change in potential
-		
-		energy = dU * t + U0;
-		force = -dU * angle_step_inv;
-		//================================================
-		// TODO: add an option for cubic interpolation
-
-		if (crossABC.rLength() > 1.0f || crossBCD.rLength() > 1.0f)
-			// avoid singularity when one angle is straight 
-			force = 0.0f;
-
-		f1 *= force;
-		f2 *= force;
-		f3 *= force;
-
-		switch (index) {
-			// Return energy and forces to appropriate particles 
-			case 1: return EnergyForce(energy, f1);       // a
-			case 2: return EnergyForce(energy, f2 - f1);  // b
-			case 3: return EnergyForce(energy, f3 - f2);  // c
-			case 4: return EnergyForce(energy, -f3);      // d
-			default: return EnergyForce(0.0f, Vector3(0.0f));
-		}
-	}
-};
-
-#endif
diff --git a/src/TabulatedMethods.cuh b/src/TabulatedMethods.cuh
deleted file mode 100644
index c775a6b00acd336a95615a496244b45eee9e3a36..0000000000000000000000000000000000000000
--- a/src/TabulatedMethods.cuh
+++ /dev/null
@@ -1,294 +0,0 @@
-#pragma once
-
-// Defined elsewhere: constexpr float BD_PI = 3.1415927f;
-
-struct AngleForce {
-    __host__ __device__
-    AngleForce(Vector3 f1, Vector3 f3, float e) : f1(f1), f3(f3), e(e) { }
-    Vector3 f1;
-    Vector3 f3;
-    float e;
-};
-
-__device__ inline void computeAngle(const TabulatedAnglePotential* __restrict__ a, const BaseGrid* __restrict__ sys, Vector3* force, const Vector3* __restrict__ pos,
-				const int& i, const int& j, const int& k, float* energy, bool get_energy) {
-	    
-	    
-	// Particle's type and position
-	Vector3 posa = pos[i];
-	Vector3 posb = pos[j];
-	Vector3 posc = pos[k];
-		
-	// The vectors between each pair of particles
-	const Vector3 ab = sys->wrapDiff(posa - posb);
-	const Vector3 bc = sys->wrapDiff(posb - posc);
-	const Vector3 ac = sys->wrapDiff(posc - posa);
-  
-	// Find the distance between each pair of particles
-	float distab = ab.length2();
-	float distbc = bc.length2();
-	const float distac2 = ac.length2();
-  
-	// Find the cosine of the angle we want - <ABC	
-	float cos = (distab + distbc - distac2);
-
-	distab = 1.0f/sqrt(distab); //TODO: test other functiosn
-	distbc = 1.0f/sqrt(distbc);
-	cos *= 0.5f * distbc * distab;
-  
-	// If the cosine is illegitimate, set it to 1 or -1 so that acos won't fail
-	if (cos < -1.0f) cos = -1.0f;
-	if (cos > 1.0f) cos = 1.0f;
-
-	// Find the sine while we're at it.
-
-	// Now we can use the cosine to find the actual angle (in radians)		
-	float angle = acos(cos);
-
-	// transform angle to units of tabulated array index
-	angle *= a->angle_step_inv;
-
-	// tableAngle[0] stores the potential at angle_step
-	// tableAngle[1] stores the potential at angle_step * 2, etc.
-	// 'home' is the index after which 'convertedAngle' would appear if it were stored in the table	
-	int home = int(floorf(angle));
-        home =  (home >= a->size) ? (a->size)-1 : home; 
-	//assert(home >= 0);
-	//assert(home+1 < a->size);
-
-	// // Make angle the distance from [0,1) from the first index in the potential array index
-	// angle -= home;
-		
-	// Linearly interpolate the potential	
-	float U0 = a->pot[home];
-	float dUdx = (a->pot[(((home+1)==(a->size)) ? (a->size)-1 : home+1)] - U0) * a->angle_step_inv;
-        if(get_energy)
-        {
-	    float e = ((dUdx * (angle-home)) + U0)*0.3333333333;
-            atomicAdd( &energy[i], e);
-            atomicAdd( &energy[j], e);
-            atomicAdd( &energy[k], e);
-            
-        }
-	float sin = sqrtf(1.0f - cos*cos);
-	dUdx /= abs(sin) > 1e-3 ? sin : 1e-3; // avoid singularity 
-
-	// Calculate the forces
-	Vector3 force1 = -(dUdx*distab) * (ab * (cos*distab) + bc * distbc); // force on particle 1
-	Vector3 force3 = (dUdx*distbc) * (bc * (cos*distbc) + ab * distab); // force on particle 3
-
-	// assert( force1.length() < 10000.0f );
-	// assert( force3.length() < 10000.0f );
-	
-	atomicAdd( &force[i], force1 );
-	atomicAdd( &force[j], -(force1 + force3) );
-	atomicAdd( &force[k], force3 );
-}
-
-__device__ inline AngleForce calcAngle(const TabulatedAnglePotential* __restrict__ a, const Vector3 ab, const Vector3 bc, const Vector3 ac) {
-	// // The vectors between each pair of particles
-	// const Vector3 ab = sys->wrapDiff(posa - posb);
-	// const Vector3 bc = sys->wrapDiff(posb - posc);
-	// const Vector3 ac = sys->wrapDiff(posc - posa);
- 
-	// Find the distance between each pair of particles
-	float distab = ab.length2();
-	float distbc = bc.length2();
-	const float distac2 = ac.length2();
-  
-	// Find the cosine of the angle we want - <ABC	
-	float cos = (distab + distbc - distac2);
-
-	distab = 1.0f/sqrt(distab); //TODO: test other functiosn
-	distbc = 1.0f/sqrt(distbc);
-	cos *= 0.5f * distbc * distab;
-  
-	// If the cosine is illegitimate, set it to 1 or -1 so that acos won't fail
-	if (cos < -1.0f) cos = -1.0f;
-	if (cos > 1.0f) cos = 1.0f;
-
-	// Find the sine while we're at it.
-
-	// Now we can use the cosine to find the actual angle (in radians)		
-	float angle = acos(cos);
-
-	// transform angle to units of tabulated array index
-	angle *= a->angle_step_inv;
-
-	// tableAngle[0] stores the potential at angle_step
-	// tableAngle[1] stores the potential at angle_step * 2, etc.
-	// 'home' is the index after which 'convertedAngle' would appear if it were stored in the table	
-	int home = int(floorf(angle));
-        home =  (home >= a->size) ? (a->size)-1 : home; 
-	//assert(home >= 0);
-	//assert(home+1 < a->size);
-
-	// // Make angle the distance from [0,1) from the first index in the potential array index
-	// angle -= home;
-		
-	// Linearly interpolate the potential	
-	float U0 = a->pot[home];
-	float dUdx = (a->pot[(((home+1)==(a->size)) ? (a->size)-1 : home+1)] - U0) * a->angle_step_inv;
-	float e = ((dUdx * (angle-home)) + U0);
-
-	float sin = sqrtf(1.0f - cos*cos);
-	dUdx /= abs(sin) > 1e-3 ? sin : 1e-3; // avoid singularity 
-
-	// Calculate the forces
-	Vector3 force1 = -(dUdx*distab) * (ab * (cos*distab) + bc * distbc); // force on particle 1
-	Vector3 force3 = (dUdx*distbc) * (bc * (cos*distbc) + ab * distab); // force on particle 3
-
-	return AngleForce(force1,force3,e);
-}
-
-__device__ inline void computeBondAngle(const TabulatedAnglePotential* __restrict__ a1,
-					const TabulatedPotential* __restrict__ b, const TabulatedAnglePotential* __restrict__ a2,
-					const BaseGrid* __restrict__ sys, Vector3* force, const Vector3* __restrict__ pos,
-					const int& i, const int& j, const int& k, const int& l, float* energy, bool get_energy) {
-
-	// Particle's type and position
-	Vector3 posa = pos[i];
-	Vector3 posb = pos[j];
-	Vector3 posc = pos[k];
-	Vector3 posd = pos[l];
-
-	// The vectors between each pair of particles
-	const Vector3 ab = sys->wrapDiff(posb - posa);
-	const Vector3 bc = sys->wrapDiff(posc - posb);
-	const Vector3 ca = sys->wrapDiff(posc - posa);
-	AngleForce fe_a1 = calcAngle(a1, -ab,-bc,ca);
-
-	float distbc = bc.length2();
-	EnergyForce fe_b = b->compute(bc,distbc);
-
-	const Vector3 cd = sys->wrapDiff(posd - posc);
-	const Vector3 db = sys->wrapDiff(posd - posb);
-	AngleForce fe_a2 = calcAngle(a2, -bc,-cd,db);
-
-        if(get_energy)
-        {
-	    float e =  fe_a1.e * fe_b.e * fe_a2.e * 0.25f;
-            atomicAdd( &energy[i], e);
-            atomicAdd( &energy[j], e);
-            atomicAdd( &energy[k], e);
-            atomicAdd( &energy[l], e);
-        }
-	atomicAdd( &force[i], fe_a1.f1 * fe_b.e * fe_a2.e );
-	atomicAdd( &force[j], 
-		   -(fe_a1.f1 + fe_a1.f3) * fe_b.e * fe_a2.e
-		   + fe_b.f * fe_a1.e * fe_a2.e
-		   + fe_a2.f1 * fe_b.e * fe_a1.e 
-	    );
-	atomicAdd( &force[k], 
-		   fe_a1.f3 * fe_b.e * fe_a2.e
-		   - fe_b.f * fe_a1.e * fe_a2.e 
-		   - (fe_a2.f1 + fe_a2.f3) * fe_b.e * fe_a1.e
-	    );
-	atomicAdd( &force[l], fe_a2.f3 * fe_b.e * fe_a1.e );
-}
-
-
-__device__ inline void computeDihedral(const TabulatedDihedralPotential* __restrict__ d,
-				const BaseGrid* __restrict__ sys, Vector3* forces, const Vector3* __restrict__ pos,
-				const int& i, const int& j, const int& k, const int& l, float* energy, bool get_energy) {
-	const Vector3 posa = pos[i];
-	const Vector3 posb = pos[j];
-	const Vector3 posc = pos[k];
-	const Vector3 posd = pos[l];
-	// if (i >= 8738)  printf("Dihedral posa: (%f,%f,%f)\nDihedral posb: (%f,%f,%f)\nDihedral posc: (%f,%f,%f)\nDihedral posd: (%f,%f,%f)\n",
-	// 		       posa.x,posa.y,posa.z,
-	// 		       posb.x,posb.y,posb.z,
-	// 		       posc.x,posc.y,posc.z,
-	// 		       posd.x,posd.y,posd.z);
-
-		
-	const Vector3 ab = sys->wrapDiff(posa - posb);
-	const Vector3 bc = sys->wrapDiff(posb - posc);
-	const Vector3 cd = sys->wrapDiff(posc - posd);
-		
-	//const float distab = ab.length();
-	const float distbc = bc.length();
-	//const float distcd = cd.length();
-	
-	Vector3 crossABC = ab.cross(bc);
-	Vector3 crossBCD = bc.cross(cd);
-	Vector3 crossX = bc.cross(crossABC);
-	// assert( crossABC.rLength2() <= 1.0f );
-	// assert( crossBCD.rLength2() <= 1.0f );
-
-	
-	const float cos_phi = crossABC.dot(crossBCD) / (crossABC.length() * crossBCD.length());
-	const float sin_phi = crossX.dot(crossBCD) / (crossX.length() * crossBCD.length());
-		
-	const float angle = -atan2(sin_phi, cos_phi);
-
-	// float energy = 0.0f;
-	float force = 0.0f;
-	
-	Vector3 f1, f2, f3; // forces
-	f1 = -distbc * crossABC.rLength2() * crossABC;
-	f3 = -distbc * crossBCD.rLength2() * crossBCD;
-	f2 = -(ab.dot(bc) * bc.rLength2()) * f1 - (bc.dot(cd) * bc.rLength2()) * f3;
-	
-	// Shift "angle" by "PI" since    -PI < dihedral < PI
-	// And our tabulated potential data: 0 < angle < 2 PI
-	float t = (angle + BD_PI) * d->angle_step_inv;
-	int home = (int) floorf(t);
-	t = t - home;
-        //home = home % (d->size);
-        home = (home < d->size) ? home : d->size-1;
-        int home1 = (home + 1) >= d->size ? (home+1-d->size) : home+1;
-
-	//assert(home >= 0);
-	//assert(home < d->size);
-	// home = home % size;
-	//int home1 = (home + 1) >= d->size ? (home+1-d->size) : home+1;
-
-	//assert(home1 >= 0);
-	//assert(home1 < d->size);
-
-	//================================================
-	// Linear interpolation
-	float U0 = d->pot[home];       // Potential
-	float dU = d->pot[home1] - U0; // Change in potential
-	if(get_energy)
-        {	
-	    float e_local = (dU * t + U0)*0.25f;
-            atomicAdd( &energy[i], e_local );
-            atomicAdd( &energy[j], e_local );
-            atomicAdd( &energy[k], e_local );
-            atomicAdd( &energy[l], e_local );
-        }
-	force = -dU * d->angle_step_inv;
-	// if (i >= 8738)  printf("Dihedral (angle,U0,dUdT): (%f,%f,%f)\n", angle*180.0f/BD_PI, U0, force);
-
-	// avoid singularity when one angle is straight 
-	// force = (distbc*distbc*crossABC.rLength2() > 1000.0f || distbc*distbc*crossBCD.rLength2() > 1000.0f) ? 0.0f : force;
-	force = (ab.length2()*bc.length2()*crossABC.rLength2() > 100.0f || bc.length2()*cd.length2()*crossBCD.rLength2() > 100.0f) ? 0.0f : force;
-
-	// if ( force > 1000.0f )
-	//     printf("%f %d %d (%.4f %.4f) %.2f %f\n",force,home,home1, d->pot[home], d->pot[home1], dU, d->angle_step_inv);	    
-	//assert( force < 10000.0f )
-
-	//if( force != force ) 
-            //force = 0.f;
-        assert(force == force);
-	if ( force > 1000.0f ) 
-	    force = 1000.0f;
-	if ( force < -1000.0f ) 
-	    force = -1000.0f;
-	//assert( force < 10000.0f );
-
-	f1 *= force;
-	f2 *= force;
-	f3 *= force;
-
-	// assert( f1.length() < 10000.0f );
-	// assert( f2.length() < 10000.0f );
-	// assert( f3.length() < 10000.0f );
-
-	atomicAdd( &forces[i], f1 );
-	atomicAdd( &forces[j], f2-f1 );
-	atomicAdd( &forces[k], f3-f2 );
-	atomicAdd( &forces[l], -f3 );
-}
diff --git a/src/TabulatedPotential.cu b/src/TabulatedPotential.cu
deleted file mode 100644
index 75f6fe74a6220ed7b5e1a92c4bfd4d1f223abbfa..0000000000000000000000000000000000000000
--- a/src/TabulatedPotential.cu
+++ /dev/null
@@ -1,261 +0,0 @@
-///////////////////////////////////////////////////////////////////////
-// Author: Jeff Comer <jcomer2@illinois.edu>
-
-#include "useful.h"
-#include "TabulatedPotential.h"
-#include <cuda.h>
-
-TabulatedPotential::TabulatedPotential() {
-    n = 2;
-    drInv = 1.0f;
-    r0 = 0.0f;
-    v0 = new float[n];
-    for (int i = 0; i < n; i++) v0[i] = 0.0f;
-}
-
-TabulatedPotential::TabulatedPotential(const float* dist, const float* pot, int n0) {
-	n = abs(n0);
-	drInv = 1.0f/(dist[1]-dist[0]);
-	r0 = dist[0];
-
-	v0 = new float[n];
-	for (int i = 0; i < n; i++) v0[i] = pot[i];
-}
-TabulatedPotential::TabulatedPotential(const TabulatedPotential& tab) {
-    n = tab.n;
-    drInv = tab.drInv;
-    r0 = tab.r0;
-
-    v0 = new float[n];
-    for (int i = 0; i < n; i++) v0[i] = tab.v0[i];
-}
-
-TabulatedPotential::~TabulatedPotential() {
-    if (v0 != NULL) delete [] v0;
-}
-
-void TabulatedPotential::truncate(float cutoff) {
-	int home = int(floor((cutoff - r0) * drInv));
-	if (home > n) return;
-
-	float v = v0[home];
-	for (int i = home; i < n; i++) v0[i] = v;
-	// interpolate();
-}
-
-bool TabulatedPotential::truncate(float switchDist, float cutoff, float value) {
-	int indOff = int(floor((cutoff - r0) * drInv));
-	int indSwitch = int(floor((switchDist - r0) * drInv));
-
-	if (indSwitch > n) return false;
-
-	// Set everything after the cutoff to "value".
-	for (int i = indOff; i < n; i++) v0[i] = value;
-    
-	// Apply a linear switch.
-	float v = v0[indSwitch];
-	float m = (value - v)/(indOff - indSwitch);
-	for (int i = indSwitch; i < indOff; i++) v0[i] = m*(i - indSwitch) + v;
-
-	// interpolate();
-	return true;
-}
-
-// Vector3 TabulatedPotential::computeForce(Vector3 r) {
-// 	float d = r.length();
-// 	Vector3 rUnit = -r/d;
-// 	int home = int(floor((d - r0)*drInv));
-
-// 	if (home < 0) return Vector3(0.0f);
-// 	if (home >= n) return Vector3(0.0f);
-        
-// 	float homeR = home*dr + r0;
-// 	float w = (d - homeR)/dr;
-   
-// 	// Interpolate.
-// 	Vector3 force = -(3.0f*v3[home]*w*w + 2.0f*v2[home]*w + v1[home])*rUnit/dr;
-// 	return force;
-// }
- 
-  
-// void TabulatedPotential::interpolate() { for cubic interpolation
-// 	v1 = new float[n];
-// 	v2 = new float[n];
-// 	v3 = new float[n];
-
-// 	for (int i = 0; i < n; i++) {
-// 		int i0 = i - 1;
-// 		int i1 = i;
-// 		int i2 = i + 1;
-// 		int i3 = i + 2;
-
-// 		if (i0 < 0) i0 = 0;
-// 		if (i2 >= n) i2 = n-1;
-// 		if (i3 >= n) i3 = n-1;
-
-// 		v3[i] = 0.5f*(-v0[i0] + 3.0f*v0[i1] - 3.0f*v0[i2] + v0[i3]);
-// 		v2[i] = 0.5f*(2.0f*v0[i0] - 5.0f*v0[i1] + 4.0f*v0[i2] - v0[i3]);
-// 		v1[i] = 0.5f*(-v0[i0] + v0[i2]);
-// 	}
-// 	e0 = v3[n-1] + v2[n-1] + v1[n-1] + v0[n-1];
-// }
-
-// void TabulatedPotential::init(const float* dist, const float* pot, int n0) {
-// 	n = abs(n0);
-// 	dr = dist[1]-dist[0];
-// 	r0 = dist[0];
-// 	r1 = r0 + n*dr;
-
-// 	v0 = new float[n];
-// 	for (int i = 0; i < n; i++) v0[i] = pot[i];
-// }
-
-
-FullTabulatedPotential::FullTabulatedPotential(const char* fileName) : fileName(fileName) {
-	// printf("File: %s\n", fileName);
-	FILE* inp = fopen(fileName, "r");
-	if (inp == NULL) {
-		printf("TabulatedPotential:TabulatedPotential Could not open file '%s'\n", fileName);
-		exit(-1);
-	}
-	
-	char line[256];
-	
-	numLines = countValueLines(fileName);
-	float* r = new float[numLines];
-	float* v = new float[numLines];
-	
-	int count = 0;
-	while (fgets(line, 256, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-		
-		String s(line);
-		int numTokens = s.tokenCount();
-		if (numTokens != 2) {
-			printf("TabulatedPotential:TabulatedPotential Invalid tabulated potential file line: %s\n", line);
-			exit(-1);
-		}
-		
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-		if (tokenList == NULL) {
-			printf("TabulatedPotential:TabulatedPotential Invalid tabulated potential file line: %s\n", line);
-			exit(-1);
-		}
-		r[count] = (float) strtod(tokenList[0], NULL);
-		v[count] = (float) strtod(tokenList[1], NULL);
-		count++;
-		
-		delete[] tokenList;
-	}
-	fclose(inp);
-	pot = new TabulatedPotential(r,v,count);
-	// init(r, v, count);
-	// interpolate();
-	delete[] r;
-	delete[] v;
-}
-
-FullTabulatedPotential::FullTabulatedPotential(const FullTabulatedPotential& tab) {
-    pot = new TabulatedPotential(*tab.pot);
-    numLines = tab.numLines;
-    fileName = String(tab.fileName);
-}
-
-FullTabulatedPotential::~FullTabulatedPotential() {
-	delete pot;
-}
-
-int FullTabulatedPotential::countValueLines(const char* fileName) {
-	FILE* inp = fopen(fileName, "r");
-	if (inp == NULL) {
-		printf("TabulatedPotential::countValueLines Could not open file '%s'\n", fileName);
-		exit(-1);
-	}
-	char line[256];
-	int count = 0;
-
-	while (fgets(line, 256, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-      
-		count++;
-	}
-	fclose(inp);
-
-	return count;
-}
-
-int countValueLines(const char* fileName) {
-	FILE* inp = fopen(fileName, "r");
-	if (inp == NULL) {
-		printf("SimplePotential::countValueLines Could not open file '%s'\n", fileName);
-		exit(-1);
-	}
-	char line[256];
-	int count = 0;
-
-	while (fgets(line, 256, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-		count++;
-	}
-	fclose(inp);
-	return count;
-}
- 
-SimplePotential::SimplePotential(const char* filename, SimplePotentialType type) : type(type) {
-	FILE* inp = fopen(filename, "r");
-	if (inp == NULL) {
-		printf("SimplePotential::SimplePotential Could not open file '%s'\n", filename);
-		exit(-1);
-	}
-	
-	char line[256];
-	
-	size = (unsigned int) countValueLines(filename);
-	float* r = new float[size];
-	pot = new float[size];
-	
-	int count = 0;
-	while (fgets(line, 256, inp) != NULL) {
-		// Ignore comments.
-		int len = strlen(line);
-		if (line[0] == '#') continue;
-		if (len < 2) continue;
-		
-		String s(line);
-		int numTokens = s.tokenCount();
-		if (numTokens != 2) {
-			printf("SimplePotential::SimplePotential Invalid tabulated potential file line: %s\n", line);
-			exit(-1);
-		}
-		
-		String* tokenList = new String[numTokens];
-		s.tokenize(tokenList);
-		if (tokenList == NULL) {
-			printf("SimplePotential::SimplePotential Invalid tabulated potential file line: %s\n", line);
-			exit(-1);
-		}
-		r[count] = (float) strtod(tokenList[0], NULL);
-		pot[count] = (float) strtod(tokenList[1], NULL);
-		count++;
-		
-		delete[] tokenList;
-	}
-	fclose(inp);
-
-	if (type == BOND) {
-	    step_inv = (size-1.0f) / (r[size-1]-r[0]);
-	} else {
-	    step_inv = 57.29578f * (size-1.0f) / (r[size-1]-r[0]);
-	}
-	delete[] r;
-}
diff --git a/src/TabulatedPotential.h b/src/TabulatedPotential.h
deleted file mode 100644
index 0df2bcf6cc2439cee863a025eacc3029364edb51..0000000000000000000000000000000000000000
--- a/src/TabulatedPotential.h
+++ /dev/null
@@ -1,527 +0,0 @@
-///////////////////////////////////////////////////////////////////////
-// Author: Jeff Comer <jcomer2@illinois.edu>
-#ifndef TABULATEDPOTENTIAL_H
-#define TABULATEDPOTENTIAL_H
-
-#ifdef __CUDACC__
-    #define HOST __host__
-    #define DEVICE __device__
-#else
-    #define HOST
-    #define DEVICE
-#endif
-
-#include "useful.h"
-#include "BaseGrid.h"
-
-#ifdef __CUDA_ARCH__
-#include "CudaUtil.cuh"
-#endif
-
-#include <cuda.h>
-
-#ifndef gpuErrchk
-#define delgpuErrchk
-#define gpuErrchk(code) { if ((code) != cudaSuccess) {			                            \
-	    fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), __FILE__, __LINE__); \
-	}}
-#endif
-constexpr float BD_PI = 3.1415927f;
-
-class EnergyForce {
-public:
-  HOST DEVICE
-	inline EnergyForce(float energy=0.0f, const Vector3& force=Vector3(0.0f)) :
-			e(energy), f(force) { }
-  HOST DEVICE
-	inline EnergyForce operator+=(const EnergyForce& ef) {
-		e += ef.e;
-		f += ef.f;
-		return *this;
-	}
-  float e;
-  Vector3 f;
-};
-
-class TabulatedPotential;
-
-class FullTabulatedPotential {
-public:
-  FullTabulatedPotential();
-  FullTabulatedPotential(const char* fileName);
-  FullTabulatedPotential(const FullTabulatedPotential& tab);
-  ~FullTabulatedPotential();
-
-  static int countValueLines(const char* fileName);
-
-  /* HOST DEVICE inline EnergyForce computeOLD(Vector3 r) { */
-  /* 		float d = r.length(); */
-  /* 		Vector3 rUnit = -r/d; */
-  /* 		int home = int(floorf((d - r0)/dr)); */
-  /* 		if (home < 0) return EnergyForce(v0[0], Vector3(0.0f)); */
-  /* 		if (home >= n) return EnergyForce(e0, Vector3(0.0f)); */
-  /* 		float homeR = home*dr + r0; */
-  /* 		float w = (d - homeR)/dr; */
-		
-  /* 		// Interpolate. */
-  /* 		float energy = v3[home]*w*w*w + v2[home]*w*w + v1[home]*w + v0[home]; */
-  /* 		Vector3 force = -(3.0f*v3[home] * w * w */
-  /* 										+ 2.0f*v2[home] * w */
-  /* 										+ v1[home]) * rUnit/dr; */
-  /* 		return EnergyForce(energy,force); */
-  /* 	} */
-
-  TabulatedPotential* pot;
-
-private:
-  int numLines;
-  String fileName;
-};
-
-class TabulatedPotential {
-public:
-  TabulatedPotential();
-  TabulatedPotential(const TabulatedPotential& tab);
-  TabulatedPotential(const float* dist, const float* pot, int n0);
-    TabulatedPotential(const FullTabulatedPotential& tab) : TabulatedPotential(*tab.pot) {}
-    TabulatedPotential(const char* filename) : TabulatedPotential(FullTabulatedPotential(filename)) {}
-  ~TabulatedPotential();
-
-  void truncate(float cutoff);
-  bool truncate(float switchDist, float cutoff, float value);
-
-  Vector3 computeForce(Vector3 r);
-
-  int size() const { return n; }
-
-    TabulatedPotential* copy_to_cuda() const {
-	// Allocate data for array 
-	TabulatedPotential* dev_ptr;
-	TabulatedPotential tmp(*this); // TODO consider avoiding allocating v0
-
-	float *v;
-	{
-	    size_t sz = sizeof(float) * n;
-	    gpuErrchk(cudaMalloc(&v, sz));
-	    gpuErrchk(cudaMemcpy(v, v0, sz, cudaMemcpyHostToDevice));
-	}
-	delete [] tmp.v0;
-	tmp.v0 = v;
-
-	size_t sz = sizeof(TabulatedPotential);
-	gpuErrchk(cudaMalloc(&dev_ptr, sz));
-	gpuErrchk(cudaMemcpy(dev_ptr, &tmp, sz, cudaMemcpyHostToDevice));
-	tmp.v0 = NULL;
-	return dev_ptr;
-    }
-    void free_from_cuda(TabulatedPotential* dev_ptr) const {
-	TabulatedPotential tmp = TabulatedPotential();
-	delete [] tmp.v0;
-	gpuErrchk(cudaMemcpy(&tmp, dev_ptr, sizeof(TabulatedPotential), cudaMemcpyDeviceToHost));
-	gpuErrchk(cudaFree(dev_ptr));
-	gpuErrchk(cudaFree(tmp.v0));
-	tmp.v0 = NULL;
-    }
-
-
-  HOST DEVICE inline EnergyForce compute(Vector3 r) {
-		float d = r.length();
-		float w = (d - r0) * drInv;
-		int home = int( floorf(w) );
-		w = w - home;
-		// if (home < 0) return EnergyForce(v0[0], Vector3(0.0f));
-		home = home < 0 ? 0 : home;
-		if (home >= n) return EnergyForce(v0[n-1], Vector3(0.0f));
-		
-		float u0 = v0[home];
-		float du = home+1 < n ? v0[home+1]-u0 : 0;
-				
-		// Interpolate.
-		float energy = du*w+u0;
-		Vector3 force = (du*drInv/d)*r;
-		return EnergyForce(energy,force);
-	}
-
-  HOST DEVICE inline EnergyForce compute(const Vector3 r, float d) const {
-		d = sqrt(d);
-		// float d = r.length();
-		float w = (d - r0) * drInv;
-		int home = int( floorf(w) );
-		w = w - home;
-		// if (home < 0) return EnergyForce(v0[0], Vector3(0.0f));
-		home = home < 0 ? 0 : home;
-		if (home >= n) return EnergyForce(v0[n-1], Vector3(0.0f));
-		
-		float u0 = v0[home];
-		float du = home+1 < n ? v0[home+1]-u0 : 0;
-				
-		// Interpolate.
-		float energy = du*w+u0;
-		Vector3 force = (du*drInv/d)*r;
-		return EnergyForce(energy,force);
-	}
-  HOST DEVICE inline Vector3 computef(const Vector3 r, float d) const {
-		d = sqrt(d);
-		// float d = r.length();
-		// RBTODO: precompute so that initial blocks are zero; reduce computation here
-		float w = (d - r0)*drInv;
-		int home = int( floorf(w) );
-		w = w - home;
-		// if (home < 0) return EnergyForce(v0[0], Vector3(0.0f));
-		home = home < 0 ? 0 : home;
-		if (home >= n) return Vector3(0.0f);
-		
-		if (home+1 < n) 
-		    return ((v0[home+1]-v0[home])*drInv/d)*r;
-		else
-		    return Vector3(0.0f);
-	}
-
-// private:
-private:
-  float* v0;
-  int n;
-  float drInv; //TODO replace with drInv
-  float r0;
-};
-
-/* // New unified/simplified classes for working with potentials */
-/* <template int num_indices, int max_integer> */
-/* class BitMaskInts { */
-/*     BitMaskInts(); */
-
-/* private: */
-/*     static_assert( ceil(log2(max_integer)) <= CHAR_BIT ); */
-
-/*     char data[ ceil(num_indices * ceil(log2(max_integer)) / CHAR_BIT) ]; */
-
-/*     HOST DEVICE inline unsigned short int get_int(i) const { */
-/* 	unsigned int first_bit = i * ceil(log2(max_integer)); */
-/* 	unsigned int last_bit  = (i+1) * ceil(log2(max_integer))-1; */
-/* 	char c0 = data[floor(first_bit/CHAR_BIT)]; */
-/* 	char c1 = data[floor(last_bit/CHAR_BIT)]; */
-
-/* 	unsigned short int ret = c0 << (first_bit % CHAR_BIT) /\* shift left *\/ */
-/*     };     */
-
-/* } */
-
-
-enum SimplePotentialType { UNSET, BOND, ANGLE, DIHEDRAL, VECANGLE };
-// enum PotentialTypeAtoms { bond=2, angle=3, dihedral=4 };
-
-
-class SimplePotential {
-public:
-    SimplePotential() { }
-    SimplePotential(const char* filename, SimplePotentialType type);
-    SimplePotential(float* pot, float step_inv, unsigned int size, SimplePotentialType type) :
-	pot(pot), step_inv(step_inv), size(size), type(type) { }
-    
-
-    float* pot;	     // actual potential values
-    float  step_inv; // angular increments of potential file
-    unsigned int size;     // number of data points in the file
-
-    SimplePotentialType type;
-
-    /* float start = 0;  */
-    /* bool is_periodic = false; */
-
-    /* HOST void copy_to_device(SimplePotential* device_addr_p, unsigned int offset=0) { */
-    /* 	/\* Assumes device_addr_p is already allocated, allocates space for pot *\/ */
-    /* 	float* val, tmp; */
-    /* 	gpuErrchk(cudaMalloc(&val, sizeof(float)*size)); // TODO equivalent cudaFree */
-    /* 	gpuErrchk(cudaMemcpyAsync(val, pot, sizeof(float)*size, cudaMemcpyHostToDevice)); */
-    /* 	tmp = pot; */
-    /* 	pot = val; */
-    /* 	gpuErrchk(cudaMemcpyAsync(device_addr_p+offset, this, sizeof(SimplePotential), cudaMemcpyHostToDevice)); */
-    /* 	pot = tmp; */
-    /* 	// return val; */
-    /* } */
-
-    HOST DEVICE inline float compute_value(const Vector3* __restrict__ pos,
-					   const BaseGrid* __restrict__ sys,
-					   const int* __restrict__ particles) const {
-	float val;
-	if (type == BOND)
-	    val = compute_bond(pos, sys, particles[0], particles[1]);
-	else if (type == ANGLE)
-	    val = compute_angle(pos, sys, particles[0], particles[1], particles[2]);
-	else if (type == DIHEDRAL)
-	    val = compute_dihedral(pos, sys, particles[0], particles[1], particles[2], particles[3]);
-	else if (type == VECANGLE)
-	    val = compute_vecangle(pos, sys, particles[0], particles[1], particles[2], particles[3]);
-	return val;
-    }
-
-    HOST DEVICE inline float2 compute_energy_and_deriv(float value) {
-	float2 ret;
-	if (type == DIHEDRAL) {
-	    ret = linearly_interpolate<true>(value, -BD_PI);
-	} else {
-	    ret = linearly_interpolate<false>(value);
-	}
-	return ret;
-    }
-
-    HOST DEVICE inline float compute_bond(const Vector3* __restrict__ pos,
-					      const BaseGrid* __restrict__ sys,
-					      int i, int j) const {
-	return sys->wrapDiff( pos[j] - pos[i] ).length();
-    }
-
-    HOST DEVICE inline float compute_angle(const Vector3* __restrict__ pos,
-					   const BaseGrid* __restrict__ sys,
-					   int i, int j, int k) const {
-	const Vector3 ab = sys->wrapDiff(pos[j] - pos[i]);
-	const Vector3 bc = sys->wrapDiff(pos[k] - pos[j]);
-	const Vector3 ac = sys->wrapDiff(pos[k] - pos[i]);
-	return compute_angle( ab.length2(), bc.length2(), ac.length2() );
-    }
-
-    HOST DEVICE inline float compute_vecangle(const Vector3* __restrict__ pos,
-					      const BaseGrid* __restrict__ sys,
-					      int i, int j, int k, int l) const {
-	const Vector3 ab = sys->wrapDiff(pos[j] - pos[i]);
-	const Vector3 bc = sys->wrapDiff(pos[l] - pos[k]);
-	const Vector3 ac = bc+ab;
-	return compute_angle( ab.length2(), bc.length2(), ac.length2() );
-    }
-
-    HOST DEVICE inline float compute_angle(float distab2, float distbc2, float distac2) const {
-	// Find the cosine of the angle we want - <ABC
-	float cos = (distab2 + distbc2 - distac2);
-
-	distab2 = 1.0f/sqrt(distab2); //TODO: test other functions
-	distbc2 = 1.0f/sqrt(distbc2);
-	cos *= 0.5f * distbc2 * distab2;
-
-	// If the cosine is illegitimate, set it to 1 or -1 so that acos won't fail
-	if (cos < -1.0f) cos = -1.0f;
-	if (cos > 1.0f) cos = 1.0f;
-
-	return acos(cos);
-    }
-
-    HOST DEVICE inline float compute_dihedral(const Vector3* __restrict__ pos,
-					      const BaseGrid* __restrict__ sys,
-					      int i, int j, int k, int l) const {
-	const Vector3 ab = -sys->wrapDiff(pos[j] - pos[i]);
-	const Vector3 bc = -sys->wrapDiff(pos[k] - pos[j]);
-	const Vector3 cd = -sys->wrapDiff(pos[l] - pos[k]);
-
-	const Vector3 crossABC = ab.cross(bc);
-	const Vector3 crossBCD = bc.cross(cd);
-	const Vector3 crossX = bc.cross(crossABC);
-
-	const float cos_phi = crossABC.dot(crossBCD) / (crossABC.length() * crossBCD.length());
-	const float sin_phi = crossX.dot(crossBCD) / (crossX.length() * crossBCD.length());
-
-	return -atan2(sin_phi,cos_phi);
-    }
-
-    template <bool is_periodic>
-	HOST DEVICE inline float2 linearly_interpolate(float x, float start=0.0f) const {
-	float w = (x - start) * step_inv;
-	int home = int( floorf(w) );
-	w = w - home;
-	// if (home < 0) return EnergyForce(v0[0], Vector3(0.0f));
-	if (home < 0) {
-	    if (is_periodic) home += size;
-	    else return make_float2(pot[0],0.0f);
-	}
-	else if (home >= size) {
-	    if (is_periodic) home -= size;
-	    else return make_float2(pot[size-1],0.0f);
-	}
-
-	float u0 = pot[home];
-	float du = home+1 < size ? pot[home+1]-u0 : is_periodic ? pot[0]-u0 : 0;
-
-	return make_float2(du*w+u0, du*step_inv);
-    }
-
-    DEVICE inline void apply_force(const Vector3* __restrict__ pos,
-				   const BaseGrid* __restrict__ sys,
-				   Vector3* __restrict__ forces,
-				   int* particles, float energy_deriv) const {
-	if (type == BOND)
-	    apply_bond_force(pos, sys, forces, particles[0], particles[1], energy_deriv);
-	else if (type == ANGLE)
-	    apply_angle_force(pos, sys, forces, particles[0], particles[1],
-			     particles[2], energy_deriv);
-	else if (type == DIHEDRAL)
-	    apply_dihedral_force(pos, sys, forces, particles[0], particles[1],
-				 particles[2], particles[3], energy_deriv);
-	else if (type == VECANGLE)
-	    apply_vecangle_force(pos, sys, forces, particles[0], particles[1],
-				 particles[2], particles[3], energy_deriv);
-    }
-
-    __device__ inline void apply_bond_force(const Vector3* __restrict__ pos,
-					const BaseGrid* __restrict__ sys,
-					Vector3* __restrict__ force,
-					int i, int j, float energy_deriv) const {
-#ifdef __CUDA_ARCH__
-	Vector3 f = sys->wrapDiff( pos[j] - pos[i] );
-	f = f * energy_deriv / f.length();
-	atomicAdd(&force[i], f);
-	atomicAdd(&force[j], -f);
-#endif
-    }
-
-    struct TwoVector3 {
-	Vector3 v1;
-	Vector3 v2;
-    };
-
-    DEVICE inline TwoVector3 get_angle_force(const Vector3& ab,
-					     const Vector3& bc,
-					     float energy_deriv) const {
-	// Find the distance between each pair of particles
-	float distab = ab.length2();
-	float distbc = bc.length2();
-	const float distac2 = (ab+bc).length2();
-
-	// Find the cosine of the angle we want - <ABC
-	float cos = (distab + distbc - distac2);
-
-	distab = 1.0f/sqrt(distab); //TODO: test other functions
-	distbc = 1.0f/sqrt(distbc);
-	cos *= 0.5f * distbc * distab;
-
-	// If the cosine is illegitimate, set it to 1 or -1 so that acos won't fail
-	if (cos < -1.0f) cos = -1.0f;
-	if (cos > 1.0f) cos = 1.0f;
-
-	float sin = sqrtf(1.0f - cos*cos);
-	energy_deriv /= abs(sin) > 1e-3 ? sin : 1e-3; // avoid singularity
-	if (abs(sin) < 1e-3) {
-	    printf("BAD ANGLE: sin, cos, energy_deriv, distab, distbc, distac2: (%f %f %f %f %f)\n",
-		   sin,cos,energy_deriv,distab,distbc);	}
-
-	// Calculate the forces
-	TwoVector3 force;
-	force.v1 = (energy_deriv*distab) * (ab * (cos*distab) + bc * distbc); // force on 1st particle
-	force.v2 = -(energy_deriv*distbc) * (bc * (cos*distbc) + ab * distab); // force on last particle
-	return force;
-    }
-
-    // DEVICE inline TwoVector3 get_angle_force(const Vector3& ab,
-    // 					     const Vector3& bc,
-    // 					     float energy_deriv) const {
-    // 	// Find the distance between each pair of particles
-    // 	float distab = ab.length2();
-    // 	float distbc = bc.length2();
-
-    // 	float pre = distab*distbc - pow(ab.dot(bc),2);
-    // 	// if (pre < 1e-6) {
-    // 	//     pre = 1e-3;
-    // 	//     printf("BAD ANGLE: pre, energy_deriv, distab, distbc, ab.dot(bc): (%f %f %f %f %f)\n",
-    // 	// 	   pre,energy_deriv,distab,distbc,ab.dot(bc));	
-    // 	// } else pre = sqrt(pre);
-    // 	// if (distab == distbc) {
-    // 	//     printf("GOOD ANGLE: pre, energy_deriv, distab, distbc, ab.dot(bc): (%f %f %f %f %f)\n",
-    // 	// 	   pre,energy_deriv,distab,distbc,ab.dot(bc));	
-    // 	// }
-	    
-    // 	pre = pre > 1e-6 ? sqrt(pre) : 1e-3;
-    // 	energy_deriv /= pre;
-
-    // 	TwoVector3 force;
-    // 	//force.v1 = energy_deriv * Vector3::element_mult( 1-Vector3::element_mult(ab,ab)/distab, bc);
-    // 	//force.v2 = energy_deriv * Vector3::element_mult( 1-Vector3::element_mult(bc,bc)/distbc, ab);
-
-    // 	Vector3 abbc = Vector3::element_mult(-ab,bc);
-    // 	force.v1 = -energy_deriv * (bc-Vector3::element_mult(abbc, -ab/distab));
-    // 	force.v2 = -energy_deriv * (-ab-Vector3::element_mult(abbc, bc/distbc));
-    // 	return force;
-    // }
-
-    DEVICE inline void apply_angle_force(const Vector3* __restrict__ pos,
-					 const BaseGrid* __restrict__ sys,
-					 Vector3* __restrict__ force,
-					 int i, int j, int k, float energy_deriv) const {
-
-#ifdef __CUDA_ARCH__
-	const Vector3 ab = sys->wrapDiff(pos[j] - pos[i]);
-	const Vector3 bc = sys->wrapDiff(pos[k] - pos[j]);
-	// const Vector3 ac = sys->wrapDiff(pos[k] - pos[i]);
-
-	TwoVector3 f = get_angle_force(ab,bc, energy_deriv);
-
-	atomicAdd( &force[i], f.v1 );
-	atomicAdd( &force[j], -(f.v1 + f.v2) );
-	atomicAdd( &force[k], f.v2 );
-#endif
-    }
-
-    DEVICE inline void apply_dihedral_force(const Vector3* __restrict__ pos,
-					    const BaseGrid* __restrict__ sys,
-					    Vector3* __restrict__ force,
-					    int i, int j, int k, int l, float energy_deriv) const {
-#ifdef __CUDA_ARCH__
-	const Vector3 ab = -sys->wrapDiff(pos[j] - pos[i]);
-	const Vector3 bc = -sys->wrapDiff(pos[k] - pos[j]);
-	const Vector3 cd = -sys->wrapDiff(pos[l] - pos[k]);
-
-	const Vector3 crossABC = ab.cross(bc);
-	const Vector3 crossBCD = bc.cross(cd);
-	const Vector3 crossX = bc.cross(crossABC);
-
-	const float cos_phi = crossABC.dot(crossBCD) / (crossABC.length() * crossBCD.length());
-	const float sin_phi = crossX.dot(crossBCD) / (crossX.length() * crossBCD.length());
-
-	// return -atan2(sin_phi,cos_phi);
-	Vector3 f1, f2, f3; // forces
-	float distbc = bc.length();
-
-	f1 = -distbc * crossABC.rLength2() * crossABC;
-	f3 = -distbc * crossBCD.rLength2() * crossBCD;
-	f2 = -(ab.dot(bc) * bc.rLength2()) * f1 - (bc.dot(cd) * bc.rLength2()) * f3;
-
-	// energy_deriv = (ab.length2()*bc.length2()*crossABC.rLength2() > 100.0f || bc.length2()*cd.length2()*crossBCD.rLength2() > 100.0f) ? 0.0f : energy_deriv;
-	/* if ( energy_deriv > 1000.0f ) */
-	/*     energy_deriv = 1000.0f; */
-	/* if ( energy_deriv < -1000.0f ) */
-	/*     energy_deriv = -1000.0f; */
-
-	f1 *= energy_deriv;
-	f2 *= energy_deriv;
-	f3 *= energy_deriv;
-
-	atomicAdd( &force[i], f1 );
-	atomicAdd( &force[j], f2-f1 );
-	atomicAdd( &force[k], f3-f2 );
-	atomicAdd( &force[l], -f3 );
-#endif
-    }
-    DEVICE inline void apply_vecangle_force(const Vector3* __restrict__ pos,
-					    const BaseGrid* __restrict__ sys,
-					    Vector3* __restrict__ force,
-					    int i, int j, int k, int l, float energy_deriv) const {
-
-#ifdef __CUDA_ARCH__
-
-	const Vector3 ab = -sys->wrapDiff(pos[j] - pos[i]);
-	const Vector3 bc = -sys->wrapDiff(pos[k] - pos[j]);
-	// const Vector3 ac = sys->wrapDiff(pos[k] - pos[i]);
-
-	TwoVector3 f = get_angle_force(ab,bc, energy_deriv);
-
-	atomicAdd( &force[i], f.v1 );
-	atomicAdd( &force[j], -f.v1 );
-	atomicAdd( &force[k], -f.v2 );
-	atomicAdd( &force[l], f.v2 );
-#endif
-    }
-
-};
-
-#ifndef delgpuErrchk
-#undef  delgpuErrchk
-#undef  gpuErrchk(code)
-#endif
-
-#endif
diff --git a/src/TrajectoryWriter.h b/src/TrajectoryWriter.h
deleted file mode 100644
index 264a5bdfe38694714b223a2b501df76fd3b7f5b3..0000000000000000000000000000000000000000
--- a/src/TrajectoryWriter.h
+++ /dev/null
@@ -1,293 +0,0 @@
-///////////////////////////////////////////////////////////////////////  
-// Author: Jeff Comer <jcomer2@illinois.edu>
-#ifndef TRAJECTORYWRITER_H
-#define TRAJECTORYWRITER_H
-
-#define PDB_TEMPLATE_LINE "ATOM      1  CA  MOL S   1      -6.210  -9.711   3.288  0.00  0.00      ION"
-
-#include <cstdio>
-#include "useful.h"
-#include "DcdWriter.h"
-
-class TrajectoryWriter {
-public:
-  const static int formatDcd = 0;
-  const static int formatPdb = 1;
-  const static int formatTraj = 2;
-
-  TrajectoryWriter(const char* filePrefix, const char* formatName, Matrix3 box0, int num0, float timestep0, int outputPeriod0) 
-    : box(box0), num(num0), timestep(timestep0), outputPeriod(outputPeriod0)  {
-    pdbTemplate = PDB_TEMPLATE_LINE;
-    format = getFormatCode(String(formatName));
-    makeUnitCell();
-
-    fileName = filePrefix;
-    fileName.add(".");
-    fileName.add(getFormatName(format));
-    
-    if (format == formatDcd) {
-      dcd = new DcdWriter(fileName);
-      dcd->writeHeader(fileName.val(), num, 1, 0, outputPeriod, 0, timestep, 1);
-    }
-  }
-
-  ~TrajectoryWriter() {
-    if (format == formatDcd) delete dcd;
-  }
-
-private:
-  Matrix3 box;
-  String fileName;
-  int format;
-  String pdbTemplate;
-  double unitCell[6];		/* use double for dcd format */
-  int num;
-  float timestep;
-  int outputPeriod;
-  DcdWriter* dcd;
-
-  void makeUnitCell() {
-    float pi = 4.0f*atan(1.0f);
-
-    unitCell[0] = box.ex().length();
-    unitCell[2] = box.ey().length();
-    unitCell[5] = box.ez().length();
-    
-    float bc = box.ey().dot(box.ez());
-    float ac = box.ex().dot(box.ez());
-    float ab = box.ex().dot(box.ey());
-
-    unitCell[1] = bc/unitCell[0]/unitCell[2]/pi*180.0f;
-    unitCell[3] = ac/unitCell[0]/unitCell[5]/pi*180.0f;
-    unitCell[4] = ab/unitCell[0]/unitCell[1]/pi*180.0f;
-  }
-
-public:
-  static int getFormatCode(String format) {
-    format.lower();
-    if (format == String("dcd")) return formatDcd;
-    if (format == String("pdb")) return formatPdb;
-    if (format == String("traj")) return formatTraj;
-    return formatDcd;
-  }
-
-  static String getFormatName(int formatCode) {
-    switch(formatCode) {
-    case formatPdb:
-      return String("pdb");
-    case formatTraj:
-      return String("traj");
-    case formatDcd:
-      return String("dcd");
-    default:
-      return String("dcd");
-    }
-  }
-
-  void newFile(const Vector3* pos, const String* name, float t, int n) const {
-    switch(format) {
-    case formatPdb:
-      newPdb(fileName, pos, name);
-      break;
-    case formatTraj:
-      newTraj(pos, name, t, n);
-      break;
-    case formatDcd:
-    default:
-      newDcd(pos, name);
-      break;
-    }
-  }
-
-  void newFile(const Vector3* pos, const String* name, const int* id, float t, int n) const {
-    switch(format) {
-    case formatPdb:
-      newPdb(fileName, pos, name);
-      break;
-    case formatTraj:
-      newTraj(pos, name, id, t, n);
-      break;
-    case formatDcd:
-    default:
-      newDcd(pos, name);
-      break;
-    }
-  }
-
-  void append(const Vector3* pos, const String* name, float t, int n) const {
-    switch(format) {
-    case formatPdb:
-      appendPdb(pos, name);
-      break;
-    case formatTraj:
-      appendTraj(pos, name, t, n);
-      break;
-    case formatDcd:
-    default:
-      appendDcd(pos);
-      break;
-    }
-  }
-  
-  void append(const Vector3* pos, const String* name, const int* id, float t, int n) const {
-    switch(format) {
-    case formatPdb:
-      appendPdb(pos, name);
-      break;
-    case formatTraj:
-      appendTraj(pos, name, id, t, n);
-      break;
-    case formatDcd:
-    default:
-      appendDcd(pos);
-      break;
-    }
-  }
-
-  void newPdb(const char* outFile, const Vector3* pos, const String* name) const {
-    char s[128];
-
-    sprintf(s, "CRYST1   %.3f   %.3f   %.3f  90.00  90.00  90.00 P 1           1\n", box.exx, box.eyy, box.ezz);
-    String sysLine(s);
-
-    sprintf(s, "REMARK   frameTime %.10g ns\n", outputPeriod*timestep);
-    String remarkLine(s);
-    
-    String line;
-
-    FILE* out = fopen(outFile, "w");
-    fprintf(out, "%s", sysLine.val());
-    fprintf(out, "%s", remarkLine.val());
-
-    for (int i = 0; i < num; i++) {
-      line = makePdbLine(pdbTemplate, i, name[i], i, name[i], pos[i], 0.0);
-      fprintf(out, "%s",  line.val());
-      fprintf(out, "\n");
-    }
-    fprintf(out, "END\n");
-    fclose(out);
-  }
-
-  void appendPdb(const Vector3* pos, const String* name) const {
-    String line;
-
-    FILE* out = fopen(fileName, "a");
-    for (int i = 0; i < num; i++) {
-      line = makePdbLine(pdbTemplate, i, name[i], i, name[i], pos[i], 0.0);
-      fprintf(out, "%s", line.val());
-      fprintf(out, "\n");
-    }
-    fprintf(out, "END\n");
-    fclose(out);
-  }
- 
-  void newTraj(const Vector3* pos, const String* name, float t, int n) const {
-    FILE* out = fopen(fileName, "w");
-    for (int i = 0; i < n; i++)
-      fprintf(out, "%s %.10g %.10g %.10g %.10g\n", name[i].val(), t, pos[i].x, pos[i].y, pos[i].z);
-    fprintf(out, "END\n");
-    fclose(out);
-  }
-
-  void newTraj(const Vector3* pos, const String* name, const int* id, float t, int n) const {
-    FILE* out = fopen(fileName, "w");
-    for (int i = 0; i < n; i++)
-      fprintf(out, "%s %.10g %.10g %.10g %.10g %d\n", name[i].val(), t, pos[i].x, pos[i].y, pos[i].z, id[i]);
-    fprintf(out, "END\n");
-    fclose(out);
-  }
-
-  void appendTraj(const Vector3* pos, const String* name, float t, int n) const {
-    FILE* out = fopen(fileName, "a");
-    for (int i = 0; i < n; i++)
-      fprintf(out, "%s %.10g %.10g %.10g %.10g\n", name[i].val(), t, pos[i].x, pos[i].y, pos[i].z);
-    fprintf(out, "END\n");
-    fclose(out);
-  }
-
-  void appendTraj(const Vector3* pos, const String* name, const int* id, float t, int n) const {
-    FILE* out = fopen(fileName, "a");
-    for (int i = 0; i < n; i++)
-      fprintf(out, "%s %.10g %.10g %.10g %.10g %d\n", name[i].val(), t, pos[i].x, pos[i].y, pos[i].z, id[i]);
-    fprintf(out, "END\n");
-    fclose(out);
-  }
-
-  void newDcd(const Vector3* pos, const String* name) const {
-    /*  
-    // Write a new pdb to store the atom names and such.
-    char pdbFile[128];
-    sprintf(pdbFile, "%s.pdb", fileName.val());
-    newPdb(pdbFile, pos, name);
-    */
-
-    // Write first frame.
-    appendDcd(pos);
-  }
-
-  void appendDcd(const Vector3* pos) const {
-    float* x = new float[num];
-    float* y = new float[num];
-    float* z = new float[num];
-
-    for (int i = 0; i < num; i++) {
-      x[i] = pos[i].x;
-      y[i] = pos[i].y;
-      z[i] = pos[i].z;
-    }
-    dcd->writeStep(num, x, y, z, unitCell);
-
-    delete[] x;
-    delete[] y;
-    delete[] z;
-  }
-
-  static String makePdbLine(const String& tempLine, int index, const String& segName, int resId, 
-			    const String& name, Vector3 r, float beta) {
-    char s[128];
-
-    String record("ATOM  ");
-    sprintf(s, "     %5i ", index);
-    String si = String(s).range(-6,-1);
-    if (name.length() == 4) sprintf(s, "%s   ", name.val());
-    else sprintf(s, " %s   ", name.val());
-    String nam = String(s).range(0,3);
-    String temp0 = tempLine.range(16,21);
-  
-    sprintf(s, "    %d", resId);
-    String res = String(s).range(-4,-1);
-    String temp1 = tempLine.range(26,29);
-  
-    sprintf(s,"       %.3f", r.x);
-    String sx = String(s).range(-8,-1);
-    sprintf(s,"       %.3f", r.y);
-    String sy = String(s).range(-8,-1);
-    sprintf(s,"       %.3f", r.z);
-    String sz = String(s).range(-8,-1);
-
-    String temp2 = tempLine.range(54,59);
-    sprintf(s,"    %.2f", beta);
-    String bet = String(s).range(-6,-1);
-    String temp3 = tempLine.range(66,71);
-
-    sprintf(s, "%s    ", segName.val());
-    String seg = String(s).range(0,3);
-
-    String ret(record);
-    ret.add(si);
-    ret.add(nam);
-    ret.add(temp0);
-    ret.add(res);
-    ret.add(temp1);
-    ret.add(sx);
-    ret.add(sy);
-    ret.add(sz);
-    ret.add(temp2);
-    ret.add(bet);
-    ret.add(temp3);
-    ret.add(seg);
-  
-    return ret;
-  }
-};
-#endif
diff --git a/src/WKFUtils.cpp b/src/WKFUtils.cpp
deleted file mode 100644
index e10dd85b2f7f5348b9a44fbc3e0214e480a9594d..0000000000000000000000000000000000000000
--- a/src/WKFUtils.cpp
+++ /dev/null
@@ -1,212 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 1995-2009 John E. Stone
- *cr
- ***************************************************************************/
-/***************************************************************************
- * RCS INFORMATION:
- *
- *      $RCSfile: WKFUtils.C,v $
- *      $Author: johns $        $Locker:  $             $State: Exp $
- *      $Revision: 1.1 $       $Date: 2009/10/26 14:59:44 $
- *
- ***************************************************************************/
-/*
- * Copyright (c) 1994-2009 John E. Stone
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
- * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "WKFUtils.h"
-
-#include <string.h>
-#include <ctype.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#if defined(_MSC_VER)
-#include <windows.h>
-#include <conio.h>
-#else
-#include <unistd.h>
-#include <sys/time.h>
-#include <errno.h>
-
-#if defined(ARCH_AIX4)
-#include <strings.h>
-#endif
-
-#if defined(__irix)
-#include <bstring.h>
-#endif
-
-#if defined(__hpux)
-#include <time.h>
-#endif // HPUX
-#endif // _MSC_VER
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#if defined(_MSC_VER)
-typedef struct {
-  DWORD starttime;
-  DWORD endtime;
-} wkf_timer;
-
-void wkf_timer_start(wkf_timerhandle v) {
-  wkf_timer * t = (wkf_timer *) v;
-  t->starttime = GetTickCount();
-}
-
-void wkf_timer_stop(wkf_timerhandle v) {
-  wkf_timer * t = (wkf_timer *) v;
-  t->endtime = GetTickCount();
-}
-
-double wkf_timer_time(wkf_timerhandle v) {
-  wkf_timer * t = (wkf_timer *) v;
-  double ttime;
-
-  ttime = ((double) (t->endtime - t->starttime)) / 1000.0;
-
-  return ttime;
-}
-
-double wkf_timer_start_time(wkf_timerhandle v) {
-  wkf_timer * t = (wkf_timer *) v;
-  double ttime;
-  ttime = ((double) (t->starttime)) / 1000.0;
-  return ttime;
-}
-
-double wkf_timer_stop_time(wkf_timerhandle v) {
-  wkf_timer * t = (wkf_timer *) v;
-  double ttime;
-  ttime = ((double) (t->endtime)) / 1000.0;
-  return ttime;
-}
-
-#else
-
-// Unix with gettimeofday()
-typedef struct {
-  struct timeval starttime, endtime;
-  struct timezone tz;
-} wkf_timer;
-
-void wkf_timer_start(wkf_timerhandle v) {
-  wkf_timer * t = (wkf_timer *) v;
-  gettimeofday(&t->starttime, &t->tz);
-}
-
-void wkf_timer_stop(wkf_timerhandle v) {
-  wkf_timer * t = (wkf_timer *) v;
-  gettimeofday(&t->endtime, &t->tz);
-}
-
-double wkf_timer_time(wkf_timerhandle v) {
-  wkf_timer * t = (wkf_timer *) v;
-  double ttime;
-  ttime = ((double) (t->endtime.tv_sec - t->starttime.tv_sec)) +
-          ((double) (t->endtime.tv_usec - t->starttime.tv_usec)) / 1000000.0;
-  return ttime;
-}
-
-double wkf_timer_start_time(wkf_timerhandle v) {
-  wkf_timer * t = (wkf_timer *) v;
-  double ttime;
-  ttime = ((double) t->starttime.tv_sec) +
-          ((double) t->starttime.tv_usec) / 1000000.0;
-  return ttime;
-}
-
-double wkf_timer_stop_time(wkf_timerhandle v) {
-  wkf_timer * t = (wkf_timer *) v;
-  double ttime;
-  ttime = ((double) t->endtime.tv_sec) +
-          ((double) t->endtime.tv_usec) / 1000000.0;
-  return ttime;
-}
-
-#endif
-
-// system independent routines to create and destroy timers
-wkf_timerhandle wkf_timer_create(void) {
-  wkf_timer * t;
-  t = (wkf_timer *) malloc(sizeof(wkf_timer));
-  memset(t, 0, sizeof(wkf_timer));
-  return t;
-}
-
-void wkf_timer_destroy(wkf_timerhandle v) {
-  free(v);
-}
-
-double wkf_timer_timenow(wkf_timerhandle v) {
-  wkf_timer_stop(v);
-  return wkf_timer_time(v);
-}
-
-/// initialize status message timer
-wkfmsgtimer * wkf_msg_timer_create(double updatetime) {
-  wkfmsgtimer *mt;
-  mt = (wkfmsgtimer *) malloc(sizeof(wkfmsgtimer));
-  if (mt != NULL) {
-    mt->timer = wkf_timer_create();
-    mt->updatetime = updatetime;
-    wkf_timer_start(mt->timer);
-  }
-  return mt;
-}
-
-/// return true if it's time to print a status update message
-int wkf_msg_timer_timeout(wkfmsgtimer *mt) {
-  double elapsed = wkf_timer_timenow(mt->timer);
-  if (elapsed > mt->updatetime) {
-    // reset the clock and return true that our timer expired
-    wkf_timer_start(mt->timer);
-    return 1;
-  } else if (elapsed < 0) {
-    // time went backwards, best reset our clock!
-    wkf_timer_start(mt->timer);
-  }
-  return 0;
-}
-
-/// destroy message timer
-void wkf_msg_timer_destroy(wkfmsgtimer * mt) {
-  wkf_timer_destroy(mt->timer);
-  free(mt);
-}
-
-#ifdef __cplusplus
-}
-#endif
-
diff --git a/src/WKFUtils.h b/src/WKFUtils.h
deleted file mode 100644
index d05a1beff419b875a132340627c33aa197079ac8..0000000000000000000000000000000000000000
--- a/src/WKFUtils.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 1995-2009 John E. Stone
- *cr
- ***************************************************************************/
-/***************************************************************************
- * RCS INFORMATION:
- *
- *      $RCSfile: WKFUtils.h,v $
- *      $Author: johns $        $Locker:  $             $State: Exp $
- *      $Revision: 1.1 $       $Date: 2009/10/26 14:59:45 $
- *
- ***************************************************************************/
-/*
- * Copyright (c) 1994-2009 John E. Stone
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
- * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef WKF_UTILS_INC
-#define WKF_UTILS_INC 1
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef void * wkf_timerhandle;            ///< a timer handle
-wkf_timerhandle wkf_timer_create(void);    ///< create a timer (clears timer)
-void wkf_timer_destroy(wkf_timerhandle);   ///< create a timer (clears timer)
-void wkf_timer_start(wkf_timerhandle);     ///< start a timer  (clears timer)
-void wkf_timer_stop(wkf_timerhandle);      ///< stop a timer
-double wkf_timer_time(wkf_timerhandle);    ///< report elapsed time in seconds
-double wkf_timer_timenow(wkf_timerhandle); ///< report elapsed time in seconds
-double wkf_timer_start_time(wkf_timerhandle); ///< report wall starting time
-double wkf_timer_stop_time(wkf_timerhandle); ///< report wall stopping time
-
-typedef struct {
-  wkf_timerhandle timer;
-  double updatetime;
-} wkfmsgtimer;
-
-/// initialize periodic status message timer
-extern wkfmsgtimer * wkf_msg_timer_create(double updatetime);
-
-/// return true if it's time to print a status update message
-extern int wkf_msg_timer_timeout(wkfmsgtimer *time);
-
-/// destroy message timer
-void wkf_msg_timer_destroy(wkfmsgtimer * mt);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/arbd.cpp b/src/arbd.cpp
deleted file mode 100644
index 9c8a83c800f9ae7f129ee4236f8830747f4acf5d..0000000000000000000000000000000000000000
--- a/src/arbd.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-///////////////////////////////////////////////////////////////////////
-// Author: Jeff Comer <jcomer2@illinois.edu>
-#include <cstdio>
-#include <cuda.h>
-#include <sstream>
-
-#include "useful.h"
-#include "GrandBrownTown.h"
-#include "Configuration.h"
-#include "GPUManager.h"
-
-#include "SignalManager.h"
-
-// using namespace std;
-using std::max;
-
-const unsigned int kIMDPort = 71992;
-
-int main(int argc, char* argv[]) {
-    SignalManager::manage_segfault();
-
-	if (argc == 2 && (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0)) {
-		// --help
-		printf("Usage: %s [OPTIONS [ARGS]] CONFIGFILE OUTPUT [SEED]\n", argv[0]);
-		printf("\n");
-		printf("  -r, --replicas=    Number of replicas to run\n");
-		printf("  -g, --gpu=         Index of gpu to use (defaults to 0)\n");
-		printf("  -i, --imd=         IMD port (defaults to %d)\n", kIMDPort);
-		printf("  -d, --debug        Debug mode: allows user to choose which forces are computed\n");
-		printf("  --safe             Do not use GPUs that may timeout\n");
-		printf("  --unsafe           Use GPUs that may timeout (default)\n");
-		printf("  -h, --help         Display this help and exit\n");
-		printf("  --info             Output CPU and GPU information and exit\n");
-		printf("  --version          Output version information and exit\n");
-		return 0;
-	} else if (argc == 2 && (strcmp(argv[1], "--version") == 0)) {
-		// --version
-		// printf("%s Nov 2016 (alpha)\n", argv[0]);
-#ifdef VERSION
-	    printf("%s %s\n", argv[0], VERSION);
-#else
-	    printf("%s Nov 2016 (alpha)\n", argv[0]);
-#endif
-		return 0;
-	} else if (argc == 2 && (strcmp(argv[1], "--info") == 0)) {
-		// --info
-		GPUManager::load_info();
-		printf("Returning\n");
-		// size_t n_gpus = max(GPUManager::gpus.size(), 1lu);
-		return 0;
-	} else if (argc < 3) {
-		printf("%s: missing arguments\n", argv[0]);
-    printf("Try '%s --help' for more information.\n", argv[0]);
-    return 1;
-  }
-	// printf("Everything's great when you're...BrownTown\n");
-	printf("  â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“\n");
-	printf("  |    Atomic Resolution Brownian Dynamics    |\n");
-	printf("  â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“â€“\n");
-	GPUManager::init();
-
-	size_t n_gpus = GPUManager::allGpuSize();
-	std::vector<unsigned int> gpuIDs;
-	
-	bool debug = false, safe = false;
-	int replicas = 1;
-	unsigned int imd_port = 0;
-	bool imd_on = false;
-	int num_flags = 0;
-	for (int pos = 1; pos < argc; pos++) {
-		const char *arg = argv[pos];
-		if (strcmp(arg, "--safe") == 0) {
-			safe = true;
-			num_flags++;
-		} else if (strcmp(arg, "--unsafe") == 0) {
-			safe = false;
-			num_flags++;
-		} else if (strcmp(arg, "-d") == 0 || strcmp(arg, "--debug") == 0) {
-			debug = true;
-			num_flags++;
-
-		} else if (strcmp(arg, "-g") == 0 || strcmp(arg, "--gpu") == 0) {
-		    String argval(argv[pos+1]);
-		    int nTokens = argval.tokenCount(',');
-		    String* tokens = new String[nTokens];
-		    argval.tokenize(tokens,',');
-		    for (int i = 0; i < nTokens; ++i) {
-			unsigned int arg_val = atoi(tokens[i].val());
-			if (arg_val < 0 || arg_val > n_gpus) {
-			    printf("ERROR: Invalid argument given to %s: %s\n", arg, tokens[i].val());
-				return 1;
-			}
-			std::vector<unsigned int>::iterator it;
-			it = std::find(gpuIDs.begin(), gpuIDs.end(), arg_val);
-			if (it != gpuIDs.end()) {
-			    printf("WARNING: ignoring repeated GPU ID %d\n", arg_val);
-			} else {
-			    gpuIDs.push_back(arg_val);
-			}
-		    }
-		    delete[] tokens;
-		    safe = false;
-		    num_flags += 2;
-			
-		} else if (strcmp(arg, "-r") == 0 || strcmp(arg, "--replicas") == 0) {
-			int arg_val = atoi(argv[pos + 1]);
-			if (arg_val <= 0) {
-				printf("ERROR: Invalid argument given to %s\n", arg);
-				return 1;
-			}
-			replicas = arg_val;
-			num_flags += 2;
-		} else if (strcmp(arg, "-i") == 0 || strcmp(arg, "--imd") == 0) {
-			int arg_val = atoi(argv[pos + 1]);
-			if (arg_val <= 0) {
-				imd_port = kIMDPort;
-			} else {
-				imd_port = arg_val;
-				num_flags++;
-			}
-			imd_on = true;
-			num_flags++;
-		}
-		
-		if (argc - num_flags < 3) {
-			printf("%s: missing arguments\n", argv[0]);
-			printf("Try '%s --help' for more information.\n", argv[0]);
-			return 1;
-		}
-	}
-
-	char* configFile = NULL;
-	char* outArg = NULL;
-	if (argc - num_flags == 3) {
-		configFile = argv[argc - 2];
-		outArg = argv[argc - 1];
-	} else {
-	    printf("%s: too many arguments\n", argv[0]);
-	    printf("Try '%s --help' for more information.\n", argv[0]);
-	    return 1;
-	}
-
-	GPUManager::safe(safe);
-	if (gpuIDs.size() == 0)
-	    gpuIDs.push_back( GPUManager::getInitialGPU() );
-
-	#ifndef USE_NCCL
-	if (gpuIDs.size() > 1) {
-	    printf("ERROR: more than one GPU requires compilation with USE_NCCL flag\n");
-	    return 1;
-	}
-	#endif
-
-	GPUManager::select_gpus(gpuIDs);
-
-	Configuration config(configFile, replicas, debug);
-	config.copyToCUDA();
-	// GPUManager::set(0);
-
-	GrandBrownTown brown(config, outArg,
-			debug, imd_on, imd_port, replicas);
-
-	brown.run();
-  return 0;
-
-}
diff --git a/src/namd_common.h b/src/common.h
similarity index 100%
rename from src/namd_common.h
rename to src/common.h
diff --git a/src/cuda-test.c b/src/cuda-test.c
deleted file mode 100644
index b3456ec5a25e0eab7e0d502a7fecc8b5fe018c32..0000000000000000000000000000000000000000
--- a/src/cuda-test.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#include <cuda_runtime.h>
-int main(int argc, char **argv) {}
diff --git a/src/findcudalib.mk b/src/findcudalib.mk
deleted file mode 100644
index 7f4c10c89b10f502c17e95ebc9f5205820e76109..0000000000000000000000000000000000000000
--- a/src/findcudalib.mk
+++ /dev/null
@@ -1,109 +0,0 @@
-################################################################################
-#
-# Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
-#
-# NOTICE TO USER:   
-#
-# This source code is subject to NVIDIA ownership rights under U.S. and 
-# international Copyright laws.  
-#
-# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE 
-# CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR 
-# IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH 
-# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF 
-# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.   
-# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, 
-# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS 
-# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 
-# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE 
-# OR PERFORMANCE OF THIS SOURCE CODE.  
-#
-# U.S. Government End Users.  This source code is a "commercial item" as 
-# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting  of 
-# "commercial computer software" and "commercial computer software 
-# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) 
-# and is provided to the U.S. Government only as a commercial end item.  
-# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through 
-# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the 
-# source code with only those rights set forth herein.
-#
-################################################################################
-#
-#  findcudalib.mk is used to find the locations for CUDA libraries and other
-#                 Unix Platforms.  This is supported Mac OS X and Linux.
-#
-################################################################################
-
-## Find Location of most recent CUDA Toolkit
-ifeq (,$(CUDA_PATH))
-    CUDA_PATH := $(shell echo $(wildcard /usr/local/cuda*) | tr ' ' '\n' | tail -n1)
-    ifeq (,$(CUDA_PATH))
-        $(error Could not CUDA_PATH. Please pass as follows: $(MAKE) CUDA_PATH=/path/to/cuda)
-    endif
-    $(info Using CUDA_PATH=$(CUDA_PATH))
-endif
-
-# OS Name (Linux or Darwin)
-OSUPPER = $(shell uname -s 2>/dev/null | tr "[:lower:]" "[:upper:]")
-OSLOWER = $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
-
-# Flags to detect 32-bit or 64-bit OS platform
-OS_SIZE = $(shell uname -m | sed -e "s/i.86/32/" -e "s/x86_64/64/" -e "s/armv7l/32/")
-OS_ARCH = $(shell uname -m | sed -e "s/i386/i686/")
-
-# Determine OS platform and unix distribution
-ifeq ("$(OSLOWER)","linux")
-   # first search lsb_release
-   DISTRO  = $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
-   DISTVER = $(shell lsb_release -r -s 2>/dev/null)
-   ifeq ("$(DISTRO)",'') 
-     # second search and parse /etc/issue
-     DISTRO = $(shell more /etc/issue | awk '{print $$1}' | sed '1!d' | sed -e "/^$$/d" 2>/dev/null | tr "[:upper:]" "[:lower:]")
-     DISTVER= $(shell more /etc/issue | awk '{print $$2}' | sed '1!d' 2>/dev/null
-   endif
-   ifeq ("$(DISTRO)",'') 
-     # third, we can search in /etc/os-release or /etc/{distro}-release
-     DISTRO = $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB")
-     DISTVER= $(shell awk '/DISTRIB_RELEASE/' /etc/*-release | sed 's/DISTRIB_RELEASE=//' | grep -v "DISTRIB_RELEASE")
-   endif
-endif
-
-# search at Darwin (unix based info)
-DARWIN = $(strip $(findstring DARWIN, $(OSUPPER)))
-ifneq ($(DARWIN),)
-   SNOWLEOPARD = $(strip $(findstring 10.6, $(shell egrep "<string>10\.6" /System/Library/CoreServices/SystemVersion.plist)))
-   LION        = $(strip $(findstring 10.7, $(shell egrep "<string>10\.7" /System/Library/CoreServices/SystemVersion.plist)))
-   MOUNTAIN    = $(strip $(findstring 10.8, $(shell egrep "<string>10\.8" /System/Library/CoreServices/SystemVersion.plist)))
-   MAVERICKS   = $(strip $(findstring 10.9, $(shell egrep "<string>10\.9" /System/Library/CoreServices/SystemVersion.plist)))
-   MAVERICKS   = $(strip $(findstring 10.9, $(shell egrep "<string>10\.9" /System/Library/CoreServices/SystemVersion.plist)))
-endif 
-
-# Common binaries
-GCC   ?= g++
-CLANG ?= /usr/bin/clang++
-
-ifeq ("$(OSUPPER)","LINUX")
-	CC=$(GCC)
-else
-    # for some newer versions of XCode, CLANG is the default compiler, so we need to include this
-    ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
-        CC = $(CLANG)
-		CC_FLAGS += -stdlib=libstdc++
-		NV_FLAGS += -Xcompiler -arch -Xcompiler x86_64 -Xcompiler -stdlib=libstdc++
-    endif
-endif
-NVCC ?= $(CUDA_PATH)/bin/nvcc -ccbin $(CC)
-
-# Take command line flags that override any of these settings
-ifeq ($(i386),1)
-	OS_SIZE = 32
-	OS_ARCH = i686
-endif
-ifeq ($(x86_64),1)
-	OS_SIZE = 64
-	OS_ARCH = x86_64
-endif
-ifeq ($(ARMv7),1)
-	OS_SIZE = 32
-	OS_ARCH = armv7l
-endif
diff --git a/src/gridSampleDef.c b/src/gridSampleDef.c
deleted file mode 100644
index c89bec557e8bd8f56856ad70c3d8bdd455eb4723..0000000000000000000000000000000000000000
--- a/src/gridSampleDef.c
+++ /dev/null
@@ -1,46 +0,0 @@
-///////////////////////////////////////////////////////////////////////  
-// Author: Jeff Comer <jcomer2@illinois.edu>
-
-#include <cstdio>
-#include <cstdlib>
-
-#include "useful.h"
-#include "BaseGrid.h"
-#include "OverlordGrid.h"
-
-//using namespace std;
-
-int main(int argc, char* argv[]) {
-  if ( argc != 4 ) {
-    printf("Usage: %s systemDefinitionFile spacing outputFile\n", argv[0]);
-    printf("You entered %i arguments.\n", argc-1);
-    return 0;
-  }
-  const char* systemDefFile = argv[1];
-  const float spacing = strtod(argv[2], NULL);
-
-  String rootGrid = OverlordGrid::readDefFirst(systemDefFile);
-  OverlordGrid* over = new OverlordGrid(rootGrid.val());
-  int count = over->readDef(systemDefFile);
-  printf("Found %d unique grids.\n", over->getUniqueGridNum());
-  printf("Linked %d subgrids.\n", count);
-
-  over->writeSubgrids("subgrid.txt");
-
-  Matrix3 box(over->getBox());
-  Vector3 org(over->getOrigin());
-  BaseGrid sample(box, org, spacing);
-  const int n = sample.length();
-
-  printf("Sampling...\n");
-  for (int i = 0; i < n; i++) {
-    Vector3 r = sample.getPosition(i);
-    float v = over->interpolatePotential(r);
-    //Vector3 f = over->interpolateForce(r);
-    sample.setValue(i, v);
-  }
-  sample.write(argv[argc-1]);
-
-  delete over;
-  return 0;
-}
diff --git a/src/gridSampleFlow.c b/src/gridSampleFlow.c
deleted file mode 100644
index f5f2896fbb9fc9e8e5e81efa817bda32dd373fd6..0000000000000000000000000000000000000000
--- a/src/gridSampleFlow.c
+++ /dev/null
@@ -1,37 +0,0 @@
-///////////////////////////////////////////////////////////////////////  
-// Author: Jeff Comer <jcomer2@illinois.edu>
-
-#include <cstdio>
-#include <cstdlib>
-
-#include "useful.h"
-#include "BaseGrid.h"
-#include "FlowForce.h"
-
-// using namespace std;
-
-int main(int argc, char* argv[]) {
-  if ( argc != 4 ) {
-    printf("Usage: %s inGridFile diffusion outGridFile\n", argv[0]);
-    printf("You entered %i arguments.\n", argc-1);
-    return 0;
-  }
-
-  const char* inGrid = argv[1];
-  const float diffusion = strtod(argv[2], NULL);
-  const char* outGrid = argv[argc-1];
-
-  BaseGrid sample(inGrid);
-  FlowForce flow;
-  const int n = sample.length();
-
-  printf("Sampling...\n");
- for (int i = 0; i < n; i++) {
-    Vector3 r = sample.getPosition(i);
-    Vector3 f = flow.force(r,diffusion);
-    sample.setValue(i, f.x);
-  }
-  sample.write(outGrid);
-
-  return 0;
-}
diff --git a/src/imd.cpp b/src/imd.cpp
deleted file mode 100644
index 779ef579ab0e2d95f796d923956adcdf114ed723..0000000000000000000000000000000000000000
--- a/src/imd.cpp
+++ /dev/null
@@ -1,259 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 1995-2016 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/***************************************************************************
- * RCS INFORMATION:
- *
- *      $RCSfile: imd.C,v $
- *      $Author: johns $        $Locker:  $             $State: Exp $
- *      $Revision: 1.19 $       $Date: 2016/11/28 03:05:07 $
- *
- ***************************************************************************
- * DESCRIPTION:
- *  Lowest level interactive MD communication routines.
- *
- * LICENSE:
- *   UIUC Open Source License
- *   http://www.ks.uiuc.edu/Research/vmd/plugins/pluginlicense.html
- *
- ***************************************************************************/
-#include "imd.h"
-#include "vmdsock.h"
-#include <string.h>
-#include <errno.h>
-#include <stdlib.h>
-
-/// IMD communication protocol message header structure
-typedef struct {
-  int32 type;
-  int32 length;
-} IMDheader;
-
-#define HEADERSIZE 8
-#define IMDVERSION 2
-
-/* Only works with aligned 4-byte quantities, will cause a bus error */
-/* on some platforms if used on unaligned data.                      */
-void swap4_aligned(void *v, long ndata) {
-  int *data = (int *) v;
-  long i;
-  int *N;
-  for (i=0; i<ndata; i++) {
-    N = data + i;
-    *N=(((*N>>24)&0xff) | ((*N&0xff)<<24) |
-        ((*N>>8)&0xff00) | ((*N&0xff00)<<8));
-  }
-}
-
-static int32 imd_htonl(int32 h) {
-  int32 n;
-  ((char *)&n)[0] = (h >> 24) & 0x0FF;
-  ((char *)&n)[1] = (h >> 16) & 0x0FF;
-  ((char *)&n)[2] = (h >> 8) & 0x0FF;
-  ((char *)&n)[3] = h & 0x0FF;
-  return n;
-}
-
-/// structure used to perform byte swapping operations 
-typedef struct {
-  unsigned int highest : 8;
-  unsigned int high    : 8;
-  unsigned int low     : 8;
-  unsigned int lowest  : 8;
-} netint;
-
-static int32 imd_ntohl(int32 n) {
-  int32 h = 0;
-  netint net;
-
-  memcpy((void *)&net,(void *)&n, sizeof(n));
-  h |= net.highest << 24 | net.high << 16 | net.low << 8 | net.lowest;
-  return h;
-}
-
-static void fill_header(IMDheader *header, IMDType type, int32 length) {
-  header->type = imd_htonl((int32)type);
-  header->length = imd_htonl(length);
-}
-
-static void swap_header(IMDheader *header) {
-  header->type = imd_ntohl(header->type);
-  header->length= imd_ntohl(header->length);
-}
-
-static int32 imd_readn(void *s, char *ptr, int32 n) {
-  int32 nleft;
-  int32 nread;
- 
-  nleft = n;
-  while (nleft > 0) {
-    if ((nread = vmdsock_read(s, ptr, nleft)) < 0) {
-      if (errno == EINTR)
-        nread = 0;         /* and call read() again */
-      else
-        return -1;
-    } else if (nread == 0)
-      break;               /* EOF */
-    nleft -= nread;
-    ptr += nread;
-  }
-  return n-nleft;
-}
-
-static int32 imd_writen(void *s, const char *ptr, int32 n) {
-  int32 nleft;
-  int32 nwritten;
-
-  nleft = n;
-  while (nleft > 0) {
-    if ((nwritten = vmdsock_write(s, ptr, nleft)) <= 0) {
-      if (errno == EINTR)
-        nwritten = 0;
-      else
-        return -1;
-    }
-    nleft -= nwritten;
-    ptr += nwritten;
-  }
-  return n;
-}
- 
-
-int imd_disconnect(void *s) {
-  IMDheader header;
-  fill_header(&header, IMD_DISCONNECT, 0);
-  return (imd_writen(s, (char *)&header, HEADERSIZE) != HEADERSIZE);
-}
-
-int imd_pause(void *s) {
-  IMDheader header;
-  fill_header(&header, IMD_PAUSE, 0);
-  return (imd_writen(s, (char *)&header, HEADERSIZE) != HEADERSIZE);
-}
-
-int imd_kill(void *s) {
-  IMDheader header;
-  fill_header(&header, IMD_KILL, 0);
-  return (imd_writen(s, (char *)&header, HEADERSIZE) != HEADERSIZE);
-}
-
-static int imd_go(void *s) {
-  IMDheader header;
-  fill_header(&header, IMD_GO, 0);
-  return (imd_writen(s, (char *)&header, HEADERSIZE) != HEADERSIZE);
-}
-
-
-int imd_handshake(void *s) {
-  IMDheader header;
-  fill_header(&header, IMD_HANDSHAKE, 1);
-  header.length = IMDVERSION;   /* Not byteswapped! */
-  return (imd_writen(s, (char *)&header, HEADERSIZE) != HEADERSIZE);
-}
-
-int imd_trate(void *s, int32 rate) {
-  IMDheader header;
-  fill_header(&header, IMD_TRATE, rate);
-  return (imd_writen(s, (char *)&header, HEADERSIZE) != HEADERSIZE);
-}
-
-/* Data methods */
-int imd_send_mdcomm(void *s,int32 n,const int32 *indices,const float *forces) {
-  int rc;
-  int32 size = HEADERSIZE+16L*n;
-  char *buf = (char *) malloc(sizeof(char) * size); 
-  fill_header((IMDheader *)buf, IMD_MDCOMM, n);
-  memcpy(buf+HEADERSIZE, indices, 4L*n);
-  memcpy(buf+HEADERSIZE+4*n, forces, 12L*n);
-  rc = (imd_writen(s, buf, size) != size);
-  free(buf);
-  return rc;
-}
-
-int imd_send_energies(void *s, const IMDEnergies *energies) {
-  int rc;
-  int32 size = HEADERSIZE+sizeof(IMDEnergies);
-  char *buf = (char *) malloc(sizeof(char) * size);
-  fill_header((IMDheader *)buf, IMD_ENERGIES, 1);
-  memcpy(buf+HEADERSIZE, energies, sizeof(IMDEnergies));
-  rc = (imd_writen(s, buf, size) != size);
-  free(buf);
-  return rc;
-}
-
-int imd_send_fcoords(void *s, int32 n, const float *coords) {
-  int rc;
-  int32 size = HEADERSIZE+12L*n;
-  char *buf = (char *) malloc(sizeof(char) * size); 
-  fill_header((IMDheader *)buf, IMD_FCOORDS, n);
-  memcpy(buf+HEADERSIZE, coords, 12L*n);
-  rc = (imd_writen(s, buf, size) != size);
-  free(buf);
-  return rc;
-}
-
-/* The IMD receive functions */
-IMDType imd_recv_header_nolengthswap(void *s, int32 *length) {
-  IMDheader header;
-  if (imd_readn(s, (char *)&header, HEADERSIZE) != HEADERSIZE)
-    return IMD_IOERROR;
-  *length = header.length;
-  swap_header(&header);
-  return (IMDType) header.type; 
-}
-
-IMDType imd_recv_header(void *s, int32 *length) {
-  IMDheader header;
-  if (imd_readn(s, (char *)&header, HEADERSIZE) != HEADERSIZE)
-    return IMD_IOERROR;
-  swap_header(&header);
-  *length = header.length;
-  return (IMDType) header.type; 
-}
-
-int imd_recv_handshake(void *s) {
-  int32 buf;
-  IMDType type;
-
-  /* Wait up to 5 seconds for the handshake to come */
-  if (vmdsock_selread(s, 5) != 1) return -1;
-
-  /* Check to see that a valid handshake was received */
-  type = imd_recv_header_nolengthswap(s, &buf);
-  if (type != IMD_HANDSHAKE) return -1;
-
-  /* Check its endianness, as well as the IMD version. */
-  if (buf == IMDVERSION) {
-    if (!imd_go(s)) return 0;
-    return -1;
-  }
-
-  swap4_aligned(&buf, 1);
-  if (buf == IMDVERSION) {
-    if (!imd_go(s)) return 1;
-  }
-  
-  /* We failed to determine endianness. */
-  return -1; 
-}
-
-int imd_recv_mdcomm(void *s, int32 n, int32 *indices, float *forces) {
-  if (imd_readn(s, (char *)indices, 4L*n) != 4L*n) return 1;
-  if (imd_readn(s, (char *)forces, 12L*n) != 12L*n) return 1;
-  return 0;
-}
-
-int imd_recv_energies(void *s, IMDEnergies *energies) {
-  return (imd_readn(s, (char *)energies, sizeof(IMDEnergies))
-          != sizeof(IMDEnergies));
-}
-
-int imd_recv_fcoords(void *s, int32 n, float *coords) {
-  return (imd_readn(s, (char *)coords, 12L*n) != 12L*n);
-}
-
diff --git a/src/imd.h b/src/imd.h
deleted file mode 100644
index 3414cdf2051bb1fb669059636ebf5efe8e18bab8..0000000000000000000000000000000000000000
--- a/src/imd.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 1995-2016 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/***************************************************************************
- * RCS INFORMATION:
- *
- *      $RCSfile: imd.h,v $
- *      $Author: johns $       $Locker:  $             $State: Exp $
- *      $Revision: 1.22 $       $Date: 2016/11/28 03:05:07 $
- *
- * LICENSE:
- *   UIUC Open Source License
- *   http://www.ks.uiuc.edu/Research/vmd/plugins/pluginlicense.html
- *
- ***************************************************************************/
-
-#ifndef IMD_H__
-#define IMD_H__
-
-#include <limits.h>
-
-#if ( INT_MAX == 2147483647 )
-typedef int     int32;
-#else
-typedef short   int32;
-#endif
-
-
-typedef enum IMDType_t {
-  IMD_DISCONNECT,   /**< close IMD connection, leaving sim running */
-  IMD_ENERGIES,     /**< energy data block                         */
-  IMD_FCOORDS,      /**< atom coordinates                          */
-  IMD_GO,           /**< start the simulation                      */
-  IMD_HANDSHAKE,    /**< endianism and version check message       */
-  IMD_KILL,         /**< kill the simulation job, shutdown IMD     */
-  IMD_MDCOMM,       /**< MDComm style force data                   */
-  IMD_PAUSE,        /**< pause the running simulation              */
-  IMD_TRATE,        /**< set IMD update transmission rate          */
-  IMD_IOERROR       /**< indicate an I/O error                     */
-} IMDType;          /**< IMD command message type enumerations */
-
-
-typedef struct {
-  int32 tstep;      /**< integer timestep index                    */
-  float T;          /**< Temperature in degrees Kelvin             */
-  float Etot;       /**< Total energy, in Kcal/mol                 */
-  float Epot;       /**< Potential energy, in Kcal/mol             */
-  float Evdw;       /**< Van der Waals energy, in Kcal/mol         */
-  float Eelec;      /**< Electrostatic energy, in Kcal/mol         */
-  float Ebond;      /**< Bond energy, Kcal/mol                     */
-  float Eangle;     /**< Angle energy, Kcal/mol                    */
-  float Edihe;      /**< Dihedral energy, Kcal/mol                 */
-  float Eimpr;      /**< Improper energy, Kcal/mol                 */
-} IMDEnergies;      /**< IMD simulation energy report structure    */
-
-
-/* Send control messages - these consist of a header with no subsequent data */
-extern int imd_disconnect(void *);   /**< leave sim running but close IMD  */
-extern int imd_pause(void *);        /**< pause simulation                 */
-extern int imd_kill(void *);         /**< kill simulation, shutdown IMD    */
-extern int imd_handshake(void *);    /**< check endianness, version compat */
-extern int imd_trate(void *, int32); /**< set IMD update transmission rate */
-
-/* Send data update messages */
-
-/** Send MDComm compatible forces, units are Kcal/mol/angstrom */
-extern int imd_send_mdcomm(void *, int32, const int32 *, const float *);
-
-/** Send energies */
-extern int imd_send_energies(void *, const IMDEnergies *);
-
-/** Send atom forces and coordinates, units are Kcal/mol/angstrom */
-extern int imd_send_fcoords(void *, int32, const float *);
-
-/** 
- *  recv_handshake returns 0 if server and client have the same relative 
- *  endianism; returns 1 if they have opposite endianism, and -1 if there
- *  was an error in the handshake process.
- */
-extern int imd_recv_handshake(void *);
-
-/** Receive header and data */
-extern IMDType imd_recv_header(void *, int32 *);
-
-/** Receive MDComm-style forces, units are Kcal/mol/angstrom */
-extern int imd_recv_mdcomm(void *, int32, int32 *, float *);
-
-/** Receive energies */
-extern int imd_recv_energies(void *, IMDEnergies *);
-
-/** Receive atom coordinates and forces, units are Kcal/mol/angstrom */
-extern int imd_recv_fcoords(void *, int32, float *);
-
-#endif
-
diff --git a/src/makefile.clang b/src/makefile.clang
deleted file mode 100644
index 228fcf762e4fc1c45489941568427bfd82a3c413..0000000000000000000000000000000000000000
--- a/src/makefile.clang
+++ /dev/null
@@ -1,76 +0,0 @@
-# CC = g+
-# CC = /usr/local/Cellar/gcc44/4.4.7/bin/g++-4.4
-CC = /usr/bin/clang++
-
-PLATFORM = $(shell uname)
-FINDNVCC = $(shell which nvcc)
-
-#SYSTEM = $(shell $(CC) -dumpmachine) # might be better(?)
-
-ifeq ($(PLATFORM), Linux)
-  CUDAHOME = $(shell readlink -f $(FINDNVCC) | sed 's,/bin/nvcc,,')
-  CUDALIB = $(CUDAHOME)/lib64
-endif
-
-ifeq ($(PLATFORM), Darwin)
-  CUDAHOME = $(shell ./read_link.sh $(FINDNVCC) | sed 's,/bin/nvcc,,')
-  CUDALIB = $(CUDAHOME)/lib
-endif
-
-# if gcc ...
-#OPT = 
-
-# if clang ...
-OPT = -Xcompiler -arch -Xcompiler x86_64
-
-# Either compiler
-#OPT += -gencode arch=compute_10,code=sm_10 -gencode arch=compute_20,code=sm_20 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=\"sm_35,compute_35\"
-
-
-# Redefine CUDAHOME and/or CUDALIB if necessary 
-#
-# ks.uiuc.edu (Beckman Institute)
-# CUDAHOME = /usr/local/encap/cuda-4.0
-# CUDALIB = $(CUDAHOME)/lib64
-# physics.illinois.edu (Loomis Laboratory)
-# CUDAHOME = /software/cuda-toolkit-5.0-x86_64/cuda
-
-CUDAINC  = $(CUDAHOME)/include
-
-NVCC = $(CUDAHOME)/bin/nvcc
-
-NVCCFLAGS = -ccbin $(CC) -m64 -O3 $(OPT)
-NVCCINC = -I -DUNIX
-
-CCFLAGS = -O3 -Wall -Wno-write-strings -m64 -I$(CUDAINC) -stdlib=libstdc++
-# stdlib is necessary for clang++
-
-LFLAGS = -L$(CUDALIB) -lcurand -lcudart -Wl,-rpath,$(CUDALIB) -I$(CUDAINC)
-
-TARGET = runBrownCUDA
-
-CC_SRC := $(wildcard *.cpp)
-CC_SRC := $(filter-out runBrownTown.cpp, $(CC_SRC))
-CU_SRC := $(wildcard *.cu)
-
-CC_OBJ := $(patsubst %.cpp, %.o, $(CC_SRC))
-CU_OBJ := $(patsubst %.cu, %.o, $(CU_SRC))
-
-# Make EXEC empty to compile
-# Make EXEC @echo to see what would be run otherwise
-EXEC =  @echo 
-
-all: $(TARGET)
-	@echo "Done ->" $(TARGET)
-
-$(TARGET): $(CU_OBJ) $(CC_OBJ) runBrownTown.cpp vmdsock.c imd.c imd.h
-	$(EXEC) $(CC) $(CCFLAGS) $(LFLAGS) runBrownTown.cpp vmdsock.c imd.c $(CU_OBJ) $(CC_OBJ) -o $(TARGET)
-
-$(CU_OBJ): %.o: %.cu %.h
-	$(EXEC) $(NVCC) $(NVCCFLAGS) -c $< -o $@
-	
-$(CC_OBJ): %.o: %.cpp %.h
-	$(EXEC) $(CC) $(CCFLAGS) -c $< -o $@
-	
-clean:
-	rm -f $(TARGET) $(CU_OBJ) $(CC_OBJ)
diff --git a/src/nvtx_defs.h b/src/nvtx_defs.h
deleted file mode 100644
index 01099f5d8d660e983d845341b9dc0e52f83df6e4..0000000000000000000000000000000000000000
--- a/src/nvtx_defs.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* Adapted from: https://developer.nvidia.com/blog/cuda-pro-tip-generate-custom-application-profile-timelines-nvtx/ */
-#ifdef USE_NVTX
-#include <nvToolsExt.h>
-
-const uint32_t nvtx_colors[] = { 0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff, 0xff00ffff, 0xffff0000, 0xffffffff };
-const int num_nvtx_colors = sizeof(nvtx_colors)/sizeof(uint32_t);
-
-#define PUSH_NVTX(name,cid) { \
-    int color_id = cid; \
-    color_id = color_id%num_nvtx_colors;\
-    nvtxEventAttributes_t eventAttrib = {0}; \
-    eventAttrib.version = NVTX_VERSION; \
-    eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \
-    eventAttrib.colorType = NVTX_COLOR_ARGB; \
-    eventAttrib.color = nvtx_colors[color_id]; \
-    eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \
-    eventAttrib.message.ascii = name; \
-    nvtxRangePushEx(&eventAttrib); \
-}
-#define POP_NVTX nvtxRangePop();
-#else
-#define PUSH_NVTX(name,cid)
-#define POP_NVTX
-#endif
diff --git a/src/useful.h b/src/useful.h
index 5038e21ef7a769d8149c28012fa310a947849896..25a8455d5d67be8934e4dc3dc98b7d5381a780ac 100644
--- a/src/useful.h
+++ b/src/useful.h
@@ -26,7 +26,26 @@
 #include <cstring>
 #include <cstdio>
 #include <cstdlib>
+
+#ifdef USE_CUDA
 #include <cuda_runtime.h>
+#else
+struct float4 {
+    float4() : x(0), y(0), z(0), w(0) {};
+    float4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {};
+    float4 operator+(const float4&& o) {
+	return float4(x+o.x,y+o.y,z+o.z,w+o.w);
+	// float4 r;
+	// r.x = x+o.x; r.y = y+o.y; r.z = z+o.z; r.w = w+o.w;
+	// return r;
+    };
+    float4 operator*(const float&& s) {
+	return float4(x*s,y*s,z*s,w*s);
+    };
+    
+    float x,y,z,w;
+};
+#endif
 
 // using namespace std;
 
@@ -37,6 +56,8 @@ bool isInt(char c);
 int firstSpace(const char* s, int max);
 
 
+
+
 /*class int2 {
 public:
 	int2(int x, int y) : x(x), y(y) {}
diff --git a/src/vmdsock.cpp b/src/vmdsock.cpp
deleted file mode 100644
index ceddf444879b380047b1cf5df99e7fb06ee98894..0000000000000000000000000000000000000000
--- a/src/vmdsock.cpp
+++ /dev/null
@@ -1,212 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 1995-2003 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/***************************************************************************
- * RCS INFORMATION:
- *
- *      $RCSfile: vmdsock.c,v $
- *      $Author: johns $        $Locker:  $             $State: Exp $
- *      $Revision: 1.1 $      $Date: 2003/09/12 18:30:46 $
- *
- ***************************************************************************
- * DESCRIPTION:
- *   Socket interface, abstracts machine dependent APIs/routines. 
- ***************************************************************************/
-
-#define VMDSOCKINTERNAL 1
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#if defined(_MSC_VER) 
-#include <winsock2.h>
-#else
-#include <arpa/inet.h>
-#include <fcntl.h>
-
-#include <sys/types.h>
-#include <unistd.h>   /* for Linux */
-#include <sys/socket.h>
-#include <netdb.h>
-#endif
-
-#include <errno.h>
-
-#include "vmdsock.h"
-
-int vmdsock_init(void) {
-#if defined(_MSC_VER)
-  int rc = 0;
-  static int initialized=0;
-
-  if (!initialized) {
-    WSADATA wsdata;
-    rc = WSAStartup(MAKEWORD(1,1), &wsdata);
-    if (rc == 0)
-      initialized = 1;
-  }
-
-  return rc;
-#else   
-  return 0;
-#endif
-}
-
-
-void * vmdsock_create(void) {
-  vmdsocket * s;
-
-  s = (vmdsocket *) malloc(sizeof(vmdsocket));
-  if (s != NULL)
-    memset(s, 0, sizeof(vmdsocket)); 
-
-  if ((s->sd = socket(PF_INET, SOCK_STREAM, 0)) == -1) {
-    printf("Failed to open socket.");
-    free(s);
-    return NULL;
-  }
-
-  return (void *) s;
-}
-
-int  vmdsock_connect(void *v, const char *host, int port) {
-  vmdsocket *s = (vmdsocket *) v;
-  char address[1030];
-  struct hostent *h;
-
-  h=gethostbyname(host);
-  if (h == NULL) 
-    return -1;
-  sprintf(address, "%d.%d.%d.%d",
-    (unsigned char) h->h_addr_list[0][0],
-    (unsigned char) h->h_addr_list[0][1],
-    (unsigned char) h->h_addr_list[0][2],
-    (unsigned char) h->h_addr_list[0][3]);
-
-  memset(&(s->addr), 0, sizeof(s->addr)); 
-  s->addr.sin_family = PF_INET;
-  s->addr.sin_addr.s_addr = inet_addr(address);
-  s->addr.sin_port = htons(port);  
-
-  return connect(s->sd, (struct sockaddr *) &s->addr, sizeof(s->addr)); 
-}
-
-int vmdsock_bind(void * v, int port) {
-  vmdsocket *s = (vmdsocket *) v;
-  memset(&(s->addr), 0, sizeof(s->addr)); 
-  s->addr.sin_family = PF_INET;
-  s->addr.sin_port = htons(port);
-
-  return bind(s->sd, (struct sockaddr *) &s->addr, sizeof(s->addr));
-}
-
-int vmdsock_listen(void * v) {
-  vmdsocket *s = (vmdsocket *) v;
-  return listen(s->sd, 5);
-}
-
-void *vmdsock_accept(void * v) {
-  int rc;
-  vmdsocket *new_s = NULL, *s = (vmdsocket *) v;
-#if defined(SOCKLEN_T)
-  SOCKLEN_T len;
-#elif defined(ARCH_LINUXALPHA)
-  socklen_t len;
-#else
-  // int len;
-#endif
-	socklen_t len;
-  len = sizeof(s->addr);
-  rc = accept(s->sd, (struct sockaddr *) &s->addr, &len);
-  if (rc >= 0) {
-    new_s = (vmdsocket *) malloc(sizeof(vmdsocket));
-    if (new_s != NULL) {
-      *new_s = *s;
-      new_s->sd = rc;
-    }
-  }
-  return (void *)new_s;
-}
-
-int  vmdsock_write(void * v, const void *buf, int len) {
-  vmdsocket *s = (vmdsocket *) v;
-#if defined(_MSC_VER)
-  return send(s->sd, (const char*) buf, len, 0);  // windows lacks the write() call
-#else
-  return write(s->sd, buf, len);
-#endif
-}
-
-int  vmdsock_read(void * v, void *buf, int len) {
-  vmdsocket *s = (vmdsocket *) v;
-#if defined(_MSC_VER)
-  return recv(s->sd, (char*) buf, len, 0); // windows lacks the read() call
-#else
-  return read(s->sd, buf, len);
-#endif
-
-}
-
-void vmdsock_shutdown(void *v) {
-  vmdsocket * s = (vmdsocket *) v;
-  if (s == NULL)
-    return;
-
-#if defined(_MSC_VER)
-  shutdown(s->sd, SD_SEND);
-#else
-  shutdown(s->sd, 1);  /* complete sends and send FIN */
-#endif
-}
-
-void vmdsock_destroy(void * v) {
-  vmdsocket * s = (vmdsocket *) v;
-  if (s == NULL)
-    return;
-
-#if defined(_MSC_VER)
-  closesocket(s->sd);
-#else
-  close(s->sd);
-#endif
-  free(s);  
-}
-
-int vmdsock_selread(void *v, int sec) {
-  vmdsocket *s = (vmdsocket *)v;
-  fd_set rfd;
-  struct timeval tv;
-  int rc;
- 
-  FD_ZERO(&rfd);
-  FD_SET(s->sd, &rfd);
-  memset((void *)&tv, 0, sizeof(struct timeval));
-  tv.tv_sec = sec;
-  do {
-    rc = select(s->sd+1, &rfd, NULL, NULL, &tv);
-  } while (rc < 0 && errno == EINTR);
-  return rc;
-
-}
-  
-int vmdsock_selwrite(void *v, int sec) {
-  vmdsocket *s = (vmdsocket *)v;
-  fd_set wfd;
-  struct timeval tv;
-  int rc;
- 
-  FD_ZERO(&wfd);
-  FD_SET(s->sd, &wfd);
-  memset((void *)&tv, 0, sizeof(struct timeval));
-  tv.tv_sec = sec;
-  do {
-    rc = select(s->sd + 1, NULL, &wfd, NULL, &tv);
-  } while (rc < 0 && errno == EINTR);
-  return rc;
-}
diff --git a/src/vmdsock.h b/src/vmdsock.h
deleted file mode 100644
index bd29a44b091af2c6a65f617d1299896b08941333..0000000000000000000000000000000000000000
--- a/src/vmdsock.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/***************************************************************************
- *cr
- *cr            (C) Copyright 1995-2003 The Board of Trustees of the
- *cr                        University of Illinois
- *cr                         All Rights Reserved
- *cr
- ***************************************************************************/
-
-/***************************************************************************
- * RCS INFORMATION:
- *
- *      $RCSfile: vmdsock.h,v $
- *      $Author: johns $        $Locker:  $             $State: Exp $
- *      $Revision: 1.1 $      $Date: 2003/09/12 18:30:46 $
- *
- ***************************************************************************
- * DESCRIPTION:
- *   socket interface layer, abstracts platform-dependent routines/APIs
- ***************************************************************************/
-
-#if defined(VMDSOCKINTERNAL)
-
-#if !defined(_MSC_VER)
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <netinet/in.h>
-#include <sys/file.h>
-#endif
-
-typedef struct {
-  struct sockaddr_in addr; /* address of socket provided by bind() */
-  int addrlen;             /* size of the addr struct */
-  int sd;                  /* socket file descriptor */
-} vmdsocket;
-
-#endif /* VMDSOCKINTERNAL */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int   vmdsock_init(void);
-void *vmdsock_create(void);
-int   vmdsock_bind(void *, int);
-int   vmdsock_listen(void *);
-void *vmdsock_accept(void *);  /* return new socket */
-int   vmdsock_connect(void *, const char *, int);
-int   vmdsock_write(void *, const void *, int);
-int   vmdsock_read(void *, void *, int);
-int   vmdsock_selread(void *, int);
-int   vmdsock_selwrite(void *, int);
-void  vmdsock_shutdown(void *);
-void  vmdsock_destroy(void *);
-
-#ifdef __cplusplus
-}
-#endif
-