From b571bf79c2538fe23fb426ee71b3fe8ce70b8d26 Mon Sep 17 00:00:00 2001 From: Chris Maffeo <cmaffeo2@illinois.edu> Date: Wed, 5 Aug 2020 16:35:47 -0500 Subject: [PATCH] Initial implementation of crossPotential --- src/ComputeForce.cu | 97 +++++++++++++- src/ComputeForce.cuh | 64 ++++++++++ src/ComputeForce.h | 32 ++++- src/Configuration.cpp | 125 ++++++++++++++++++ src/Configuration.h | 11 ++ src/GrandBrownTown.cu | 4 +- src/TabulatedPotential.cu | 144 +++++++++++++++++++++ src/TabulatedPotential.h | 261 +++++++++++++++++++++++++++++++++++++- 8 files changed, 728 insertions(+), 10 deletions(-) diff --git a/src/ComputeForce.cu b/src/ComputeForce.cu index 60bc945..f5b42bc 100644 --- a/src/ComputeForce.cu +++ b/src/ComputeForce.cu @@ -50,6 +50,7 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) : numTabAngleFiles(c.numTabAngleFiles), numDihedrals(c.numDihedrals), numTabDihedralFiles(c.numTabDihedralFiles), numRestraints(c.numRestraints), numBondAngles(c.numBondAngles), + numCrossPotentials(c.numCrossPotentials), numGroupSites(c.numGroupSites), numReplicas(numReplicas) { @@ -235,6 +236,7 @@ ComputeForce::ComputeForce(const Configuration& c, const int numReplicas = 1) : restraintIds_d = NULL; bondAngleList_d = NULL; + cross_potential_list_d = NULL; //Calculate the number of blocks the grid should contain gridSize = (num+num_rb_attached_particles) / NUM_THREADS + 1; @@ -728,6 +730,12 @@ float ComputeForce::computeTabulated(bool get_energy) { //Mlog: the commented function doesn't use bondList, uncomment for testing. //if(bondMap_d != NULL && tableBond_d != NULL) + + if(cross_potential_list_d != NULL && cross_potentials_d != NULL) + { + computeCrossPotentials <<<nb, numThreads, 0, gpuman.get_next_stream()>>> ( forceInternal_d, pos_d, sys_d, numReplicas*numCrossPotentials, cross_potential_particles_d, cross_potentials_d, cross_potential_list_d, numCrossed_d, energies_d, get_energy); + } + if(bondAngleList_d != NULL && tableBond_d != NULL && tableAngle_d != NULL) { computeTabulatedBondAngles <<<nb, numThreads, 0, gpuman.get_next_stream()>>> ( forceInternal_d, pos_d, sys_d, numReplicas*numBondAngles, bondAngleList_d, tableAngle_d, tableBond_d, energies_d, get_energy); @@ -884,7 +892,7 @@ void ComputeForce::setForceInternalOnDevice(Vector3* f) { gpuErrchk(cudaMemcpy(forceInternal_d[0], f, sizeof(Vector3) * tot_num, cudaMemcpyHostToDevice)); } -void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals, const Restraint* const restraints, const BondAngle* const bondAngles) +void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals, const Restraint* const restraints, const BondAngle* const bondAngles, const XpotMap simple_potential_map, const std::vector<SimplePotential> simple_potentials, const CrossPotentialConf* const cross_potential_confs) { assert(simNum == numReplicas); // Not sure why we have both of these things int tot_num_with_rb = (num+num_rb_attached_particles) * simNum; @@ -961,6 +969,93 @@ void ComputeForce::copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, cudaMemcpyHostToDevice)); } + if (simple_potentials.size() > 0) { + float **val = simple_potential_pots_d = new float*[simple_potentials.size()]; + // float **tmp = new float*[simple_potentials.size()]; + for (int i=0; i < simple_potentials.size(); ++i) { + const SimplePotential sp = simple_potentials[i]; + gpuErrchk(cudaMalloc(&val[i], sizeof(float)*sp.size)); + gpuErrchk(cudaMemcpyAsync(val[i], sp.pot, sizeof(float)*sp.size, cudaMemcpyHostToDevice)); + // tmp[i] = sp.pot; + // // sp.pot = val[i]; + } + + // size_t sz = sizeof(SimplePotential) * simple_potentials.size(); + // gpuErrchk(cudaMalloc(&simple_potentials_d, sz)); + // gpuErrchk(cudaMemcpyAsync(simple_potentials_d, &simple_potentials[0], sz, + // cudaMemcpyHostToDevice)); + + // for (int i=0; i < simple_potentials.size(); ++i) { // Restore host pointers on host object + // SimplePotential &sp = simple_potentials[i]; + // sp.pot = tmp[i]; + // } + // // delete[] val; + // delete[] tmp; + + } + + if (numCrossPotentials > 0) { + // Count particles + int n_pots = 0; + int n_particles = 0; + for (int i=0; i < numCrossPotentials; ++i) { + const CrossPotentialConf& c = cross_potential_confs[i]; + n_pots += c.indices.size(); + for (int j=0; j < c.indices.size(); ++j) { + n_particles += c.indices[j].size(); + } + } + // printf("DEBUG: Found %d particles participating in %d potentials forming %d crossPotentials\n", + // n_particles, n_pots, numCrossPotentials); + + // Build crossPotentialLists on host + int *particle_list = new int[n_particles]; + SimplePotential *cross_potentials = new SimplePotential[n_pots]; + uint2 *cross_potential_list = new uint2[numCrossPotentials]; + unsigned short *numCrossed = new unsigned short[numCrossPotentials]; + + n_particles = 0; + n_pots = 0; + for (int i=0; i < numCrossPotentials; ++i) { + const CrossPotentialConf& c = cross_potential_confs[i]; + cross_potential_list[i] = make_uint2( n_pots, n_particles ); + + for (int j=0; j < c.indices.size(); ++j) { + unsigned int sp_i = simple_potential_map.at(c.potential_names[j]); + cross_potentials[n_pots] = simple_potentials[sp_i]; + cross_potentials[n_pots++].pot = simple_potential_pots_d[sp_i]; + for (int k=0; k < c.indices[j].size(); ++k) { + particle_list[n_particles++] = c.indices[j][k]; + } + } + numCrossed[i] = c.indices.size(); + } + + // Copy to device + size_t sz = sizeof(int)*n_particles; + gpuErrchk(cudaMalloc(&cross_potential_particles_d, sz)); + gpuErrchk(cudaMemcpyAsync(cross_potential_particles_d, particle_list, sz, + cudaMemcpyHostToDevice)); + sz = sizeof(SimplePotential)*n_pots; + gpuErrchk(cudaMalloc(&cross_potentials_d, sz)); + gpuErrchk(cudaMemcpyAsync(cross_potentials_d, cross_potentials, sz, + cudaMemcpyHostToDevice)); + sz = sizeof(uint2)*numCrossPotentials; + gpuErrchk(cudaMalloc(&cross_potential_list_d, sz)); + gpuErrchk(cudaMemcpyAsync(cross_potential_list_d, cross_potential_list, sz, + cudaMemcpyHostToDevice)); + sz = sizeof(unsigned short)*numCrossPotentials; + gpuErrchk(cudaMalloc(&numCrossed_d, sz)); + gpuErrchk(cudaMemcpyAsync(numCrossed_d, numCrossed, sz, + cudaMemcpyHostToDevice)); + + // Clean up + delete[] particle_list; + delete[] cross_potentials; + delete[] cross_potential_list; + delete[] numCrossed; + } + gpuErrchk(cudaDeviceSynchronize()); } diff --git a/src/ComputeForce.cuh b/src/ComputeForce.cuh index bbbce8c..eb01f2f 100644 --- a/src/ComputeForce.cuh +++ b/src/ComputeForce.cuh @@ -3,7 +3,9 @@ // Terrance Howard <heyterrance@gmail.com> #pragma once #include <cassert> + #include "CudaUtil.cuh" + #include "TabulatedMethods.cuh" // From TabulatedMethods.cuh: constexpr float BD_PI = 3.1415927f; @@ -887,6 +889,68 @@ void computeTabulatedBondAngles(Vector3* force, } } +__global__ +void computeCrossPotentials(Vector3* force, + Vector3* __restrict__ pos, + BaseGrid* __restrict__ sys, + int numCrossPotentials, + int* __restrict__ crossPotentialParticles, + SimplePotential* __restrict__ potentialList, + uint2* __restrict__ crossPotential_list, + unsigned short* __restrict__ numCrossed, + float* energy, bool get_energy) { + /* + crossPotential_list[i].x : index of first potential in potentialList for i_th crossPotential + crossPotential_list[i].y : index of first atom in crossPotentialParticles for i_th crossPotential + + for three potentials, angle, bond, angle, we would have the following atomic indices in crossPotentialParticles: + pot1 : crossPotential_list[i].y, crossPotential_list[i].y + 1 , crossPotential_list[i].y + 2 + pot2 : crossPotential_list[i].y + 3, crossPotential_list[i].y + 4 + and + pot3 : crossPotential_list[i].y + 5, crossPotential_list[i].y + 6, crossPotential_list[i].y + 7 + */ + + // CRAPPY NAIVE IMPLEMENTATION +#define MAX_XPOTS 4 + float2 energy_and_deriv[MAX_XPOTS]; + float tmp_force; + + for (int i = threadIdx.x+blockIdx.x*blockDim.x; i<numCrossPotentials; i+=blockDim.x*gridDim.x) { + unsigned short num_pots = numCrossed[i]; + + unsigned int part_idx = crossPotential_list[i].y; +#pragma unroll + for (unsigned short int j = 0; j < MAX_XPOTS; ++j) { + if (j == num_pots) break; + SimplePotential& p = potentialList[ crossPotential_list[i].x + j ]; + + // Hidden branch divergence in compute_value => sort potentials by type before running kernel + float tmp = p.compute_value(pos,sys, &crossPotentialParticles[part_idx]); + energy_and_deriv[j] = p.compute_energy_and_deriv(tmp); + part_idx += p.type==BOND? 2: p.type==ANGLE? 3: 4; + } + + part_idx = crossPotential_list[i].y; +#pragma unroll + for (unsigned short int j = 0; j < MAX_XPOTS; ++j) { + if (j == num_pots) break; + tmp_force = energy_and_deriv[j].y; +#pragma unroll + for (unsigned short int k = 0; k < MAX_XPOTS; ++k) { + if (k == num_pots) break; + if (j == k) continue; + tmp_force *= energy_and_deriv[k].x; + } + if (tmp_force == 0) continue; + + // TODO add energy + // TODO make it work with replicas + SimplePotential& p = potentialList[ crossPotential_list[i].x + j ]; + p.apply_force(pos,sys, force, &crossPotentialParticles[part_idx], tmp_force); + part_idx += p.type==BOND? 2: p.type==ANGLE? 3: 4; + } + } +} __global__ diff --git a/src/ComputeForce.h b/src/ComputeForce.h index d4b8322..1801267 100644 --- a/src/ComputeForce.h +++ b/src/ComputeForce.h @@ -1,8 +1,7 @@ /////////////////////////////////////////////////////////////////////// // Brownian dynamics base class // Author: Jeff Comer <jcomer2@illinois.edu> -#ifndef COMPUTEFORCE_H -#define COMPUTEFORCE_H +#pragma once #ifdef __CUDACC__ #define HOST __host__ @@ -25,13 +24,26 @@ #include "TabulatedPotential.h" #include "TabulatedAngle.h" #include "TabulatedDihedral.h" +#include "CrossPotential.h" #include "GPUManager.h" +// #include <map> +#include <boost/unordered_map.hpp> #include <cstdio> // #include <cuda_runtime.h> #include <thrust/transform_reduce.h> // thrust::reduce #include <thrust/functional.h> // thrust::plus +inline std::size_t hash_value(String const& s) { + if (s.length() == 0) return 0; + // return hash_value(s.val()); + return boost::hash_range(s.val(), s.val()+s.length()); +} + +typedef boost::unordered_map<String,unsigned int> XpotMap; +// typedef std::map<String,unsigned int> XpotMap; + + const unsigned int NUM_THREADS = 256; // Configuration @@ -75,7 +87,10 @@ public: //MLog: new copy function to allocate memory required by ComputeForce class. void copyToCUDA(Vector3* forceInternal, Vector3* pos); - void copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals, const Restraint* const restraints, const BondAngle* const bondAngles); + void copyToCUDA(int simNum, int *type, Bond* bonds, int2* bondMap, Exclude* excludes, int2* excludeMap, Angle* angles, Dihedral* dihedrals, const Restraint* const restraints, const BondAngle* const bondAngles, + const XpotMap simple_potential_map, + const std::vector<SimplePotential> simple_potentials, + const CrossPotentialConf* const cross_potential_confs); void copyToCUDA(Vector3* forceInternal, Vector3* pos, Vector3* mom); void copyToCUDA(Vector3* forceInternal, Vector3* pos, Vector3* mom, float* random); @@ -253,6 +268,15 @@ private: BondAngle* bondAngles_d; int4* bondAngleList_d; + int numCrossPotentials; + float** simple_potential_pots_d; + SimplePotential* simple_potentials_d; + int* cross_potential_particles_d; + SimplePotential* cross_potentials_d; + uint2* cross_potential_list_d; + // ushort4* cross_potential_particle_counts_d; + unsigned short* numCrossed_d; + int3* bondList_d; int4* angleList_d; int4* dihedralList_d; @@ -264,5 +288,3 @@ private: float* restraintSprings_d; }; - -#endif diff --git a/src/Configuration.cpp b/src/Configuration.cpp index e12f811..2ce832f 100644 --- a/src/Configuration.cpp +++ b/src/Configuration.cpp @@ -2,6 +2,7 @@ #include "Angle.h" #include "Dihedral.h" #include "Restraint.h" +#include "CrossPotential.h" #include <cmath> #include <cassert> #include <stdlib.h> /* srand, rand */ @@ -272,6 +273,8 @@ Configuration::Configuration(const char* config_file, int simNum, bool debug) : if (readDihedralsFromFile) readDihedrals(); if (readRestraintsFromFile) readRestraints(); if (readBondAnglesFromFile) readBondAngles(); + if (readCrossPotentialsFromFile) readCrossPotentials(); + if (temperatureGridFile.length() != 0) { printf("\nFound temperature grid file: %s\n", temperatureGridFile.val()); @@ -632,6 +635,7 @@ Configuration::~Configuration() { if (angles != NULL) delete[] angles; if (dihedrals != NULL) delete[] dihedrals; if (bondAngles != NULL) delete[] bondAngles; + if (crossPotentials != NULL) delete[] crossPotentials; delete[] numPartsOfType; @@ -867,6 +871,10 @@ void Configuration::setDefaults() { numBondAngles = 0; bondAngles = NULL; + readCrossPotentialsFromFile = false; + numCrossPotentials = 0; + + readRestraintsFromFile = false; numRestraints = 0; restraints = NULL; @@ -1135,6 +1143,13 @@ int Configuration::readParameters(const char * config_file) { bondAngleFile = value; readBondAnglesFromFile = true; } + } else if (param == String("inputCrossPotentials")) { + if (readBondAnglesFromFile) { + printf("WARNING: More than one cross potential file specified. Ignoring new file.\n"); + } else { + crossPotentialFile = value; + readCrossPotentialsFromFile = true; + } } else if (param == String("tabulatedAngleFile")) { if (numTabAngleFiles >= atfcap) { String* temp = angleTableFile; @@ -2001,6 +2016,98 @@ void Configuration::readBondAngles() { // angles[i].print(); } +void Configuration::readCrossPotentials() { + FILE* inp = fopen(crossPotentialFile.val(), "r"); + char line[256]; + int capacity = 256; + numCrossPotentials = 0; + crossPotentials = new CrossPotentialConf[capacity]; + + // If the angle file cannot be found, exit the program + if (inp == NULL) { + printf("WARNING: Could not open `%s'.\n", crossPotentialFile.val()); + printf("This simulation will not use cross potentials.\n"); + return; + } + printf("DEBUG: READING CROSSPOT FILE\n"); + std::vector<std::vector<int>> indices; + std::vector<int> tmp; + std::vector<String> pot_names; + + while(fgets(line, 256, inp)) { + if (line[0] == '#') continue; + String s(line); + int numTokens = s.tokenCount(); + String* tokenList = new String[numTokens]; + s.tokenize(tokenList); + + indices.clear(); + tmp.clear(); + pot_names.clear(); + + printf("\rDEBUG: reading line %d",numCrossPotentials+1); + + // Legitimate CrossPotential inputs have at least 7 tokens + // BONDANGLE | INDEX1 | INDEX2 | INDEX3 | POT_FILENAME1 | INDEX4 | INDEX5 | POT_FILENAME2 ... + if (numTokens < 7) { + printf("WARNING: Invalid cross potential input line (too few tokens %d): %s\n", numTokens, line); + continue; + } + + // Discard any empty line + if (tokenList == NULL) + continue; + + for (int i = 1; i < numTokens; ++i) { + char *end; + // printf("DEBUG: Working on token %d '%s'\n", i, tokenList[i].val()); + int index = (int) strtol(tokenList[i].val(), &end, 10); + if (tokenList[i].val() == end || *end != '\0' || errno == ERANGE) { + indices.push_back(tmp); + String& n = tokenList[i]; + pot_names.push_back( n ); + if ( simple_potential_ids.find(n) == simple_potential_ids.end() ) { + // Could not find fileName in dictionary, so read and add it + unsigned int s = tmp.size(); + if (s < 2 || s > 4) { + printf("WARNING: Invalid cross potential input line (indices of potential %d == %d): %s\n", i, s, line); + continue; + } + + simple_potential_ids[n] = simple_potentials.size(); + SimplePotentialType t = s==2? BOND: s==3? ANGLE: DIHEDRAL; + simple_potentials.push_back( SimplePotential(n.val(), t) ); + } + tmp.clear(); + } else { + if (index >= num) { + continue; + } + tmp.push_back(index); + } + } + + if (numCrossPotentials >= capacity) { + CrossPotentialConf* temp = crossPotentials; + capacity *= 2; + crossPotentials = new CrossPotentialConf[capacity]; + for (int i = 0; i < numCrossPotentials; i++) + crossPotentials[i] = temp[i]; + delete[] temp; + } + + CrossPotentialConf a(indices, pot_names); + crossPotentials[numCrossPotentials++] = a; + delete[] tokenList; + } + printf("\nDEBUG: Sorting\n"); + std::sort(crossPotentials, crossPotentials + numCrossPotentials, compare()); + + // for(int i = 0; i < numAngles; i++) + // angles[i].print(); +} + + void Configuration::readRestraints() { FILE* inp = fopen(restraintFile.val(), "r"); char line[256]; @@ -2613,3 +2720,21 @@ bool Configuration::compare::operator()(const BondAngle& lhs, const BondAngle& r return lhs.ind3 < rhs.ind3; return lhs.ind4 < rhs.ind4; } + +bool Configuration::compare::operator()(const CrossPotentialConf& lhs, const CrossPotentialConf& rhs) { + int diff = rhs.indices.size() - lhs.indices.size(); + if (diff != 0) return diff > 0; + + for (unsigned int i = 0; i < lhs.indices.size(); ++i) { + diff = rhs.indices[i].size() - lhs.indices[i].size(); + if (diff != 0) return diff > 0; + } + + for (unsigned int i = 0; i < lhs.indices.size(); ++i) { + for (unsigned int j = 0; j < lhs.indices[i].size(); ++j) { + diff = rhs.indices[i][j] - lhs.indices[i][j]; + if (diff != 0) return diff > 0; + } + } + return true; +} diff --git a/src/Configuration.h b/src/Configuration.h index 66d6539..00fddec 100644 --- a/src/Configuration.h +++ b/src/Configuration.h @@ -25,6 +25,7 @@ #include "TrajectoryWriter.h" #include "TabulatedPotential.h" #include "TabulatedAngle.h" +#include "CrossPotential.h" #include "GPUManager.h" #include "RigidBodyType.h" #include "RigidBody.h" @@ -49,6 +50,7 @@ class Configuration { bool operator()(const Angle& lhs, const Angle& rhs); bool operator()(const Dihedral& lhs, const Dihedral& rhs); bool operator()(const BondAngle& lhs, const BondAngle& rhs); + bool operator()(const CrossPotentialConf& lhs, const CrossPotentialConf& rhs); }; void setDefaults(); @@ -265,6 +267,15 @@ public: Restraint* restraints; + void readCrossPotentials(); + String crossPotentialFile; + int numCrossPotentials; + bool readCrossPotentialsFromFile; + CrossPotentialConf* crossPotentials; + // boost::unordered_map<String, unsigned int> simple_potential_ids; + XpotMap simple_potential_ids; + std::vector<SimplePotential> simple_potentials; + //Han-Yi Chou String ParticleDynamicType; String RigidBodyDynamicType; diff --git a/src/GrandBrownTown.cu b/src/GrandBrownTown.cu index 6353b8a..27e02b0 100644 --- a/src/GrandBrownTown.cu +++ b/src/GrandBrownTown.cu @@ -260,9 +260,9 @@ GrandBrownTown::GrandBrownTown(const Configuration& c, const char* outArg, // fullLongRange, numBonds, numTabBondFiles, numExcludes, numAngles, numTabAngleFiles, // numDihedrals, numTabDihedralFiles, c.pairlistDistance, numReplicas); internal = new ComputeForce(c, numReplicas); - + //MLog: I did the other halve of the copyToCUDA function from the Configuration class here, keep an eye on any mistakes that may occur due to the location. - internal -> copyToCUDA(c.simNum, c.type, c.bonds, c.bondMap, c.excludes, c.excludeMap, c.angles, c.dihedrals, c.restraints, c.bondAngles ); + internal -> copyToCUDA(c.simNum, c.type, c.bonds, c.bondMap, c.excludes, c.excludeMap, c.angles, c.dihedrals, c.restraints, c.bondAngles, c.simple_potential_ids, c.simple_potentials, c.crossPotentials ); if (numGroupSites > 0) init_cuda_group_sites(); // TODO: check for duplicate potentials diff --git a/src/TabulatedPotential.cu b/src/TabulatedPotential.cu index 9a378bb..e8ee152 100644 --- a/src/TabulatedPotential.cu +++ b/src/TabulatedPotential.cu @@ -180,3 +180,147 @@ int FullTabulatedPotential::countValueLines(const char* fileName) { return count; } + +void TabulatedPotential::truncate(float cutoff) { + int home = int(floor((cutoff - r0)/dr)); + if (home > n) return; + + float v = v0[home]; + for (int i = home; i < n; i++) v0[i] = v; + interpolate(); +} + +bool TabulatedPotential::truncate(float switchDist, float cutoff, float value) { + int indOff = int(floor((cutoff - r0)/dr)); + int indSwitch = int(floor((switchDist - r0)/dr)); + + if (indSwitch > n) return false; + + // Set everything after the cutoff to "value". + for (int i = indOff; i < n; i++) v0[i] = value; + + // Apply a linear switch. + float v = v0[indSwitch]; + float m = (value - v)/(indOff - indSwitch); + for (int i = indSwitch; i < indOff; i++) v0[i] = m*(i - indSwitch) + v; + + interpolate(); + return true; +} + +Vector3 TabulatedPotential::computeForce(Vector3 r) { + float d = r.length(); + Vector3 rUnit = -r/d; + int home = int(floor((d - r0)/dr)); + + if (home < 0) return Vector3(0.0f); + if (home >= n) return Vector3(0.0f); + + float homeR = home*dr + r0; + float w = (d - homeR)/dr; + + // Interpolate. + Vector3 force = -(3.0f*v3[home]*w*w + 2.0f*v2[home]*w + v1[home])*rUnit/dr; + return force; +} + +void TabulatedPotential::init(const float* dist, const float* pot, int n0) { + n = abs(n0); + dr = dist[1]-dist[0]; + r0 = dist[0]; + r1 = r0 + n*dr; + + v0 = new float[n]; + for (int i = 0; i < n; i++) v0[i] = pot[i]; +} + +void TabulatedPotential::interpolate() { + v1 = new float[n]; + v2 = new float[n]; + v3 = new float[n]; + + for (int i = 0; i < n; i++) { + int i0 = i - 1; + int i1 = i; + int i2 = i + 1; + int i3 = i + 2; + + if (i0 < 0) i0 = 0; + if (i2 >= n) i2 = n-1; + if (i3 >= n) i3 = n-1; + + v3[i] = 0.5f*(-v0[i0] + 3.0f*v0[i1] - 3.0f*v0[i2] + v0[i3]); + v2[i] = 0.5f*(2.0f*v0[i0] - 5.0f*v0[i1] + 4.0f*v0[i2] - v0[i3]); + v1[i] = 0.5f*(-v0[i0] + v0[i2]); + } + e0 = v3[n-1] + v2[n-1] + v1[n-1] + v0[n-1]; +} + +int countValueLines(const char* fileName) { + FILE* inp = fopen(fileName, "r"); + if (inp == NULL) { + printf("SimplePotential::countValueLines Could not open file '%s'\n", fileName); + exit(-1); + } + char line[256]; + int count = 0; + + while (fgets(line, 256, inp) != NULL) { + // Ignore comments. + int len = strlen(line); + if (line[0] == '#') continue; + if (len < 2) continue; + count++; + } + fclose(inp); + return count; +} + +SimplePotential::SimplePotential(const char* filename, SimplePotentialType type) : type(type) { + FILE* inp = fopen(filename, "r"); + if (inp == NULL) { + printf("SimplePotential::SimplePotential Could not open file '%s'\n", filename); + exit(-1); + } + + char line[256]; + + size = (unsigned int) countValueLines(filename); + float* r = new float[size]; + pot = new float[size]; + + int count = 0; + while (fgets(line, 256, inp) != NULL) { + // Ignore comments. + int len = strlen(line); + if (line[0] == '#') continue; + if (len < 2) continue; + + String s(line); + int numTokens = s.tokenCount(); + if (numTokens != 2) { + printf("SimplePotential::SimplePotential Invalid tabulated potential file line: %s\n", line); + exit(-1); + } + + String* tokenList = new String[numTokens]; + s.tokenize(tokenList); + if (tokenList == NULL) { + printf("SimplePotential::SimplePotential Invalid tabulated potential file line: %s\n", line); + exit(-1); + } + r[count] = (float) strtod(tokenList[0], NULL); + pot[count] = (float) strtod(tokenList[1], NULL); + count++; + + delete[] tokenList; + } + fclose(inp); + + if (type == BOND) { + step_inv = (size-1.0f) / (r[size-1]-r[0]); + } else { + step_inv = 57.29578f * (size-1.0f) / (r[size-1]-r[0]); + } + delete[] r; +} diff --git a/src/TabulatedPotential.h b/src/TabulatedPotential.h index 2bf3162..8c6f812 100644 --- a/src/TabulatedPotential.h +++ b/src/TabulatedPotential.h @@ -7,11 +7,17 @@ #define HOST __host__ #define DEVICE __device__ #else - #define HOST - #define DEVICE + #define HOST + #define DEVICE #endif #include "useful.h" +#include "BaseGrid.h" + +#ifdef __CUDA_ARCH__ +#include "CudaUtil.cuh" +#endif + #include <cuda.h> #ifndef gpuErrchk @@ -20,6 +26,7 @@ fprintf(stderr,"CUDA Error: %s %s %d\n", cudaGetErrorString(code), __FILE__, __LINE__); \ }} #endif +constexpr float BD_PI 3.1415927f; class EnergyForce { public: @@ -177,8 +184,258 @@ private: float drInv; //TODO replace with drInv float r0; }; + +/* // New unified/simplified classes for working with potentials */ +/* <template int num_indices, int max_integer> */ +/* class BitMaskInts { */ +/* BitMaskInts(); */ + +/* private: */ +/* static_assert( ceil(log2(max_integer)) <= CHAR_BIT ); */ + +/* char data[ ceil(num_indices * ceil(log2(max_integer)) / CHAR_BIT) ]; */ + +/* HOST DEVICE inline unsigned short int get_int(i) const { */ +/* unsigned int first_bit = i * ceil(log2(max_integer)); */ +/* unsigned int last_bit = (i+1) * ceil(log2(max_integer))-1; */ +/* char c0 = data[floor(first_bit/CHAR_BIT)]; */ +/* char c1 = data[floor(last_bit/CHAR_BIT)]; */ + +/* unsigned short int ret = c0 << (first_bit % CHAR_BIT) /\* shift left *\/ */ +/* }; */ + +/* } */ + + +enum SimplePotentialType { UNSET, BOND, ANGLE, DIHEDRAL }; +// enum PotentialTypeAtoms { bond=2, angle=3, dihedral=4 }; + + +class SimplePotential { +public: + SimplePotential() { } + SimplePotential(const char* filename, SimplePotentialType type); + SimplePotential(float* pot, float step_inv, unsigned int size, SimplePotentialType type) : + pot(pot), step_inv(step_inv), size(size), type(type) { } + + + float* pot; // actual potential values + float step_inv; // angular increments of potential file + unsigned int size; // number of data points in the file + + SimplePotentialType type; + + /* float start = 0; */ + /* bool is_periodic = false; */ + + /* HOST void copy_to_device(SimplePotential* device_addr_p, unsigned int offset=0) { */ + /* /\* Assumes device_addr_p is already allocated, allocates space for pot *\/ */ + /* float* val, tmp; */ + /* gpuErrchk(cudaMalloc(&val, sizeof(float)*size)); // TODO equivalent cudaFree */ + /* gpuErrchk(cudaMemcpyAsync(val, pot, sizeof(float)*size, cudaMemcpyHostToDevice)); */ + /* tmp = pot; */ + /* pot = val; */ + /* gpuErrchk(cudaMemcpyAsync(device_addr_p+offset, this, sizeof(SimplePotential), cudaMemcpyHostToDevice)); */ + /* pot = tmp; */ + /* // return val; */ + /* } */ + + HOST DEVICE inline float compute_value(const Vector3* __restrict__ pos, + const BaseGrid* __restrict__ sys, + const int* __restrict__ particles) const { + float val; + if (type == BOND) + val = compute_bond(pos, sys, particles[0], particles[1]); + else if (type == ANGLE) + val = compute_angle(pos, sys, particles[0], particles[1], particles[2]); + else if (type == DIHEDRAL) + val = compute_dihedral(pos, sys, particles[0], particles[1], particles[2], particles[3]); + return val; + } + + HOST DEVICE inline float2 compute_energy_and_deriv(float value) { + float2 ret; + if (type == DIHEDRAL) { + ret = linearly_interpolate<true>(value, BD_PI); + } else { + ret = linearly_interpolate<false>(value); + } + return ret; + } + + HOST DEVICE inline float compute_bond(const Vector3* __restrict__ pos, + const BaseGrid* __restrict__ sys, + int i, int j) const { + return sys->wrapDiff( pos[j] - pos[i] ).length(); + } + + HOST DEVICE inline float compute_angle(const Vector3* __restrict__ pos, + const BaseGrid* __restrict__ sys, + int i, int j, int k) const { + const Vector3 ab = sys->wrapDiff(pos[j] - pos[i]); + const Vector3 bc = sys->wrapDiff(pos[k] - pos[j]); + const Vector3 ac = sys->wrapDiff(pos[k] - pos[i]); + + // Find the distance between each pair of particles + float distab = ab.length2(); + float distbc = bc.length2(); + const float distac2 = ac.length2(); + + // Find the cosine of the angle we want - <ABC + float cos = (distab + distbc - distac2); + + distab = 1.0f/sqrt(distab); //TODO: test other functions + distbc = 1.0f/sqrt(distbc); + cos *= 0.5f * distbc * distab; + + // If the cosine is illegitimate, set it to 1 or -1 so that acos won't fail + if (cos < -1.0f) cos = -1.0f; + if (cos > 1.0f) cos = 1.0f; + + return acos(cos); + } + + HOST DEVICE inline float compute_dihedral(const Vector3* __restrict__ pos, + const BaseGrid* __restrict__ sys, + int i, int j, int k, int l) const { + const Vector3 ab = -sys->wrapDiff(pos[j] - pos[i]); + const Vector3 bc = -sys->wrapDiff(pos[k] - pos[j]); + const Vector3 cd = -sys->wrapDiff(pos[l] - pos[k]); + + const Vector3 crossABC = ab.cross(bc); + const Vector3 crossBCD = bc.cross(cd); + const Vector3 crossX = bc.cross(crossABC); + + const float cos_phi = crossABC.dot(crossBCD) / (crossABC.length() * crossBCD.length()); + const float sin_phi = crossX.dot(crossBCD) / (crossX.length() * crossBCD.length()); + + return -atan2(sin_phi,cos_phi); + } + + template <bool is_periodic> + HOST DEVICE inline float2 linearly_interpolate(float x, float start=0.0f) const { + float w = (x - start) * step_inv; + int home = int( floorf(w) ); + w = w - home; + // if (home < 0) return EnergyForce(v0[0], Vector3(0.0f)); + if (home < 0 || (home >= size && !is_periodic)) return make_float2(0.0f,0.0f); + + float u0 = pot[home]; + float du = home+1 < size ? pot[home+1]-u0 : is_periodic ? pot[0]-u0 : 0; + + return make_float2(du*w+u0, du*step_inv); + } + + DEVICE inline void apply_force(const Vector3* __restrict__ pos, + const BaseGrid* __restrict__ sys, + Vector3* __restrict__ forces, + int* particles, float energy_deriv) const { + if (type == BOND) + apply_bond_force(pos, sys, forces, particles[0], particles[1], energy_deriv); + else if (type == ANGLE) + apply_angle_force(pos, sys, forces, particles[0], particles[1], + particles[2], energy_deriv); + else if (type == DIHEDRAL) + apply_dihedral_force(pos, sys, forces, particles[0], particles[1], + particles[2], particles[3], energy_deriv); + } + + __device__ inline void apply_bond_force(const Vector3* __restrict__ pos, + const BaseGrid* __restrict__ sys, + Vector3* __restrict__ force, + int i, int j, float energy_deriv) const { +#ifdef __CUDA_ARCH__ + Vector3 f = sys->wrapDiff( pos[j] - pos[i] ); + f = f * energy_deriv / f.length(); + atomicAdd(&force[i], f); + atomicAdd(&force[j], -f); +#endif + } + + DEVICE inline void apply_angle_force(const Vector3* __restrict__ pos, + const BaseGrid* __restrict__ sys, + Vector3* __restrict__ force, + int i, int j, int k, float energy_deriv) const { + +#ifdef __CUDA_ARCH__ + const Vector3 ab = -sys->wrapDiff(pos[j] - pos[i]); + const Vector3 bc = -sys->wrapDiff(pos[k] - pos[j]); + const Vector3 ac = sys->wrapDiff(pos[k] - pos[i]); + + // Find the distance between each pair of particles + float distab = ab.length2(); + float distbc = bc.length2(); + const float distac2 = ac.length2(); + + // Find the cosine of the angle we want - <ABC + float cos = (distab + distbc - distac2); + + distab = 1.0f/sqrt(distab); //TODO: test other functions + distbc = 1.0f/sqrt(distbc); + cos *= 0.5f * distbc * distab; + + // If the cosine is illegitimate, set it to 1 or -1 so that acos won't fail + if (cos < -1.0f) cos = -1.0f; + if (cos > 1.0f) cos = 1.0f; + + float sin = sqrtf(1.0f - cos*cos); + energy_deriv /= abs(sin) > 1e-3 ? sin : 1e-3; // avoid singularity + + // Calculate the forces + Vector3 force1 = -(energy_deriv*distab) * (ab * (cos*distab) + bc * distbc); // force on particle 1 + Vector3 force3 = (energy_deriv*distbc) * (bc * (cos*distbc) + ab * distab); // force on particle 3 + + atomicAdd( &force[i], force1 ); + atomicAdd( &force[j], -(force1 + force3) ); + atomicAdd( &force[k], force3 ); +#endif + } + + DEVICE inline void apply_dihedral_force(const Vector3* __restrict__ pos, + const BaseGrid* __restrict__ sys, + Vector3* __restrict__ force, + int i, int j, int k, int l, float energy_deriv) const { +#ifdef __CUDA_ARCH__ + const Vector3 ab = -sys->wrapDiff(pos[j] - pos[i]); + const Vector3 bc = -sys->wrapDiff(pos[k] - pos[j]); + const Vector3 cd = -sys->wrapDiff(pos[l] - pos[k]); + + const Vector3 crossABC = ab.cross(bc); + const Vector3 crossBCD = bc.cross(cd); + const Vector3 crossX = bc.cross(crossABC); + + const float cos_phi = crossABC.dot(crossBCD) / (crossABC.length() * crossBCD.length()); + const float sin_phi = crossX.dot(crossBCD) / (crossX.length() * crossBCD.length()); + + // return -atan2(sin_phi,cos_phi); + Vector3 f1, f2, f3; // forces + float distbc = bc.length2(); + + f1 = -distbc * crossABC.rLength2() * crossABC; + f3 = -distbc * crossBCD.rLength2() * crossBCD; + f2 = -(ab.dot(bc) * bc.rLength2()) * f1 - (bc.dot(cd) * bc.rLength2()) * f3; + + energy_deriv = (ab.length2()*bc.length2()*crossABC.rLength2() > 100.0f || bc.length2()*cd.length2()*crossBCD.rLength2() > 100.0f) ? 0.0f : energy_deriv; + if ( energy_deriv > 1000.0f ) + energy_deriv = 1000.0f; + if ( energy_deriv < -1000.0f ) + energy_deriv = -1000.0f; + + f1 *= energy_deriv; + f2 *= energy_deriv; + f3 *= energy_deriv; + + atomicAdd( &force[i], f1 ); + atomicAdd( &force[j], f2-f1 ); + atomicAdd( &force[k], f3-f2 ); + atomicAdd( &force[l], -f3 ); +#endif + } +}; + #ifndef delgpuErrchk #undef delgpuErrchk #undef gpuErrchk(code) #endif + #endif -- GitLab