From f80eefa3a5e20b54d469fd3f3047580eed31de55 Mon Sep 17 00:00:00 2001 From: pinyili2 <pinyili2@illinois.edu> Date: Wed, 2 Oct 2024 16:36:37 -0500 Subject: [PATCH] add arbdmodel --- mrdna/RELEASE-VERSION | 2 +- mrdna/__init__.py | 5 +- mrdna/coords.py | 2 +- mrdna/model/CanonicalNucleotideAtoms.py | 8 +- mrdna/model/nbPot_arbd.py | 475 ++++++++++++++++++++++++ 5 files changed, 487 insertions(+), 5 deletions(-) create mode 100644 mrdna/model/nbPot_arbd.py diff --git a/mrdna/RELEASE-VERSION b/mrdna/RELEASE-VERSION index 1510856..7e7193f 100644 --- a/mrdna/RELEASE-VERSION +++ b/mrdna/RELEASE-VERSION @@ -1 +1 @@ -1.0a.dev165 +1.0a.dev166 diff --git a/mrdna/__init__.py b/mrdna/__init__.py index 386e405..303fff6 100644 --- a/mrdna/__init__.py +++ b/mrdna/__init__.py @@ -32,7 +32,10 @@ def get_resource_path(relative_path): return _RESOURCE_DIR / relative_path ## Import useful messsages -from arbdmodel.coords import readArbdCoords +try: + from arbdmodel.coords import readArbdCoords +except: + from marbdmodel.coords import readArbdCoords from .segmentmodel import SegmentModel, SingleStrandedSegment, DoubleStrandedSegment from .simulate import multiresolution_simulation from .model.dna_sequence import read_sequence_file diff --git a/mrdna/coords.py b/mrdna/coords.py index 363ab96..720165d 100644 --- a/mrdna/coords.py +++ b/mrdna/coords.py @@ -1,4 +1,4 @@ -from .arbdmodel.coords import * +from .marbdmodel.coords import * from sys import stderr stderr.write("WARNING: the module 'mrdna.coords' has been deprecated. Please change your scripts to use 'mrdna.arbdmodel.coords'\n") diff --git a/mrdna/model/CanonicalNucleotideAtoms.py b/mrdna/model/CanonicalNucleotideAtoms.py index 4cade46..fa28af0 100644 --- a/mrdna/model/CanonicalNucleotideAtoms.py +++ b/mrdna/model/CanonicalNucleotideAtoms.py @@ -1,8 +1,12 @@ import json import numpy as np import copy -from arbdmodel.coords import rotationAboutAxis -from arbdmodel import Group, PointParticle, ParticleType +try: + from arbdmodel.coords import rotationAboutAxis + from arbdmodel import Group, PointParticle, ParticleType +except: + from ..marbdmodel.coords import rotationAboutAxis + from ..marbdmodel import Group, PointParticle, ParticleType from .. import get_resource_path seqComplement = dict(A='T',G='C') diff --git a/mrdna/model/nbPot_arbd.py b/mrdna/model/nbPot_arbd.py new file mode 100644 index 0000000..f564a72 --- /dev/null +++ b/mrdna/model/nbPot_arbd.py @@ -0,0 +1,475 @@ +import numpy as np +import scipy.optimize as opt +from scipy.interpolate import interp1d +from scipy.signal import savgol_filter as savgol + +from arbdmodel.interactions import AbstractPotential +from .. import get_resource_path +from ..config import CACHE_DIR + +from scipy.optimize import curve_fit +from pathlib import Path + +## TODO: clean this file up a bit, make things protected + +## units "291 k K" kcal_mol +kT = 0.57827709 + +def _round_bp(bp): + return 10**(np.around(np.log10(bp)*100)/100.0) + +class AbstractNbDnaPotential(AbstractPotential): + + def __init__(self, *args,**kwargs): + AbstractPotential.__init__(self,*args,**kwargs) + + self.debye_length = self.debye_length0 # set by child class + + x,y = self.load_pmf() + self.target_x = x + self.target_y = y + base = 4 + self.param_x = np.logspace(np.log(15)/np.log(base),np.log(120)/np.log(base),80,base=base) + # print(self.param_x) + + self.target_x_interp = np.linspace(np.min(x),np.max(x),2*len(x)) # interpolate potential + self.target_x_interp = np.arange(23.5,38,0.1) # interpolate potential + self.target_y_interp = interp1d(x,y)(self.target_x_interp) + + self.bead_distribution_y = None + + """ Initialize caches """ + try: + self.single_bp_pair_pot = np.loadtxt( self.get_package_cache_file() ).T + self.param_x = self.single_bp_pair_pot[0,:] + assert( len(self.param_x > 2) ) + except: + pass + + def get_package_cache_file(self): + raise NotImplementedError() + + def load_pmf(self): + raise NotImplementedError() + + def load_bead_distributions(self): + raise NotImplementedError() + + def potential(self, r, typeA, typeB): + """ Implements interface for AbstractPotential """ + + if typeA.name[0] == "O" or typeB.name[0] == "O": + u = np.zeros( r.shape ) + else: + u = self.nbPot(r, 0.5*typeA.nts, 0.5*typeB.nts) + return u + + """ Remainder has to do with nbPot """ + def load_np(self,filename): + return np.load(filename)['arr_0'].tolist() + + def parametric_potential(self,x,*parms): + x0 = self.param_x + x = np.array(x) + u = interp1d(x0, np.array(parms), + bounds_error = False, fill_value="extrapolate", + assume_sorted=True)(x).T + u[x<13] = u[x<13] + 0.5 * (x[x<13]-13)**2 + + ## Debye correction + if self.debye_length != self.debye_length0: + ## units "e**2/(4 * pi * 80 epsilon0 AA)" kcal_mol + A = 4.1507964 + with np.errstate(divide='ignore',invalid='ignore'): + du = (A/x) * (np.exp(-x/self.debye_length) - np.exp(-x/self.debye_length0)) + + du[x < 10] = du[x>=10][0] + u = u + du + + return u-u[-1] + + def get_bead_distributions(self, interhelical_distances): + if self.bead_distribution_y is None: + self.load_bead_distributions() + return self.bead_distribution_x, self.bead_distribution_y(interhelical_distances) + + def iterative_fit(self): + try: + import bead_dist_data as bdd + x = bdd.bead_centers.x + except: + ## First iteration + x = np.arange(0,200,0.2) + f = self.get_target_force(x+2) + f[f>0.5] = 0.5 + u = np.cumsum(f[::-1])[::-1] + u = 0.5* u / (10*20) + np.savetxt("pair-pot.dat", np.array((x,u)).T) + return interp1d(x,u, + bounds_error = False, fill_value="extrapolate", + assume_sorted=True)(self.param_x).T + + x = x[x<50] + data = np.loadtxt("last/pair-pot.dat") + u_last = interp1d(data[:,0], data[:,1], + bounds_error = False, fill_value="extrapolate", + assume_sorted=True)(x).T + + particle_force = np.diff(u_last) / np.diff(x) + + for b in bdd.bead_dists: + ids = bdd.bead_centers < 10 + b[:,ids] = 0 + + R = bdd.interhelical_spacing + R,y = self.load_rau_pressure() + + P = self.get_pressure(R) + P_t = self.get_target_pressure(R) + dP = P_t-P + + np.savetxt("raw_pressure.dat", self.get_raw_pressure()) + np.savetxt("pressure.dat", np.array((R,P)).T) + np.savetxt("target_pressure.dat", np.array((R,P_t)).T) + + def fitFun(R,*uvals): + df_interp = dfFun(*uvals) + dP_local = self.get_virial_pressure(R, df_interp) + print( np.std(dP_local-dP) ) + return dP_local + + def duFun(x,*popt): + f = dfFun(*popt)(x) + u = -np.cumsum(f) * np.mean( np.diff(x) ) + u = u - u[-1] + return u + + def dfFun(x0,f0,x1_par,x2): + x1 = (1-x1_par)*x0 + x1_par*x2 + t = np.linspace(0,1,100) + x = x0*(1-t)**2 + 2*x1*(1-t)*t + x2*t**2 + f = f0*(1-t)**2 + return interp1d(x, f, + bounds_error = False, + fill_value=0, + assume_sorted=True) + + dP_sofar = np.zeros(dP.shape) + popt_list = [] + for i in range(3): + popt, pcov = opt.curve_fit(fitFun, R, dP-dP_sofar, + bounds=((10,-0.1,0.1,35), + (34,0.1,0.9,50)), + p0=(20,0,0.25,50)) + popt_list.append(popt) + dP_sofar += fitFun(R,*popt) + + du = np.zeros(x.shape) + for popt in popt_list: + du = du + duFun(x,*popt) + + du = savgol(du,int(2.5/(x[1]-x[0]))*2+1,3) + u = u_last+0.75*du + u = u-u[-1] + + np.savetxt("pair-pot.dat", np.array((x,u)).T) + + return interp1d(x,u, + bounds_error = False, fill_value="extrapolate", + assume_sorted=True)(self.param_x).T + + + def get_virial_pressure(self, R, bead_force): + """ Return pressure as fn of interhelical spacing """ + import bead_dist_data as bdd + + interhelical_spacing = bdd.interhelical_spacing + volume = (np.pi*np.array(bdd.radii)**2) * bdd.height + + # units "kcal_mol/AA**3" atm + # * 68568.409 + + xy = bdd.bead_centers[np.newaxis,:] + r = bdd.bead_centers[:,np.newaxis] + force = bead_force(r) + + """ Pxy = 1/(4V) Sum_ij (xy_ij)**2 * bead_force(r_ij) / r_ij """ + ## 2*count because count only includes i < j + pressure = [68568.409*np.sum( 2*count * (xy**2) * force / r ) / (4 * vol) + for vol,count in zip(volume,bdd.bead_dists)] + return pchip_interpolate(interhelical_spacing, pressure, R) + """ + return interp1d(interhelical_spacing, pressure, + bounds_error = False, + kind='cubic', + fill_value="extrapolate", + assume_sorted=True)(R) + + ## Fit double exponential + + try: + def fitfn(x,A,a,b): + return A*a**(-b**x) + def fit_wrap(x,*popt): + print("pressure:",pressure) + print(" ",fitfn(interhelical_spacing, *popt) - pressure) + return fitfn(x,*popt) + popt, pcov = opt.curve_fit(fit_wrap, interhelical_spacing, pressure, + bounds=((-np.inf,0,0), + (np.inf,np.inf,np.inf)), + p0=(0,1,1)) + return fitfn(R,*popt) + except: + return interp1d(interhelical_spacing, pressure, + bounds_error = False, + fill_value="extrapolate", + assume_sorted=True)(R) + """ + + + def get_interhelical_density(self,R): + data = np.loadtxt("last/density.dat") + r,rho = data.T + rho = rho / r**2 + rho = rho/np.trapz(rho,r) + filter_points = int(2.5/(r[1]-r[0]))*2 + 1 + rho = savgol(rho, filter_points, 3) # smooth density + return interp1d(r,rho, + bounds_error = False, fill_value=0, + assume_sorted=True)(R).T + + def get_raw_pressure(self): + from bead_dist_data import interhelical_spacing, pressure + distance = interhelical_spacing + assert( len(distance) > 2 ) + return distance,pressure + + def get_pressure(self,R): + distance,pressure = self.get_raw_pressure() + # distance,pressure = np.loadtxt("last/pressure.dat") + return pchip_interpolate(distance, pressure, R) + return interp1d(distance, pressure, + bounds_error = False, + kind='cubic', + fill_value="extrapolate", + assume_sorted=True)(R) + def fitfn(x,a,b): + return a**(-b**x) + + popt, pcov = opt.curve_fit(fitfn, distance, pressure, + bounds=(0,np.inf), + p0=(1,1)) + return fitfn(R,*popt) + + + def get_interhelical_force(self,R): + y = pressure = self.get_pressure(R) + x = R + force = (x*y / np.sqrt(3)) * 34 * 1.4583976e-05 # convert atm AA**2 to kcal/mol AA + return force + + def get_target_interhelical_density(self,R): + r,u = self.load_pmf() + rho = np.exp( -u/kT ) + rho = rho / np.trapz(rho,r) + return interp1d(r,rho, + bounds_error = False, fill_value=0, + assume_sorted=True)(R).T + + def get_target_force(self,R): + x,y = self.load_rau_force() + # return pchip_interpolate(x, y, R) + # return interp1d(x,y, + # bounds_error = False, + # # kind='cubic', + # fill_value="extrapolate", + # assume_sorted=True)(R) + + def fitfn(x,A,a): + return A * np.exp(-(x-18)/a) + popt,pcov = curve_fit(fitfn, x,y, p0=(10,20)) + return fitfn(R,*popt) + + def get_target_pressure(self,R): + x,y = self.load_rau_pressure() + return pchip_interpolate(x, y, R) + return interp1d(x,y, + bounds_error = False, + kind='cubic', + fill_value="extrapolate", + assume_sorted=True)(R) + def fitfn(x,A,a): + return A * np.exp(-(x-18)/a) + popt,pcov = curve_fit(fitfn, x,y, p0=(10,20)) + return fitfn(R,*popt) + + def load_rau_force(self): + x,y = self.load_rau_pressure() + y = (x*y / np.sqrt(3)) * 34 * 1.4583976e-05 # convert atm AA**2 to kcal/mol AA + return x,y + + def get_rounded_bp(self,bps1,bps2): + larger = np.max([bps1,bps2]) + smaller = np.min([bps1,bps2]) + smaller_shift, larger_shift = [_round_bp(bp) for bp in (smaller,larger)] + + key = (smaller_shift,larger_shift) + return smaller,larger,smaller_shift,larger_shift + + def nbPot(self, x, bps1, bps2): + x0,u0 = self.single_bp_pair_pot + u = self.parametric_potential(x,*u0) + u = u * bps1 * bps2 + assert(np.sum(np.isnan(u)) == 0) + return u + + +def _add_parameter_to_cache_dict( args ): + obj, dict_, key = args + cache_key = '{}:{}'.format(*key) + dict_[cache_key] = obj.fit_potential(*key) + return True + + +class nbDnaPotential100Mg(AbstractNbDnaPotential): + + def __init__(self,*args,**kwargs): + ## units "sqrt( 80 epsilon0 295 k K / ((4*100 mM + 200 mM) e**2/particle) )" AA + self.debye_length0 = 5.5771297 + AbstractNbDnaPotential.__init__(self,*args,**kwargs) + + def get_package_cache_file(self): + return get_resource_path("nbPot.rau_refined_100Mg.dat") + + def load_rau_pressure(self): + data = np.loadtxt("rau-data-100Mg.dat") + x = data[:,0] + y0 = data[:,1] + y = (10**y0) * 9.8692327e-07 # convert dyne/cm^2 to atm + return x,y + + def load_pmf(self): + d = np.loadtxt(get_resource_path("jj-nar-dna-pmf-100Mg.dat")) + x0 = d[:,1] + y0 = d[:,0] # kcal/mol for 10bp with 10bp, according to JY + y0 = y0 - np.mean(y0[x0 > 38.5]) # set 0 to tail of potential + filter_points = int(2.5/(x0[1]-x0[0]))*2 + 1 + y0 = savgol(y0, filter_points, 3) # smooth potential + return x0,y0 + + def load_bead_distributions(self): + from bead_dist_data import helix_bins, bead_bins + from bead_dist_data import helix_centers, bead_centers + import bead_dist_data as bdd + + self.bead_distribution_x = x = bdd.bead_centers + + all_x = [] + all_y = [] + for i in range(0,len(bdd.helix_centers),1): + y = bdd.bead_dists_for_helix_dist[i] + + # if y.sum() == 0: continue + if bdd.helix_centers[i] < 20: continue + all_x.append( bdd.helix_centers[i] ) + filter_points = int(5/(x[1]-x[0]))*2 + 1 + y = savgol( y, filter_points, 3) # smooth and normalize data + y[y<0] = 0 # ensure positive + y = 10*20 * y/np.trapz(y,x) # normalize + all_y.append( y ) + + self.bead_distribution_y = interp1d( np.array(all_x), np.array(all_y), + axis=0, + kind='linear', + bounds_error = False, fill_value="extrapolate", + assume_sorted=True) + + +class nbDnaPotential20Mg(AbstractNbDnaPotential): + + def __init__(self,*args,**kwargs): + ## units "sqrt( 80 epsilon0 295 k K / ((4*25 mM + 50 mM) e**2/particle) )" nm + ## 25 mM MgCl2, sorry about name of this class/files + self.debye_length0 = 11.154259 + AbstractNbDnaPotential.__init__(self,*args,**kwargs) + + def get_package_cache_file(self): + return get_resource_path("nbPot.rau_refined_20Mg_200Na.dat") + + def load_rau_pressure(self): + data = np.loadtxt("rau-data-20Mg.dat") + x = data[:,0] + y0 = data[:,1] + y = (10**y0) * 9.8692327e-07 # convert dyne/cm^2 to atm + return x,y + + def load_pmf(self): + d = np.loadtxt(get_resource_path("jj-nar-dna-pmf-20Mg-200Na.dat")) + x0 = d[:,1] + y0 = d[:,0] # kcal/mol for 10bp with 10bp, according to JY + y0 = y0 - np.mean(y0[x0 > 38.5]) # set 0 to tail of potential + filter_points = int(2.5/(x0[1]-x0[0]))*2 + 1 + y0 = savgol(y0, filter_points, 3) # smooth potential + return x0,y0 + + def load_bead_distributions(self): + import bead_dist_data as bdd + + self.bead_distribution_centers = bdd.bead_centers + self.bead_distribution_radii = bdd.radii + self.bead_distribution_counts = bdd.bead_dists + ... + + + for i in range(0,len(bdd.helix_centers),1): + y = bdd.bead_dists_for_helix_dist[i] + + # if y.sum() == 0: continue + if bdd.helix_centers[i] < 20: continue + all_x.append( bdd.helix_centers[i] ) + filter_points = int(5/(x[1]-x[0]))*2 + 1 + y = savgol( y, filter_points, 3) # smooth and normalize data + y[y<0] = 0 # ensure positive + y = 10*20 * y/np.trapz(y,x) # normalize + all_y.append( y ) + + self.bead_distribution_y = interp1d( np.array(all_x), np.array(all_y), + axis=0, + bounds_error = False, fill_value="extrapolate", + assume_sorted=True) + + +# nbDnaPotential100MgObj = nbDnaPotential100Mg() +nbDnaPotential = nbDnaPotential20MgObj = nbDnaPotential20Mg() + + +if __name__ == "__main__": + # import cProfile + # cProfile.run("_run()") + # _generate_cache() + # tmp = nbDnaPotential20Mg() + # tmp._write_cache() + # pass + + tmp = nbDnaPotential20Mg() + + # dR = 1 + # for R in np.arange(22,35,2): + # x,ys = tmp.get_bead_distributions(np.array([R-0.5*dR,R+0.5*dR])) + # y1,y2 = ys + # d_rho_dR = (y2-y1)/dR + # # plt.plot(x,d_rho_dR) + + # x = np.arange(20,50,1) + # tmp.nbPot(x,1,1) + + + # plt.gca().set_xlim((10,40)) + # plt.savefig("test.pdf") + + # x = np.arange(20,50,1) + # tmp.nbPot(x,1,1) + + nb = nbDnaPotential20MgObj + x = np.arange(10,50,1) + u = nb.nbPot(x,1,1) -- GitLab