diff --git a/mrdna/model/nbPot.py b/mrdna/model/nbPot.py index 697f675691cb1a00494974c373d1001b83000425..6941f479cb05100ec26c207886dc8e9741582683 100644 --- a/mrdna/model/nbPot.py +++ b/mrdna/model/nbPot.py @@ -30,21 +30,6 @@ def _maxForce(x,u0,maxForce=40): assert( np.isnan(u).sum() == 0 ) return u -d = np.loadtxt(get_resource_path("jj-nar-dna-pmf-100Mg.dat")) - -x0 = d[:,1] + _dxParam -y0 = d[:,0] # kcal/mol for 10bp with 10bp, according to JY -y0 = y0 - np.mean(y0[x0>(38.5 + _dxParam)]) # set 0 to tail of potential -y0 = savgol(y0, int(5.0/(x0[1]-x0[0])), 3) # smooth potential - -def parametric_potential(x,*parms): - x = np.array(x) - u = interp1d(x0, np.array(parms), - bounds_error = False, fill_value="extrapolate", - assume_sorted=True)(x).T - u[x>np.max(x0)] = 0 - return u - def integrate_potential(potfn,x,y,*parms): u = np.zeros(np.shape(x)) for h in y: @@ -53,124 +38,177 @@ def integrate_potential(potfn,x,y,*parms): u = u + potfn(r,*parms) # * x/r return u -xinterp = np.linspace(np.min(x0),np.max(x0),1000) # interpolate potentialb -yinterp = interp1d(x0,y0)(xinterp) - -def fit_potential(bp1,bp2): - assert(bp2 >= bp1) - - def fitFun(x,*parms): - ## 1-turn iteracting with at least 4 turns (more is not needed) - y = np.arange(-20,21)*(bp2*3.4) - return (10.0/bp1) * integrate_potential(parametric_potential,x,y,*parms) - - popt, pcov = opt.curve_fit(fitFun, xinterp, yinterp, p0=y0/10**2 ) # find fit to interpolated potential - return popt - # return lambda x: parametric_potential(x,*popt) def _round_bp(bp): return 10**(np.around(np.log10(bp)*100)/100.0) - -_pot_dict = {} -_pot_dict_full = {} -def nbPot_full(x,bps1,bps2): - larger = np.max([bps1,bps2]) - smaller = np.min([bps1,bps2]) - key = (smaller,larger) - if key not in _pot_dict: - _pot_dict_full[key] = fit_potential(*key) - u = parametric_potential(x,*_pot_dict_full[key]) - - u1 = _maxForce(x,u, 100) - # u2 = savgol(u1, int(5.0/(x0[1]-x0[0])), 3) - u2 = u - assert(np.sum(np.isnan(u2)) == 0) - return u2 - -def nbPot(x,bps1,bps2): - larger = np.max([bps1,bps2]) - smaller = np.min([bps1,bps2]) - smaller_shift, larger_shift = [_round_bp(bp) for bp in (smaller,larger)] - - key = (smaller_shift,larger_shift) - if key not in _pot_dict: - cache_file = CACHE_DIR / "nbPot-{}-{}.npz".format(*key) - try: - _pot_dict[key] = np.load(cache_file)['arr_0'].tolist() - except: - _pot_dict[key] = fit_potential(*key) - np.savez(cache_file, _pot_dict[key]) - u = parametric_potential(x,*_pot_dict[key]) - u = u * ( (smaller/smaller_shift)*(larger/larger_shift) ) +class AbstractNbDnaScheme(NonbondedScheme): - u1 = _maxForce(x,u, 100) - u2 = savgol(u1, int(5.0/(x0[1]-x0[0])), 3) - u2 = u - assert(np.sum(np.isnan(u2)) == 0) - return u2 + def __init__(self, *args,**kwargs): + NonbondedScheme.__init__(self,*args,**kwargs) -try: - _cached_params = np.load( get_resource_path("nbPot.npz") ) -except: - pass - - -def nbPot(x,bps1,bps2): - larger = np.max([bps1,bps2]) - smaller = np.min([bps1,bps2]) - smaller_shift, larger_shift = [_round_bp(bp) for bp in (smaller,larger)] - - key = (smaller_shift,larger_shift) - if key not in _pot_dict: - filename = "nbPot-{}-{}.npz".format(*key) - def load_np(filename): - return np.load(filename)['arr_0'].tolist() + x,y = self.load_pmf() + self.target_x = x + self.target_y = y + self.target_x_interp = np.linspace(np.min(x),np.max(x),1000) # interpolate potential + self.target_y_interp = interp1d(x,y)(self.target_x_interp) + + """ Initialize caches """ + self._pot_dict = dict() # cache including only what has already been acessed try: - _pot_dict[key] = _cached_params['{}:{}'.format(*key)] - except: - # print( "Unable to find cached parammeters for beads {}-{}".format(*key) ) - cache_file = CACHE_DIR / filename - try: - _pot_dict[key] = load_np(cache_file) - except: - _pot_dict[key] = fit_potential(*key) - np.savez(cache_file, _pot_dict[key]) + self._cached_params = np.load( self.get_package_cache_file() ) + except: + pass - u = parametric_potential(x,*_pot_dict[key]) - u = u * ( (smaller/smaller_shift)*(larger/larger_shift) ) + def get_package_cache_file(self): + raise NotImplementedError() - u1 = _maxForce(x,u, 100) - u2 = savgol(u1, int(5.0/(x0[1]-x0[0])), 3) - u2 = u - assert(np.sum(np.isnan(u2)) == 0) - return u2 + def load_pmf(self): + raise NotImplementedError() + def get_cache_file_for_pair(self,key): + raise NotImplementedError() -def _writeNonbondedParameterFiles(self, prefix): - x = np.arange(0, 50, 0.1) - for i,j,t1,t2 in self._particleTypePairIter(): - f = "%s.%s-%s.dat" % (prefix, t1, t2) + def potential(self, r, typeA, typeB): + """ Implements interface for NonbondedScheme """ - if t1 == "O" or t2 == "O": - y = np.zeros(np.shape(x)) + if typeA.name[0] == "O" or typeB.name[0] == "O": + u = np.zeros( r.shape ) else: - bps1,bps2 = [float( t[1:] )/10 for t in (t1,t2)] - y = nbPot(x, bps1, bps2) + u = self.nbPot(r, 0.5*typeA.nts, 0.5*typeB.nts) + return u - np.savetxt( f, np.array([x, y]).T ) - self._nbParamFiles.append(f) + """ Remainder has to do with nbPot """ + def load_np(self,filename): + return np.load(filename)['arr_0'].tolist() -class nbDnaScheme(NonbondedScheme): - def potential(self, r, typeA, typeB): - if typeA.name[0] == "O" or typeB.name[0] == "O": - u = np.zeros(np.shape(r)) - else: - u = nbPot(r, 0.5*typeA.nts, 0.5*typeB.nts) + def set_cache_params(self,key): + if key not in self._pot_dict: + try: + self._pot_dict[key] = self._cached_params['{}:{}'.format(*key)] + except: + cache_file = self.get_cache_file_for_pair(key) + try: + self._pot_dict[key] = self.load_np(cache_file) + except: + self._pot_dict[key] = self.fit_potential(*key) + np.savez(cache_file, self._pot_dict[key]) + + def parametric_potential(self,x,*parms): + x0 = self.target_x + x = np.array(x) + u = interp1d(x0, np.array(parms), + bounds_error = False, fill_value="extrapolate", + assume_sorted=True)(x).T + u[x>np.max(x0)] = 0 return u -nbDnaScheme = nbDnaScheme() -def _run(): + def fit_potential(self,bp1,bp2): + assert(bp2 >= bp1) + + def fitFun(x,*parms): + ## 1-turn iteracting with at least 4 turns (more is not needed) + y = np.arange(-20,21)*(bp2*3.4) + + return (10.0/bp1) * integrate_potential(self.parametric_potential, x,y,*parms) + + popt, pcov = opt.curve_fit(fitFun, self.target_x_interp, + self.target_y_interp, + p0=self.target_y/10**2 ) # find fit to interpolated potential + return popt + + def get_rounded_bp(self,bps1,bps2): + larger = np.max([bps1,bps2]) + smaller = np.min([bps1,bps2]) + smaller_shift, larger_shift = [_round_bp(bp) for bp in (smaller,larger)] + + key = (smaller_shift,larger_shift) + return smaller,larger,smaller_shift,larger_shift + + def nbPot(self, x, bps1, bps2): + smaller,larger,smaller_shift,larger_shift = self.get_rounded_bp(bps1,bps2) + key = (smaller_shift,larger_shift) + self.set_cache_params(key) + + u = self.parametric_potential(x,*self._pot_dict[key]) + u = u * ( (smaller/smaller_shift)*(larger/larger_shift) ) + + # u1 = _maxForce(x,u, 100) + # u2 = savgol(u1, int(5.0/(self.x0[1]-self.x0[0])), 3) + u2 = u + assert(np.sum(np.isnan(u2)) == 0) + return u2 + + """ Creating package cache """ + + def _write_cache(self): + keys = [] + for i in np.arange(0.4,5.5,0.01): + ir = _round_bp(i) + for j in np.arange(0.4,5.5,0.01): + if j < i: continue + keys.append( (ir,_round_bp(j)) ) + + keys = sorted(list(set(keys))) + print(len(keys)) + + new_cache_dict = dict() + + args = [(self, new_cache_dict, key) for key in keys] + + from multiprocessing import Pool + p = Pool(15) + print("Starting pool") + for arg in args: + _add_parameter_to_cache_dict(arg) + + p.map( _add_parameter_to_cache_dict, args ) + print("done with pool") + np.savez( self.get_package_cache_file(), **new_cache_dict) + + +def _add_parameter_to_cache_dict( args ): + obj, dict_, key = args + cache_key = '{}:{}'.format(*key) + dict_[cache_key] = obj.fit_potential(*key) + return True + + +class nbDnaScheme100Mg(AbstractNbDnaScheme): + def get_package_cache_file(self): + return get_resource_path("nbPot.jj-nar-dna-pmf-100Mg.npz") + + def load_pmf(self): + d = np.loadtxt(get_resource_path("jj-nar-dna-pmf-100Mg.dat")) + x0 = d[:,1] + _dxParam + y0 = d[:,0] # kcal/mol for 10bp with 10bp, according to JY + y0 = y0 - np.mean(y0[x0>(38.5 + _dxParam)]) # set 0 to tail of potential + y0 = savgol(y0, int(5.0/(x0[1]-x0[0])), 3) # smooth potential + return x0,y0 + + def get_cache_file_for_pair(self,key): + filename = "nbPot-100Mg-{}-{}.npz".format(*key) + return CACHE_DIR / filename + +class nbDnaScheme20Mg(AbstractNbDnaScheme): + def get_package_cache_file(self): + return get_resource_path("nbPot.jj-nar-dna-pmf-20Mg-200Na.npz") + + def load_pmf(self): + d = np.loadtxt(get_resource_path("jj-nar-dna-pmf-20Mg-200Na.dat")) + x0 = d[:,1] + _dxParam + y0 = d[:,0] # kcal/mol for 10bp with 10bp, according to JY + y0 = y0 - np.mean(y0[x0>(38.5 + _dxParam)]) # set 0 to tail of potential + y0 = savgol(y0, int(5.0/(x0[1]-x0[0])), 3) # smooth potential + return x0,y0 + + def get_cache_file_for_pair(self,key): + filename = "nbPot-20Mg-200Na-{}-{}.npz".format(*key) + return CACHE_DIR / filename + +nbDnaScheme = nbDnaScheme100Mg() + +def _plot_things(): np.savetxt("jj-filtered.dat", np.array((x0,y0)).T) x = np.arange(0, 50, 0.1) ## run some tests @@ -184,40 +222,11 @@ def _run(): y = (10.0/i)*integrate_potential(nbPot, x, np.arange(-20,20)*(3.4*j) , i, j) np.savetxt("test2-{}-{}.dat".format(i,j), np.array((x,y)).T), - -_new_cache_dict = dict() -def _add_parameter_to_cache_dict(key): - new_cache_dict['{}:{}'.format(*key)] = _get_potential_parameters(key) - -def _get_potential_parameters(key0): - bps1,bps2 = key0 - larger = np.max([bps1,bps2]) - smaller = np.min([bps1,bps2]) - smaller_shift, larger_shift = [_round_bp(bp) for bp in (smaller,larger)] - key = (smaller_shift,larger_shift) - params = fit_potential(*key) - return params - - -def _write_cache(filename): - from multiprocessing import Pool - keys = [] - for i in np.arange(0.4,5.5,0.01): - ir = _round_bp(i) - for j in np.arange(0.4,5.5,0.01): - if j < i: continue - keys.append( (ir,_round_bp(j)) ) - - keys = sorted(list(set(keys))) - print(len(keys)) - - p = Pool(15) - p.map( _add_parameter_to_cache_dict, keys ) - np.savez(filename, **_new_cache_dict) - if __name__ == "__main__": # import cProfile # cProfile.run("_run()") # _generate_cache() + tmp = nbDnaScheme20Mg() + tmp._write_cache() pass diff --git a/mrdna/resources/nbPot.npz b/mrdna/resources/nbPot.jj-nar-dna-pmf-100Mg.npz similarity index 100% rename from mrdna/resources/nbPot.npz rename to mrdna/resources/nbPot.jj-nar-dna-pmf-100Mg.npz diff --git a/mrdna/resources/nbPot.jj-nar-dna-pmf-20Mg-200Na.npz b/mrdna/resources/nbPot.jj-nar-dna-pmf-20Mg-200Na.npz new file mode 100644 index 0000000000000000000000000000000000000000..f596d0268c3ebc161b03c4b4d2c22d6c8dfd3dca Binary files /dev/null and b/mrdna/resources/nbPot.jj-nar-dna-pmf-20Mg-200Na.npz differ