nbPot.py 16.2 KB
Newer Older
1
2
3
4
5
import numpy as np
import scipy.optimize as opt
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter as savgol

6
7
from mrdna.model.nonbonded import NonbondedScheme
from mrdna import get_resource_path
8
9
from mrdna.config import CACHE_DIR

10
11
from scipy.optimize import curve_fit
from pathlib import Path
12

13
## TODO: clean this file up a bit, make things protected
14

15
16
## units "291 k K" kcal_mol
kT = 0.57827709
17

18
19
20
def _round_bp(bp):
    return 10**(np.around(np.log10(bp)*100)/100.0)

21
class AbstractNbDnaScheme(NonbondedScheme):
22

23
24
    def __init__(self, *args,**kwargs):
        NonbondedScheme.__init__(self,*args,**kwargs)
25

26
27
        self.debye_length = self.debye_length0 # set by child class

28
29
30
        x,y = self.load_pmf()
        self.target_x = x
        self.target_y = y
31
32
33
34
35
36
        base = 4
        self.param_x = np.logspace(np.log(15)/np.log(base),np.log(120)/np.log(base),80,base=base)
        # print(self.param_x)

        self.target_x_interp = np.linspace(np.min(x),np.max(x),2*len(x)) # interpolate potential
        self.target_x_interp = np.arange(23.5,38,0.1) # interpolate potential
37
        self.target_y_interp = interp1d(x,y)(self.target_x_interp)
38
39

        self.bead_distribution_y = None
40
41
        
        """ Initialize caches """
42
        try:
43
44
45
            self.single_bp_pair_pot = np.loadtxt( self.get_package_cache_file() ).T
            self.param_x = self.single_bp_pair_pot[0,:]
            assert( len(self.param_x > 2) )
46
47
        except:
            pass
48

49
50
    def get_package_cache_file(self):
        raise NotImplementedError()
51

52
53
    def load_pmf(self):
        raise NotImplementedError()
54

55
    def load_bead_distributions(self):
56
        raise NotImplementedError()
57

58
59
    def potential(self, r, typeA, typeB):
        """ Implements interface for NonbondedScheme """
60

61
62
        if typeA.name[0] == "O" or typeB.name[0] == "O":
            u = np.zeros( r.shape )
63
        else:
64
65
            u = self.nbPot(r, 0.5*typeA.nts, 0.5*typeB.nts)
        return u
66

67
68
69
    """ Remainder has to do with nbPot """
    def load_np(self,filename):
        return np.load(filename)['arr_0'].tolist()
70

71
    def parametric_potential(self,x,*parms):
72
        x0 = self.param_x
73
74
75
76
        x = np.array(x)
        u = interp1d(x0, np.array(parms),
                     bounds_error = False, fill_value="extrapolate",
                     assume_sorted=True)(x).T
77
        u[x<13] = u[x<13] + 0.5 * (x[x<13]-13)**2
78
79
80
81
82

        ## Debye correction
        if self.debye_length != self.debye_length0:
            ## units "e**2/(4 * pi * 80 epsilon0 AA)" kcal_mol
            A = 4.1507964
83
84
85
            with np.errstate(divide='ignore',invalid='ignore'):
                du = (A/x) * (np.exp(-x/self.debye_length) - np.exp(-x/self.debye_length0))
            
86
87
88
            du[x < 10] = du[x>=10][0]
            u = u + du

89
        return u-u[-1]
90

91
92
93
94
    def get_bead_distributions(self, interhelical_distances):
        if self.bead_distribution_y is None:
            self.load_bead_distributions()
        return self.bead_distribution_x, self.bead_distribution_y(interhelical_distances)
95

96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
    def iterative_fit(self):
        try:
            import bead_dist_data as bdd
            x = bdd.bead_centers.x
        except:
            ## First iteration
            x = np.arange(0,200,0.2)
            f = self.get_target_force(x+2)
            f[f>0.5] = 0.5
            u = np.cumsum(f[::-1])[::-1]
            u = 0.5* u / (10*20)
            np.savetxt("pair-pot.dat", np.array((x,u)).T)
            return interp1d(x,u,
                            bounds_error = False, fill_value="extrapolate",
                            assume_sorted=True)(self.param_x).T

        x = x[x<50]
        data = np.loadtxt("last/pair-pot.dat")
        u_last = interp1d(data[:,0], data[:,1],
                          bounds_error = False, fill_value="extrapolate",
                          assume_sorted=True)(x).T

        particle_force = np.diff(u_last) / np.diff(x)

        for b in bdd.bead_dists:
            ids = bdd.bead_centers < 10
            b[:,ids] = 0

        R = bdd.interhelical_spacing
        R,y = self.load_rau_pressure()

        P = self.get_pressure(R)
        P_t = self.get_target_pressure(R)
        dP = P_t-P

        np.savetxt("raw_pressure.dat", self.get_raw_pressure())
        np.savetxt("pressure.dat", np.array((R,P)).T)
        np.savetxt("target_pressure.dat", np.array((R,P_t)).T)

        def fitFun(R,*uvals):
            df_interp = dfFun(*uvals)
            dP_local = self.get_virial_pressure(R, df_interp)
            print( np.std(dP_local-dP) )
            return dP_local

        def duFun(x,*popt):
            f = dfFun(*popt)(x)
            u = -np.cumsum(f) * np.mean( np.diff(x) )
            u = u - u[-1]
            return u

        def dfFun(x0,f0,x1_par,x2):
            x1 = (1-x1_par)*x0 + x1_par*x2
            t = np.linspace(0,1,100)
            x = x0*(1-t)**2 + 2*x1*(1-t)*t + x2*t**2
            f = f0*(1-t)**2
            return interp1d(x, f,
                            bounds_error = False,
                            fill_value=0,
                            assume_sorted=True)

        dP_sofar = np.zeros(dP.shape)
        popt_list = []
        for i in range(3):
            popt, pcov = opt.curve_fit(fitFun, R, dP-dP_sofar,
                                       bounds=((10,-0.1,0.1,35),
                                               (34,0.1,0.9,50)),
                                       p0=(20,0,0.25,50))
            popt_list.append(popt)
            dP_sofar += fitFun(R,*popt)

        du = np.zeros(x.shape)
        for popt in popt_list:
            du = du + duFun(x,*popt)

        du = savgol(du,int(2.5/(x[1]-x[0]))*2+1,3)
        u = u_last+0.75*du
        u = u-u[-1]

        np.savetxt("pair-pot.dat", np.array((x,u)).T)

        return interp1d(x,u,
                        bounds_error = False, fill_value="extrapolate",
                          assume_sorted=True)(self.param_x).T


    def get_virial_pressure(self, R, bead_force):
        """ Return pressure as fn of interhelical spacing """
        import bead_dist_data as bdd

        interhelical_spacing = bdd.interhelical_spacing
        volume = (np.pi*np.array(bdd.radii)**2) * bdd.height

        # units "kcal_mol/AA**3" atm
        # * 68568.409

        xy = bdd.bead_centers[np.newaxis,:]
        r = bdd.bead_centers[:,np.newaxis]
        force = bead_force(r)

        """ Pxy = 1/(4V) Sum_ij (xy_ij)**2 * bead_force(r_ij) / r_ij """
        ## 2*count because count only includes i < j
        pressure = [68568.409*np.sum( 2*count * (xy**2) * force / r ) / (4 * vol)
                    for vol,count in zip(volume,bdd.bead_dists)]
        return pchip_interpolate(interhelical_spacing, pressure, R)
        """
        return interp1d(interhelical_spacing, pressure,
                        bounds_error = False,
                        kind='cubic',
                        fill_value="extrapolate",
                        assume_sorted=True)(R)

        ## Fit double exponential
209

210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
        try:
            def fitfn(x,A,a,b):
                return A*a**(-b**x)
            def fit_wrap(x,*popt):
                print("pressure:",pressure)
                print("  ",fitfn(interhelical_spacing, *popt) - pressure)
                return fitfn(x,*popt)
            popt, pcov = opt.curve_fit(fit_wrap, interhelical_spacing, pressure,
                                       bounds=((-np.inf,0,0),
                                               (np.inf,np.inf,np.inf)),
                                       p0=(0,1,1))
            return fitfn(R,*popt)
        except:
            return interp1d(interhelical_spacing, pressure,
                            bounds_error = False, 
                            fill_value="extrapolate",
                            assume_sorted=True)(R)
        """


    def get_interhelical_density(self,R):
        data = np.loadtxt("last/density.dat")
        r,rho = data.T 
        rho = rho / r**2
        rho = rho/np.trapz(rho,r)
        filter_points = int(2.5/(r[1]-r[0]))*2 + 1
        rho = savgol(rho, filter_points, 3) # smooth density
        return interp1d(r,rho,
                          bounds_error = False, fill_value=0,
                          assume_sorted=True)(R).T

    def get_raw_pressure(self):
        from bead_dist_data import interhelical_spacing, pressure
        distance = interhelical_spacing
        assert( len(distance) > 2 )
        return distance,pressure

    def get_pressure(self,R):
        distance,pressure = self.get_raw_pressure()
        # distance,pressure = np.loadtxt("last/pressure.dat")
        return pchip_interpolate(distance, pressure, R)
        return interp1d(distance, pressure,
                        bounds_error = False,
                        kind='cubic',
                        fill_value="extrapolate",
                        assume_sorted=True)(R)
        def fitfn(x,a,b):
            return a**(-b**x)

        popt, pcov = opt.curve_fit(fitfn, distance, pressure,
                                   bounds=(0,np.inf),
                                   p0=(1,1))
        return fitfn(R,*popt)


    def get_interhelical_force(self,R):
        y = pressure = self.get_pressure(R)
        x = R
        force = (x*y / np.sqrt(3)) * 34 * 1.4583976e-05 # convert atm AA**2 to kcal/mol AA
        return force

    def get_target_interhelical_density(self,R):
        r,u = self.load_pmf()
        rho = np.exp( -u/kT )  
        rho = rho / np.trapz(rho,r)
        return interp1d(r,rho,
                          bounds_error = False, fill_value=0,
                          assume_sorted=True)(R).T

    def get_target_force(self,R):
        x,y = self.load_rau_force()
        # return pchip_interpolate(x, y, R)
        # return interp1d(x,y,
        #                 bounds_error = False,
        #                 # kind='cubic',
        #                 fill_value="extrapolate",
        #                 assume_sorted=True)(R)

        def fitfn(x,A,a):
            return A * np.exp(-(x-18)/a)
        popt,pcov = curve_fit(fitfn, x,y, p0=(10,20))
        return fitfn(R,*popt)

    def get_target_pressure(self,R):
        x,y = self.load_rau_pressure()
        return pchip_interpolate(x, y, R)
        return interp1d(x,y,
                        bounds_error = False,
                        kind='cubic',
                        fill_value="extrapolate",
                        assume_sorted=True)(R)
        def fitfn(x,A,a):
            return A * np.exp(-(x-18)/a)
        popt,pcov = curve_fit(fitfn, x,y, p0=(10,20))
        return fitfn(R,*popt)

    def load_rau_force(self):
        x,y = self.load_rau_pressure()
        y = (x*y / np.sqrt(3)) * 34 * 1.4583976e-05 # convert atm AA**2 to kcal/mol AA
        return x,y
310
311
312
313
314
315
316
317
318
319

    def get_rounded_bp(self,bps1,bps2):
        larger  = np.max([bps1,bps2])
        smaller = np.min([bps1,bps2])
        smaller_shift, larger_shift = [_round_bp(bp) for bp in (smaller,larger)]

        key = (smaller_shift,larger_shift)
        return smaller,larger,smaller_shift,larger_shift

    def nbPot(self, x, bps1, bps2):
320
321
322
323
324
        x0,u0 = self.single_bp_pair_pot
        u = self.parametric_potential(x,*u0)
        u = u * bps1 * bps2
        assert(np.sum(np.isnan(u)) == 0)
        return u
325
326
327
328
329
330
331
332
333
334
335


def _add_parameter_to_cache_dict( args ):
    obj, dict_, key = args
    cache_key = '{}:{}'.format(*key)
    dict_[cache_key] = obj.fit_potential(*key)
    return True


class nbDnaScheme100Mg(AbstractNbDnaScheme):

336
337
338
339
340
    def __init__(self,*args,**kwargs):
        ## units "sqrt( 80 epsilon0 295 k K / ((4*100 mM + 200 mM) e**2/particle) )" AA
        self.debye_length0 = 5.5771297
        AbstractNbDnaScheme.__init__(self,*args,**kwargs)

341
342
343
344
345
346
347
348
349
350
    def get_package_cache_file(self):
        return get_resource_path("nbPot.rau_refined_100Mg.dat")

    def load_rau_pressure(self):
        data = np.loadtxt("rau-data-100Mg.dat")
        x = data[:,0]
        y0 = data[:,1]
        y = (10**y0) * 9.8692327e-07 # convert dyne/cm^2 to atm
        return x,y
    
351
352
    def load_pmf(self):
        d = np.loadtxt(get_resource_path("jj-nar-dna-pmf-100Mg.dat"))
353
        x0 = d[:,1]
354
        y0 = d[:,0]                     # kcal/mol for 10bp with 10bp, according to JY
355
356
357
        y0 = y0 - np.mean(y0[x0 > 38.5])    # set 0 to tail of potential
        filter_points = int(2.5/(x0[1]-x0[0]))*2 + 1
        y0 = savgol(y0, filter_points, 3) # smooth potential
358
359
        return x0,y0

360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
    def load_bead_distributions(self):
        from bead_dist_data import helix_bins, bead_bins
        from bead_dist_data import helix_centers, bead_centers
        import bead_dist_data as bdd

        self.bead_distribution_x = x = bdd.bead_centers

        all_x = []
        all_y = []
        for i in range(0,len(bdd.helix_centers),1):
            y = bdd.bead_dists_for_helix_dist[i]

            # if y.sum() == 0: continue
            if bdd.helix_centers[i] < 20: continue 
            all_x.append( bdd.helix_centers[i] )
            filter_points = int(5/(x[1]-x[0]))*2 + 1
            y = savgol( y, filter_points, 3) # smooth and normalize data
            y[y<0] = 0                                       # ensure positive
            y = 10*20 * y/np.trapz(y,x) # normalize
            all_y.append( y )

        self.bead_distribution_y = interp1d( np.array(all_x), np.array(all_y),
                                             axis=0,
                                             kind='linear',
                                             bounds_error = False, fill_value="extrapolate",
                                             assume_sorted=True)

387
388
389

class nbDnaScheme20Mg(AbstractNbDnaScheme):

390
391
392
393
394
395
    def __init__(self,*args,**kwargs):
        ## units "sqrt( 80 epsilon0 295 k K / ((4*25 mM + 50 mM) e**2/particle) )" nm
        ## 25 mM MgCl2, sorry about name of this class/files
        self.debye_length0 = 11.154259
        AbstractNbDnaScheme.__init__(self,*args,**kwargs)

396
397
398
399
400
401
402
403
404
405
    def get_package_cache_file(self):
        return get_resource_path("nbPot.rau_refined_20Mg_200Na.dat")

    def load_rau_pressure(self):
        data = np.loadtxt("rau-data-20Mg.dat")
        x = data[:,0]
        y0 = data[:,1]
        y = (10**y0) * 9.8692327e-07 # convert dyne/cm^2 to atm
        return x,y
    
406
407
    def load_pmf(self):
        d = np.loadtxt(get_resource_path("jj-nar-dna-pmf-20Mg-200Na.dat"))
408
        x0 = d[:,1]
409
        y0 = d[:,0]                     # kcal/mol for 10bp with 10bp, according to JY
410
411
412
        y0 = y0 - np.mean(y0[x0 > 38.5])    # set 0 to tail of potential
        filter_points = int(2.5/(x0[1]-x0[0]))*2 + 1
        y0 = savgol(y0, filter_points, 3) # smooth potential
413
414
        return x0,y0

415
416
    def load_bead_distributions(self):
        import bead_dist_data as bdd
417

418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
        self.bead_distribution_centers = bdd.bead_centers
        self.bead_distribution_radii = bdd.radii
        self.bead_distribution_counts = bdd.bead_dists
        ...
        

        for i in range(0,len(bdd.helix_centers),1):
            y = bdd.bead_dists_for_helix_dist[i]

            # if y.sum() == 0: continue
            if bdd.helix_centers[i] < 20: continue 
            all_x.append( bdd.helix_centers[i] )
            filter_points = int(5/(x[1]-x[0]))*2 + 1
            y = savgol( y, filter_points, 3) # smooth and normalize data
            y[y<0] = 0                                       # ensure positive
            y = 10*20 * y/np.trapz(y,x) # normalize
            all_y.append( y )
435

436
437
438
439
        self.bead_distribution_y = interp1d( np.array(all_x), np.array(all_y),
                                             axis=0,
                                             bounds_error = False, fill_value="extrapolate",
                                             assume_sorted=True)
440

441
442
443

# nbDnaScheme100MgObj = nbDnaScheme100Mg()
nbDnaScheme = nbDnaScheme20MgObj = nbDnaScheme20Mg()
444

445
    
446
if __name__ == "__main__":
447
448
449
    # import cProfile
    # cProfile.run("_run()")
    # _generate_cache()
450
451
452
453
    # tmp = nbDnaScheme20Mg()
    # tmp._write_cache()
    # pass

454
    tmp = nbDnaScheme20Mg()
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475

    # dR = 1
    # for R in np.arange(22,35,2):
    #     x,ys = tmp.get_bead_distributions(np.array([R-0.5*dR,R+0.5*dR]))
    #     y1,y2 = ys
    #     d_rho_dR = (y2-y1)/dR
    #     # plt.plot(x,d_rho_dR)
        
    # x = np.arange(20,50,1)
    # tmp.nbPot(x,1,1)
        
        
    # plt.gca().set_xlim((10,40))
    # plt.savefig("test.pdf")

    # x = np.arange(20,50,1)
    # tmp.nbPot(x,1,1)

    nb = nbDnaScheme20MgObj
    x = np.arange(10,50,1)
    u = nb.nbPot(x,1,1)